#############################################################################
##
#W  pipeobj.g           OpenMath Sharepackage         Andrew Solomon
##
#H  @(#)$Id: pipeobj.g,v 1.3 2000/03/24 21:39:41 andrews Exp $
##
#Y  Copyright (C)  1997,  Lehrstuhl D fuer Mathematik,  RWTH Aachen,  Germany
#Y  (C) 1998 School Math and Comp. Sci., University of St.  Andrews, Scotland
##
##  Pipe an XML encoded OpenMath object from <input stream> to 
##  <output stream>. This is used to put precisely one object
##  on the input to the lexical analyser gpipe.
##
##


#############################################################################
##
#F  ReadCharSkipSpace
##
##  Reads and returns next non-space byte from input stream
##  returning the associated character.
##
BindGlobal("ReadCharSkipSpace", 
function(input)
	local
		b,	# byte
		c;  # character

	b :=  ReadByte(input);
	while b <> fail and (CHAR_INT(b) in [' ','\n','\t','\r']) do
		 b := ReadByte(input);
	od;
	if b <> fail then	
		return CHAR_INT(b);
	fi;

	return fail;
end);
	
#############################################################################
##
#F  ReadChar
##
##  Reads and returns next byte as a character.
##
BindGlobal("ReadChar", 
function(input)
	local
		b,	# byte
		c;  # character

	b :=  ReadByte(input);
	if b <> fail then	
		return CHAR_INT(b);
	fi;

	return fail;
end);

#############################################################################
##
#F  CharIsSpace(<c>)
##
##  True iff c is a space, newline or tabstop.
##
BindGlobal("CharIsSpace", c -> c in  [' ','\n','\t']);

#############################################################################
##
#F  ReadTag(<input>)
##
##  Read a tag of the form < tag >
##  return "<tag>" - no spaces
##
BindGlobal("ReadTag", 
function(input)
	local
		s,	# the string to return	
		c,  # the character read
		i;	# indexes into s

	# find the first '<'
	s := "";
	c := ReadCharSkipSpace(input);
	if c <> '<' then
		return fail;
	fi;

	s[1] := c;
	c := ReadCharSkipSpace(input);

	# code inserted later to ensure encoding node (i.e <? ... ?>) is ignored 
	if c = '?' then
		c := ReadChar(input);
		while not (c = fail or c = '?') do
			c := ReadChar(input);
		od;
		if c = fail then
			return fail;
		fi;
		# so now we have a '?', the next character must be '>'
		c := ReadChar(input);
		if c <> '>' then
			return fail;
		fi;

		# now find the real beginning of the tag
		s := "";
		c := ReadCharSkipSpace(input);
		if c <> '<' then
			return fail;
		fi;

		s[1] := c;
		c := ReadCharSkipSpace(input);
	fi;


	i:= 1;
	while not (c = fail or CharIsSpace(c)) do
		i := i + 1;
		s[i] := c;
		if c = '>' then
			return s;
		fi;
		c := ReadChar(input);
	od;

	# now just read the final >
	c := ReadCharSkipSpace(input);
	if c <> '>' then
		return fail;
	fi;

	s[i+1] := c;
	return s;
end);
	

#############################################################################
##
#F  PipeOpenMathObject(<input>, <output>)
##
##  Return "true" if we succeed in piping an OMOBJ from
##  input to output, fail otherwise.
##
##  Based on a very complicated finite state automaton
##  which accepts "<OMOBJ>" then any amount of stuff
##  and terminates with "<\OMOBJ>" unless it is inside
##  a comment "<!-- -->".
##
BindGlobal("PipeOpenMathObject",
function(input,output)

	local
		s,	# string
		EndOMOBJstates, # list of states all of which behave almost the same way
		state,	# current state
		nextchar, # the next character we expect
		c;	# the last character read

	# first read " <OMOBJ >"
	s  := ReadTag(input);
	if s <> "<OMOBJ>" then
		return fail;
	fi;
	AppendTo(output, "<OMOBJ>\n");


	EndOMOBJstates:= ["InXMLRead</","InXMLRead</O","InXMLRead</OM",
		"InXMLRead</OMO","InXMLRead</OMOB","InXMLRead</OMOBJ"];

	# start state
	state := "InXML";

	c := ReadChar(input);
	while c <> fail do

		## Start state
		if state =  "InXML" then
			if c = '<' then
				state := "InXMLRead<";
			fi;

		## Read a `<`
		elif state = "InXMLRead<" then
			if c = '!' then
				state := "InXMLRead<!";
			elif c = '/' then
				state := "InXMLRead</";
			elif c <> '<' then # if c = '<' then we stay in this state
				state := "InXML";
			fi; 


		## Read some part of InXMLRead</OMOBJ
		## these states are all dealt with together
		elif state in  EndOMOBJstates then
			if state <> EndOMOBJstates[Length(EndOMOBJstates)] then # this isn't the last one
				nextchar := EndOMOBJstates[Length(EndOMOBJstates)][Length(state)+1];
			else
				nextchar := '>';
				# skip to next nonblank
				while c <> fail and CharIsSpace(c) do
					AppendTo(output, [c]);
					c := ReadChar(input);
				od;
				if c = fail then 
					return fail;
				elif c = nextchar then
					AppendTo(output, [c]);
					return true;
				fi;
			fi;

			if c = nextchar then
				state := Concatenation(state, [nextchar]);
			elif c = '<' then
				state := "InXMLRead<";
			else 
				state := "InXML";
			fi;


		## now on to the comments
		elif state = "InXMLRead<!" then
			if c = '-' then
				state := "InXMLRead<!-";
			elif c = '<' then
				state := "InXMLRead<";
			else
				state := "InXML";
			fi;

		elif state = "InXMLRead<!-" then
			if c = '-' then
				state := "InComment";
			elif c = '<' then
				state := "InXMLRead<";
			else
				state := "InXML";
			fi;


		elif state = "InComment" then
			if c = '-' then
				state := "InComment-";
			fi;

		elif state = "InComment-" then
			if c = '-' then
				state := "InComment--";
			else 
				state := "InComment";
			fi;

		elif state = "InComment--" then
			if c = '>' then
				state := "InXML";
			else 
				state := "InComment";
			fi;

		else
			Error("Invalid state:",state);
		fi;


		# finally send the character off to the output and get the next one
	
		AppendTo(output, [c]);
		c := ReadChar(input);
	od;
	return fail;
end);

#############################################################################
#E
