
/******************************************************************************
* MODULE     : fromtm.gen.cc
* DESCRIPTION: conversion from the TeXmacs file format to TeXmacs trees
*              older versions are automatically converted into the present one
* COPYRIGHT  : (C) 1999  Joris van der Hoeven
*******************************************************************************
* This software falls under the GNU general public license and comes WITHOUT
* ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
* If you don't have this file, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
******************************************************************************/

#module code_fromtm

/******************************************************************************
* Conversion of TeXmacs strings of the present format to TeXmacs trees
******************************************************************************/

struct tm_reader {
  path    version;            // document was composed using this version
  hashmap<string,int> codes;  // codes for to present version
  tree_label EXPAND_APPLY;    // APPLY (version < 0.3.3.22) or EXPAND (otherw)
  string  buf;                // the string being read from
  int     pos;                // the current position of the reader
  string  last;               // last read string

  tm_reader (string buf2):
    version (as_path ("1.0")),
    codes (CONSTRUCTOR_CODE), EXPAND_APPLY (EXPAND),
    buf (buf2), pos (0), last ("") {}
  tm_reader (string buf2, path version2):
    version (version2),
    codes (get_codes (version)),
    EXPAND_APPLY (path_inf (version, as_path ("0.3.3.22"))? APPLY: EXPAND),
    buf (buf2), pos (0), last ("") {}

  int    skip_blank ();
  string decode (string s);
  string read_char ();
  string read_next ();
  string read_function_name ();
  tree   read_apply (string s, bool skip_flag);
  tree   read (bool skip_flag);
};

int
tm_reader::skip_blank () {
  int n=0;
  for (; pos < N(buf); pos++) {
    if (buf[pos]==' ') continue;
    if (buf[pos]=='\t') continue;
    if (buf[pos]=='\n') { n++; continue; }
    break;
  }
  return n;
}

string
tm_reader::decode (string s) {
  int i, n=N(s);
  string r;
  for (i=0; i<n; i++)
    if (((i+1)<n) && (s[i]=='\\')) {
      i++;
      if (s[i] == ';');
      else if (s[i] == '0') r << '\0';
      else if (s[i] == 't') r << '\t';
      else if (s[i] == 'n') r << '\n';
      else r << s[i];
    }
    else r << s[i];
  return r;
}

string
tm_reader::read_char () {
  while (((pos+1) < N(buf)) && (buf[pos] == '\\') && (buf[pos+1] == '\n')) {
    pos += 2;
    skip_spaces (buf, pos);
  }
  if (pos >= N(buf)) return "";
  pos++;
  return buf (pos-1, pos);
}

string
tm_reader::read_next () {
  int old_pos= pos;
  string c= read_char ();
  if (c == "") return c;
  switch (c[0]) {
  case '\t':
  case '\n':
  case ' ': 
    pos--;
    if (skip_blank () <= 1) return " ";
    else return "\n";
  case '<':
    {
      old_pos= pos;
      c= read_char ();
      if (c == "") return "";
      if ((c == "\\") || (c == "|") || (c == "/")) return "<" * c;
      if (is_iso_alpha (c[0]) || (c == ">")) {
	pos= old_pos;
	return "<";
      }
      old_pos= pos;
      string d= read_char ();
      if ((d == "\\") || (d == "|") || (d == "/")) return "<" * c * d;
      pos= old_pos;
      return "<" * c;
    }
  case '|':
  case '>':
    return c;
  }

  string r;
  pos= old_pos;
  while (TRUE) {
    old_pos= pos;
    c= read_char ();
    if (c == "") return r;
    else if (c == "\\") r << c << read_char ();
    else if (c == "\t") break;
    else if (c == "\n") break;
    else if (c == " ") break;
    else if (c == "<") break;
    else if (c == "|") break;
    else if (c == ">") break;
    else r << c;
  }
  pos= old_pos;
  return r;
}

string
tm_reader::read_function_name () {
  string name= decode (read_next ());
  // cout << "==> " << name << "\n";
  while (TRUE) {
    last= read_next ();
    // cout << "~~> " << last << "\n";
    if ((last == "") || (last == "|") || (last == ">")) break;
  }
  return name;
}

static void
get_collection (tree& u, tree t) {
  if (is_func (t, ASSOCIATE, 2)) u << t;
  else if (is_func (t, COLLECTION) ||
	   is_func (t, DOCUMENT) ||
	   is_func (t, CONCAT)) {
    int i;
    for (i=0; i<N(t); i++)
      get_collection (u, t[i]);
  }
}

tree
tm_reader::read_apply (string name, bool skip_flag) {
  // cout << "Read apply " << name << INDENT << LF;
  tree t (EXPAND_APPLY, name);
  if (codes->contains (name))
    t= tree ((tree_label) codes [name]);

  bool closed= !skip_flag;
  while (pos < N(buf)) {
    // cout << "last= " << last << LF;
    bool sub_flag= (skip_flag) && ((last == "") || (last[N(last)-1] != '|'));
    if (sub_flag) (void) skip_blank ();
    t << read (sub_flag);
    if ((last == "/>") || (last == "/|")) closed= TRUE;
    if (closed && ((last == ">") || (last == "/>"))) break;
  }
  // cout << "last= " << last << UNINDENT << LF;
  // cout << "Done" << LF;

  if (is_func (t, COLLECTION)) {
    tree u (COLLECTION);
    get_collection (u, t);
    return u;
  }
  return t;
}

static void
flush (tree& D, tree& C, string& S, bool& spc_flag, bool& ret_flag) {
  if (spc_flag) S << " ";
  if (S != "") {
    if ((N(C) == 0) || (!is_atomic (C[N(C)-1]))) C << S;
    else C[N(C)-1]->label << S;
    S= "";
    spc_flag= FALSE;
  }

  if (ret_flag) {
    if (N(C) == 0) D << "";
    else if (N(C) == 1) D << C[0];
    else D << C;
    C= tree (CONCAT);
    ret_flag= FALSE;
  }
}

tree
tm_reader::read (bool skip_flag) {
  tree   D (DOCUMENT);
  tree   C (CONCAT);
  string S ("");
  bool   spc_flag= FALSE;
  bool   ret_flag= FALSE;

  while (TRUE) {
    last= read_next ();
    // cout << "--> " << last << "\n";
    if (last == "") break;
    if (last == "|") break;
    if (last == ">") break;
    
    if (last[0] == '<') {
      if (last[N(last)-1] == '\\') {
	flush (D, C, S, spc_flag, ret_flag);
	string name= read_function_name ();
	if (last == ">") last= "\\>";
	else last= "\\|";
	C << read_apply (name, TRUE);
      }
      else if (last[N(last)-1] == '|') {
	(void) read_function_name ();
	if (last == ">") last= "|>";
	else last= "||";
	break;
      }
      else if (last[N(last)-1] == '/') {
	(void) read_function_name ();
	if (last == ">") last= "/>";
	else last= "/|";
	break;
      }
      else {
	flush (D, C, S, spc_flag, ret_flag);
	string name= decode (read_next ());
	string sep = ">";
	if (name == ">") name= "";
	else sep = read_next ();
	// cout << "==> " << name << "\n";
	// cout << "~~> " << sep << "\n";
	if (sep == '|') {
	  last= "|";
	  C << read_apply (name, FALSE);
	}
	else {
	  tree t (EXPAND_APPLY, name);
	  if (codes->contains (name))
	    t= tree ((tree_label) codes [name]);
	  C << t;
	}
      }
    }
    else if (last == " ") spc_flag= TRUE;
    else if (last == "\n") ret_flag= TRUE;
    else {
      flush (D, C, S, spc_flag, ret_flag);
      // cout << "<<< " << last << "\n";
      // cout << ">>> " << decode (last) << "\n";
      S << decode (last);
      if ((S == "") && (N(C) == 0)) C << "";
    }
  }

  if (skip_flag) spc_flag= ret_flag= FALSE;
  flush (D, C, S, spc_flag, ret_flag);
  if (N(C) == 1) D << C[0];
  else if (N(C)>1) D << C;
  // cout << "*** " << D << "\n";
  if (N(D)==0) return "";
  if (N(D)==1) {
    if (!skip_flag) return D[0];
    if (path_inf_eq (version, as_path ("0.3.4.10"))) return D[0];
    if (is_func (D[0], COLLECTION)) return D[0];
  }
  return D;
}

tree
texmacs_to_tree (string s) {
  tm_reader tmr (s);
  return tmr.read (TRUE);
}

tree
texmacs_to_tree (string s, path version) {
  tm_reader tmr (s, version);
  return tmr.read (TRUE);
}

/******************************************************************************
* Conversion of TeXmacs strings to TeXmacs trees
******************************************************************************/

tree
texmacs_document_to_tree (string s) {
  if (starts (s, "edit") ||
      starts (s, "TeXmacs") ||
      starts (s, "\\(\\)(TeXmacs"))
  {
    path version= as_path ("0.0.0.0");
    tree t= string_to_tree (s, version);
    if (is_tuple (t) && (N(t)>0)) t= t (1, N(t));
    int n= arity (t);

    tree doc (DOCUMENT);
    if (n<3) return "error";
    else if (n<4)
      doc << tree (EXPAND, "body", t[2])
	  << tree (EXPAND, "style", t[0])
	  << tree (EXPAND, "initial", t[1]);
    else if (n<7)
      doc << tree (EXPAND, "body", t[0])
	  << tree (EXPAND, "style", t[1])
	  << tree (EXPAND, "initial", t[2])
	  << tree (EXPAND, "references", t[3]);
    else
      doc << tree (EXPAND, "body", t[0])
	  << tree (EXPAND, "project", t[1])
	  << tree (EXPAND, "style", t[2])
	  << tree (EXPAND, "initial", t[3])
	  << tree (EXPAND, "final", t[4])
	  << tree (EXPAND, "references", t[5])
	  << tree (EXPAND, "auxiliary", t[6]);
    return upgrade (doc, version);
  }

  if (starts (s, "<TeXmacs|")) {
    int i;
    for (i=9; i<N(s); i++)
      if (s[i] == '>') break;
    path version= as_path (s (9, i));
    tree doc= texmacs_to_tree (s, version);
    if ((is_func (doc, EXPAND, 2) || is_func (doc, EXPAND, 2)) &&
	(doc[0] == "TeXmacs"))
      doc= tree (DOCUMENT, doc);
    if (!is_document (doc)) return "error";
    return upgrade (doc, version);
  }
  return "error";
}

/******************************************************************************
* Extracting attributes from a TeXmacs document tree
******************************************************************************/

tree
extract (tree doc, string attr) {
  int i, n= arity (doc);
  for (i=0; i<n; i++)
    if ((is_func (doc[i], EXPAND, 2) || is_func (doc[i], APPLY, 2)) &&
	(doc[i][0] == attr))
      {
	tree r= doc[i][1];
	if ((attr == "body") && (!is_document (r))) return tree (DOCUMENT, r);
	if (attr == "style") {
	  if (r == "none") return tree (TUPLE);
	  if (r == "") return tree (TUPLE);
	  if (r == "style") return tree (TUPLE);
	  if (is_atomic (r)) return tree (TUPLE, r);
	  if (!is_func (r, TUPLE)) return tree (TUPLE);
	}
	return r;
      }

  if (attr == "body") return tree (DOCUMENT, "");
  if (attr == "project") return "";
  if (attr == "style") return tree (TUPLE);
  if (attr == "initial") return tree (COLLECTION);
  if (attr == "final") return tree (COLLECTION);
  if (attr == "references") return tree (COLLECTION);
  if (attr == "auxiliary") return tree (COLLECTION);
  return "";
}

#endmodule // code_fromtm
