
/******************************************************************************
* MODULE     : analyze.gen.cc
* DESCRIPTION: Properties of characters and strings
* COPYRIGHT  : (C) 1999  Joris van der Hoeven
*******************************************************************************
* This software falls under the GNU general public license and comes WITHOUT
* ANY WARRANTY WHATSOEVER. See the file $TEXMACS_PATH/LICENSE for more details.
* If you don't have this file, write to the Free Software Foundation, Inc.,
* 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
******************************************************************************/

#include <string.gen.h>

#module code_analyze
#import string

/******************************************************************************
* Tests for caracters
******************************************************************************/

bool
is_alpha (register char c) {
  return ((c>='a') && (c<='z')) || ((c>='A') && (c<='Z'));
}

bool
is_iso_alpha (register char c) {
  int i= ((int) ((unsigned char) c));
  return
    ((c>='a') && (c<='z')) ||
    ((c>='A') && (c<='Z')) ||
    ((i >= 128) && (i != 159) && (i != 189) && (i != 190) && (i != 191));
}

bool
is_locase (register char c) {
  int code= (int) ((unsigned char) c);
  return
    ((c>='a') && (c<='z')) ||
    ((code >= 160) && (code < 189)) ||
    (code >= 224);
}

bool
is_upcase (register char c) {
  int code= (int) ((unsigned char) c);
  return
    ((c>='A') && (c<='Z')) ||
    ((code >= 128) && (code < 159)) ||
    ((code >= 192) && (code < 224));
}

bool
is_digit (register char c) {
  return (c>='0') && (c<='9');
}

bool
is_numeric (register char c) {
  return ((c>='0') && (c<='9')) || (c=='.');
}

bool
is_ponctuation (register char c) {
  return
    (c=='.') || (c==',') || (c==':') || (c=='\'') || (c=='`') ||
    (c==';') || (c=='!') || (c=='?');
}

/******************************************************************************
* Tests for strings
******************************************************************************/

bool
is_alpha (string s) {
  int i;
  if (N(s)==0) return FALSE;
  for (i=0; i<N(s); i++)
    if (!is_alpha (s[i])) return FALSE;
  return TRUE;
}

bool
is_iso_alpha (string s) {
  int i;
  if (N(s)==0) return FALSE;
  for (i=0; i<N(s); i++)
    if (!is_iso_alpha (s[i])) return FALSE;
  return TRUE;
}

bool
is_numeric (string s) {
  int i;
  if (N(s)==0) return FALSE;
  for (i=0; i<N(s); i++)
    if (!is_numeric (s[i])) return FALSE;
  return TRUE;
}

/******************************************************************************
* Changing cases
******************************************************************************/

string
upcase_first (string s) {
  if ((N(s)==0) || (!is_locase (s[0]))) return s;
  return string ((char) (((int) ((unsigned char) s[0]))-32)) * s (1, N(s));
}

string
locase_first (string s) {
  if ((N(s)==0) || (!is_upcase (s[0]))) return s;
  return string ((char) (((int) ((unsigned char) s[0]))+32)) * s (1, N(s));
}

string
upcase_all (string s) {
  int i;
  string r (N(s));
  for (i=0; i<N(s); i++)
    if (!is_locase (s[i])) r[i]= s[i];
    else r[i]= (char) (((int) ((unsigned char) s[i]))-32);
  return r;
}

string
locase_all (string s) {
  int i;
  string r (N(s));
  for (i=0; i<N(s); i++)
    if (!is_upcase (s[i])) r[i]= s[i];
    else r[i]= (char) (((int) ((unsigned char) s[i]))+32);
  return r;
}

/******************************************************************************
* Spanish in relation with ispell
******************************************************************************/

string
ispanish_to_spanish (string s) {
  int i, n= N(s);
  string r;
  for (i=0; i<n; i++)
    if ((s[i] == '\'') && ((i+1)<n)) {
      switch (s[i+1]) {
      case 'A': r << ''; break;
      case 'E': r << ''; break;
      case 'I': r << ''; break;
      case 'N': r << ''; break;
      case 'O': r << ''; break;
      case 'U': r << ''; break;
      case 'Y': r << ''; break;
      case 'a': r << ''; break;
      case 'e': r << ''; break;
      case 'i': r << ''; break;
      case 'n': r << ''; break;
      case 'o': r << ''; break;
      case 'u': r << ''; break;
      case 'y': r << ''; break;
      default : r << '\'' << s[i+1];
      }
      i++;
    }
    else r << s[i];
  return r;
}

string
spanish_to_ispanish (string s) {
  int i, n= N(s);
  string r;
  for (i=0; i<n; i++)
    switch (s[i]) {
    case '': r << "'A"; break;
    case '': r << "'E"; break;
    case '': r << "'I"; break;
    case '': r << "'N"; break;
    case '': r << "'O"; break;
    case '': r << "'U"; break;
    case '': r << "'Y"; break;
    case '': r << "'a"; break;
    case '': r << "'e"; break;
    case '': r << "'i"; break;
    case '': r << "'n"; break;
    case '': r << "'o"; break;
    case '': r << "'u"; break;
    case '': r << "'y"; break;
    default : r << s[i];
    }
  return r;
}

/******************************************************************************
* Iso latin 2 encoding for polish and czech
******************************************************************************/

static string il2_to_cork_string=
  "   ĈǃɆ˅΄ЋԎ.ږݕ䨢/ ";
static string cork_to_il2_string=
  "áGť ئYIIg嵳 yi!?LAAAAEEIINOOOOUU Saaaaeeiinoooouu ";

static char
il2_to_cork (char c) {
  int i= (int) ((unsigned char) c);
  if (i<128) return c;
  return il2_to_cork_string [i-128];
}

static char
cork_to_il2 (char c) {
  int i= (int) ((unsigned char) c);
  if (i<128) return c;
  return cork_to_il2_string [i-128];
}

string
il2_to_cork (string s) {
  int i, n= N(s);
  string r (n);
  for (i=0; i<n; i++)
    r[i]= il2_to_cork (s[i]);
  return r;
}

string
cork_to_il2 (string s) {
  int i, n= N(s);
  string r (n);
  for (i=0; i<n; i++)
    r[i]= cork_to_il2 (s[i]);
  return r;
}

/******************************************************************************
* Koi8 encoding for russian
******************************************************************************/

static string koi8_to_iso_string=
  "";
static string iso_to_koi8_string=
  "";

static char
koi8_to_iso (char c) {
  int i= (int) ((unsigned char) c);
  if (i==156) return '';
  if (i==188) return '';
  if (i<192) return c;
  return koi8_to_iso_string [i-192];
}

static char
iso_to_koi8 (char c) {
  int i= (int) ((unsigned char) c);
  if (c=='') return (char) 156;
  if (c=='') return (char) 188;
  if (i<192) return c;
  return iso_to_koi8_string [i-192];
}

string
koi8_to_iso (string s) {
  int i, n= N(s);
  string r (n);
  for (i=0; i<n; i++)
    r[i]= koi8_to_iso (s[i]);
  return r;
}

string
iso_to_koi8 (string s) {
  int i, n= N(s);
  string r (n);
  for (i=0; i<n; i++)
    r[i]= iso_to_koi8 (s[i]);
  return r;
}

/******************************************************************************
* Roman and alpha numbers
******************************************************************************/

static string ones[10]= {
  "", "i", "ii", "iii", "iv", "v", "vi", "vii", "viii", "ix" };
static string tens[10]= {
  "", "x", "xx", "xxx", "xl", "l", "lx", "lxx", "lxxx", "xc" };
static string hundreds[10]= {
  "", "c", "cc", "ccc", "cd", "d", "dc", "dcc", "dccc", "cm" };

string
roman_nr (int nr) {
  if (nr<0) return "-" * roman_nr (nr);
  if (nr==0) return "o";
  if (nr>1000) return "m" * roman_nr (nr-1000);
  if (nr==1000) return "m";
  if (nr==999) return "im";
  if (nr==499) return "id";
  if ((nr%100)==99) return hundreds[nr/100] * "ic";
  if ((nr%100)==49) return hundreds[nr/100] * "il";
  return hundreds[nr/100] * tens[(nr%100)/10] * ones[nr%10];
}

string
Roman_nr (int nr) {
  return upcase_all (roman_nr (nr));
}

string
alpha_nr (int nr) {
  if (nr<0) return "-" * alpha_nr (nr);
  if (nr==0) return "0";
  if (nr<=26) return string ((char) (((int) 'a')+ nr-1));
  return alpha_nr ((nr-1)/26) * alpha_nr (((nr-1)%26)+1);
}

string
Alpha_nr (int nr) {
  return upcase_all (alpha_nr (nr));
}

/******************************************************************************
* Convert between verbatim and TeXmacs encoding
******************************************************************************/

string
tm_encode (string s) {
  register int i;
  string r;
  for (i=0; i<N(s); i++) {
    if (s[i]=='<') r << "<less>";
    else if (s[i]=='>') r << "<gtr>";
    else r << s[i];
  }
  return r;
}

string
tm_decode (string s) {
  register int i;
  string r;
  for (i=0; i<N(s); i++) {
    if (s[i]=='<') {
      register int j;
      for (j=i+1; j<N(s); j++)
	if (s[j]=='>') break;
      if (j<N(s)) j++;
      if (s(i,j) == "<less>") r << "<";
      else if (s(i,j) == "<gtr>") r << ">";
      i=j-1;
      if (s[i]!='>') return r;
    }
    else if (s[i]!='>') r << s[i];
  }
  return r;
}

string
tm_correct (string s) {
  register int i;
  string r;
  for (i=0; i<N(s); i++) {
    if (s[i]=='<') {
      register bool flag= TRUE;
      register int j, k;
      for (j=i+1; j<N(s); j++)
	if (s[j]=='>') break;
      if (j==N(s)) return r;
      for (k=i+1; k<j; k++)
	if (s[k]=='<') flag= FALSE;
      if (flag) r << s(i,j+1);
      i=j;
    }
    else if (s[i]!='>') r << s[i];
  }
  return r;
}

/******************************************************************************
* Reading input from a string
******************************************************************************/

bool
test (string s, int i, const char* test) {
  int n= N(s), j=0;
  while (test[j]!='\0') {
    if (i>=n) return FALSE;
    if (s[i]!=test[j]) return FALSE;
    i++; j++;
  }
  return TRUE;
}

bool
test (string s, int i, string test) {
  int n= N(s), m= N(test), j=0;
  while (j<m) {
    if (i>=n) return FALSE;
    if (s[i]!=test[j]) return FALSE;
    i++; j++;
  }
  return TRUE;
}

bool
starts (string s, const char* what) {
  return test (s, 0, what);
}

bool
ends (string s, const char* what) {
  string r ((char*) what);
  if (N(r) > N(s)) return FALSE;
  return s (N(s)-N(r), N(s)) == r;
}

bool
ends (string s, string r) {
  if (N(r) > N(s)) return FALSE;
  return s (N(s)-N(r), N(s)) == r;
}

bool
read (string s, int& i, const char* test) {
  int n= N(s), j=0, k=i;
  while (test[j]!='\0') {
    if (k>=n) return FALSE;
    if (s[k]!=test[j]) return FALSE;
    j++; k++;
  }
  i=k;
  return TRUE;
}

bool
read (string s, int& i, string test) {
  int n= N(s), m= N(test), j=0, k=i;
  while (j<m) {
    if (k>=n) return FALSE;
    if (s[k]!=test[j]) return FALSE;
    j++; k++;
  }
  i=k;
  return TRUE;
}

bool
read_line (string s, int& i, string& result) {
  int start= i;
  for (; i<N(s); i++) {
    if (s[i]=='\n') {
      result= s(start,i++);
      return TRUE;
    }
  }
  result= s(start,i);
  return FALSE;
}

bool
read_int (string s, int& i, int& result) {
  int n= N(s), start= i;
  result= 0;
  if (i==n) return FALSE;
  if (s[i]=='-') {
    if (i+1==n) return FALSE;
    if (!is_digit (s[i+1])) return FALSE;
    i++;
  }
  else if (!is_digit (s[i])) return FALSE;
  while ((i<n) && is_digit (s[i])) i++;
  result= as_int (s(start,i));
  return TRUE;
}

bool
read_double (string s, int& i, double& result) {
  int n= N(s), start= i;
  result= 0.0;
  if (i==n) return FALSE;
  if (s[i]=='-') {
    if (i+1==n) return FALSE;
    if (!is_numeric (s[i+1])) return FALSE;
    i++;
  }
  else if (!is_numeric (s[i])) return FALSE;
  while ((i<n) && is_digit (s[i])) i++;
  if ((i<n) && (s[i]=='.')) i++;
  while ((i<n) && is_digit (s[i])) i++;
  if ((i<n) && ((s[i]=='e') || (s[i]=='E'))) {
    i++;
    if ((i<n) && (s[i]=='-')) i++;
    if ((i==n) || (!is_digit (s[i]))) { i=start; return FALSE; }
    while ((i<n) && is_digit (s[i])) i++;
  }
  result= as_double (s(start,i));
  return TRUE;
}

void
skip_spaces (string s, int& i) {
  int n=N(s);
  while ((i<n) && ((s[i]==' ') || (s[i]=='\t'))) i++;
}

void
skip_line (string s, int& i) {
  int n=N(s);
  while ((i<n) && (s[i]!='\n')) i++;
  if (i<n) i++;
}

int
search_forwards (string s, int pos, string in) {
  int k= N(s), n= N(in);
  while (pos+k <= n) {
    if (test (in, pos, s)) return pos;
    pos++;
  }
  return -1;
}

int
search_forwards (string s, string in) {
  return search_forwards (s, 0, in);
}

int
search_backwards (string s, int pos, string in) {
  while (pos >= 0) {
    if (test (in, pos, s)) return pos;
    pos--;
  }
  return -1;
}

int
search_backwards (string s, string in) {
  return search_backwards (s, N(in)-N(s), in);
}

#endmodule // code_analyze
