package Freenet.contrib.fproxy.filter;
import java.io.*;
import java.util.*;
/**
 * This class is designed to catch the following dangerous constructs in
 * Freenet HTML documents:
 * 
 * Any javascript code
 * Any embedded objects
 * Any embedded links to pages outside of Freenet or URLs that may interact
 * with the behavior of the proxy.
 * Any non-embedded links (these are considered "warnings")
 *
 * @author devrandom@hyper.to
 */
%%

%{
  private boolean debug = false;
  private StringBuffer buffer = new StringBuffer();
  private FilterAnalysis analysis = new FilterAnalysis();

  public FilterAnalysis getAnalysis() {
      return analysis;
  }

  private String getResult() {
      return buffer.toString();
  }

  public void setDebug(boolean debug) {
	this.debug = debug;
  }
  
  public String parse () throws IOException {
      while (yylex() != null);
      return getResult();
  }

  public static void dumpElements(Enumeration enum) {
      if (enum != null) {
	  while (enum.hasMoreElements()) {
	      System.out.println(" - " + enum.nextElement());
	  }
      }
  }


  public static void main(String argv[]) {
      for (int i = 0; i < argv.length; i++) {
	  try {
	      Reader reader = new FileReader(argv[i]);
	      Parser finder = new Parser(reader);
	      String result = finder.parse();
	      FilterAnalysis analysis = finder.getAnalysis();
	      System.out.println("Disallowed:");
	      dumpElements(analysis.getDisallowedElements());
	      System.out.println("Warning:");
	      dumpElements(analysis.getWarningElements());
	      //System.out.println(result);
	      System.out.println(analysis);
	  }
	  catch (Exception e) {
	      e.printStackTrace(System.out);
	      System.exit(1);
	  }
      }
  }
%} 

%class Parser
%unicode
%ignorecase

/* Whitespace */
WS=[\n\r\ \t\b\012]*

/* Non whitespace and not close of tag (right angle bracket).  I.e. chars that
 * would not cause an unquoted attribute to end */
NONSEP=[^>\n\r\ \t\b\012:?]
NONSEP_NOQUOTE=[^>\n\r\ \t\b\012:?"]

/* Alpha */
ALPHA_STRING=[a-z]+

/* Alphanumeric */
ALPHANUM_STRING=[a-z0-9]+

/* Attributes that cause the client to run JavaScript */
SCRIPT_KEYWORDS=data|datasrc|codebase|object|onblur|onchange|onclick|ondblclick|onkeydown|onkeypress|onkeyup|onload|onmousedown|onmousemove|onmouseout|onmouseover|onmouseup|onsubmit|onreset|onselect|onunload|onafterupdate|onbeforeupdate|onerrorupdate|onrowenter|onrowexit|onbeforeunload|ondatasetchanged|ondataavailable|ondatasetcomplete|http-equiv
SCRIPT_INTRO={SCRIPT_KEYWORDS}{WS}=
DISALLOWED_TAGS=<(script|applet|base|servlet|embed|param|bgsound)

/* Attributes that cause the client to automatically retrieve a page */
EMB_ATTRS=background|longdesc|lowsrc|profile|src
/* Catch any colon or question mark within the URL */
EMB_PATTERNS1={EMB_ATTRS}{WS}={WS}["][^"?:]*[?:][^"]*
EMB_PATTERNS2={EMB_ATTRS}{WS}={WS}({NONSEP_NOQUOTE}{NONSEP}*)?[?:]{NONSEP}*
EMB_PATTERNS={EMB_PATTERNS1}|{EMB_PATTERNS2}

/* Attributes that cause the client to retrieve a page when activated by the
 * user */
LINK_ATTRS=action|cite|classid|href
ALLOWED_SCHEMES=mailto

ALLOWED_LINK_PATTERNS1={LINK_ATTRS}{WS}={WS}["]{ALLOWED_SCHEMES}:[^"]*
ALLOWED_LINK_PATTERNS2={LINK_ATTRS}{WS}={WS}{ALLOWED_SCHEMES}:{NONSEP}*
ALLOWED_LINK_PATTERNS={ALLOWED_LINK_PATTERNS1}|{ALLOWED_LINK_PATTERNS2}

/* Catch any colon or question mark within the URL */
LINK_PATTERNS1={LINK_ATTRS}{WS}={WS}["][^"?:]*[?:][^"]*
LINK_PATTERNS2={LINK_ATTRS}{WS}={WS}({NONSEP_NOQUOTE}{NONSEP}*)?[?:]{NONSEP}*
LINK_PATTERNS={LINK_PATTERNS1}|{LINK_PATTERNS2}

%% 
{SCRIPT_INTRO} {
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
    analysis.addDisallowedElement("Scripts or other executable content: " + yytext());
    if (debug)
	System.err.println("found script " + yytext());
    analysis.found_script=true;
}

{DISALLOWED_TAGS} {
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
    analysis.addDisallowedElement("Scripts or other executable content: " + yytext());
    if (debug)
	System.err.println("found dangerous tags " + yytext());
    analysis.found_script=true;
}

{EMB_PATTERNS} {
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
    analysis.addDisallowedElement("Absolute URLs that are fetched automatically: " + yytext());
    if (debug)
	System.err.println("found absolute embedded link attributes " + yytext());
    analysis.found_embedded_absolute=true;
}

{ALLOWED_LINK_PATTERNS} {
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
    String str = new String(yy_buffer, yy_startRead, yy_markedPos-yy_startRead);
    if (debug)
	    System.err.println("found allowed absolute linking: " + yytext());
}
{LINK_PATTERNS} {
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
    analysis.addWarningElement(yytext());
    String str = new String(yy_buffer, yy_startRead, yy_markedPos-yy_startRead);
    if (debug)
	    System.err.println("found absolute linking: " + yytext());
    analysis.found_external_links=true;
}

{ALPHA_STRING} {/*Reduce backtracking*/
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
}

.|\n {  
    buffer.append( yy_buffer, yy_startRead, yy_markedPos-yy_startRead );
}

