%%% File    : wbxml.erl
%%% Author  : Johan Blom <johblo@dragon.cellpt.se>
%%% Purpose : Main WBXML encoder/decoder
%%% Created :  5 Jan 2001 by Johan Blom <johblo@dragon.cellpt.se>

-module(wbxml).
-author('johblo@dragon.cellpt.se').
-revision('$Revision: 1.3 $ ').
-rcsid('@(#) $Id: wbxml.erl,v 1.3 2001/07/17 12:55:49 johblo Exp $ ').
-modified('$Date: 2001/07/17 12:55:49 $ ').
-modified_by('$Author: johblo $ ').
-vsn("1").

-export([encode/4,decode/1,
	 add_string/2,lookup_index/2]).

-include("wae.hrl").
-include("wbxml.hrl").
-include("xmerl.hrl").
-include("utilslog.hrl").

%% User State for xmerl scanning
-record(user_state,{
	  str_table, % (ets) The Global String table
	  module,    % (atom) Main callback module, possibly inheriting others
	  wbxml,     % (uint8) WBXML version to be used for encoding
	  xml,       % (#xmlDecl{}) XML declaration
	  charset,   % (atom) Charset used to encode all text messages
	  cbs        % (list) Call back modules, set when fetching DTD
	 }).

%%% Usage of the various xmerl_scan functions
%% fetch_fun - to catch the DTD data
%% acc_fun - to accumulate the WBXML code
%% hook_fun - to make call back to specific module supporting the DTD
%% event_fun - to store the XML declaration record
%%% See manual for explanations on character translating scheme!
encode(Content,Accept,ExtCharset,DTDRules) ->
    {Mode,InCharset,C1}=wae_man:detect_charset(ExtCharset,Content),

    Acc=fun(X,Acc,S) -> {[X|Acc], S} end,
    Fetch=fun(DTDSpec,S) ->
		  case DTDSpec of
		      {public,PI,URI} ->
			  US=xmerl_scan:user_state(S),
			  {Wbxml,Mod}=get_module(PI),
			  US1=US#user_state{cbs=xmerl:callbacks(Mod),
					    module=Mod,wbxml=Wbxml},
			  {ok, xmerl_scan:user_state(US1,S)};
		      Other ->
			  {ok, S}
		  end
	  end,
    Hook=fun(ParsedEntity, S) ->
		 US=xmerl_scan:user_state(S),
		 WBXMLinfo=#wbxml_info{str_table=US#user_state.str_table,
				       charset=US#user_state.charset},
		 case xmerl:export_element(ParsedEntity,US#user_state.cbs,
					   [WBXMLinfo]) of
		     {Tag,{StrTbl,BinCodes}} ->
			 US1=US#user_state{str_table=StrTbl},
			 {{Tag,StrTbl,BinCodes},xmerl_scan:user_state(US1,S)};
		     {error,Reason} ->
			 throw({error,Reason})
		 end
	   end,
    Event=fun(#xmerl_event{data=Data}, S) ->
		  case Data of
		      Decl when record(Decl,xmlDecl) ->
			  US=xmerl_scan:user_state(S),
			  US1=US#user_state{xml=Decl},
			  xmerl_scan:user_state(US1,S);
		      _ ->
			  S
		  end
	  end,

    User_state=#user_state{str_table=create_erl_stringtable([])},
    {USret,C2}=xmerl_scan:string(C1,
				 [{acc_fun, Acc},
				  {fetch_fun, Fetch},
				  {event_fun,Event},
				  {user_state,User_state},
				  {prolog,stop}
				 ]),
    ?debug("xmerl_scan with prolog=stop Charset:~p Mode:~p~n",
	   [InCharset,Mode],encode),
    
    {OutCharset,C3}=
	case Mode of
	    undefined ->
		case get_xml_encoding(USret#user_state.xml) of
		    undefined -> % Character set undefined!
			guess_charset(C2);
		    XMLcharset ->
			?debug("XMLcharset=~p",[XMLcharset],encode),
			{XMLcharset,ucs:to_unicode(C2,XMLcharset)}
		end;
	    auto ->
		case get_xml_encoding(USret#user_state.xml) of
		    undefined -> % Assume Auto detected charset is correct
			{InCharset,C2};
		    XMLcharset ->
			{XMLcharset,ucs:to_unicode(C2,XMLcharset)}
		end;
	    external ->
		{InCharset,C2}
	end,
    ?debug("OutCharset: ~p Rest:~p~n",[OutCharset,C3],encode),
    
    USret2=USret#user_state{charset=OutCharset},
    Module=USret2#user_state.module,
    ModRules=
	case ets:lookup(DTDRules,Module) of
	    [{_,MR}] ->
		MR;
	    _ ->
		throw({error,dtd_not_supported})
	end,
    {Root,StrTbl2,WBXMLTokens}=
	xmerl_scan:string(C3,
			  [{acc_fun, Acc},
			   {hook_fun,Hook},
			   {user_state,USret2},
			   {rules,ModRules},
			   {prolog,continue}
			  ]),
    EncWBXMLVers=encode_wbxml_version(USret2#user_state.wbxml),
    EncPIcode=encode_public_identifier(Module),
    EncCharset=encode_charset(OutCharset),
    EncStrTbl=encode_stringtable(StrTbl2,OutCharset),
    remove_stringtable(User_state#user_state.str_table),
    {ok,
     EncWBXMLVers++EncPIcode++EncCharset++EncStrTbl++WBXMLTokens}.

%%% Some sites generate content without the XML Declaration (not "valid" XML)
get_xml_encoding(#xmlDecl{attributes=Attrlist}) ->
    lookup_attributelist(encoding,Attrlist);
get_xml_encoding(undefined) -> % No XML Declaration
    ?warning("No XML Declaration!",[],get_xml_encoding),
    undefined.

lookup_attributelist(Name,[]) ->
    undefined;
lookup_attributelist(Name,[#xmlAttribute{name=Name,value=Val}|Attrlist]) ->
    list_to_atom(httpd_util:to_lower(Val));
lookup_attributelist(Name,[H|Attrlist]) ->
    lookup_attributelist(Name,Attrlist).

%%% According to the XML standard the default character set is UTF-8.
%%% However, this have shown not be always true for actual WAP services.
%%% For now, assume it is UTF-8 encoded and if that fails try ISO-8859-1
guess_charset(C) ->
    ?debug("Assumes it is utf-8 encoded~n",[],encode),
    case ucs:to_unicode(C,'utf-8') of
	{error,Reason} ->
	    ?warning("Mal-formed XML document! (not UTF-8) got ~p",
		     [Reason],guess_charset), 
	    case ucs:to_unicode(C,'iso-8859-1') of
		{error,Reason1} ->
		    ?warning("... and not ISO-8859-1, got ~p",
			     [Reason1],guess_charset), 
		    {error,Reason1};
		C2 ->
		    {'iso-8859-1',C2}
	    end;
	C1 ->
	    {'utf-8',C1}
    end.


%%% ============================================================================
decode(Content) ->
    {Content1,WBXMLVers}=decode_wbxml_version(Content),
    BinWBXMLVers=get_bin(Content,Content1),
    io:format("WBXML version:~p from ~p~n",[WBXMLVers,BinWBXMLVers]),
    {Content2,PIcode}=decode_public_identifier(Content1),
    BinPIcode=get_bin(Content1,Content2),
    io:format("PI Code:~p from ~p~n",[PIcode,BinPIcode]),
    {Content3,Charset}=decode_charset(Content2),
    BinCharset=get_bin(Content2,Content3),
    io:format("Charset:~p from ~p~n",[Charset,BinCharset]),
    {Content4,StrTbl}=decode_stringtable(Content3),
    BinStrTbl=get_bin(Content3,Content4),
    io:format("StrTbl:~p from ~p~n",[StrTbl,BinStrTbl]),
    Module=decode_cb_module(PIcode,StrTbl),
    io:format("   using callback Module:~w~n",[Module]),

    case catch decode_tokens(Content4,StrTbl,Charset,Module) of
	Body when list(Body) ->
	    PI=decode_PI(PIcode,StrTbl),
	    remove_stringtable(StrTbl),
	    {ok,"<?xml version=\"1.0\"?>\n"++PI++Body};
	Error ->
	    ?error("ERROR decoding ~p, got ~p",[Content4,Error],decode),
	    remove_stringtable(StrTbl),
	    {error,cannot_decode}
    end.

%% FIXME! DTD identifer might be stored in the String Table
decode_cb_module(?WBXML_wml_10,_) -> wml_10;
decode_cb_module(?WBXML_wml_11,_) -> wml_11;
decode_cb_module(?WBXML_wml_12,_) -> wml_12;
decode_cb_module(?WBXML_wml_13,_) -> wml_13;
decode_cb_module(?WBXML_wta_event_10,_) -> wta_event_10;
decode_cb_module(?WBXML_si_10,_) -> si_10;
decode_cb_module(?WBXML_sl_10,_) -> sl_10;
decode_cb_module(?WBXML_co_10,_) -> co_10;
decode_cb_module(?WBXML_channel_11,_) -> channel_11;
decode_cb_module(?WBXML_channel_12,_) -> channel_12;
decode_cb_module(?WBXML_provisioning_10,_) -> provisioning_10;
decode_cb_module(?WBXML_wta_wml_12,_) -> wta_wml_12;
decode_cb_module(?WBXML_unknown,_) -> wml_11;
decode_cb_module(_,StrTbl) -> wml_11.


get_bin(Content,Content1) ->
    list_to_binary(lists:sublist(Content,length(Content)-length(Content1))).


%%% ----------------------------------------------------------------------------
decode_tokens(Content,StrTbl,Charset,Module) ->
    apply(Module,decode_wbxml,[Content,StrTbl,Charset]).


%%% ----------------------------------------------------------------------------
encode_wbxml_version(WBXMLVers) ->
    wap_common:pack_uintvar(WBXMLVers).

decode_wbxml_version(Content) ->
    wap_common:unpack_uintvar(Content).

%%% ----------------------------------------------------------------------------
%% Public Identifier code, or index in the string table.
encode_public_identifier(PI) ->
    [encode_PI(PI)].

decode_public_identifier([0|Content]) ->
    {C,Index}=wap_common:unpack_uintvar(Content),
    {C,{index,Index}};
decode_public_identifier(Content) ->
    wap_common:unpack_uintvar(Content).


%%% ----------------------------------------------------------------------------
encode_charset(Charset) ->
    case ucs:getMIB(Charset) of
	{error,undefined_charset} ->
	    throw({error,undefined_charset});
	MIBnum ->
	    wap_common:pack_uintvar(MIBnum)
    end.

decode_charset(Content) ->
    {C1,MIBnum}=wap_common:unpack_uintvar(Content),
    case ucs:getCharset(MIBnum) of
	{error,undefined_mibnum} ->
	    throw({error,undefined_mibnum});
	Charset ->
	    {C1,Charset}
    end.

%%% ----------------------------------------------------------------------------
encode_stringtable(StrTbl,Charset) ->
    EncStrTbl=create_wbxml_stringtable(StrTbl,Charset),
    wap_common:pack_uintvar(length(EncStrTbl))++EncStrTbl.

decode_stringtable(Content) ->
    {C1,Len}=wap_common:unpack_uintvar(Content),
    BinLen=get_bin(Content,C1),
    io:format("StrTbl Len:~w from ~p~n",[Len,BinLen]),
    {lists:nthtail(Len,C1),
     create_erl_stringtable(lists:sublist(C1,Len))}.

%%% ----------------------------------------------------------------------------
get_module("-//WAPFORUM//DTD WML 1.0//EN") -> {?WBXML_VERSION1,wml_10};
get_module("-//WAPFORUM//DTD WTA 1.1//EN") -> {?WBXML_VERSION1,wta_11};
get_module("-//WAPFORUM//DTD WML 1.1//EN") -> {?WBXML_VERSION1,wml_11};
get_module("-//WAPFORUM//DTD SI 1.0//EN") ->  {?WBXML_VERSION2,si_10};
get_module("-//WAPFORUM//DTD SL 1.0//EN") ->  {?WBXML_VERSION2,sl_10};
get_module("-//WAPFORUM//DTD CO 1.0//EN") ->  {?WBXML_VERSION2,co_10};
get_module("-//WAPFORUM//DTD CHANNEL 1.1//EN") -> {?WBXML_VERSION2,channel_11};
get_module("-//WAPFORUM//DTD WML 1.2//EN") ->     {?WBXML_VERSION2,wml_12};
get_module("-//WAPFORUM//DTD WML 1.3//EN") ->     {?WBXML_VERSION2,wml_13};
get_module("-//WAPFORUM//DTD PROV 1.0//EN") ->{?WBXML_VERSION2,provisioning_10};
get_module("-//WAPFORUM//DTD WTA-WML 1.2//EN") -> {?WBXML_VERSION2,wta_wml_12};
get_module("-//WAPFORUM//DTD CHANNEL 1.2//EN") -> {?WBXML_VERSION2,channel_12};
get_module(PI) ->
    throw({error,unknown_pi}).

%%% SGML Public Identifier
encode_PI(wml_10) -> ?WBXML_wml_10;
encode_PI(wta_event_10) -> ?WBXML_wta_event_10;
encode_PI(wml_11) -> ?WBXML_wml_11;
encode_PI(si_10) -> ?WBXML_si_10;
encode_PI(sl_10) -> ?WBXML_sl_10;
encode_PI(co_10) -> ?WBXML_co_10;
encode_PI(channel_11) -> ?WBXML_channel_11;
encode_PI(wml_12) -> ?WBXML_wml_12;
encode_PI(wml_13) -> ?WBXML_wml_13;
encode_PI(provisioning_10) -> ?WBXML_provisioning_10;
encode_PI(wta_wml_12) -> ?WBXML_wta_wml_12;
encode_PI(channel_12) -> ?WBXML_channel_12;
encode_PI(A) ->
    A.



decode_PI({index,Index},StrTab) ->
    lookup_index(StrTab,Index);
decode_PI(?WBXML_unknown,_) ->
    "";
decode_PI(?WBXML_wml_10,_) ->
    "<!DOCTYPE wml PUBLIC \"-//WAPFORUM//DTD WML 1.0//EN\" \"http://www.wapforum.org/DTD/wml_1.0.xml\">";
decode_PI(?WBXML_wta_event_10,_) ->
    "<!DOCTYPE wta PUBLIC \"-//WAPFORUM//DTD WTA 1.1//EN\" \"http://www.wapforum.org/DTD/wta_1.0.xml\">";
decode_PI(?WBXML_wml_11,_) ->
    "<!DOCTYPE wml PUBLIC \"-//WAPFORUM//DTD WML 1.1//EN\" \"http://www.wapforum.org/DTD/wml_1.1.xml\">";
decode_PI(?WBXML_si_10,_) ->
    "<!DOCTYPE si PUBLIC \"-//WAPFORUM//DTD SI 1.0//EN\" \"http://www.wapforum.org/DTD/si_1.0.xml\">";
decode_PI(?WBXML_sl_10,_) ->
    "<!DOCTYPE sl PUBLIC \"-//WAPFORUM//DTD SL 1.0//EN\" \"http://www.wapforum.org/DTD/sl_1.0.xml\">";
decode_PI(?WBXML_co_10,_) ->
    "<!DOCTYPE co PUBLIC \"-//WAPFORUM//DTD CO 1.0//EN\" \"http://www.wapforum.org/DTD/co_1.0.xml\">";
decode_PI(?WBXML_channel_11,_) ->
    "<!DOCTYPE channel PUBLIC \"-//WAPFORUM//DTD CHANNEL 1.1//EN\" \"http://www.wapforum.org/DTD/channel_1.1.xml\">";
decode_PI(?WBXML_wml_12,_) ->
    "<!DOCTYPE wml PUBLIC \"-//WAPFORUM//DTD WML 1.2//EN\" \"http://www.wapforum.org/DTD/wml_1.2.xml\">";
decode_PI(?WBXML_wml_13,_) ->
    "<!DOCTYPE wml PUBLIC \"-//WAPFORUM//DTD WML 1.3//EN\" \"http://www.wapforum.org/DTD/wml_1.3.xml\">";
decode_PI(?WBXML_provisioning_10,_) ->
    "<!DOCTYPE prov PUBLIC \"-//WAPFORUM//DTD PROV 1.0//EN\" \"http://www.wapforum.org/DTD/prov_1.0.xml\">";
decode_PI(?WBXML_wta_wml_12,_) ->
    "<!DOCTYPE wta-wml PUBLIC \"-//WAPFORUM//DTD WTA-WML 1.2//EN\" \"http://www.wapforum.org/DTD/wta-wml_1.2.xml\">";
decode_PI(?WBXML_channel_12,_) ->
    "<!DOCTYPE channel PUBLIC \"-//WAPFORUM//DTD CHANNEL 1.2//EN\" \"http://www.wapforum.org/DTD/channel_1.2.xml\">".


%%% ============================================================================
%%% String table handling
%% Note:
%% - Currently implemented as simple key/value list which is probably faster
%%   than using ets, because of the setup times and usually rather small tables
%%   anyway.
%% - String tables are created empty when encoding, but directly filled with
%%   content when decoding.
%% - StrTab is kept sorted!

create_erl_stringtable([]) ->
    [];
create_erl_stringtable(Data) ->
    create_erl_stringtable([],Data,0).

create_erl_stringtable(StrTab,[],_) ->
    StrTab;
create_erl_stringtable(StrTab,Data,Index) ->
    {Data1,Str}=wsp_bytecodes:decode_string(Data),
    create_erl_stringtable([{Index,Str}|StrTab],Data1,Index+length(Str)+1).

create_wbxml_stringtable([],_) ->
    [];
create_wbxml_stringtable(StrTab,Charset) ->
    create_wbxml_stringtable2(lists:reverse(StrTab),Charset).

create_wbxml_stringtable2([],_) ->
    [];
create_wbxml_stringtable2([{_,Str}|Rest],Charset) ->
    Str2=ucs:from_unicode(Str,Charset),
    binary_to_list(wsp_bytecodes:encode_string(Str2))++
	create_wbxml_stringtable2(Rest,Charset).

remove_stringtable(StrTbl) ->
    [].
% ets:delete(StrTbl).


add_string([],Val) ->
    {[{0,Val}],0};
add_string(StrTbl,Val) ->
    case lists:keysearch(Val,2,StrTbl) of
	{value,{Index,Val}} ->
	    {StrTbl,Index};
	_ ->
	    {I1,TblVal}=hd(StrTbl),
	    Index=I1+length(TblVal)+1,
	    {[{Index,Val}|StrTbl],Index}
    end.


lookup_index(StrTab,Index) ->
    case lists:keysearch(Index,1,StrTab) of
	{value,{_,Val}} ->
	    Val;
	_ ->
	    ?error("Index ~p not found in string table",
		   [Index],lookup_index),
	    throw({error,not_found})
    end.

%    case ets:lookup(StrTab,Key) of
%	[{Key,Val}]  ->
%	    Val;
%	[] ->
%	    ?error("Key ~p not found in string table",[Key],lookup_stringtable),
%	    throw({error,not_found})
%    end.



