%%% File    : http.erl
%%% Author  : Johan Blom <johblo@dragon.cellpt.se>
%%% Purpose : HTTP/1.1 Client
%%% Created : 31 Aug 2000 by Johan Blom <johblo@dragon.cellpt.se>

%%% TODO: (Known bugs!)
%% - Support for Persistent Connections, by checking the 'Connection' header
%% - Finish implementation of chunked transfers
%% - Should probably implement a simple cach as well

-module(http).
-author('johblo@dragon.cellpt.se').
-behaviour(gen_server).
-revision('$Revision: 1.2 $ ').
-rcsid('@(#) $Id: http.erl,v 1.2 2001/07/10 12:53:28 johblo Exp $ ').
-modified('$Date: 2001/07/10 12:53:28 $ ').
-modified_by('$Author: johblo $ ').
-vsn("1").

-export([request/4,request_sync/2,request_sync/3,
	 noProxy/2,cancel_request/1,
	 format_status/1,
	 getHeaderValue/2,getParameterValue/2]).

%% Internal gen_server callbacks
-export([init/1, handle_cast/2, handle_info/2, terminate/2]).

-define(HTTP_REQUEST_TIMEOUT,   5000).
-define(MAX_REDIRECTS, 4).
-define(START_OPTIONS,[]).
-define(CR,13).
-define(LF,10).
-define(DEBUG(_Par1, _Par2), 
        io:format(lists:concat(["\n<", ?MODULE, ":", ?LINE, ">\n",
                                _Par1, "\n</", ?MODULE, ">\n"]), _Par2)).

-include("utilslog.hrl").

-record(state, {bin,
		redircount,
		from,
		ref,
		method,
		httpcont
	       }).

%%%----------------------------------------------------------------------
%%% API
%%%----------------------------------------------------------------------
%% Spawns a process for each HTTP request and sends caller the message:
%% gen_server:cast(Caller,{Ref,{Status,Headers,ContentType,Body}}
%% whenever the reply comes back from the HTTP server
%% HTTPCont={Url,Headers,ContentType,Body} or {Url,Headers}
%% Method=atom() get,post etc.
request_sync(Method,HTTPReq) ->
    Timeout=?HTTP_REQUEST_TIMEOUT,
    request_sync(Method,HTTPReq,Timeout,0).

request_sync(Method,HTTPReq,Settings) ->
    case catch {get(timeout,Settings)} of
	{Timeout} when integer(Timeout) ->
	    request_sync(Method,HTTPReq,Timeout,0);
	Error ->
	    {error,invalid_command}
    end.



request_sync(Method,HTTPReq,Timeout,Redirects) ->
    TCPArgs=[binary,{packet,0},{active, false}],
    case http_request(Method,HTTPReq,TCPArgs) of
	{ok,Socket} ->
	    case catch receive_data(Socket,Timeout,Method,<<>>) of
		{ok,HTTPResp} ->
		    {ok,HTTPResp};
		{redirect,RedirUrl} ->
		    if 
			Redirects<?MAX_REDIRECTS ->
			    NewHTTPReq=create_newrequest(Method,HTTPReq,
							 RedirUrl),
			    request_sync(Method,NewHTTPReq,Timeout,Redirects+1);
			true ->
			    {status,500}
		    end;
		{Error,Reas} when Error==error;Error=='EXIT' ->
		    ?error("Got error when receiving data ~p",[Reas],
			   request_sync),
		    {status,500}
	    end;
	{error,Status} ->
	    {status,Status}
    end.

create_newrequest(Method,HTTPReq,RedirUrl) when Method==post;Method==put ->
    {Url,Hd,CT,Bo}=HTTPReq,
    {resolve_url(Url,RedirUrl),Hd,CT,Bo};
create_newrequest(_,HTTPReq,RedirUrl) ->
    {Url,Hd}=HTTPReq,
    {resolve_url(Url,RedirUrl),Hd}.

    
%%% ============================================================================
request(Ref,Method,HTTPCont,Timeout) ->
    {ok,Hc}=gen_server:start_link(?MODULE,
				  {Ref,Method,HTTPCont,self()},
				  ?START_OPTIONS),
    gen_server:cast(Hc,{request,Method,HTTPCont}),
    Hc.

cancel_request(Hc) ->
    gen_server:cast(Hc,stop).

%%%----------------------------------------------------------------------
%%% Callback functions from gen_server
%%%----------------------------------------------------------------------

%%----------------------------------------------------------------------
%% Func: init/1
%% Returns: {ok, State}          |
%%          {ok, State, Timeout} |
%%          ignore               |
%%          {stop, Reason}
%%----------------------------------------------------------------------
init({Ref,Method,HTTPCont,From}) ->
    {ok, #state{bin=[],redircount=0,
		from=From,ref=Ref,method=Method,httpcont=HTTPCont}}.


terminate(Reason,State) ->
    ok.

handle_cast({request,Method,HTTPCont}, State) ->
%%    TCPArgs=[binary,{packet,0},{active, false}],
    TCPArgs=[binary,{packet,0}],
    case catch http_request(Method,HTTPCont,TCPArgs) of
	{ok,_} ->
	    ok;
	{error,NewStatus} ->
	    gen_server:cast(State#state.from,
			    {State#state.ref,{status,NewStatus}})
    end,
    {noreply, State};
handle_cast(stop, State) ->
    {stop,normal, State}.


%%----------------------------------------------------------------------
%% Func: handle_info/2
%% Returns: {noreply, State}          |
%%          {noreply, State, Timeout} |
%%          {stop, Reason, State}            (terminate/2 is called)
%%----------------------------------------------------------------------
handle_info({tcp_error, Socket, Reason}, State) ->
    gen_server:cast(State#state.from,{State#state.ref,{status,503}}),
    {stop,normal, State};
handle_info({tcp_closed, Socket}, State) ->
    From=State#state.from,
    case catch http_result(State#state.bin,State#state.method) of
	{ok,HTTPCont} ->
	    gen_server:cast(From,{State#state.ref,HTTPCont}),
	    {stop,normal, State};
	{redirect,RedirUrl} ->
	    {Url,Hd,CT,Bo}=State#state.httpcont,
	    NewUrl=resolve_url(Url,RedirUrl),
	    ?trace("Redirecting: ~p ~p ~p ~p",[NewUrl,Url,Hd,CT],handle_info),
	    NewState=State#state{bin=[],redircount=State#state.redircount+1},
	    if
		NewState#state.redircount<?MAX_REDIRECTS ->
		    http_request(NewState#state.method,{NewUrl,Hd,CT,Bo},
				 [binary,{packet,0}]);
		true ->
		    ?warning("Too many redirects, aborting: ~p",
			     [NewUrl],handle_info),
		    gen_server:cast(From,{State#state.ref,{status,500}})
	    end,
	    {noreply, NewState};
	{error,NewStatus} ->
	    gen_server:cast(From,{State#state.ref,{status,NewStatus}}),
	    {stop,normal, State}
    end;
handle_info({tcp, Socket, NewData}, State) ->
    NewState=State#state{bin=concat_binary([State#state.bin,NewData])},
    {noreply, NewState}.



%% ===========================================================================
%% Internals

%%% Use HTTP/1.1 for now although it is not really supported as it additionally
%%%  - may send stuff back with byte_ranges
%%%  - URL/Host header is not properly supported by the WAP Gateway
%%% Host: field is required when addressing multi-homed sites ...
%%% It must not be present when the request is being made to a proxy.
http_request(Method,{Url,Headers,ContentType,Body},TCPArgs) ->
    case extractUrlParts(Url) of
	{http,Host,Port,Path} ->
	    PostData=
		if
		    Method==post;Method==put -> 
			content_type_header(ContentType) ++
			    content_length_header(length(Body)) ++ 
			    "\r\n" ++ Body;
		    true ->
			"\r\n"
		end,
	    Message=
		case useProxy(Url) of
		    false ->
			method(Method)++" "++Path++" "++"HTTP/1.1\r\n"++
			    host_header(Host)++connection_header()++
			    te_header()++
			    headers(Headers) ++ PostData;
		    _ ->
			method(Method)++" "++Path++" "++"HTTP/1.1\r\n"++
			    host_header(Host)++connection_header()++
			    te_header()++
			    headers(Headers)++PostData
		end,
	    tcp_connect(Host,Port,TCPArgs,Message);
	{error,Reason} ->
	    ?error("Got error when extracting URL ~p",[Reason],http_request),
	    {error,"Can't extract URL"}
    end;
http_request(Method,{Url,Headers},TCPArgs)->
    http_request(Method,{Url,Headers,[],[]},TCPArgs).


tcp_connect(Host,Port,TCPArgs,Message) ->
    case gen_tcp:connect(Host,Port,TCPArgs) of
	{error,Tag} -> 
	    ?error("Couldn't create TCP connection ~p",[Tag],tcp_connect),
	    {error,inet:format_error(Tag)};
	{ok,Socket} ->
	    case gen_tcp:send(Socket,Message) of
		ok ->
		    {ok,Socket};
		{error,Tag} ->
		    ?error("Couldn't send via TCP ~p",[Tag],tcp_connect),
		    {error,inet:format_error(Tag)}
	    end
    end.


http_result([],_) ->
    {error,503};
http_result(Bin,Method) ->
    {Status,Headers,CT,TransferBody}=split_data(Bin),
    case {Status,Method} of
	{Status,get} when 300=<Status,Status =<307 ->
	    RedirUrl=getHeaderValue('location',Headers),
	    {redirect,RedirUrl};
	{Status,head} when 300=<Status,Status =<307 ->
	    RedirUrl=getHeaderValue('location',Headers),
	    {redirect,RedirUrl};	    
	_ ->
	    TransferEncoding=getHeaderValue('transfer-encoding',Headers),
	    case transfer_decode_body(TransferEncoding,TransferBody) of 
		Body -> {ok,{Status,Headers,CT,Body}};
		{error,Reason} -> {error,503}
	    end
    end.

transfer_decode_body("chunked",TransferBody) ->
    decode_chunked_body(TransferBody);
transfer_decode_body([],TransferBody) ->
    TransferBody;
transfer_decode_body(_,TransferBody) ->
    {error,unknown_transfer_encoding}.

receive_data(Socket, Timeout, Method,Bin) ->
    case gen_tcp:recv(Socket, 0, Timeout) of
        {ok, B} ->
            receive_data(Socket, Timeout, Method,concat_binary([Bin,B]));
        {error, closed} ->
            http_result(Bin,Method);
        Other ->
	    gen_tcp:close(Socket),
            {error, {socket, Other}}
    end.

%%% ----------------------------------------------------------------------------
resolve_url(Url,"http://"++AbsUrl) ->
    "http://"++AbsUrl;
resolve_url(Url,RelUrl) ->
    url_parse:resolve(Url,RelUrl).


get(Key,Dictionary) when list(Dictionary) -> % 'Key' is not a list
    case lists:keysearch(Key,1,Dictionary) of
	{value,{Key,Value}} ->
	    Value;
	{value,Tuple} ->
	    %% If the element found wasn't a true dictionary value
	    %% (i.e. {Key,Value}) but a tuple with more than two
	    %% elements, we return the entire tuple as a fallback
	    %% solution.
	    Tuple;
	_Other ->
	    case Key of
		timeout ->
		    ?HTTP_REQUEST_TIMEOUT;
		_Other2 ->
		    throw({exception,{not_found,Key}})
	    end
    end;
get(_,Other) ->
    throw({exception,{not_a_list,Other}}).

%%% ----------------------------------------------------------------------------
%% Data should be a string starting with:
%% "HTTP/x.y statuscode Document Follows
%% \r\nContent-Length: 876\r\nContent-Type: text/html ... "
%% Returns {Status,Headers,ContentType,Body}
%%   Status -> integer,
%%   ContentType -> atom,
%%   HeaderList -> list of strings,
%%   Body -> string
%%% Body0 might have had multiple "sections" separated by double CRLFs ... 
%%% ... so strip white spaces around them.
%%% TBD will this affect multipart handling???
%%%		    Body = lists:concat(lists:map({string,strip},[],Body0)),
split_data(Bin) ->
    {Status,Headers,Body}=response(binary_to_list(Bin)),
    case extractHeaderValue('content-type',Headers) of
	{error,Reason} ->
	    {Status,Headers,[],Body};
	{H,CT} ->
	    {Status,H,CT,Body}
    end.

%%% From RFC 2616:
%%%       Response      = Status-Line               ; Section 6.1
%%%                       *(( general-header        ; Section 4.5
%%%                        | response-header        ; Section 6.2
%%%                        | entity-header ) CRLF)  ; Section 7.1
%%%                       CRLF
%%%                       [ message-body ]          ; Section 7.2
response(Cs) ->
    case status_line(Cs) of
	{ok, Status, Cs1} ->
	    {Headers,Body} = hsplit([],Cs1),
	    {Status,tagup_header(split_lines(Headers)),Body};
	Error ->
	    Error
    end.

%%% From RFC 2616:
%%%       Status-Line = HTTP-Version SP Status-Code SP Reason-Phrase CRLF
status_line(Cs) ->
    {{Type,Ver},[X1,X2,X3,$ |Cs1]} = version(Cs,[]),
    {Phrase,Cs2} = phrase(Cs1,[]),
    {ok, list_to_integer([X1,X2,X2]),Cs2}.


%%% From RFC 2616:
%%%       HTTP-Version   = "HTTP" "/" 1*DIGIT "." 1*DIGIT
version("HTTP/"++Cs,Acc) ->
    {Major,[$.|Cs1]} = num(Cs),
    {Minor,Cs2} = num(Cs1),
    {{'HTTP',{Major,Minor}},skip_lwsp(Cs2)}.

%%% From RFC 2616:
%%%      Reason-Phrase  = *<TEXT, excluding CR, LF>
phrase("\r\n"++Cs, Acc) ->
    {lists:reverse(Acc), Cs};
phrase([C | Cs], Acc) ->
    phrase(Cs, [C|Acc]).


%%% Split body from rest, on \r\n\r\n
hsplit(Accu,[]) -> {lists:reverse(Accu),[]};
hsplit(Accu,"\r\n\r\n"++Tail) -> {lists:reverse(Accu),Tail}; 
hsplit(Accu,[H|T]) -> hsplit([H|Accu],T).


%%% Split lines on \r\n
split_lines(Request) ->  split_lines(Request, [], []).

split_lines([],CAcc,Acc) ->
    lists:reverse([lists:reverse(CAcc)|Acc]);
split_lines("\r\n"++Rest,CAcc,Acc) ->
    split_lines(Rest,[],[lists:reverse(CAcc)|Acc]);
split_lines([Chr|Rest],CAcc,Acc) ->
    split_lines(Rest,[Chr|CAcc],Acc).


%% Create {Field-name,Field-value} tuples from "Field-name: Field-value" strings
tagup_header([]) -> [];
tagup_header([Line|Rest]) -> [tag(Line,[])|tagup_header(Rest)].

tag([],Tag) ->
    Field=list_to_atom(httpd_util:to_lower(lists:reverse(Tag))),
    {Field,""};
tag([$:|Rest],Tag) ->
    Field=list_to_atom(httpd_util:to_lower(lists:reverse(Tag))),
    {Field,get_value(Rest)};
tag([Chr|Rest],Tag) -> tag(Rest, [Chr|Tag]).

get_value([]) ->        [];
get_value([32|Line]) -> get_value2(Line);
get_value([C|Line]) ->  get_value(Line).

get_value2([]) ->     [];
get_value2([L|Ls]) -> [L|get_value2(Ls)].


%% Return 1*DIGIT as a number
num([C|Cs]) when $0=<C,C=<$9 -> num(Cs,C-$0).

num([C|Cs],N) when $0=<C,C=<$9 -> num(Cs,N*10+(C-$0));
num(Cs,N) -> {N,Cs}.


%% skip space & tab
skip_lwsp([$ | Cs]) -> skip_lwsp(Cs);
skip_lwsp([$\t | Cs]) -> skip_lwsp(Cs);
skip_lwsp(Cs) -> Cs.

%% =============================================================================

%%% Extracts and removes Status line from Headers and converts the rest of the
%%% headers to a list onj the form [{Fieldname,Fieldvalue},...]
%%% All Fieldnames are also converted to Uppercase.
extractHeaderValue(Attr,Headers) ->
    case lists:keysearch(Attr,1,Headers) of
	{value,{Attr,Val}} ->
	    {lists:keydelete(Attr,1,Headers),Val};
	_ ->
	    {error,no_content_type}
    end.

%% lists:keysearch(Attr,1,List).
getHeaderValue(Attr,[]) ->
    [];
getHeaderValue(Attr,[{Attr,Value}|Rest]) ->
    Value;
getHeaderValue(Attr,[_|Rest]) ->
    getHeaderValue(Attr,Rest).

getParameterValue(Attr,undefined) ->
    undefined;
getParameterValue(Attr,List) ->
    case lists:keysearch(Attr,1,List) of
	{value,{Attr,Val}} ->
	    Val;
	A ->
	    undefined
    end.

headers([]) -> [];
headers([{Key,Value}|Rest]) when atom(Key) ->
    Head = lists:concat([atom_to_list(Key), ": ", Value, "\r\n"]),
    Head ++ headers(Rest);
headers([{Key,Value}|Rest]) ->
    Head = lists:concat([Key, ": ", Value, "\r\n"]),
    Head ++ headers(Rest).

host_header(Host) ->
    "Host: "++lists:concat([Host])++"\r\n".
connection_header() ->
    "Connection: close \r\n".
content_type_header(ContentType) ->
    "Content-Type: " ++ ContentType ++ "\r\n".
content_length_header(ContentLength) ->
    "Content-Length: "++integer_to_list(ContentLength) ++ "\r\n".
te_header() ->
    "TE: \r\n".
    

method(Method) ->
    httpd_util:to_upper(atom_to_list(Method)).


%% =============================================================================
%% Check to see if the given URL is in the NoProxyList
%% returns {Host,Port,Path}
%%% Default NoProxyList: []
%%% Default UseProxy: false
extractUrlParts(Url) ->
    NoProxyList=application:get_key(http_noproxylist),
    UseProxy=application:get_key(http_useproxy),
    case UseProxy of
	true ->
	    case noProxy(Url,NoProxyList) of
		true ->
		    url_parse:parse(Url);
		_ ->
		    {Host,Port}=application:get_key(http_proxy),
		    {http,Host,Port,Url}
	    end;
	_ ->
	    url_parse:parse(Url)
    end.

%% =============================================================================
%%% Default NoProxyList: []
useProxy(Url) ->
    NoProxyList=application:get_key(http_noproxylist),
    case noProxy(Url,NoProxyList) of
	true -> false;
	_ -> true
    end.

noProxy(Url,undefined) ->
    false;
noProxy(Url,[]) ->
    false;
noProxy(Url,[Host|Rest]) ->
    HostPort = url:host(Url) ++ ":" ++ url:port(Url),
    case HostPort of
	Host -> true;
	_ -> case string:str(Url,Host) of
		 0 -> noProxy(Url,Rest);
		 _ -> true
	     end
    end.

%%% Returns the Timeout value, as found in http_proxy_ini, in milliseconds
%%% Default Timeout: 60000 (= 60 seconds)
get_timeout() ->
    application:get_key(http_timeout).


%%% ============================================================================
%%% FIXME! Only partially implemented!
decode_chunked_body(ChunkBody) ->
    decode_chunked_body(ChunkBody,[]).

%% Note:
%% - Just throws away anything after a zero length chunk, should probably check
%%   so that the request is ok, ie decode any extensions and check for CRLF
%% - Only chunked transfer encoding, with no trailers, is currently supported
decode_chunked_body(ChunkBody,Body) ->
    {Input1,ChunkSize}=decode_hexstr(ChunkBody),
    case ChunkSize of
	0 ->
	    Body;
	_ ->
	    {Input2,ChunkExtension}=decode_chunkextension(Input1),
	    {Input3,Out}=decode_chunk(Input2,ChunkSize),
	    decode_chunked_body(Input3,Body++Out)
    end.

decode_hexstr(Input) ->
    decode_hexstr(Input,0).

decode_hexstr([C|Input],Int) when $0=<C,C=<$9 ->
    decode_hexstr(Input,16*Int+(C-$0));
decode_hexstr([C|Input],Int) when $a=<C,C=<$f ->
    decode_hexstr(Input,16*Int+10+(C-$a));
decode_hexstr([C|Input],Int) when $A=<C,C=<$F ->
    decode_hexstr(Input,16*Int+10+(C-$A));
decode_hexstr(Input,Int) ->
    {Input,Int}.

decode_chunkextension([?CR,?LF|Input]) ->
    {Input,none};
decode_chunkextension([$;|Input]) ->
    throw({error,no_support_of_chunk_extensions_implemented});
decode_chunkextension([A|Input]) when A==$ ;A==$\t -> % FIXME: Check if this ok
    decode_chunkextension(Input);
decode_chunkextension(_) ->
    throw({error,illegal_chunk}).

decode_chunk(Input,ChunkSize) ->
    Chunk=lists:sublist(Input,ChunkSize),
    [?CR,?LF|Rest]=lists:nthtail(ChunkSize,Input),
    {Rest,Chunk}.

%%% ============================================================================
format_status(100) ->   "Continue";
format_status(101) ->   "Switching Protocols" ;
format_status(200) ->   "OK" ;
format_status(201) ->   "Created" ;
format_status(202) ->   "Accepted" ;
format_status(203) ->   "Non-Authoritative Information" ;
format_status(204) ->   "No Content" ;
format_status(205) ->   "Reset Content" ;
format_status(206) ->   "Partial Content" ;
format_status(300) ->   "Multiple Choices" ;
format_status(301) ->   "Moved Permanently" ;
format_status(302) ->   "Moved Temporarily" ;
format_status(303) ->   "See Other" ;
format_status(304) ->   "Not Modified" ;
format_status(305) ->   "Use Proxy" ;
format_status(306) ->   "(unused)" ;
format_status(307) ->   "Temporary Redirect" ;
format_status(400) ->   "Bad Request";
format_status(401) ->   "Unauthorized";
format_status(402) ->   "Payment Required";
format_status(403) ->   "Forbidden" ;
format_status(404) ->   "Object Not Found" ;
format_status(405) ->   "Method Not Allowed" ;
format_status(406) ->   "Not Acceptable" ;
format_status(407) ->   "Proxy Authentication Required" ;
format_status(408) ->   "Request Time-out" ;
format_status(409) ->   "Conflict" ;
format_status(410) ->   "Gone" ;
format_status(411) ->   "Length Required" ;
format_status(412) ->   "Precondition Failed" ;
format_status(413) ->   "Request Entity Too Large" ;
format_status(414) ->   "Request-URI Too Large" ;
format_status(415) ->   "Unsupported Media Type" ;
format_status(416) ->   "Requested Range Not Satisfiable" ;
format_status(417) ->   "Expectation Failed" ;
format_status(500) ->   "Internal Server Error" ;
format_status(501) ->   "Not Implemented" ;
format_status(502) ->   "Bad Gateway" ;
format_status(503) ->   "Service Unavailable" ;
format_status(504) ->   "Gateway Time-out" ;
format_status(505) ->   "HTTP Version not supported".





