//<copyright>
//
// Copyright (c) 1995,96
// Institute for Information Processing and Computer Supported New Media (IICM),
// Graz University of Technology, Austria.
//
// This file is part of VRweb.
//
// VRweb is free software; you can redistribute it and/or modify
// it under the terms of the GNU General Public License as published by
// the Free Software Foundation; either version 2, or (at your option)
// any later version.
//
// VRweb is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU General Public License for more details.
//
// You should have received a copy of the GNU General Public License
// along with VRweb; see the file LICENCE. If not, write to the
// Free Software Foundation, Inc., 59 Temple Place - Suite 330,
// Boston, MA 02111-1307, USA.
//
// Note that the GNU General Public License does not permit incorporating
// the Software into proprietary or commercial programs. Such usage
// requires a separate license from IICM.
//
//</copyright>

//<file>
//
// Name:        httpreader.C
//
// Purpose:     get an URL via HTTP protocol
//
// Created:     30 Nov 95   Michael Pichler
//
// Changed:     30 Jul 96   Michael Pichler
//
// $Id: httpreader.C,v 1.9 1997/02/25 17:03:58 mpichler Exp $
//
//</file>


#include "httpreader.h"

#include <hyperg/utils/str.h>
/* #include <hyperg/utils/hgunistd.h> */
#include <hyperg/utils/inetsocket.h>
#include <hyperg/http/httpresponse.h>

#include <hyperg/WWW/HTParse.h>
#undef DEBUG  /* also defined in HTParse.h */
#undef CR  /* do I hate header files defining useless symbols? */
#include <hyperg/utils/verbose.h>

#include <hyperg/Dispatch/dispatcher.h>
#include <hyperg/Dispatch/iohandler.h>

#include <string.h>
#include <stdlib.h>



/***** HTTPIOHandler ****/


// helper class fetching data from HTTP socket connection
// extracts header lines and writes entity body to a file

class HTTPIOHandler: public IOHandler
{
  public:
    HTTPIOHandler (                     // constructor
      HTTPReader* reader,               //   master
      const INETSocketPtr& conn         //   socket for HTTP connection
    );
    ~HTTPIOHandler ();

    // IOHandler
    int inputReady (int fd);

  private:
    HTTPReader* reader_;
    INETSocketPtr conn_;

}; // HTTPIOHandler



HTTPIOHandler::HTTPIOHandler (HTTPReader* reader, const INETSocketPtr& conn)
: reader_ (reader),
  conn_ (conn)
{
  DEBUGNL ("HTTPIOHandler created");
  Dispatcher::instance().link (conn.ptr ()->fd (), Dispatcher::ReadMask, this);
}


HTTPIOHandler::~HTTPIOHandler ()
{
  DEBUGNL ("~HTTPIOHandler: unlinking from Dispatcher");
  Dispatcher::instance().unlink (conn_.ptr ()->fd ());
}


int HTTPIOHandler::inputReady (int)
{
#define BUFFERSIZE 1024

  char tmpbuf [BUFFERSIZE];

  int nread = conn_.ptr ()->read (tmpbuf, BUFFERSIZE);
  int nhdrbytes = 0;
  HTTPReader* reader = reader_;

#undef BUFFERSIZE

  if (nread <= 0)  // EOF or error
  {
    DEBUGNL ("HTTPIOHandler::inputReady: no more input (success).");
    // how to determine error???
    reader->success ();  // will destroy me too, unlink in destructor
    return 0;
  }

  HTTPResponse* response = reader->response_;
  if (!response->complete ())
  {
    nhdrbytes = response->add (tmpbuf, nread);  // feed some bytes into response parser

    if (!response->ok ())
    {
      // a simple response without header (HTTP/0.9 protocol) is also considered an error
      DEBUGNL ("HTTPIOHandler::inputReady: error in HTTP header (failure).");
      reader->error_ = HTTPReader::err_badheader;
      reader->failure ();  // will destroy me too, unlink in destructor
      return 0;
    }

    if (response->complete ())
    {
      HTTPResponse* resp = reader->response ();
      const RString& code = resp->code ();
      DEBUGNL ("HTTPIOHandler::inputReady: header complete. response code: " << code);
      char status = *code.string ();  // status class (first char of status code)

      switch (status)
      {
        case '2':  // OK
        {
          const RString& contentlen = resp->hdr ().contentLength ();
          DEBUGNL ("  response 2xx: all ok, document will follow; length: " << contentlen);
          reader->bytestotal_ = ::atol (contentlen.string ());
          reader->headerComplete ();
        }
        break;

        case '3':  // redirect
        {
          DEBUGNL ("  response 3xx: will carry out redirect");
          // reader->error_ = HTTPReader::err_redirect;  // info for headerComplete
          // reader->headerComplete ();

          const HTTPHeader& header = resp->hdr ();
          const RString& newlocation = header.location ();
          DEBUGNL ("redirect to " << newlocation.string ());
          // init the new request; destroys (and unlinks) me
          reader->init (newlocation.string ());
          // this will create a new HTTPIOHandler for the redirected request
          reader->redirectRequest ();
          // reader keeps alive, do not call success or failure
          // there is usually no redirect when contacting a proxy
          // (even if it chooses not to cache a specific host)
        }
        return 0;

        default:  // error code
          DEBUGNL ("  response 4xx: server error condition. no data will arrive.");
          // no data will be fetched, headerComplete is not called
          reader->error_ = HTTPReader::err_servererror;
          cerr << "error indicated by server. response: " + resp->version () + " " + code + " " + resp->reason () << endl;
          reader->failure ();  // will destroy me too, unlink in destructor
        return 0;
      }
    } // header completed
  } // header incomplete

  ostream* os = reader->os_;
  if (!os)
  { cerr << "HTTPIOHandler: error: output stream not set" << endl;
    return 0;
  }

/*DEBUGNL ("HTTPIOHandler::inputReady: " << nread << " bytes total read, thereof " << nhdrbytes << " for header");*/

  // remaining bytes: data (header complete)
  nread -= nhdrbytes;
  if (nread > 0)
  {
    os->write (tmpbuf + nhdrbytes, nread);
    reader->bytesread_ += nread;
    reader->documentData ();
  }

  return 0;

} // inputReady



/***** HTTPReader *****/


// static members
RString HTTPReader::proxyhost_;
int HTTPReader::proxyport_ = 80;


HTTPReader::HTTPReader (const char* url, const char* parenturl, const char* appname, ostream* os)
{
  DEBUGNL ("HTTPReader created");
  parenturl_ = parenturl;  // necessary for relative URLs; also used for Referer field
  appname_ = appname;  // used for User-Agent field
  fullurl_ = url;  // composed in init
  os_ = os;

  ioh_ = 0;
  response_ = 0;

  bytestotal_ = 0L;  // for progress feadback
  bytesread_ = 0L;

  init (url);
  // transfer will be started on call of connect
}


HTTPReader::~HTTPReader ()
{
  DEBUGNL ("~HTTPReader ...");
  delete ioh_;
  delete response_;
  DEBUGNL ("... ~HTTPReader finished.");
}


const HTTPHeader& HTTPReader::header ()
{
  // only defined after header complete
  return response_->hdr ();
}


// init: prepare a HTTP request; error should be checked afterwards

void HTTPReader::init (const char* url)
{
  char* str;
  host_ = "";
  port_ = 80;
  path_ = "";
  // on redirection: clear old IOhandler
  delete ioh_;
  ioh_ = 0;

  // check protocol (must be http)
  // note: proxy may be able to fetch data from other protocols and send them via http
  str = HTParse (url, parenturl_, PARSE_ACCESS);
  if (strcmp (str, "http"))
  { error_ = err_nothttp;
    free (str);
    return;
  }
  free (str);

  RString portstr;  // if non-default, with leading ":"

  // get host name and port number (host:port)
  str = HTParse (url, parenturl_, PARSE_HOST);
  char* colpos = strchr (str, ':');
  if (colpos)
  {
    portstr = colpos;
    *colpos = '\0';
    port_ = atoi (colpos + 1);
  }
  // else host only; default port

  host_ = str;

  free (str);

  // path
  str = HTParse (url, parenturl_, PARSE_PATH);
  if (*str == '/')
    path_ = str;
  else  // absolute path should begin with '/' (absolute)
    path_ = RString ("/") + str;
  free (str);

  DEBUGNL ("HTTPReader: trying connection to host " << host_ << ':' << port_);
  DEBUGNL ("  path: " << path_);

  fullurl_ = "http:/""/" + host_ + portstr + path_;
  DEBUGNL ("complete URL: " << fullurl_);

  error_ = err_none;  // ready for connect

} // init


// setProxy host:port

void HTTPReader::setProxy (const RString& hostport)
{
  const char* hpstr = hostport.string ();  // non-nil
  char* colpos = (char*) strrchr (hpstr, ':');
  proxyport_ = 80;
  if (colpos)  // host:port
  {
    sscanf (colpos + 1, "%d", &proxyport_);
    *colpos = '\0';
    proxyhost_ = hpstr;
    *colpos = ':';
  }
  else  // host only
    proxyhost_ = hostport;
  DEBUGNL ("http connections made via proxy host " << proxyhost_ << ", port " << proxyport_);
}


// connect: issue HTTP request (connect is blocking, transfer non-blocking)
// create HTTPIOHandler

void HTTPReader::connect ()
{
  if (error_)
  { cerr << "internal error: trial to connect to " << fullurl_ << endl;
    cerr << "without call of init or on error in init" << endl;
    return;
  }

  // open connection
  INETSocketPtr conn;
  if (proxyhost_.length ())  // go through proxy
  { DEBUGNL ("fetching " << fullurl_ << " via proy " << proxyhost_ << ':' << proxyport_);
    conn = new INETSocket (proxyhost_, proxyport_);
  }
  else  // contact host directly
  { DEBUGNL ("fetching " << fullurl_ << " directly from host " << host_);
     conn = new INETSocket (host_, port_);
  }
  if (!conn.ptr ()->ok ())
  {
    DEBUGNL ("HTTPReader: connection failed");
    error_ = err_noconnect;
    return;
  }

  RString request ("GET ");
  if (proxyhost_.length ())
    request += fullurl_;
  else
    request += path_;
  request += " HTTP/1.0\n";
  if (appname_.length ())
    request += RString ("User-Agent: ") + appname_ + "\n";
  request += "Accept: *\n";  // some (buggy?) servers need this; mpichler, 19960627
  if (parenturl_.length ())
    request += RString ("Referer: ") + parenturl_ + "\n";
  request += "\n";  // emty line completes request

  DEBUGNL ("HTTPReader: sending HTTP request...\n" << request);

  conn.ptr ()->write (request, request.length ());

  delete response_;
  response_ = new HTTPResponse;  // takes the response line and header

  delete ioh_;  // should be NULL
  ioh_ = new HTTPIOHandler (this, conn);
  // connection (socket class) will be deleted via smart pointer mechanism

  error_ = err_none;  // request sent, data will arive asynchronously via Dispatcher loop

} // connect
