libwww_util.h

00001 /* File:      libwww_util.h
00002 ** Author(s): kifer
00003 ** Contact:   xsb-contact@cs.sunysb.edu
00004 ** 
00005 ** Copyright (C) The Research Foundation of SUNY, 2000
00006 ** 
00007 ** XSB is free software; you can redistribute it and/or modify it under the
00008 ** terms of the GNU Library General Public License as published by the Free
00009 ** Software Foundation; either version 2 of the License, or (at your option)
00010 ** any later version.
00011 ** 
00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014 ** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
00015 ** more details.
00016 ** 
00017 ** You should have received a copy of the GNU Library General Public License
00018 ** along with XSB; if not, write to the Free Software Foundation,
00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00020 **
00021 ** $Id: libwww_util.h,v 1.12 2005/02/22 06:22:02 kifer Exp $
00022 ** 
00023 */
00024 
00025 
00026 
00027 #include "WWWLib.h"
00028 #include "WWWHTTP.h"
00029 #include "WWWInit.h"
00030 #include "HTAABrow.h"
00031 #include "WWWApp.h"
00032 #include "WWWXML.h"
00033 #include "HTUtils.h"
00034 #include "HTTPReq.h"
00035 #include <stdio.h>
00036 #include <string.h>
00037 #include <time.h>
00038 #include "basictypes.h"
00039 #include "basicdefs.h"
00040 #include "auxlry.h"
00041 #include "xsb_config.h"
00042 #include "wind2unix.h"
00043 #include "cell_xsb.h"
00044 #include "error_xsb.h"
00045 #include "cinterf.h"
00046 #include "varstring_xsb.h"
00047 
00048 /* XSB_LIBWWW_PACKAGE is used in http_errors.h */
00049 #define XSB_LIBWWW_PACKAGE
00050 #include "../prolog_includes/http_errors.h"
00051 
00052 
00053 /* definitions and macros included in all files */
00054 
00055 extern int total_number_of_requests;
00056 extern int event_loop_runnung;
00057 HTList *XML_converter=NULL, *RDF_converter=NULL, *HTML_converter=NULL;
00058 
00059 /*
00060 #define LIBWWW_DEBUG_VERBOSE
00061 #define LIBWWW_DEBUG
00062 #define LIBWWW_DEBUG_TERSE
00063 #include "debug_xsb.h"
00064 */
00065 #ifdef LIBWWW_DEBUG_VERBOSE
00066 #define LIBWWW_DEBUG
00067 #endif
00068 #ifdef LIBWWW_DEBUG
00069 #define LIBWWW_DEBUG_TERSE
00070 #endif
00071 
00072 /* special tag type that we use to wrap around text */
00073 #define PCDATA_SPECIAL    -77
00074 
00075 
00076 /* from HTTP.c */
00077 #define FREE_TARGET(t)  (*(t->target->isa->_free))(t->target)
00078 
00079 /* Must define this, since HTStream is just a name aliased to _HTStream */
00080 struct _HTStream {
00081     const HTStreamClass *       isa;
00082 };
00083 
00084 enum request_type {FETCH, HTMLPARSE, XMLPARSE, RDFPARSE, HEADER};
00085 typedef enum request_type REQUEST_TYPE;
00086 
00087 union hkey {
00088   int intkey;
00089   char *strkey;
00090 };
00091 typedef union hkey HKEY;
00092 struct hash_table {
00093   int          size;
00094   REQUEST_TYPE type;
00095   HKEY         *table;
00096 };
00097 typedef struct hash_table HASH_TABLE;
00098 
00099 typedef struct auth AUTHENTICATION;
00100 struct auth {
00101   char           *realm;
00102   char           *uid;   /* username */
00103   char           *pw;    /* password */
00104   AUTHENTICATION *next;  /* next authorization record (used for subrequests) */
00105 };
00106 
00107 /* used to pass the input info to request and get output info from request back
00108    to the Prolog side*/
00109 struct request_context {
00110   int  request_id;
00111   int  subrequest_id;
00112   int  suppress_is_default;
00113   int  convert2list;    /* if convert pcdata to Prolog lists on exit */
00114   int  is_subrequest;  /* In XML parsing, we might need to go to a different
00115                           URI to fetch an external reference. This spawns a new
00116                           blocking subrequest with the same context. */
00117   int  statusOverride; /* If set, this status code should replace the one
00118                           returned by libwww */
00119   time_t last_modtime; /* page modtime */
00120   /* data structure where we build parsed terms, etc. */
00121   void *userdata;
00122   /* input */
00123   REQUEST_TYPE type;        /* request type: html/xml parsing, fetching page */
00124   int  timeout;
00125   time_t user_modtime;      /* oldest modtime the user can tolerate */
00126   prolog_term formdata;
00127   AUTHENTICATION auth_info; /* list of name/pw pairs */
00128   int            retry;     /* whether to retry authentication */
00129   HTMethod   method;
00130   HASH_TABLE selected_tags_tbl;
00131   HASH_TABLE suppressed_tags_tbl;
00132   HASH_TABLE stripped_tags_tbl;
00133   /* output */
00134   prolog_term status_term;
00135   prolog_term result_params;  /* additional params returned in the result */
00136   prolog_term request_result; /* either the parse tree of a string containing
00137                                  the HTML page */
00138   HTChunk     *result_chunk;  /* used only by the FETCH method. Here we get the
00139                                  resulting page before converting it to
00140                                  prolog_term */
00141 };
00142 typedef struct request_context REQUEST_CONTEXT;
00143 
00144 typedef void DELETE_USERDATA(void *userdata);
00145 
00146 /* like strcpy, but also converts to lowercase */
00147 void strcpy_lower(char *to, const char *from);
00148 
00149 
00150 int add_to_htable(HKEY item, HASH_TABLE *htable);
00151 int is_in_htable(const HKEY item, HASH_TABLE *htable);
00152 
00153 
00154 void print_prolog_term(prolog_term term, char *message);
00155 
00156 void html_register_callbacks();
00157 void HTXML_newInstance (HTStream *              me,
00158                         HTRequest *             request,
00159                         HTFormat                target_format,
00160                         HTStream *              target_stream,
00161                         XML_Parser              xmlparser,
00162                         void *                  context);
00163 void libwww_newRDF_parserHandler (HTStream *            me,
00164                                   HTRequest *           request,
00165                                   HTFormat              target_format,
00166                                   HTStream *            target_stream,
00167                                   HTRDF *               rdfparser,
00168                                   void *                context);
00169 void add_result_param(prolog_term *result_param, 
00170                       char *functor, int cnt, ...);
00171 void report_asynch_subrequest_status(HTRequest *request, int status);
00172 void report_synch_subrequest_status(HTRequest *request, int status);
00173 int verifyMIMEformat(HTRequest *request, REQUEST_TYPE type);
00174 char *RequestID(HTRequest *request);
00175 
00176 int xml_entity_termination_handler(HTRequest   *request,
00177                                    HTResponse  *response,
00178                                    void      *param,
00179                                    int       status);
00180 REQUEST_CONTEXT *set_subrequest_context(HTRequest *request,
00181                                         HTRequest *subrequest,
00182                                         prolog_term result_term);
00183 void setup_termination_filter(HTRequest *request, HTNetAfter *filter);
00184 void set_xml_conversions(void);
00185 void set_rdf_conversions(void);
00186 void set_html_conversions(void);
00187 
00188 #define AUTH_OR_REDIRECTION(status) \
00189     ((status == HT_NO_ACCESS) || (status == HT_NO_PROXY_ACCESS) \
00190        || (status == HT_REAUTH) || (status == HT_PROXY_REAUTH) \
00191        || (status == HT_SEE_OTHER) || (status == HT_PERM_REDIRECT) \
00192        || (status == HT_FOUND) || (status == HT_TEMP_REDIRECT))

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5