libwww_parse_xml.c

00001 /* File:      libwww_parse_xml.c
00002 ** Author(s): kifer, Yang Yang
00003 ** Contact:   xsb-contact@cs.sunysb.edu
00004 ** 
00005 ** Copyright (C) The Research Foundation of SUNY, 2000
00006 ** 
00007 ** XSB is free software; you can redistribute it and/or modify it under the
00008 ** terms of the GNU Library General Public License as published by the Free
00009 ** Software Foundation; either version 2 of the License, or (at your option)
00010 ** any later version.
00011 ** 
00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014 ** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
00015 ** more details.
00016 ** 
00017 ** You should have received a copy of the GNU Library General Public License
00018 ** along with XSB; if not, write to the Free Software Foundation,
00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00020 **
00021 ** $Id: libwww_parse_xml.c,v 1.15 2003/12/31 01:04:51 kifer Exp $
00022 ** 
00023 */
00024 
00025 
00026 #include "libwww_util.h"
00027 #include "libwww_parse.h"
00028 #include "libwww_parse_xml.h"
00029 
00030 
00031 /* BOOL, PRIVATE, PUBLIC, etc., are defined in a Libwww header */
00032 
00033 /* ------------------------------------------------------------------------- */
00034 /*                           HTXML STREAM HANDLERS                           */
00035 /* ------------------------------------------------------------------------- */
00036 
00037 PRIVATE void HTXML_setHandlers (XML_Parser me)
00038 {
00039   XML_SetElementHandler(me, xml_beginElement, xml_endElement);
00040   XML_SetCharacterDataHandler(me, xml_addText);
00041   XML_SetProcessingInstructionHandler(me, xml_processingInstruction);
00042   XML_SetUnparsedEntityDeclHandler(me, xml_unparsedEntityDecl);
00043   XML_SetNotationDeclHandler(me, xml_notationDecl);
00044   XML_SetExternalEntityRefHandler(me, xml_externalEntityRef);
00045   XML_SetUnknownEncodingHandler(me, xml_unknownEncoding, NULL);
00046 
00047   /* This exists only in expat 1.1. This version doesn't prohibit expansion of
00048      internal entities. Commented until expat 1.1 is included in libwww
00049 
00050      XML_SetDefaultHandlerExpand(me, xml_default);
00051   */
00052 }
00053 
00054 void HTXML_newInstance (HTStream *              me,
00055                         HTRequest *             request,
00056                         HTFormat                target_format,
00057                         HTStream *              target_stream,
00058                         XML_Parser              xmlparser,
00059                         void *                  context)
00060 {
00061   USERDATA *userdata = xml_create_userData(xmlparser, request, target_stream);
00062   XML_SetUserData(xmlparser, (void *) userdata);
00063   if (me && xmlparser) HTXML_setHandlers(xmlparser);
00064 }
00065 
00066 
00067 
00068 /* This is the callback that captures start tag events */
00069 PRIVATE void xml_beginElement(void  *userdata, /* where we build everything */
00070                               const XML_Char *tag, /* tag */
00071                               const XML_Char **attributes)
00072 {
00073   USERDATA *userdata_obj = (USERDATA *) userdata;
00074 
00075 #ifdef LIBWWW_DEBUG
00076   xsb_dbgmsg((LOG_DEBUG,"***In xml_beginElement(%s): stackptr=%d tag=%s suppress=%d choose=%d",
00077              RequestID(userdata_obj->request),
00078              userdata_obj->stackptr, tag,
00079              IS_SUPPRESSED_TAG((HKEY)(char *)tag, userdata_obj->request),
00080              IS_SELECTED_TAG((HKEY)(char *)tag, userdata_obj->request)
00081               ));
00082 #endif
00083 
00084   if (IS_STRIPPED_TAG((HKEY)(char *)tag, userdata_obj->request)) return;
00085 
00086   if ((suppressing(userdata_obj)
00087        && !IS_SELECTED_TAG((HKEY)(char *)tag, userdata_obj->request))
00088       || (parsing(userdata_obj)
00089           && IS_SUPPRESSED_TAG((HKEY)(char *)tag, userdata_obj->request))) {
00090     xml_push_suppressed_element(userdata_obj, tag);
00091     return;
00092   }
00093 
00094   /* parsing or suppressing & found a selected tag */
00095   if ((parsing(userdata_obj)
00096        && !IS_SUPPRESSED_TAG((HKEY)(char *)tag, userdata_obj->request))
00097       || (suppressing(userdata_obj) 
00098           && IS_SELECTED_TAG((HKEY)(char *)tag, userdata_obj->request))) {
00099     xml_push_element(userdata_obj,tag,attributes);
00100     return;
00101   }
00102 }
00103 
00104 
00105 /* The callback for the end-tag event */
00106 PRIVATE void xml_endElement (void *userdata, const XML_Char *tag)
00107 {
00108   USERDATA *userdata_obj = (USERDATA *) userdata;
00109 
00110 #ifdef LIBWWW_DEBUG
00111   xsb_dbgmsg((LOG_DEBUG,"***In xml_endElement(%s): stackptr=%d, tag=%s",
00112              RequestID(userdata_obj->request),
00113              userdata_obj->stackptr, tag
00114               ));
00115 #endif
00116 
00117   if (IS_STRIPPED_TAG((HKEY)(char *)tag, userdata_obj->request)) return;
00118 
00119   /* Expat does checking for tag mismatches, so we don't have to */
00120   if (parsing(userdata_obj))
00121     xml_pop_element(userdata_obj);
00122   else
00123     xml_pop_suppressed_element(userdata_obj);
00124 
00125 #ifdef LIBWWW_DEBUG_VERBOSE
00126   if (userdata_obj->stackptr >= 0) {
00127     if (!STACK_TOP(userdata_obj).suppress)
00128       print_prolog_term(STACK_TOP(userdata_obj).elt_term, "elt_term");
00129   }
00130 #endif
00131 
00132   return;
00133 }
00134 
00135 
00136 
00137 /* The callback to capture text events */
00138 PRIVATE void xml_addText (void           *userdata,
00139                           const XML_Char *textbuf,
00140                           int            len)
00141 {
00142   USERDATA *userdata_obj = (USERDATA *) userdata;
00143   static XSB_StrDefine(pcdata_buf);
00144   int shift = 0;
00145   REQUEST_CONTEXT *context =
00146     (REQUEST_CONTEXT *)HTRequest_context(userdata_obj->request);
00147 
00148 #ifdef LIBWWW_DEBUG
00149   xsb_dbgmsg((LOG_DEBUG,"***In xml_addText (%s)", RequestID(userdata_obj->request)));
00150 #endif
00151 
00152   if (IS_STRIPPED_TAG((HKEY)"pcdata", userdata_obj->request)) return;
00153   if (suppressing(userdata_obj)) return;
00154 
00155   /* strip useless newlines */
00156   if (strncmp(textbuf,"\n", len) == 0) return;
00157 
00158   if (!xml_push_element(userdata_obj, "pcdata", NULL))
00159     return;
00160 
00161   /* copy textbuf (which isn't null-terminated) into a variable length str */
00162   XSB_StrEnsureSize(&pcdata_buf, len+1);
00163   strncpy(pcdata_buf.string, textbuf, len);
00164   pcdata_buf.length = len;
00165   XSB_StrNullTerminate(&pcdata_buf);
00166 
00167   /* if string starts with a newline, skip the newline */
00168   if (strncmp(textbuf,"\n", strlen("\n")) == 0)
00169     shift = strlen("\n");
00170 
00171 #ifdef LIBWWW_DEBUG_VERBOSE
00172   xsb_dbgmsg((LOG_DEBUG,"***In addText: pcdata=%s", pcdata_buf.string+shift));
00173 #endif
00174 
00175   /* put the text string into the elt term and then pop it */
00176   if (context->convert2list)
00177     c2p_chars(pcdata_buf.string+shift,
00178               p2p_arg(STACK_TOP(userdata_obj).elt_term,3));
00179   else
00180     c2p_string(pcdata_buf.string+shift,
00181                p2p_arg(STACK_TOP(userdata_obj).elt_term,3));
00182 
00183   xml_pop_element(userdata_obj);
00184   return;
00185 }
00186 
00187 
00188 /* Collect tag's attributes and make them into a list of the form
00189    [attval(attr,val), ...]; bind it to Arg 2 of ELT_TERM */
00190 PRIVATE void collect_xml_attributes (prolog_term     elt_term,
00191                                      const XML_Char  **attrs)
00192 {
00193   static XSB_StrDefine(attrname);
00194   prolog_term
00195     prop_list = p2p_arg(elt_term,2),
00196     prop_list_tail = prop_list,
00197     prop_list_head;
00198 
00199   c2p_list(prop_list_tail);
00200 
00201   while (attrs && *attrs) {
00202     XSB_StrEnsureSize(&attrname, strlen((char *)*attrs));
00203     strcpy_lower(attrname.string, (char *)*attrs);
00204     
00205 #ifdef LIBWWW_DEBUG_VERBOSE
00206     xsb_dbgmsg((LOG_DEBUG,"***attr=%s", attrname.string));
00207 #endif
00208     prop_list_head = p2p_car(prop_list_tail);
00209     c2p_functor("attval",2,prop_list_head);
00210     c2p_string(attrname.string, p2p_arg(prop_list_head,1));
00211     /* get value */
00212     attrs++;
00213     /* if *attrs=NULL, then it is an error: expat will stop */
00214     if (*attrs)
00215       c2p_string((char *)*attrs, p2p_arg(prop_list_head, 2));
00216     
00217     prop_list_tail = p2p_cdr(prop_list_tail);
00218     c2p_list(prop_list_tail);
00219     attrs++;
00220   }
00221   
00222   /* Terminate the property list */
00223   c2p_nil(prop_list_tail);
00224   return;
00225 }
00226 
00227 
00228 /* push element onto USERDATA->stack */
00229 PRIVATE int xml_push_element (USERDATA    *userdata,
00230                                const XML_Char  *tag,
00231                                const XML_Char  **attrs)
00232 {
00233   static XSB_StrDefine(lower_tagname);
00234   prolog_term location;
00235 
00236   /*   If tag is not valid */
00237   if (tag == NULL) return TRUE;
00238 
00239   if (userdata->stackptr < 0)
00240     location = userdata->parsed_term_tail;
00241   else 
00242     location = STACK_TOP(userdata).content_list_tail;
00243 
00244   userdata->stackptr++;
00245 
00246 #ifdef LIBWWW_DEBUG
00247   xsb_dbgmsg((LOG_DEBUG,"***In xml_push_element(%s): stackptr=%d tag=%s",
00248               RequestID(userdata->request), userdata->stackptr, tag));
00249 #endif
00250 
00251   CHECK_STACK_OVERFLOW(userdata);
00252 
00253   /* wire the new elt into where it should be in the content list */
00254   STACK_TOP(userdata).elt_term = p2p_car(location);
00255 
00256   STACK_TOP(userdata).tag = (XML_Char *)tag; /* cast to discard const
00257                                                 declaration */
00258   STACK_TOP(userdata).suppress = FALSE;
00259 
00260   /* lowercase the tag */
00261   XSB_StrEnsureSize(&lower_tagname, strlen(tag)+1);
00262   strcpy_lower(lower_tagname.string, tag);
00263 
00264   /* normal tags look like elt(tagname, attrlist, contentlist);
00265      pcdata tags are: elt(pcdata,[],text); */
00266   if (XSB_StrCmp(&lower_tagname, "pcdata")==0)
00267     c2p_functor("elt",3,STACK_TOP(userdata).elt_term);
00268   else /* normal elt */
00269     c2p_functor("elt",3,STACK_TOP(userdata).elt_term);
00270 
00271   c2p_string(lower_tagname.string, p2p_arg(STACK_TOP(userdata).elt_term, 1));
00272   collect_xml_attributes(STACK_TOP(userdata).elt_term, attrs);
00273   
00274 #ifdef LIBWWW_DEBUG_VERBOSE
00275   xsb_dbgmsg((LOG_DEBUG,"***elt_name=%s", lower_tagname.string));
00276   print_prolog_term(STACK_TOP(userdata).elt_term, "elt_term");
00277 #endif
00278 
00279   /* normal element */
00280   if (XSB_StrCmp(&lower_tagname, "pcdata")!=0) {
00281     STACK_TOP(userdata).content_list_tail =
00282       p2p_arg(STACK_TOP(userdata).elt_term,3);
00283     c2p_list(STACK_TOP(userdata).content_list_tail);
00284   }
00285   return TRUE;
00286 }
00287 
00288 
00289 /* When done with an elt, close its contents list and pop the stack */
00290 PRIVATE void xml_pop_element(USERDATA *userdata)
00291 {
00292 #ifdef LIBWWW_DEBUG_VERBOSE
00293   xsb_dbgmsg((LOG_DEBUG,"***In xml_pop_element(%s): stackptr=%d, elt_name=%s",
00294              RequestID(userdata->request),
00295              userdata->stackptr,
00296               STACK_TOP(userdata).tag));
00297 #endif
00298   /* close the property list, for normal elements */
00299   if (strcasecmp(STACK_TOP(userdata).tag, "pcdata")!=0) {
00300     c2p_nil(STACK_TOP(userdata).content_list_tail);
00301   }
00302 
00303   /* insert new list cell into the tail and change content_list_tail to point
00304      to the new tail */
00305   if (userdata->stackptr > 0) {
00306     STACK_PREV(userdata).content_list_tail =
00307       p2p_cdr(STACK_PREV(userdata).content_list_tail);
00308     c2p_list(STACK_PREV(userdata).content_list_tail);
00309   } else {
00310     userdata->parsed_term_tail = p2p_cdr(userdata->parsed_term_tail);
00311     c2p_list(userdata->parsed_term_tail);
00312   }
00313 
00314   userdata->stackptr--;
00315 
00316 #ifdef LIBWWW_DEBUG_VERBOSE
00317   if (userdata->stackptr >= 0)
00318     print_prolog_term(STACK_TOP(userdata).content_list_tail,
00319                       "content_list_tail");
00320   else
00321     print_prolog_term(userdata->parsed_term_tail, "parsed_term_tail");
00322 #endif
00323 
00324   return;
00325 }
00326 
00327 
00328 /* Push tag, but keep only the tag info; don't convert to prolog term */
00329 PRIVATE void xml_push_suppressed_element(USERDATA   *userdata,
00330                                          const XML_Char *tag)
00331 {
00332   /* non-empty tag */
00333   userdata->stackptr++; /* advance ptr, but don't push tag */
00334 
00335   STACK_TOP(userdata).tag = (XML_Char *)tag; /* cast to discard const
00336                                                 declaration */
00337   STACK_TOP(userdata).suppress = TRUE;
00338 
00339   /* passing content list tail through suppressed elements */
00340   if (userdata->stackptr == 0)
00341     STACK_TOP(userdata).content_list_tail = userdata->parsed_term_tail;
00342   else 
00343     STACK_TOP(userdata).content_list_tail =
00344       STACK_PREV(userdata).content_list_tail;
00345 
00346   return;
00347 }
00348 
00349 
00350 PRIVATE void xml_pop_suppressed_element(USERDATA *userdata)
00351 {
00352   /* chain the list tails back through the sequence of suppressed tags */
00353   if (userdata->stackptr > 0) {
00354     STACK_PREV(userdata).content_list_tail = STACK_TOP(userdata).content_list_tail;
00355   } else {
00356     userdata->parsed_term_tail = STACK_TOP(userdata).content_list_tail;
00357   }
00358 
00359   userdata->stackptr--;
00360 
00361 #ifdef LIBWWW_DEBUG_VERBOSE
00362   xsb_dbgmsg((LOG_DEBUG,"***In xml_pop_suppressed_element(%s): stackptr=%d",
00363               RequestID(userdata->request), userdata->stackptr));
00364   if (userdata->stackptr >= 0)
00365     print_prolog_term(STACK_TOP(userdata).content_list_tail, "content_list_tail");
00366   else
00367     print_prolog_term(userdata->parsed_term_tail, "parsed_term_tail");
00368 #endif
00369 
00370   return;
00371 }
00372 
00373 
00374 PRIVATE USERDATA *xml_create_userData(XML_Parser parser,
00375                                       HTRequest *request,
00376                                       HTStream  *target_stream)
00377 {
00378   USERDATA *me = NULL;
00379 #ifdef LIBWWW_DEBUG
00380   xsb_dbgmsg((LOG_DEBUG,"***Start xml_create_userData: Request %s", RequestID(request)));
00381 #endif
00382   if (parser) {
00383     /* make sure that MIME type is appropriate for XML */
00384     if (!verifyMIMEformat(request, XMLPARSE)) {
00385       /*
00386         HTStream * input = HTRequest_inputStream(request);
00387         (*input->isa->abort)(input, NULL);
00388         HTRequest_setInputStream(request,NULL);
00389         HTRequest_kill(request);
00390         return NULL;
00391       */
00392       xsb_abort("[LIBWWW_REQUEST] Bug: Request type/MIME type mismatch");
00393     }
00394     if ((me = (USERDATA *) HT_CALLOC(1, sizeof(USERDATA))) == NULL)
00395       HT_OUTOFMEM("libwww_parse_xml");
00396     me->delete_method = xml_delete_userData;
00397     me->parser = parser;
00398     me->request = request;
00399     me->target = target_stream;
00400     me->suppress_is_default = 
00401       ((REQUEST_CONTEXT *)HTRequest_context(request))->suppress_is_default;
00402     me->parsed_term = p2p_new();
00403     c2p_list(me->parsed_term);
00404     me->parsed_term_tail = me->parsed_term;
00405     SETUP_STACK(me);
00406   }
00407 
00408 #ifdef LIBWWW_DEBUG
00409   xsb_dbgmsg((LOG_DEBUG,"***End xml_create_userData: Request %s", RequestID(request)));
00410 #endif
00411 
00412   /* Hook up userdata to the request context */
00413   ((REQUEST_CONTEXT *)HTRequest_context(request))->userdata = (void *)me;
00414 
00415   return me;
00416 }
00417 
00418 
00419 PRIVATE void xml_delete_userData(void *userdata)
00420 {
00421   prolog_term parsed_result, status_term;
00422   USERDATA *me = (USERDATA *)userdata;
00423   HTRequest *request = me->request;
00424 
00425   if (me->request) {
00426     parsed_result =
00427       ((REQUEST_CONTEXT *)HTRequest_context(request))->request_result;
00428     status_term =
00429       ((REQUEST_CONTEXT *)HTRequest_context(request))->status_term;
00430   } else return;
00431 
00432 #ifdef LIBWWW_DEBUG
00433   xsb_dbgmsg((LOG_DEBUG,"***In xml_delete_userData(%s): stackptr=%d",
00434               RequestID(request), me->stackptr));
00435 #endif
00436 
00437   /* if the status code says the doc was loaded fine, but stackptr is != -1,
00438      it means the doc is ill-formed */
00439   if (me->stackptr >= 0 && (me->status == HT_LOADED)) {
00440     c2p_int(WWW_DOC_SYNTAX,status_term);
00441   }
00442 
00443   /* terminate the parsed prolog terms list */
00444   c2p_nil(me->parsed_term_tail);
00445 
00446   /* pass the result to the outside world */
00447   if (is_var(me->parsed_term))
00448     p2p_unify(parsed_result, me->parsed_term);
00449   else
00450     xsb_abort("[LIBWWW_REQUEST] Request %s: Arg 4 (Result) must be unbound variable",
00451               RequestID(request));
00452 
00453   if (me->target) FREE_TARGET(me);
00454   if (me->stack) HT_FREE(me->stack);
00455   HT_FREE(me);
00456 
00457 #ifdef LIBWWW_DEBUG
00458   xsb_dbgmsg((LOG_DEBUG,"***Request %s: freed the USERDATA object", RequestID(request)));
00459 #endif
00460 
00461   return;
00462 }
00463 
00464 
00465 /* Unused handlers, which might get used later in the development */
00466 
00467 PRIVATE void xml_processingInstruction (void * userData,
00468                                         const XML_Char * target,
00469                                         const XML_Char * data)
00470 {
00471   return;
00472 }
00473 
00474 /* 
00475 ** This is called for a declaration of an unparsed (NDATA)
00476 ** entity.  The base argument is whatever was set by XML_SetBase.
00477 ** The entityName, systemId and notationName arguments will never be null.
00478 ** The other arguments may be.
00479 */
00480 PRIVATE void xml_unparsedEntityDecl (void * userData,
00481                                      const XML_Char * entityName,
00482                                      const XML_Char * base,
00483                                      const XML_Char * systemId,
00484                                      const XML_Char * publicId,
00485                                      const XML_Char * notationName)
00486 {
00487   return;
00488 }
00489 
00490 /* 
00491 ** This is called for a declaration of notation.
00492 ** The base argument is whatever was set by XML_SetBase.
00493 ** The notationName will never be null.  The other arguments can be.
00494 */
00495 PRIVATE void xml_notationDecl (void * userData,
00496                                const XML_Char * notationName,
00497                                const XML_Char * base,
00498                                const XML_Char * systemId,
00499                                const XML_Char * publicId)
00500 {
00501   return;
00502 }
00503 
00504 /* 
00505 ** This is called for a reference to an external parsed general entity.  The
00506 ** referenced entity is not automatically parsed.  The application can parse it
00507 ** immediately or later using XML_ExternalEntityParserCreate.  The parser
00508 ** argument is the parser parsing the entity containing the reference; it can
00509 ** be passed as the parser argument to XML_ExternalEntityParserCreate.  The
00510 ** systemId argument is the system identifier as specified in the entity
00511 ** declaration; it will not be null.  The base argument is the system
00512 ** identifier that should be used as the base for resolving systemId if
00513 ** systemId was relative; this is set by XML_SetBase; it may be null.  The
00514 ** publicId argument is the public identifier as specified in the entity
00515 ** declaration, or null if none was specified; the whitespace in the public
00516 ** identifier will have been normalized as required by the XML spec.  The
00517 ** openEntityNames argument is a space-separated list of the names of the
00518 ** entities that are open for the parse of this entity (including the name of
00519 ** the referenced entity); this can be passed as the openEntityNames argument
00520 ** to XML_ExternalEntityParserCreate; openEntityNames is valid only until the
00521 ** handler returns, so if the referenced entity is to be parsed later, it must
00522 ** be copied.  The handler should return 0 if processing should not continue
00523 ** because of a fatal error in the handling of the external entity.  In this
00524 ** case the calling parser will return an XML_ERROR_EXTERNAL_ENTITY_HANDLING
00525 ** error.  Note that unlike other handlers the first argument is the parser,
00526 ** not userData.  */
00527 PRIVATE int xml_externalEntityRef (XML_Parser     parser,
00528                                    const XML_Char *openEntityNames,
00529                                    const XML_Char *base,
00530                                    const XML_Char *systemId,
00531                                    const XML_Char *publicId)
00532 {
00533   XML_Parser extParser =
00534     XML_ExternalEntityParserCreate(parser, openEntityNames, 0);
00535   HTAnchor  *anchor = NULL;
00536   HTRequest *request = HTRequest_new();
00537   char      *uri;
00538   USERDATA  *userdata = XML_GetUserData(parser);
00539   HTRequest *parent_request = userdata->request;
00540   char      *cwd = HTGetCurrentDirectoryURL();
00541   USERDATA *subuserdata;
00542 
00543   uri = HTParse((char *)systemId, cwd, PARSE_ALL);
00544   anchor = HTAnchor_findAddress(uri);
00545 
00546   HTRequest_setOutputFormat(request, WWW_SOURCE);
00547   set_subrequest_context(parent_request,request,xml_push_dummy(userdata));
00548   setup_termination_filter(request,xml_entity_termination_handler);
00549   subuserdata = xml_create_userData(extParser, request, NULL);
00550   XML_SetUserData(extParser, (void *) subuserdata);
00551   total_number_of_requests++;
00552 
00553 #ifdef LIBWWW_DEBUG
00554   xsb_dbgmsg((LOG_DEBUG,"***In xml_externalEntityRef(%s): uri=%s", RequestID(request), uri));
00555 #endif
00556 
00557   /* libwww breaks when a local file request is issued concurrently with an
00558      existing request to another local file. So, we ignore such subrequests. */
00559   if ((strncmp(uri,"file:/",6) != 0) || !HTRequest_preemptive(parent_request)) {
00560     if (strncmp(uri,"file:/",6) == 0)
00561       HTRequest_setPreemptive(request,YES);
00562     HTLoadAnchor(anchor,request);
00563   } else {
00564     HTRequest_setAnchor(request,anchor);
00565     xml_entity_termination_handler(request,NULL,NULL,WWW_EXTERNAL_ENTITY);
00566   }
00567 
00568   HT_FREE(uri);
00569   HT_FREE(cwd);
00570   return TRUE;
00571 }
00572 
00573 
00574 /* 
00575 ** This is called for an encoding that is unknown to the parser.
00576 ** The encodingHandlerData argument is that which was passed as the
00577 ** second argument to XML_SetUnknownEncodingHandler.
00578 ** The name argument gives the name of the encoding as specified in
00579 ** the encoding declaration.
00580 ** If the callback can provide information about the encoding,
00581 ** it must fill in the XML_Encoding structure, and return 1.
00582 ** Otherwise it must return 0.
00583 ** If info does not describe a suitable encoding,
00584 ** then the parser will return an XML_UNKNOWN_ENCODING error.
00585 */
00586 PRIVATE int xml_unknownEncoding (void           *encodingHandlerData,
00587                                  const XML_Char *name,
00588                                  XML_Encoding   *info)
00589 {
00590   return 0;
00591 }
00592 
00593 /* Default is commented out so that expat will parse entities.
00594 PRIVATE void xml_default (void * userData, const XML_Char * str, int len)
00595 {
00596   XSB_StrDefine(unparsed);
00597 
00598   XSB_StrEnsureSize(&unparsed, len+1);
00599   strncpy(unparsed.string, str, len);
00600   unparsed.length = len;
00601   XSB_StrNullTerminate(&unparsed);
00602 #ifdef LIBWWW_DEBUG
00603   xsb_dbgmsg((LOG_DEBUG,"***In xml_default: Request: %s: Unparsed: %s",
00604              RequestID(((USERDATA *)userData)->request), unparsed.string));
00605 #endif
00606 
00607   return;
00608 }
00609 */
00610 
00611 
00612 
00613 /* Pushes an open prolog term onto the stack and return that term */
00614 PRIVATE prolog_term xml_push_dummy(USERDATA    *userdata)
00615 {
00616   prolog_term location;
00617 
00618   if (userdata->stackptr < 0)
00619     location = userdata->parsed_term_tail;
00620   else 
00621     location = STACK_TOP(userdata).content_list_tail;
00622 
00623   userdata->stackptr++;
00624 
00625 #ifdef LIBWWW_DEBUG
00626   xsb_dbgmsg((LOG_DEBUG,"***In xml_push_dummy(%s): stackptr=%d",
00627               RequestID(userdata->request), userdata->stackptr));
00628 #endif
00629 
00630   CHECK_STACK_OVERFLOW(userdata);
00631 
00632   /* wire the new elt into where it should be in the content list */
00633   STACK_TOP(userdata).elt_term = location;
00634   STACK_TOP(userdata).tag = "extentity";
00635 
00636   /* insert new list cell into the tail and change content_list_tail to point
00637      to the new tail */
00638   if (userdata->stackptr > 0) {
00639     STACK_PREV(userdata).content_list_tail =
00640       p2p_cdr(STACK_PREV(userdata).content_list_tail);
00641     c2p_list(STACK_PREV(userdata).content_list_tail);
00642   } else {
00643     userdata->parsed_term_tail = p2p_cdr(userdata->parsed_term_tail);
00644     c2p_list(userdata->parsed_term_tail);
00645   }
00646 
00647   userdata->stackptr--;
00648 
00649 #ifdef LIBWWW_DEBUG_VERBOSE
00650   if (userdata->stackptr >= 0)
00651     print_prolog_term(STACK_TOP(userdata).content_list_tail,
00652                       "content_list_tail");
00653   else
00654     print_prolog_term(userdata->parsed_term_tail, "parsed_term_tail");
00655 #endif
00656 
00657   return location;
00658 }
00659 
00660 
00661 int xml_entity_termination_handler(HTRequest   *request,
00662                                    HTResponse  *response,
00663                                    void        *param,
00664                                    int         status)
00665 {
00666   char *ext_entity_expansion=NULL;
00667   REQUEST_CONTEXT *context = (REQUEST_CONTEXT *)HTRequest_context(request);
00668   USERDATA *userdata = context->userdata;
00669   XML_Parser extParser = userdata->parser;
00670 
00671   /* the following conditions are handled by standard libwww filters */
00672   if (context->retry && AUTH_OR_REDIRECTION(status))
00673     return HT_OK; /* this causes other filters to be used */
00674 
00675   if (status==HT_LOADED) {
00676     ext_entity_expansion = HTChunk_toCString(context->result_chunk);
00677 #ifdef LIBWWW_DEBUG
00678     xsb_dbgmsg((LOG_DEBUG,"***In xml_entity_termination_handler(%s): entity=%s", 
00679                 RequestID(request), ext_entity_expansion));
00680 #endif
00681     XML_Parse(extParser,ext_entity_expansion,strlen(ext_entity_expansion),1);
00682     HT_FREE(ext_entity_expansion);
00683   } else {
00684     prolog_term request_result = p2p_car(context->request_result);
00685     char *uri = HTAnchor_address((HTAnchor *)HTRequest_anchor(request));
00686 #ifdef LIBWWW_DEBUG
00687     xsb_dbgmsg((LOG_DEBUG,"***In xml_entity_termination_handler(%s): request failed",
00688                 RequestID(request)));
00689 #endif
00690     c2p_functor("unexpanded_entity",2,request_result);
00691     c2p_string(uri,p2p_arg(request_result,1));
00692     c2p_int(status,p2p_arg(request_result,2));
00693   }
00694 
00695   report_asynch_subrequest_status(request, status);
00696 
00697   XML_ParserFree(extParser);
00698   if (userdata)
00699     (((USERDATA *)userdata)->delete_method)(userdata);
00700 
00701   if (total_number_of_requests > 0)
00702     total_number_of_requests--;
00703   /* when the last request is done, stop the event loop */
00704   if ((total_number_of_requests == 0) && event_loop_runnung) {
00705     HTEventList_stopLoop();
00706     event_loop_runnung = FALSE;
00707 #ifdef LIBWWW_DEBUG
00708     xsb_dbgmsg((LOG_DEBUG,"***In xml_entity_termination_handler: event loop halted, status=%d, HTNetCount=%d",
00709                 status, HTNet_count()));
00710 #endif
00711   }
00712 
00713   return !HT_OK;
00714 }
00715 
00716 
00717 void set_xml_conversions()
00718 {
00719   /* Must delete old converter and create new. Apparently something in libwww
00720      releases the atoms used in thes converters, which causes it to crash 
00721      in HTStreamStack() on the second call to xmlparse. */
00722   HTPresentation_deleteAll(XML_converter);
00723   XML_converter = HTList_new();
00724 
00725   HTConversion_add(XML_converter,"*/*", "www/debug",
00726                    HTBlackHoleConverter, 1.0, 0.0, 0.0);
00727   HTConversion_add(XML_converter,"message/rfc822", "*/*",
00728                    HTMIMEConvert, 1.0, 0.0, 0.0);
00729   HTConversion_add(XML_converter,"message/x-rfc822-foot", "*/*",
00730                    HTMIMEFooter, 1.0, 0.0, 0.0);
00731   HTConversion_add(XML_converter,"message/x-rfc822-head", "*/*",
00732                    HTMIMEHeader, 1.0, 0.0, 0.0);
00733   HTConversion_add(XML_converter,"message/x-rfc822-cont", "*/*",
00734                    HTMIMEContinue, 1.0, 0.0, 0.0);
00735   HTConversion_add(XML_converter,"message/x-rfc822-upgrade","*/*",
00736                    HTMIMEUpgrade, 1.0, 0.0, 0.0);
00737   HTConversion_add(XML_converter,"message/x-rfc822-partial", "*/*",
00738                    HTMIMEPartial, 1.0, 0.0, 0.0);
00739   HTConversion_add(XML_converter,"multipart/*", "*/*",
00740                    HTBoundary, 1.0, 0.0, 0.0);
00741   HTConversion_add(XML_converter,"text/x-http", "*/*",
00742                    HTTPStatus_new, 1.0, 0.0, 0.0);
00743   /* www/xml is invented for servers that don't recognize XML */
00744   HTConversion_add(XML_converter,"text/plain", "www/xml",
00745                    HTXML_new, 1.0, 0.0, 0.0);
00746   HTConversion_add(XML_converter,"text/html", "www/xml",
00747                    HTXML_new, 1.0, 0.0, 0.0);
00748   HTConversion_add(XML_converter,"www/present", "www/xml",
00749                    HTXML_new, 1.0, 0.0, 0.0);
00750   HTConversion_add(XML_converter, "text/xml", "*/*", 
00751                    HTXML_new, 1.0, 0.0, 0.0);
00752   HTConversion_add(XML_converter, "application/xml", "*/*",
00753                    HTXML_new, 1.0, 0.0, 0.0);
00754 }
00755 
00756 

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5