00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 #include "libwww_util.h"
00027 #include "libwww_parse.h"
00028 #include "libwww_parse_html.h"
00029
00030
00031
00032
00033
00034 PRIVATE void html_beginElement(USERDATA *htext,
00035 int element_number,
00036
00037 const BOOL *present,
00038
00039
00040 const char **value)
00041 {
00042 #ifdef LIBWWW_DEBUG
00043 HTTag *tag = SGML_findTag(htext->dtd, element_number);
00044 xsb_dbgmsg((LOG_DEBUG,"***In html_beginElement(%s): stackptr=%d tag=%s suppress=%d choose=%d",
00045 RequestID(htext->request),
00046 htext->stackptr, HTTag_name(tag),
00047 IS_SUPPRESSED_TAG((HKEY)element_number, htext->request),
00048 IS_SELECTED_TAG((HKEY)element_number, htext->request)
00049 ));
00050 #endif
00051
00052 if (IS_STRIPPED_TAG((HKEY)element_number, htext->request)) return;
00053
00054 if ((suppressing(htext) && !IS_SELECTED_TAG((HKEY)element_number, htext->request))
00055 || (parsing(htext) && IS_SUPPRESSED_TAG((HKEY)element_number, htext->request))) {
00056 html_push_suppressed_element(htext, element_number);
00057 return;
00058 }
00059
00060
00061 if ((parsing(htext) && !IS_SUPPRESSED_TAG((HKEY)element_number,htext->request))
00062 || (suppressing(htext)
00063 && IS_SELECTED_TAG((HKEY)element_number, htext->request))) {
00064 html_push_element(htext,element_number,present,value);
00065 return;
00066 }
00067 }
00068
00069
00070
00071 PRIVATE void html_endElement (USERDATA *htext, int element_number)
00072 {
00073 int i, match;
00074
00075 #ifdef LIBWWW_DEBUG
00076 xsb_dbgmsg((LOG_DEBUG,"***In html_endElement(%s): stackptr=%d",
00077 RequestID(htext->request), htext->stackptr));
00078 #endif
00079
00080 if (IS_STRIPPED_TAG((HKEY)element_number, htext->request)) return;
00081
00082 match = find_matching_elt(htext, element_number);
00083
00084 if (match < 0) return;
00085
00086 #ifdef LIBWWW_DEBUG_VERBOSE
00087 xsb_dbgmsg((LOG_DEBUG,"***match=%d", match));
00088 #endif
00089
00090 for (i=htext->stackptr; i>=match; i--)
00091 if (parsing(htext))
00092 html_pop_element(htext);
00093 else
00094 html_pop_suppressed_element(htext);
00095
00096 #ifdef LIBWWW_DEBUG_VERBOSE
00097 if (htext->stackptr >= 0) {
00098 if (!STACK_TOP(htext).suppress)
00099 print_prolog_term(STACK_TOP(htext).elt_term, "elt_term");
00100 }
00101 #endif
00102
00103 return;
00104 }
00105
00106
00107
00108
00109 PRIVATE void html_addText (USERDATA *htext, const char *textbuf, int len)
00110 {
00111 static XSB_StrDefine(pcdata_buf);
00112 int shift = 0;
00113 REQUEST_CONTEXT *context =
00114 (REQUEST_CONTEXT *)HTRequest_context(htext->request);
00115
00116 #ifdef LIBWWW_DEBUG_VERBOSE
00117 xsb_dbgmsg((LOG_DEBUG,"***In html_addText: Request %s", RequestID(htext->request)));
00118 #endif
00119
00120 if (IS_STRIPPED_TAG((HKEY)PCDATA_SPECIAL, htext->request)) return;
00121 if (suppressing(htext)) return;
00122
00123
00124
00125 if (strncmp(textbuf,"\n", len) == 0) return;
00126
00127 html_push_element(htext, PCDATA_SPECIAL, NULL, NULL);
00128
00129
00130 XSB_StrEnsureSize(&pcdata_buf, len+1);
00131 strncpy(pcdata_buf.string, textbuf, len);
00132 pcdata_buf.length = len;
00133 XSB_StrNullTerminate(&pcdata_buf);
00134
00135
00136 if (strncmp(textbuf,"\n", strlen("\n")) == 0)
00137 shift = strlen("\n");
00138
00139
00140 if (context->convert2list)
00141 c2p_chars(pcdata_buf.string+shift, p2p_arg(STACK_TOP(htext).elt_term,3));
00142 else
00143 c2p_string(pcdata_buf.string+shift, p2p_arg(STACK_TOP(htext).elt_term,3));
00144
00145 html_pop_element(htext);
00146 return;
00147 }
00148
00149
00150
00151
00152 PRIVATE void collect_html_attributes ( prolog_term elt_term,
00153 HTTag *tag,
00154 const BOOL *present,
00155 const char **value)
00156 {
00157 int tag_attributes_number = HTTag_attributes(tag);
00158 static XSB_StrDefine(attrname);
00159 int cnt;
00160 prolog_term
00161 prop_list = p2p_arg(elt_term,2),
00162 prop_list_tail = prop_list,
00163 prop_list_head;
00164
00165 c2p_list(prop_list_tail);
00166
00167 #ifdef LIBWWW_DEBUG_VERBOSE
00168 xsb_dbgmsg((LOG_DEBUG,"***In collect_html_attributes: tag_attributes_number=%d",
00169 tag_attributes_number));
00170 #endif
00171
00172 for (cnt=0; cnt<tag_attributes_number; cnt++) {
00173 if (present[cnt]) {
00174 XSB_StrEnsureSize(&attrname, strlen(HTTag_attributeName(tag, cnt)));
00175 strcpy_lower(attrname.string, HTTag_attributeName(tag, cnt));
00176
00177 #ifdef LIBWWW_DEBUG_VERBOSE
00178 xsb_dbgmsg((LOG_DEBUG,"***attr=%s, val=%s ",
00179 attrname.string, (char *)value[cnt]));
00180 #endif
00181 prop_list_head = p2p_car(prop_list_tail);
00182 c2p_functor("attval",2,prop_list_head);
00183 c2p_string(attrname.string, p2p_arg(prop_list_head,1));
00184
00185
00186 if ((char *)value[cnt])
00187 c2p_string((char *)value[cnt], p2p_arg(prop_list_head, 2));
00188
00189 prop_list_tail = p2p_cdr(prop_list_tail);
00190 c2p_list(prop_list_tail);
00191 }
00192 }
00193
00194
00195 c2p_nil(prop_list_tail);
00196 return;
00197 }
00198
00199
00200
00201 PRIVATE void html_push_element (USERDATA *htext,
00202 int element_number,
00203 const BOOL *present,
00204 const char **value)
00205 {
00206 static XSB_StrDefine(tagname);
00207 HTTag *tag = special_find_tag(htext, element_number);
00208 prolog_term location;
00209
00210
00211 if (tag == NULL) return;
00212
00213 if (htext->stackptr < 0)
00214 location = htext->parsed_term_tail;
00215 else
00216 location = STACK_TOP(htext).content_list_tail;
00217
00218 htext->stackptr++;
00219
00220 #ifdef LIBWWW_DEBUG_VERBOSE
00221 xsb_dbgmsg((LOG_DEBUG,"***In html_push_element(%s): stackptr=%d",
00222 RequestID(htext->request), htext->stackptr));
00223 #endif
00224
00225 CHECK_STACK_OVERFLOW(htext);
00226
00227
00228 STACK_TOP(htext).elt_term = p2p_car(location);
00229
00230 STACK_TOP(htext).element_number = element_number;
00231 STACK_TOP(htext).suppress = FALSE;
00232
00233
00234
00235
00236 STACK_TOP(htext).element_type = HTTag_content(tag);
00237 c2p_functor("elt",3,STACK_TOP(htext).elt_term);
00238
00239 XSB_StrEnsureSize(&tagname, strlen(HTTag_name(tag)));
00240 strcpy_lower(tagname.string, HTTag_name(tag));
00241 c2p_string(tagname.string, p2p_arg(STACK_TOP(htext).elt_term, 1));
00242 collect_html_attributes(STACK_TOP(htext).elt_term, tag, present, value);
00243 #ifdef LIBWWW_DEBUG_VERBOSE
00244 xsb_dbgmsg((LOG_DEBUG,"***elt_name=%s", HTTag_name(tag)));
00245 print_prolog_term(STACK_TOP(htext).elt_term, "elt_term");
00246 #endif
00247
00248 switch (STACK_TOP(htext).element_type) {
00249 case SGML_EMPTY:
00250 c2p_nil(p2p_arg(STACK_TOP(htext).elt_term,3));
00251 html_pop_element(htext);
00252 break;
00253 case PCDATA_SPECIAL:
00254
00255 break;
00256 default:
00257 STACK_TOP(htext).content_list_tail = p2p_arg(STACK_TOP(htext).elt_term,3);
00258 c2p_list(STACK_TOP(htext).content_list_tail);
00259 }
00260 }
00261
00262
00263
00264 PRIVATE void html_pop_element(USERDATA *htext)
00265 {
00266 #ifdef LIBWWW_DEBUG_VERBOSE
00267 xsb_dbgmsg((LOG_DEBUG,"***In html_pop_element(%s): stackptr=%d, elt_name=%s",
00268 RequestID(htext->request),
00269 htext->stackptr,
00270 HTTag_name(special_find_tag(htext, STACK_TOP(htext).element_number))));
00271 #endif
00272
00273 switch (STACK_TOP(htext).element_type) {
00274 case SGML_EMPTY:
00275 break;
00276 case PCDATA_SPECIAL:
00277 break;
00278 default:
00279 c2p_nil(STACK_TOP(htext).content_list_tail);
00280 }
00281
00282
00283
00284 if (htext->stackptr > 0) {
00285 STACK_PREV(htext).content_list_tail =
00286 p2p_cdr(STACK_PREV(htext).content_list_tail);
00287 c2p_list(STACK_PREV(htext).content_list_tail);
00288 } else {
00289 htext->parsed_term_tail = p2p_cdr(htext->parsed_term_tail);
00290 c2p_list(htext->parsed_term_tail);
00291 }
00292
00293 htext->stackptr--;
00294
00295 #ifdef LIBWWW_DEBUG_VERBOSE
00296 if (htext->stackptr >= 0)
00297 print_prolog_term(STACK_TOP(htext).content_list_tail, "content_list_tail");
00298 else
00299 print_prolog_term(htext->parsed_term_tail, "parsed_term_tail");
00300 #endif
00301
00302 return;
00303 }
00304
00305
00306
00307 PRIVATE void html_push_suppressed_element(USERDATA *htext, int element_number)
00308 {
00309
00310 if (SGML_findTagContents(htext->dtd, element_number) == SGML_EMPTY)
00311 return;
00312
00313 htext->stackptr++;
00314
00315 STACK_TOP(htext).element_number = element_number;
00316 STACK_TOP(htext).suppress = TRUE;
00317
00318
00319 if (htext->stackptr == 0)
00320 STACK_TOP(htext).content_list_tail = htext->parsed_term_tail;
00321 else
00322 STACK_TOP(htext).content_list_tail = STACK_PREV(htext).content_list_tail;
00323
00324 return;
00325 }
00326
00327
00328 PRIVATE void html_pop_suppressed_element(USERDATA *htext)
00329 {
00330
00331 if (htext->stackptr > 0) {
00332 STACK_PREV(htext).content_list_tail = STACK_TOP(htext).content_list_tail;
00333 } else {
00334 htext->parsed_term_tail = STACK_TOP(htext).content_list_tail;
00335 }
00336
00337 htext->stackptr--;
00338
00339 #ifdef LIBWWW_DEBUG_VERBOSE
00340 xsb_dbgmsg((LOG_DEBUG,"***In html_pop_suppressed_element(%s): stackptr=%d",
00341 RequestID(htext->request), htext->stackptr));
00342 if (htext->stackptr >= 0)
00343 print_prolog_term(STACK_TOP(htext).content_list_tail, "content_list_tail");
00344 else
00345 print_prolog_term(htext->parsed_term_tail, "parsed_term_tail");
00346 #endif
00347
00348 return;
00349 }
00350
00351
00352 PRIVATE int find_matching_elt(USERDATA *htext, int elt_number)
00353 {
00354 int i;
00355 for (i=htext->stackptr; i>=0; i--) {
00356 #ifdef LIBWWW_DEBUG_VERBOSE
00357 xsb_dbgmsg((LOG_DEBUG,"***In find_matching_elt"));
00358 xsb_dbgmsg((LOG_DEBUG,"***i=%d htext->stack[i].element_number=%d(%s) elt_number=%d(%s)",
00359 i,
00360 htext->stack[i].element_number,
00361 SGML_findTagName(htext->dtd, htext->stack[i].element_number),
00362 elt_number,
00363 SGML_findTagName(htext->dtd, elt_number)));
00364 #endif
00365 if (htext->stack[i].element_number == elt_number)
00366 return i;
00367 }
00368 return -1;
00369 }
00370
00371
00372 PRIVATE inline HTTag *special_find_tag(USERDATA *htext, int element_number)
00373 {
00374 static HTTag pcdata_tag = {"pcdata", NULL, 0, PCDATA_SPECIAL};
00375 if (element_number == PCDATA_SPECIAL)
00376 return &pcdata_tag;
00377 return SGML_findTag(htext->dtd, element_number);
00378 }
00379
00380
00381
00382 USERDATA *html_create_userData( HTRequest * request,
00383 HTParentAnchor * anchor,
00384 HTStream * output_stream)
00385 {
00386 USERDATA *me = NULL;
00387
00388 #ifdef LIBWWW_DEBUG
00389 xsb_dbgmsg((LOG_DEBUG,"***Start html_create_userData(%s):", RequestID(request)));
00390 #endif
00391 if (request) {
00392
00393 if (!verifyMIMEformat(request, HTMLPARSE)) {
00394
00395
00396
00397
00398
00399
00400 xsb_abort("[LIBWWW_REQUEST] Bug: Request type/MIME type mismatch");
00401 }
00402 if ((me = (USERDATA *) HT_CALLOC(1, sizeof(USERDATA))) == NULL)
00403 HT_OUTOFMEM("libwww_parse_html");
00404 me->delete_method = html_delete_userData;
00405 me->request = request;
00406 me->node_anchor = anchor;
00407 me->target = output_stream;
00408 me->dtd = HTML_dtd();
00409 me->suppress_is_default =
00410 ((REQUEST_CONTEXT *)HTRequest_context(request))->suppress_is_default;
00411 me->parsed_term = p2p_new();
00412 c2p_list(me->parsed_term);
00413 me->parsed_term_tail = me->parsed_term;
00414 SETUP_STACK(me);
00415 }
00416
00417 #ifdef LIBWWW_DEBUG
00418 xsb_dbgmsg((LOG_DEBUG,"***In html_create_userData(%s):", RequestID(request)));
00419 #endif
00420
00421
00422 ((REQUEST_CONTEXT *)HTRequest_context(request))->userdata = (void *)me;
00423 return me;
00424 }
00425
00426
00427 PRIVATE void html_delete_userData(void *userdata)
00428 {
00429 int i;
00430 prolog_term parsed_result, status_term;
00431 USERDATA *me = (USERDATA *)userdata;
00432 HTRequest *request = me->request;
00433
00434 if (me->request) {
00435 parsed_result =
00436 ((REQUEST_CONTEXT *)HTRequest_context(request))->request_result;
00437 status_term =
00438 ((REQUEST_CONTEXT *)HTRequest_context(request))->status_term;
00439 } else return;
00440
00441 #ifdef LIBWWW_DEBUG
00442 xsb_dbgmsg((LOG_DEBUG,"***In html_delete_userData(%s): stackptr=%d",
00443 RequestID(request), me->stackptr));
00444 #endif
00445
00446
00447 for (i=me->stackptr; i>=0; i--)
00448 if (parsing(me))
00449 html_pop_element(me);
00450 else
00451 html_pop_suppressed_element(me);
00452
00453
00454 c2p_nil(me->parsed_term_tail);
00455
00456
00457 if (is_var(me->parsed_term))
00458 p2p_unify(parsed_result, me->parsed_term);
00459 else
00460 xsb_abort("[LIBWWW_REQUEST] Request %s: Arg 4 (Result) must be unbound variable",
00461 RequestID(request));
00462
00463
00464 if (me->target) FREE_TARGET(me);
00465 if (me->stack) HT_FREE(me->stack);
00466 HT_FREE(me);
00467
00468 #ifdef LIBWWW_DEBUG
00469 xsb_dbgmsg((LOG_DEBUG,"***Request %s: freed the USERDATA obj", RequestID(request)));
00470 #endif
00471
00472 return;
00473 }
00474
00475
00476 void html_register_callbacks()
00477 {
00478
00479 HText_registerElementCallback(html_beginElement, html_endElement);
00480
00481 HText_registerTextCallback(html_addText);
00482
00483
00484 HText_registerCDCallback(html_create_userData,
00485 (HText_delete *)html_delete_userData);
00486 return;
00487 }
00488
00489
00490 void set_html_conversions()
00491 {
00492
00493
00494
00495 HTPresentation_deleteAll(HTML_converter);
00496 HTML_converter = HTList_new();
00497
00498 HTConversion_add(HTML_converter,"*/*", "www/debug",
00499 HTBlackHoleConverter, 1.0, 0.0, 0.0);
00500 HTConversion_add(HTML_converter,"message/rfc822", "*/*",
00501 HTMIMEConvert, 1.0, 0.0, 0.0);
00502 HTConversion_add(HTML_converter,"message/x-rfc822-foot", "*/*",
00503 HTMIMEFooter, 1.0, 0.0, 0.0);
00504 HTConversion_add(HTML_converter,"message/x-rfc822-head", "*/*",
00505 HTMIMEHeader, 1.0, 0.0, 0.0);
00506 HTConversion_add(HTML_converter,"message/x-rfc822-cont", "*/*",
00507 HTMIMEContinue, 1.0, 0.0, 0.0);
00508 HTConversion_add(HTML_converter,"message/x-rfc822-upgrade","*/*",
00509 HTMIMEUpgrade, 1.0, 0.0, 0.0);
00510 HTConversion_add(HTML_converter,"message/x-rfc822-partial", "*/*",
00511 HTMIMEPartial, 1.0, 0.0, 0.0);
00512 HTConversion_add(HTML_converter,"multipart/*", "*/*",
00513 HTBoundary, 1.0, 0.0, 0.0);
00514 HTConversion_add(HTML_converter,"text/x-http", "*/*",
00515 HTTPStatus_new, 1.0, 0.0, 0.0);
00516
00517 HTConversion_add(HTML_converter,"text/html", "www/html",
00518 HTMLPresent, 1.0, 0.0, 0.0);
00519 HTConversion_add(HTML_converter,"text/plain", "www/html",
00520 HTMLPresent, 1.0, 0.0, 0.0);
00521 HTConversion_add(HTML_converter,"www/present", "www/html",
00522 HTMLPresent, 1.0, 0.0, 0.0);
00523 HTConversion_add(HTML_converter,"text/xml", "www/html",
00524 HTMLPresent, 1.0, 0.0, 0.0);
00525 HTConversion_add(HTML_converter,"text/rdf", "www/html",
00526 HTMLPresent, 1.0, 0.0, 0.0);
00527 HTConversion_add(HTML_converter, "application/html", "*/*",
00528 HTMLPresent, 1.0, 0.0, 0.0);
00529 }
00530
00531