00001
00002
00003
00004
00005
00006
00007
00008
00009
00010 #include "xsb_config.h"
00011 #ifdef WIN_NT
00012 #define XSB_DLL
00013 #endif
00014 #include "cinterf.h"
00015 #include <stdlib.h>
00016 #include "dtd.h"
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <stdio.h>
00020 #include <assert.h>
00021 #include "fetch_file.c"
00022 #include "parser.c"
00023 #include "charmap.c"
00024 #include "util.c"
00025 #include "xmlns.c"
00026 #include "model.c"
00027 #include "error_term.h"
00028 #include "util.h"
00029 #include "basic_defs.h"
00030
00031 #ifndef WIN_NT
00032 #include <sys/stat.h>
00033 #endif
00034
00035
00036
00037
00038
00039 #define PD_MAGIC 0x36472ba1
00040
00041 #define MAX_ERRORS 50
00042 #define MAX_WARNINGS 50
00043 #define MAXSTRLEN 256
00044
00045
00046 typedef enum
00047 { EM_QUIET = 0,
00048 EM_PRINT,
00049 EM_STYLE
00050 } errormode;
00051
00052 typedef enum
00053 { SA_FILE = 0,
00054 SA_INPUT,
00055 SA_ELEMENT,
00056 SA_CONTENT,
00057 SA_DECL
00058 } stopat;
00059
00060 typedef struct _env
00061 { prolog_term tail;
00062 struct _env *parent;
00063 } env;
00064
00065 typedef struct _parser_data
00066 { int magic;
00067 dtd_parser *parser;
00068
00069 int warnings;
00070 int errors;
00071 int max_errors;
00072 int max_warnings;
00073 errormode error_mode;
00074 int positions;
00075
00076 predicate_t on_begin;
00077 predicate_t on_end;
00078 predicate_t on_cdata;
00079 predicate_t on_entity;
00080 predicate_t on_pi;
00081 predicate_t on_urlns;
00082 predicate_t on_error;
00083 predicate_t on_decl;
00084
00085 stopat stopat;
00086 int stopped;
00087
00088 void* source;
00089 int its_a_url;
00090
00091 prolog_term list;
00092 prolog_term tail;
00093 env *stack;
00094 int free_on_close;
00095 } parser_data;
00096
00097
00098 dtd_parser * parser_error = NULL;
00099
00100 #include "error.c"
00101
00102
00103 dtd *
00104 new_dtd(const ichar *doctype);
00105
00106 static int
00107 get_dtd(prolog_term t, dtd **dtdp);
00108
00109 dtd_parser *
00110 new_dtd_parser(dtd *dtd);
00111
00112 int unify_dtd( prolog_term t, dtd * dtd);
00113
00114 int unify_parser( prolog_term t, dtd_parser * p);
00115
00116
00117 static int
00118 on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv);
00119
00120 static int
00121 on_end(dtd_parser *p, dtd_element *e);
00122
00123 static int
00124 on_entity(dtd_parser *p, dtd_entity *e, int chr);
00125
00126 static int
00127 on_pi(dtd_parser *p, const ichar *pi);
00128
00129 static int
00130 on_cdata(dtd_parser *p, data_type type, int len, const ochar *data);
00131
00132 static void
00133 put_element_name(dtd_parser *p, prolog_term t, dtd_element *e);
00134
00135 static int
00136 unify_attribute_list(dtd_parser *p, prolog_term alist,
00137 int argc, sgml_attribute *argv);
00138 static parser_data *
00139 new_parser_data(dtd_parser *p);
00140
00141 static void
00142 put_url(dtd_parser *p, prolog_term t, const ichar *url);
00143
00144 static int
00145 on_error(dtd_parser *p, dtd_error *error);
00146
00147 static int
00148 on_decl(dtd_parser *p, const ichar *decl);
00149
00150 static void
00151 put_attribute_name(dtd_parser *p, prolog_term t, dtd_symbol *nm);
00152
00153 static void
00154 put_attribute_value(dtd_parser *p, prolog_term t, sgml_attribute *a);
00155
00156
00157 static ichar *
00158 istrblank(const ichar *s);
00159
00160 static int
00161 unify_listval(dtd_parser *p, prolog_term t, attrtype type, int len, const char *text);
00162
00163 static dtd_srcloc *
00164 file_location(dtd_parser *p, dtd_srcloc *l);
00165
00166 static int
00167 can_end_omitted(dtd_parser *p);
00168
00169 static int
00170 set_option_dtd( dtd *dtd, dtd_option option, char *set);
00171
00172
00173
00180 DllExport int call_conv pl_new_sgml_parser()
00181 {
00182
00183 prolog_term head, tail, tmp, ref, tmp1;
00184
00185
00186 dtd *dtd = NULL;
00187 dtd_parser *p;
00188
00189 char *str;
00190
00191 tail = reg_term(2);
00192
00193
00194 while(is_list(tail))
00195 {
00196 head = p2p_car(tail);
00197 tmp1 = p2p_cdr(tail);
00198 tail = tmp1;
00199 if(is_functor( head))
00200 {
00201
00202
00203
00204 str = p2c_functor( head);
00205 if(strcmp( str, "dtd_struct"))
00206 {
00207 return FALSE;
00208 }
00209 tmp = p2p_arg(head, 1);
00210 if( is_var( tmp))
00211 {
00212 dtd = new_dtd(NULL);
00213 dtd->references++;
00214 c2p_int( (int)dtd, tmp);
00215 }
00216 else
00217 {
00218 if( !get_dtd( head, &dtd))
00219 return FALSE;
00220
00221 }
00222 }
00223 }
00224
00225 ref = reg_term(1);
00226 p = new_dtd_parser(dtd);
00227
00228 parser_error = p;
00229 return unify_parser(ref, p);
00230
00231 }
00232
00239 int unify_parser( prolog_term t, dtd_parser *p)
00240 {
00241 prolog_term tmp, tmp1;
00242
00243
00244 tmp1 = p2p_new();
00245 tmp = p2p_new();
00246
00247
00248 c2p_functor( "sgml_parser", 1, tmp1);
00249 c2p_int( (int) p, p2p_arg( tmp1, 1));
00250
00251 return p2p_unify( t, tmp1);
00252 }
00253
00254
00261 int unify_dtd( prolog_term t, dtd * d)
00262 {
00263
00264 prolog_term tmp, tmp1, tmp2;
00265
00266 tmp1 = p2p_new();
00267 tmp = p2p_new();
00268 tmp2 = p2p_new();
00269
00270
00271
00272 if(d->doctype)
00273 {
00274 c2p_functor( "dtd_struct", 2, tmp1);
00275 c2p_int( (int) d, p2p_arg( tmp1, 1));
00276 c2p_string( d->doctype, p2p_arg( tmp1, 2));
00277 }
00278
00279 else
00280 {
00281 c2p_functor( "dtd_struct", 1, tmp1);
00282 c2p_int( (int) d, tmp);
00283 p2p_unify( p2p_arg( tmp1, 1), tmp);
00284 }
00285
00286 return p2p_unify( t, tmp1);
00287 }
00288
00295 DllExport int call_conv pl_new_dtd()
00296 { char *dt;
00297 dtd *dtd;
00298 prolog_term doctype;
00299 prolog_term ref;
00300
00301 doctype = reg_term(1);
00302 ref = reg_term(2);
00303
00304
00305 if ( !(dt = p2c_string( doctype) ))
00306 return sgml2pl_error(ERR_TYPE, "atom", doctype);
00307
00308
00309 if ( !(dtd=new_dtd(dt)) )
00310 return FALSE;
00311
00312 dtd->references++;
00313
00314 return unify_dtd(ref, dtd);
00315 }
00316
00322 dtd *
00323 new_dtd(const ichar *doctype)
00324 { dtd *dtd = calloc(1, sizeof(*dtd));
00325
00326 dtd->magic = SGML_DTD_MAGIC;
00327 dtd->implicit = TRUE;
00328 dtd->dialect = DL_SGML;
00329 if ( doctype )
00330 dtd->doctype = istrdup(doctype);
00331 dtd->symbols = new_symbol_table();
00332 dtd->charclass = new_charclass();
00333 dtd->charfunc = new_charfunc();
00334 dtd->charmap = new_charmap();
00335 dtd->space_mode = SP_SGML;
00336 dtd->ent_case_sensitive = TRUE;
00337 dtd->shorttag = TRUE;
00338 dtd->number_mode = NU_TOKEN;
00339 return dtd;
00340 }
00341
00350 dtd_parser *
00351 new_dtd_parser(dtd *dtd)
00352 {
00353 dtd_parser *p = calloc(1, sizeof(*p));
00354
00355 if ( !dtd )
00356 dtd = new_dtd(NULL);
00357 dtd->references++;
00358
00359 p->magic = SGML_PARSER_MAGIC;
00360 p->dtd = dtd;
00361 p->state = S_PCDATA;
00362 p->mark_state = MS_INCLUDE;
00363 p->dmode = DM_DTD;
00364 p->encoding = ENC_ISO_LATIN1;
00365 p->buffer = new_icharbuf();
00366 p->cdata = new_ocharbuf();
00367 p->event_class = EV_EXPLICIT;
00368 set_src_dtd_parser(p, IN_NONE, NULL);
00369
00370 return p;
00371 }
00372
00379 static int
00380 get_dtd(prolog_term t, dtd **dtdp)
00381 {
00382 char * str;
00383
00384 if ( is_functor(t))
00385 {
00386
00387 prolog_term temp_term;
00388 void *ptr;
00389
00390 str = p2c_functor(t);
00391
00392 if(strcmp( str, "dtd_struct"))
00393 return FALSE;
00394
00395 temp_term = p2p_arg(t, 1);
00396
00397 if ((ptr = (void *) p2c_int(temp_term) ))
00398 {
00399 dtd *tmp = ptr;
00400 if ( tmp->magic == SGML_DTD_MAGIC )
00401 {
00402 *dtdp = tmp;
00403
00404 return TRUE;
00405 }
00406 return sgml2pl_error(ERR_EXISTENCE, "dtd_struct", t);
00407 }
00408 }
00409
00410 return sgml2pl_error(ERR_TYPE, "dtd_struct", t);
00411 }
00412
00419 static int
00420 get_parser(prolog_term parser, dtd_parser **p)
00421 {
00422
00423 prolog_term temp_term;
00424 void *ptr;
00425 char *str = NULL;
00426
00427 if(is_functor(parser))
00428 {
00429
00430
00431 str = p2c_functor( parser);
00432
00433 if(strcmp(str,"sgml_parser"))
00434 {
00435 return FALSE;
00436 }
00437 temp_term = p2p_arg( parser, 1);
00438
00439 if( (ptr = (void *) p2c_int(temp_term)))
00440 {
00441 dtd_parser *tmp = ptr;
00442 if ( tmp->magic == SGML_PARSER_MAGIC )
00443 {
00444 *p = tmp;
00445 return TRUE;
00446 }
00447 return sgml2pl_error(ERR_EXISTENCE, "sgml_parser", parser);
00448 }
00449 }
00450
00451 return sgml2pl_error(ERR_TYPE, "sgml_parser", parser);
00452 }
00453
00460 DllExport int call_conv pl_doctype()
00461 {
00462 dtd_parser *p;
00463 prolog_term parser, doctype;
00464 dtd * dtd;
00465
00466 parser = reg_term(1);
00467 doctype = reg_term(2);
00468
00469
00470
00471 if ( !get_parser(parser, &p) )
00472 return FALSE;
00473 dtd = p->dtd;
00474
00475 if(is_var(doctype) && dtd->doctype)
00476 {
00477 c2p_string( dtd->doctype, doctype);
00478 }
00479 return TRUE;
00480 }
00481
00487 DllExport int call_conv pl_set_sgml_parser()
00488 {
00489 dtd_parser *p;
00490 prolog_term parser, options, temp_term;
00491
00492 parser = reg_term(1);
00493 options = reg_term(2);
00494
00495
00496 if ( !get_parser(parser, &p) )
00497 return FALSE;
00498
00499 if( is_functor(options))
00500 {
00501 char *funcname;
00502
00503 funcname = p2c_functor( options);
00504
00505
00506 if( streq( funcname, "dialect"))
00507 {
00508 char *s;
00509 temp_term = p2p_arg(options, 1);
00510 s=p2c_string( temp_term);
00511 if ( streq(s, "xml") )
00512 set_dialect_dtd(p->dtd, DL_XML);
00513 else if ( streq(s, "xmlns") )
00514 set_dialect_dtd(p->dtd, DL_XMLNS);
00515 else if ( streq(s, "sgml") )
00516 set_dialect_dtd(p->dtd, DL_SGML);
00517 else
00518 return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", temp_term);
00519 }
00520
00521 else if( streq( funcname, "shorttag"))
00522 {
00523 char *booleanstring=NULL;
00524
00525 temp_term = p2p_arg( options, 1);
00526
00527 booleanstring = p2c_string( temp_term);
00528 if( !booleanstring){
00529 return sgml2pl_error(ERR_TYPE, "boolen", temp_term);
00530 }
00531 if( strcmp( booleanstring, "false") &&
00532 strcmp( booleanstring, "true") &&
00533 strcmp( booleanstring, "FALSE") &&
00534 strcmp( booleanstring, "TRUE"))
00535 {
00536 return sgml2pl_error( ERR_TYPE, "boolean", temp_term);
00537 }
00538
00539 set_option_dtd( p->dtd, OPT_SHORTTAG, booleanstring);
00540
00541 }
00542
00543 else if( streq( funcname, "file"))
00544 {
00545 char * file;
00546
00547 temp_term = p2p_arg( options, 1);
00548 file=p2c_string( temp_term);
00549 set_src_dtd_parser( p, IN_FILE, file);
00550 }
00551
00552 else if ( streq( funcname, "line"))
00553 {
00554 temp_term = p2p_arg( options, 1);
00555
00556 (p->location.line = p2c_int( temp_term));
00557 }
00558
00559 else if ( streq( funcname, "charpos"))
00560 {
00561 temp_term = p2p_arg( options, 1);
00562
00563 p->location.charpos = p2c_int( temp_term);
00564
00565 }
00566
00567 else if( streq( funcname, "space"))
00568 {
00569 char *s;
00570 temp_term =p2p_arg(options, 1);
00571 s=p2c_string( temp_term);
00572
00573 if ( streq(s, "preserve") )
00574 p->dtd->space_mode = SP_PRESERVE;
00575 else if ( streq(s, "default") )
00576 p->dtd->space_mode = SP_DEFAULT;
00577 else if ( streq(s, "remove") )
00578 p->dtd->space_mode = SP_REMOVE;
00579 else if ( streq(s, "sgml") )
00580 p->dtd->space_mode = SP_SGML;
00581 else
00582 return FALSE;
00583
00584 }
00585
00586 else if( streq( funcname, "defaults"))
00587 {
00588 int val;
00589
00590 temp_term =p2p_arg(options, 1);
00591
00592 val=p2c_int( temp_term);
00593
00594 if ( val )
00595 p->flags &= ~SGML_PARSER_NODEFS;
00596 else
00597 p->flags |= SGML_PARSER_NODEFS;
00598
00599 }
00600
00601 else if( streq( funcname, "number"))
00602 {
00603 char *s;
00604 temp_term = p2p_arg(options, 1);
00605 s=p2c_string( temp_term);
00606
00607 if ( streq(s, "token") )
00608 p->dtd->number_mode = NU_TOKEN;
00609 else if ( streq(s, "integer") )
00610 p->dtd->number_mode = NU_INTEGER;
00611
00612 else
00613 return FALSE;
00614
00615 }
00616
00617 else if( streq( funcname, "doctype"))
00618 {
00619 char *s;
00620 temp_term = p2p_arg(options, 1);
00621
00622 if( is_var( temp_term))
00623 p->enforce_outer_element = NULL;
00624 else
00625 {
00626 if( !(s=p2c_string(temp_term) ))
00627 return FALSE;
00628 p->enforce_outer_element = dtd_add_symbol(p->dtd, s);
00629
00630 }
00631
00632 }
00633
00634 }
00635 return TRUE;
00636
00637 }
00638
00645 DllExport int call_conv pl_allocate_error_term()
00646 {
00647 global_error_term = reg_term(1);
00648 global_warning_term = reg_term(2);
00649 return TRUE;
00650 }
00651
00656 DllExport int call_conv pl_finalize_warn()
00657 {
00658
00659 prolog_term tmp, tmp1;
00660
00661 tmp = reg_term(1);
00662 while( is_list( tmp)){
00663 tmp1 = p2p_cdr( tmp);
00664 tmp = tmp1;
00665 }
00666 if( is_var( tmp)){
00667 c2p_nil(tmp);
00668 }
00669 return TRUE;
00670 }
00671
00672
00680 DllExport int call_conv pl_sgml_parse()
00681 {
00682 dtd_parser *p;
00683 parser_data *pd;
00684 parser_data *oldpd;
00685
00686 prolog_term head , parser, options, tail, tmp1;
00687 FILE *in = NULL;
00688 struct stat stbuf;
00689
00690 int recursive, has_content_length = FALSE, content_length = 0, its_a_url = 0, source_len = 0;
00691
00692 char *str, *source=NULL, fname[MAXSTRLEN], *tmpsource=NULL;
00693
00694 parser = reg_term(1);
00695 options = reg_term(2);
00696 tail = options;
00697
00698
00699 if ( !get_parser(parser, &p) )
00700 return FALSE;
00701
00702
00703 if( p->closure)
00704 {
00705 recursive = TRUE;
00706 oldpd = p->closure;
00707
00708 if ( oldpd->magic != PD_MAGIC || oldpd->parser != p )
00709 return sgml2pl_error(ERR_MISC, "sgml", "Parser associated with illegal data");
00710
00711 pd = calloc(1, sizeof(*pd));
00712 *pd = *oldpd;
00713 p->closure = pd;
00714
00715 its_a_url = pd->its_a_url;
00716 if(its_a_url == 1)
00717 source = (char *) pd->source;
00718 else if( its_a_url == 0)
00719 in = pd->source;
00720
00721 }
00722 else
00723 {
00724 recursive = FALSE;
00725 oldpd = NULL;
00726
00727 set_mode_dtd_parser(p, DM_DATA);
00728
00729
00730 p->on_begin_element = on_begin;
00731 p->on_end_element = on_end;
00732 p->on_entity = on_entity;
00733 p->on_pi = on_pi;
00734 p->on_data = on_cdata;
00735 p->on_error = on_error;
00736 p->on_decl = on_decl;
00737 pd = new_parser_data(p);
00738 }
00739
00740
00741
00742 if(!is_list(tail))
00743 return sgml2pl_error( ERR_DOMAIN, "source", tail);
00744
00745 while(is_list(tail)){
00746 head = p2p_car(tail);
00747 tmp1 = p2p_cdr(tail);
00748 tail = tmp1;
00749
00750
00751 if(is_functor( head)){
00752 str = p2c_functor( head);
00753
00754
00755
00756 if(!strcmp(str,"document")){
00757 pd->list = p2p_arg( head, 1);
00758 pd->tail = pd->list;
00759 pd->stack = NULL;
00760 }
00761
00762 else if(!strcmp(str,"source")){
00763
00764 prolog_term temp_term1, temp_term2 = 0;
00765 char server[MAXSTRLEN], * tmpstr=NULL;
00766
00767 temp_term1 = p2p_arg( head, 1);
00768
00769
00770 if( is_functor( temp_term1)){
00771
00772 tmpstr = p2c_functor( temp_term1);
00773
00774
00775 if( !strcmp("url", tmpstr)){
00776
00777 temp_term2 = p2p_arg(temp_term1, 1);
00778 tmpsource = p2c_string(temp_term2);
00779 source = malloc( strlen(tmpsource));
00780 strcpy( source, tmpsource);
00781
00782
00783 if(parse_url( source, server, fname) != FALSE)
00784 {
00785
00786 if( !strcmp( server, "file")){
00787 if(!(in = fopen( fname, "rb"))){
00788 return sgml2pl_error(ERR_EXISTENCE, "file", temp_term2);
00789 }
00790 its_a_url = 0;
00791 fstat( fileno( in), &stbuf);
00792 source_len = stbuf.st_size;
00793 }
00794
00795 else{
00796
00797 if(get_file_www( server, fname, &source) == FALSE){
00798 return sgml2pl_error(ERR_MISC, "url", source);
00799 }
00800 else{
00801 source_len = strlen( source);
00802 its_a_url = 1;
00803 }
00804 }
00805 }
00806 else
00807 {
00808 return sgml2pl_error(ERR_DOMAIN, "url", temp_term2);
00809 }
00810 }
00811
00812 else if ( !strcmp( "file", tmpstr)){
00813
00814 temp_term2 = p2p_arg(temp_term1, 1);
00815 source = p2c_string(temp_term2);
00816 if(!(in = fopen( source, "rb"))){
00817 return sgml2pl_error(ERR_EXISTENCE, "file", temp_term2);
00818 }
00819 its_a_url = 0;
00820 fstat( fileno( in), &stbuf);
00821 source_len = stbuf.st_size;
00822 set_src_dtd_parser(p, IN_FILE, source);
00823 }
00824
00825 else if ( !strcmp( "string", tmpstr)){
00826
00827 temp_term2 = p2p_arg(temp_term1, 1);
00828 source = p2c_string( temp_term2);
00829 source_len = strlen( source);
00830 its_a_url = 1;
00831 }
00832 else{
00833 return sgml2pl_error( ERR_MISC, "source", temp_term2);
00834 }
00835 }
00836 else{
00837 return sgml2pl_error( ERR_MISC, "source", "Improper input format");
00838 }
00839 }
00840
00841 else if( !strcmp(str,"content_length")){
00842
00843 prolog_term temp_term1, temp_term2;
00844 char * tmp;
00845
00846 temp_term1 = p2p_arg( head, 1);
00847 tmp = p2c_functor( temp_term1);
00848 temp_term2 = p2p_arg( temp_term1, 1);
00849 tmp = p2c_functor( temp_term2);
00850 content_length = p2c_int( temp_term2);
00851 has_content_length = TRUE;
00852
00853 }
00854
00855 else if( !strcmp(str,"parse"))
00856 {
00857 char *s;
00858
00859 prolog_term temp_term;
00860
00861 temp_term = p2p_arg( head, 1);
00862
00863 s = p2c_string(temp_term);
00864
00865 if(streq(s,"element"))
00866 pd->stopat = SA_ELEMENT;
00867 else if ( streq(s, "content") )
00868 pd->stopat = SA_CONTENT;
00869 else if ( streq(s, "file") )
00870 pd->stopat = SA_FILE;
00871 else if ( streq(s, "input") )
00872 pd->stopat = SA_INPUT;
00873 else if ( streq(s, "declaration") )
00874 pd->stopat = SA_DECL;
00875 else
00876 {
00877 return sgml2pl_error(ERR_DOMAIN, "parse", temp_term);
00878 }
00879
00880 }
00881
00882 else if( !strcmp( str, "syntax_errors")){
00883 char *s;
00884
00885 prolog_term temp_term;
00886
00887 temp_term = p2p_arg( head, 1);
00888
00889 s = p2c_string(temp_term);
00890
00891 if ( streq(s, "quiet") )
00892 pd->error_mode = EM_QUIET;
00893 else if ( streq(s, "print") )
00894 pd->error_mode = EM_PRINT;
00895 else if ( streq(s, "style") )
00896 pd->error_mode = EM_STYLE;
00897 else
00898 return sgml2pl_error(ERR_DOMAIN, "syntax_error", temp_term);
00899
00900 }
00901
00902 else if( !strcmp( str, "positions")){
00903 char *s=NULL;
00904
00905 prolog_term temp_term = 0;
00906
00907 temp_term = p2p_arg( head, 1);
00908
00909 s = p2c_string(temp_term);
00910
00911 if ( streq(s, "true") )
00912 pd->positions = TRUE;
00913 else if ( streq(s, "false") )
00914 pd->positions = FALSE;
00915 else
00916 return sgml2pl_error(ERR_DOMAIN, "positions", temp_term);
00917 }
00918
00919 }
00920 else{
00921 return sgml2pl_error(ERR_DOMAIN, "source", head);
00922 }
00923 }
00924
00925 #define CHECKERROR \
00926 if ( pd->errors > pd->max_errors && pd->max_errors >= 0 ) \
00927 return sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors);
00928
00929 if ( pd->stopat == SA_CONTENT && p->empty_element )
00930 goto out;
00931
00932
00933 if(in || its_a_url)
00934 {
00935 int eof = FALSE;
00936 int i = 0;
00937
00938 if(!recursive)
00939 {
00940 pd->its_a_url = its_a_url;
00941 if ( its_a_url ==1)
00942 {
00943 pd->source = source;
00944 }
00945 else if( its_a_url ==0)
00946 pd->source = in;
00947 }
00948
00949 while( !eof)
00950 {
00951 char c=0;
00952 char ateof = FALSE;
00953
00954 if ( has_content_length )
00955 {
00956 if ( content_length <= 0 )
00957 c = EOF;
00958 else
00959 {
00960 if (its_a_url == 1)
00961 {
00962 c = source[i++];
00963 if (i == source_len)
00964 {
00965 ateof = TRUE;
00966 }
00967 }
00968 else if(its_a_url == 0)
00969 {
00970 c = fgetc(in);
00971 source_len=source_len -1;
00972 if( source_len <= 0)
00973 ateof = TRUE;
00974
00975 }
00976 }
00977
00978 if(!ateof)
00979 ateof = (--content_length <= 0);
00980
00981 }
00982 else
00983 {
00984 if (its_a_url == 1)
00985 {
00986 c = source[i++];
00987 if (i == source_len)
00988 {
00989 ateof = TRUE;
00990 }
00991 }
00992 else if( its_a_url ==0)
00993 {
00994 c = fgetc(in);
00995 source_len=source_len -1;
00996 if( source_len <= 0)
00997 ateof = TRUE;
00998
00999
01000 }
01001 }
01002
01003 if(ateof)
01004 {
01005 eof = TRUE;
01006 if ( c == LF )
01007 {
01008 c = CR;
01009 }
01010 else if ( c != CR )
01011 {
01012 putchar_dtd_parser(p, c);
01013 if ( pd->stopped )
01014 goto stopped;
01015 c = CR;
01016
01017 }
01018 }
01019 putchar_dtd_parser( p, c);
01020 if ( pd->stopped )
01021 {
01022 stopped:
01023
01024 pd->stopped = FALSE;
01025 if ( pd->stopat != SA_CONTENT )
01026 reset_document_dtd_parser(p);
01027 goto out;
01028 }
01029
01030 }
01031 if ( !recursive && pd->stopat != SA_INPUT )
01032 end_document_dtd_parser(p);
01033
01034 out:
01035
01036 if( !is_nil( pd->tail))
01037 {
01038 c2p_nil(pd->tail);
01039 }
01040 if ( recursive )
01041 {
01042 p->closure = oldpd;
01043 }
01044 else
01045 {
01046 p->closure = NULL;
01047 }
01048
01049 pd->magic = 0;
01050 free(pd);
01051
01052 if (its_a_url == 0)
01053 fclose(in);
01054 return TRUE;
01055 }
01056
01057 return TRUE;
01058 }
01059
01066 DllExport int call_conv pl_open_dtd()
01067 { dtd *dtd;
01068 dtd_parser *p;
01069 parser_data *pd;
01070
01071
01072 prolog_term ref, options, tail, head, tmp1;
01073
01074 FILE * in = NULL;
01075
01076 char *str, file[MAXSTRLEN], server[MAXSTRLEN], *fname=NULL, *tmpfname=NULL;
01077 int its_a_url = 0;
01078 struct stat stbuf;
01079 int source_len = 0;
01080
01081 ref = reg_term(1);
01082 options = reg_term(2);
01083
01084
01085 if ( !get_dtd(ref, &dtd) )
01086 return FALSE;
01087
01088
01089 p = new_dtd_parser(dtd);
01090 p->dmode = DM_DTD;
01091 pd = new_parser_data(p);
01092 pd->free_on_close = TRUE;
01093
01094 tail = options;
01095
01096 while(is_list(tail))
01097 {
01098 head = p2p_car(tail);
01099 tmp1 = p2p_cdr(tail);
01100 tail = tmp1;
01101
01102
01103 if(is_functor( head)){
01104 str = p2c_functor( head);
01105
01106
01107 if(!strcmp(str,"source")){
01108
01109 prolog_term temp_term1, temp_term2;
01110 char * tmpstr = NULL;
01111 temp_term1 = p2p_arg( head, 1);
01112 tmpstr = p2c_functor(temp_term1);
01113
01114
01115
01116 if(!strcmp( tmpstr, "url")){
01117 temp_term2 = p2p_arg(temp_term1, 1);
01118 tmpfname = p2c_string(temp_term2);
01119 fname = malloc( strlen(tmpfname));
01120 strcpy( fname, tmpfname);
01121 if( parse_url( fname, server, file) != FALSE) {
01122
01123 source_len = 0;
01124
01125
01126 if( !strcmp( server, "file")){
01127 if(!(in = fopen( file, "rb"))){
01128 return sgml2pl_error(ERR_EXISTENCE, "file", temp_term2);
01129 }
01130 its_a_url = 0;
01131 fstat( fileno( in), &stbuf);
01132 source_len = stbuf.st_size;
01133 }
01134
01135 else{
01136
01137 if( get_file_www( server, file, &fname) == FALSE){
01138 return sgml2pl_error( ERR_MISC, "url", fname);
01139 }
01140 else{
01141 its_a_url = 1;
01142 source_len = strlen( fname);
01143 }
01144 }
01145 }
01146 else
01147 {
01148 return sgml2pl_error( ERR_DOMAIN, "url", temp_term2);
01149 }
01150 }
01151
01152 else if( !strcmp( tmpstr, "file")){
01153 temp_term2 = p2p_arg( temp_term1, 1);
01154 fname = p2c_string( temp_term2);
01155 its_a_url = 0;
01156 if(!(in = fopen( fname, "r"))){
01157 return sgml2pl_error(ERR_EXISTENCE, "File", temp_term2);
01158
01159 }
01160 fstat( fileno( in), &stbuf);
01161 source_len = stbuf.st_size;
01162 }
01163
01164 else if(!strcmp( tmpstr, "string")){
01165 its_a_url = 1;
01166 temp_term2 = p2p_arg( temp_term1, 1);
01167 fname = p2c_string( temp_term2);
01168 source_len = strlen( fname );
01169 }
01170 else{
01171 return FALSE;
01172 }
01173 }
01174 }
01175 }
01176
01177
01178
01179 if ( !pd->parser || pd->parser->magic != SGML_PARSER_MAGIC ){
01180 errno = EINVAL;
01181 return FALSE;
01182 }
01183
01184 if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped ){
01185 errno = EIO;
01186 return FALSE;
01187 }
01188
01189
01190 if (its_a_url == 1) {
01191 int i = 0;
01192 source_len = strlen(fname);
01193
01194 for( i=0; i<source_len ; i++){
01195 putchar_dtd_parser(pd->parser, fname[i]);
01196 }
01197 }
01198
01199 else if( its_a_url == 0)
01200 {
01201 char c;
01202 int i = 0;
01203
01204 for( i=0;i<source_len;i++)
01205 {
01206 c = fgetc(in);
01207 putchar_dtd_parser(pd->parser, c);
01208 }
01209 fclose(in);
01210 }
01211 return TRUE;
01212 }
01213
01219 DllExport int call_conv pl_free_sgml_parser()
01220 {
01221 dtd_parser *p;
01222 prolog_term parser;
01223
01224 parser = reg_term(1);
01225
01226 if ( get_parser(parser, &p) )
01227 {
01228 free_dtd_parser(p);
01229 return TRUE;
01230 }
01231
01232 return FALSE;
01233 }
01234
01239 DllExport int call_conv pl_free_dtd()
01240 { dtd *dtd;
01241
01242 prolog_term dtd_term;
01243
01244 dtd_term = reg_term(1);
01245
01246 if ( get_dtd(dtd_term, &dtd) )
01247 {
01248 free_dtd(dtd);
01249 return TRUE;
01250 }
01251
01252 return FALSE;
01253 }
01254
01255
01261 static parser_data *
01262 new_parser_data(dtd_parser *p)
01263 {
01264 parser_data *pd;
01265
01266 pd = calloc(1, sizeof(*pd));
01267 pd->magic = PD_MAGIC;
01268 pd->parser = p;
01269 pd->max_errors = MAX_ERRORS;
01270 pd->max_warnings = MAX_WARNINGS;
01271 pd->error_mode = EM_PRINT;
01272 p->closure = pd;
01273
01274 return pd;
01275 }
01276
01280 static dtd_srcloc *
01281 file_location(dtd_parser *p, dtd_srcloc *l)
01282 {
01283 while(l->parent && l->type != IN_FILE)
01284 l = l->parent;
01285
01286 return l;
01287 }
01288
01289 static int
01290 can_end_omitted(dtd_parser *p)
01291 { sgml_environment *env;
01292
01293 for(env=p->environments; env; env = env->parent)
01294 {
01295 dtd_element *e = env->element;
01296
01297 if ( !(e->structure && e->structure->omit_close) )
01298 return FALSE;
01299 }
01300
01301 return TRUE;
01302 }
01303
01311 static int
01312 on_error(dtd_parser *p, dtd_error *error)
01313 {
01314 parser_data *pd = p->closure;
01315 const char *severity;
01316
01317 if ( pd->stopped )
01318 return TRUE;
01319
01320 if ( pd->stopat == SA_ELEMENT &&
01321 (error->minor == ERC_NOT_OPEN || error->minor == ERC_NOT_ALLOWED) && can_end_omitted(p) )
01322 {
01323 end_document_dtd_parser(p);
01324 sgml_cplocation(&p->location, &p->startloc);
01325 pd->stopped = TRUE;
01326 return TRUE;
01327 }
01328
01329 switch(error->severity)
01330 {
01331 case ERS_STYLE:
01332 if ( pd->error_mode != EM_STYLE )
01333 return TRUE;
01334 severity = "informational";
01335 break;
01336 case ERS_WARNING:
01337 pd->warnings++;
01338 severity = "warning";
01339 break;
01340 case ERS_ERROR:
01341 default:
01342 pd->errors++;
01343 severity = "error";
01344 break;
01345 }
01346
01347
01348 if ( pd->error_mode != EM_QUIET )
01349 {
01350
01351
01352 prolog_term temp_term1 = p2p_new();
01353 prolog_term temp_term2 = p2p_new();
01354 prolog_term tmptail, tmp;
01355 dtd_srcloc *l = file_location(p, &p->startloc);
01356
01357
01358
01359 c2p_functor( "sgml", 4, temp_term1);
01360 unify_parser(p2p_arg(temp_term1, 1), p);
01361 c2p_string( (l->name ? (char*) l->name : "[]"), p2p_arg( temp_term1, 2));
01362 c2p_int( l->line, p2p_arg( temp_term1, 3));
01363 c2p_string( error->plain_message, p2p_arg( temp_term1, 4));
01364
01365 c2p_functor( (char*)severity, 1, temp_term2);
01366 p2p_unify( temp_term1, p2p_arg( temp_term2, 1));
01367
01368
01369 if(!strcmp(severity, "error")){
01370 p2p_unify( global_error_term, temp_term2);
01371 }
01372 else
01373 {
01374 tmptail = global_warning_term;
01375 while( is_list( tmptail))
01376 {
01377 tmp = p2p_cdr(tmptail);
01378 tmptail = tmp;
01379 }
01380 c2p_list(tmptail);
01381 p2p_unify( p2p_car(tmptail), temp_term2);
01382 }
01383 }
01384
01385 return TRUE;
01386 }
01387
01393 static int
01394 on_decl(dtd_parser *p, const ichar *decl)
01395 {
01396 parser_data *pd = p->closure;
01397
01398 if ( pd->stopped )
01399 return TRUE;
01400
01401 if ( pd->stopat == SA_DECL )
01402 pd->stopped = TRUE;
01403
01404 return TRUE;
01405
01406 }
01407
01414 static int
01415 on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
01416 {
01417 parser_data *pd = p->closure;
01418 env *env1;
01419
01420
01421 if ( pd->stopped )
01422 return TRUE;
01423
01424 if(pd->tail)
01425 {
01426
01427 prolog_term et = p2p_new();
01428
01429
01430 prolog_term tmp, content;
01431
01432 tmp = p2p_new();
01433
01434
01435
01436 c2p_functor( "element", 3, et);
01437
01438 put_element_name(p, p2p_arg( et, 1) , e);
01439
01440
01441
01442 unify_attribute_list( p, p2p_arg( et, 2), argc, argv);
01443
01444 c2p_list(tmp);
01445
01446 if(!p2p_unify( pd->tail, tmp))
01447 return FALSE;
01448
01449 tmp = p2p_car( pd->tail);
01450
01451 if(!p2p_unify( tmp, et))
01452 return FALSE;
01453
01454 content = p2p_arg( tmp, 3);
01455
01456 tmp = p2p_cdr( pd->tail);
01457 pd->tail = tmp;
01458
01459
01460 env1 = sgml_calloc(1, sizeof(struct _env *));
01461 env1->tail = pd->tail;
01462 env1->parent = pd->stack;
01463 pd->stack = env1;
01464
01465
01466 pd->tail = content;
01467 }
01468 return TRUE;
01469 }
01470
01471
01477 static int
01478 unify_attribute_list(dtd_parser *p, prolog_term alist,
01479 int argc, sgml_attribute *argv)
01480 {
01481 int i;
01482
01483
01484 prolog_term tail = alist;
01485 prolog_term temp_term[2];
01486 prolog_term tmp, tmp1;
01487
01488 for( i = 0 ; i<argc; i++)
01489 {
01490 tmp = p2p_new();
01491 temp_term[0] = p2p_new();
01492 temp_term[1] = p2p_new();
01493 tmp1 = p2p_new();
01494
01495 put_attribute_name(p, temp_term[0], argv[i].definition->name);
01496 put_attribute_value(p, temp_term[1], &argv[i]);
01497
01498
01499 c2p_functor( "=", 2, tmp);
01500 p2p_unify( p2p_arg( tmp, 1), temp_term[0]);
01501 p2p_unify( p2p_arg( tmp, 2), temp_term[1]);
01502
01503 c2p_list( tmp1);
01504
01505 if( !p2p_unify( tail, tmp1))
01506 return FALSE;
01507
01508 tmp1 = p2p_car( tail);
01509
01510 if(!p2p_unify( tmp1, tmp))
01511 return FALSE;
01512
01513 tmp1 = p2p_cdr(tail);
01514 tail = tmp1;
01515 }
01516
01517 tmp1 = p2p_new();
01518 c2p_nil( tmp1);
01519
01520 if(!p2p_unify( tail, tmp1))
01521 return FALSE;
01522
01523 return TRUE;
01524 }
01525
01530 static void
01531 put_attribute_name(dtd_parser *p, prolog_term t, dtd_symbol *nm)
01532 {
01533
01534 const ichar *url, *local;
01535
01536 if ( p->dtd->dialect == DL_XMLNS )
01537 {
01538 xmlns_resolve_attribute(p, nm, &local, &url);
01539 if(url)
01540 {
01541
01542 c2p_functor( ":", 2, t);
01543 put_url(p, p2p_arg( t, 1), url);
01544 c2p_string( (char*)local, p2p_arg( t, 2));
01545 }
01546 else
01547 {
01548 c2p_string( (char*)local, t);
01549 }
01550
01551 }
01552 else
01553 {
01554 c2p_string( nm->name, t);
01555 }
01556 }
01557
01562 static ichar *
01563 istrblank(const ichar *s)
01564 { for( ; *s; s++ )
01565 { if ( isspace(*s) )
01566 return (ichar *)s;
01567 }
01568
01569 return NULL;
01570 }
01571
01575 static void
01576 put_attribute_value(dtd_parser *p, prolog_term t, sgml_attribute *a)
01577 {
01578 switch(a->definition->type)
01579 { case AT_CDATA:
01580 c2p_string( a->value.cdata, t);
01581 break;
01582 case AT_NUMBER:
01583 {
01584 if ( a->value.text )
01585 c2p_string( a->value.text, t);
01586 else
01587 c2p_int( a->value.number, t);
01588 break;
01589 }
01590 default:
01591 {
01592 const ichar *val = a->value.text;
01593 const ichar *e;
01594 prolog_term tmp;
01595
01596 if ( a->definition->islist )
01597 {
01598 prolog_term tail, head;
01599
01600 tail = t;
01601 for(e=istrblank(val); e; val = e+1, e=istrblank(val))
01602 {
01603 if ( e == val )
01604 continue;
01605
01606 tmp = p2p_new();
01607 c2p_list( tmp);
01608
01609 p2p_unify( tail, tmp);
01610
01611 head = p2p_car( tail);
01612 tmp = p2p_cdr( tail);
01613 tail = tmp;
01614 unify_listval(p, head, a->definition->type, e-val, val);
01615 }
01616
01617 tmp = p2p_new();
01618 c2p_list( tmp);
01619
01620 p2p_unify( tail, tmp);
01621
01622
01623 head = p2p_car( tail);
01624 tmp = p2p_cdr( tail);
01625 tail = tmp;
01626 unify_listval(p, head, a->definition->type, e-val, val);
01627
01628 c2p_nil( tmp);
01629 p2p_unify( tmp, tail);
01630
01631 }
01632 else
01633 c2p_string( (char*)val, t);
01634
01635 }
01636 }
01637
01638 }
01639
01640 static int
01641 unify_listval(dtd_parser *p, prolog_term t, attrtype type, int len, const char *text)
01642 {
01643 prolog_term tmp = p2p_new();
01644 if ( type == AT_NUMBERS && p->dtd->number_mode == NU_INTEGER )
01645 {
01646 char *e;
01647 long v = strtol(text, &e, 10);
01648
01649 if ( e-text == len && errno != ERANGE )
01650 {
01651 c2p_int( v, tmp);
01652 return p2p_unify( t, tmp);
01653 }
01654
01655 }
01656
01657 c2p_string( (char*)text, tmp);
01658
01659
01660 return p2p_unify( t, tmp);
01661 }
01662
01668 static int
01669 on_entity(dtd_parser *p, dtd_entity *e, int chr)
01670 {
01671 parser_data *pd = p->closure;
01672
01673 if ( pd->stopped )
01674 return TRUE;
01675
01676 if(pd->tail)
01677 {
01678
01679 prolog_term h, tmp, tmp2, tmp1;
01680
01681 tmp1 = p2p_new();
01682 c2p_list(tmp1);
01683
01684
01685 if(p2p_unify( pd->tail, tmp1))
01686 {
01687 h = p2p_car(pd->tail);
01688 tmp = p2p_cdr(pd->tail);
01689 pd->tail = tmp;
01690 tmp2 = p2p_new();
01691
01692
01693 if(e)
01694 {
01695
01696 c2p_functor( "entity", 1 , tmp2);
01697 c2p_string( e->name->name, p2p_arg( tmp2, 1));
01698 p2p_unify( h, tmp2);
01699
01700 }
01701 else
01702 {
01703 c2p_functor( "entity", 1, tmp2);
01704 c2p_int( chr, p2p_arg( tmp2, 1));
01705 p2p_unify( h, tmp2);
01706 }
01707 }
01708 }
01709 return TRUE;
01710 }
01711
01719 static int
01720 on_pi(dtd_parser *p, const ichar *pi)
01721 {
01722 parser_data *pd = p->closure;
01723 if ( pd->stopped )
01724 return TRUE;
01725
01726 if ( pd->tail )
01727 {
01728 prolog_term head, tmp1, tmp;
01729
01730 tmp = p2p_new();
01731 c2p_list( tmp);
01732
01733
01734 if( p2p_unify(pd->tail, tmp))
01735 {
01736 head = p2p_car(pd->tail);
01737 tmp = p2p_cdr(pd->tail);
01738 pd->tail = tmp;
01739
01740 tmp1 = p2p_new();
01741
01742 c2p_functor("pi", 1, tmp1);
01743 c2p_string( (char*)pi, p2p_arg( tmp1, 1));
01744
01745 p2p_unify( head, tmp1);
01746 }
01747 }
01748 return TRUE;
01749 }
01750
01756 static int
01757 on_cdata(dtd_parser *p, data_type type, int len, const ochar *data)
01758 {
01759 parser_data *pd = p->closure;
01760 int rval=0;
01761 if ( pd->tail && !pd->stopped )
01762 {
01763
01764 prolog_term head, tmp, tmp1;
01765
01766 tmp1 = p2p_new();
01767
01768 tmp = p2p_new();
01769 c2p_list( tmp);
01770
01771
01772 if(p2p_unify(pd->tail, tmp))
01773 {
01774 head = p2p_car( pd->tail);
01775 tmp = p2p_cdr( pd->tail);
01776 pd->tail = tmp;
01777
01778 switch(type)
01779 {
01780 case EC_CDATA:
01781 c2p_string( (char*)data, tmp1);
01782 p2p_unify( tmp1, head);
01783 break;
01784 case EC_SDATA:
01785 {
01786 prolog_term data_term = p2p_new();
01787
01788 c2p_functor( "sdata", 1, data_term);
01789 c2p_string( (char*)data, p2p_arg( data_term, 1));
01790
01791 rval = p2p_unify( head, data_term);
01792 break;
01793 }
01794 case EC_NDATA:
01795 {
01796 prolog_term data_term = p2p_new();
01797
01798 c2p_functor( "ndata", 1, data_term);
01799 c2p_string( (char*)data, p2p_arg( data_term, 1));
01800
01801 rval = p2p_unify( head, data_term);
01802 break;
01803 }
01804 default:
01805 rval = FALSE;
01806 assert(0);
01807 }
01808 if (rval)
01809 {
01810 return TRUE;
01811 }
01812 }
01813
01814 }
01815 return FALSE;
01816 }
01817
01823 static int
01824 on_end(dtd_parser *p, dtd_element *e)
01825 {
01826 parser_data *pd = p->closure;
01827
01828
01829
01830 prolog_term tmp;
01831
01832 tmp = p2p_new();
01833 c2p_nil(tmp);
01834
01835 if(pd->stopped)
01836 return TRUE;
01837
01838 if ( pd->tail && !pd->stopped )
01839 {
01840 if( !is_nil( pd->tail))
01841 {
01842 p2p_unify( pd->tail, tmp);
01843 }
01844 if ( pd->stack )
01845 {
01846 env *parent = pd->stack->parent;
01847 pd->tail = pd->stack->tail;
01848 sgml_free(pd->stack);
01849 pd->stack = parent;
01850 }
01851 else
01852 {
01853 if ( pd->stopat == SA_CONTENT )
01854 pd->stopped = TRUE;
01855 }
01856 }
01857
01858 if ( pd->stopat == SA_ELEMENT && !p->environments->parent )
01859 pd->stopped = TRUE;
01860
01861 return TRUE;
01862 }
01863
01864
01870 static void
01871 put_element_name(dtd_parser *p, prolog_term t, dtd_element *e)
01872 {
01873 const ichar *url, *local;
01874
01875 if ( p->dtd->dialect == DL_XMLNS)
01876 {
01877 assert(p->environments->element == e);
01878 xmlns_resolve_element(p, &local, &url);
01879
01880 if(url)
01881 {
01882
01883 c2p_functor( ":", 2, t);
01884 put_url(p, p2p_arg( t, 1), url);
01885 c2p_string( (char*)local, p2p_arg( t, 2));
01886
01887 }
01888 else
01889 {
01890 c2p_string( (char*)local, t);
01891 }
01892
01893 }
01894 else
01895 c2p_string ( e->name->name, t);
01896
01897 return;
01898 }
01899
01900
01912 static void
01913 put_url(dtd_parser *p, prolog_term t, const ichar *url)
01914 {
01915 parser_data *pd = p->closure;
01916
01917 if ( !pd->on_urlns )
01918 {
01919 c2p_string( (char*) url, t);
01920 return;
01921 }
01922 }
01923
01924
01929 #define CHARSET MAXSTRLEN
01930
01931 static int
01932 do_quote(prolog_term in, prolog_term quoted, char **map)
01933 { char *ins;
01934 unsigned len;
01935 unsigned char *s;
01936 char outbuf[1024];
01937 char *out = outbuf;
01938 int outlen = sizeof(outbuf);
01939 int o = 0;
01940 int changes = 0;
01941
01942 prolog_term tmp = 0;
01943
01944 ins = p2c_string( in);
01945
01946 len = strlen( ins);
01947
01948 if ( len == 0 )
01949 return p2p_unify(in, quoted);
01950
01951 for(s = (unsigned char*)ins ; len-- > 0; s++ )
01952 { int c = *s;
01953
01954 if ( map[c] )
01955 { int l = strlen(map[c]);
01956 if ( o+l >= outlen )
01957 { outlen *= 2;
01958
01959 if ( out == outbuf )
01960 { out = malloc(outlen);
01961 memcpy(out, outbuf, sizeof(outbuf));
01962 } else
01963 { out = realloc(out, outlen);
01964 }
01965 }
01966 memcpy(&out[o], map[c], l);
01967 o += l;
01968 changes++;
01969 } else
01970 { if ( o >= outlen-1 )
01971 { outlen *= 2;
01972
01973 if ( out == outbuf )
01974 { out = malloc(outlen);
01975 memcpy(out, outbuf, sizeof(outbuf));
01976 } else
01977 { out = realloc(out, outlen);
01978 }
01979 }
01980 out[o++] = c;
01981 }
01982 }
01983 out[o]= 0;
01984
01985 if ( changes > 0 )
01986 {
01987 c2p_string( out, tmp);
01988 return p2p_unify( quoted, tmp);
01989 }
01990 else
01991 return p2p_unify(in, quoted);
01992 }
01993
01997 DllExport int call_conv pl_xml_quote_attribute()
01998 {
01999 prolog_term in = reg_term(1);
02000 prolog_term out = reg_term(2);
02001 static char **map;
02002
02003 if ( !map )
02004 { int i;
02005
02006 if ( !(map = malloc(CHARSET*sizeof(char*))) )
02007 return sgml2pl_error(ERR_ERRNO, errno);
02008
02009 for(i=0; i<CHARSET; i++)
02010 map[i] = NULL;
02011
02012 map['<'] = "<";
02013 map['>'] = ">";
02014 map['&'] = "&";
02015 map['\''] = "'";
02016 map['"'] = """;
02017 }
02018
02019 return do_quote(in, out, map);
02020 }
02021
02025 DllExport int call_conv pl_xml_quote_cdata()
02026 {
02027 prolog_term in = reg_term(1);
02028 prolog_term out = reg_term(2);
02029 static char **map;
02030
02031 if ( !map )
02032 { int i;
02033
02034 if ( !(map = malloc(CHARSET*sizeof(char*))) )
02035 return sgml2pl_error(ERR_ERRNO, errno);
02036
02037 for(i=0; i<CHARSET; i++)
02038 map[i] = NULL;
02039
02040 map['<'] = "<";
02041 map['>'] = ">";
02042 map['&'] = "&";
02043 }
02044
02045 return do_quote(in, out, map);
02046 }
02047
02048 DllExport int call_conv pl_xml_name()
02049 { char *ins;
02050 unsigned len;
02051 static dtd_charclass *map;
02052 unsigned int i;
02053 prolog_term in = reg_term(1);
02054
02055
02056 if ( !map )
02057 map = new_charclass();
02058
02059 ins = p2c_string( in);
02060
02061 len = strlen( ins);
02062
02063 if ( len == 0 )
02064 return FALSE;
02065 if ( !(map->class[ins[0] & 0xff] & CH_NMSTART) )
02066 return FALSE;
02067 for(i=1; i<len; i++)
02068 {
02069 if ( !(map->class[ins[i] & 0xff] & CH_NAME) )
02070 return FALSE;
02071 }
02072
02073 return TRUE;
02074 }
02075
02076
02077
02078
02079
02080
02081
02082
02083
02084
02085