sgml2pl.c

00001 /***************************************************************************
00002  *                           sgml2pl.c
00003  * This is the main file. It contains the interface to the parser internal
00004  * functions. It also contains the callback functions which are invoked 
00005  * when certain parts of the xml document are encountered.
00006  *
00007  *************************************************************************/
00008 
00009 
00010 #include "xsb_config.h"
00011 #ifdef WIN_NT
00012 #define XSB_DLL
00013 #endif
00014 #include "cinterf.h"
00015 #include <stdlib.h>
00016 #include "dtd.h"
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <stdio.h>
00020 #include <assert.h>
00021 #include "fetch_file.c"
00022 #include "parser.c"
00023 #include "charmap.c" 
00024 #include "util.c"
00025 #include "xmlns.c"
00026 #include "model.c"
00027 #include "error_term.h"
00028 #include "util.h"
00029 #include "basic_defs.h"
00030 
00031 #ifndef WIN_NT
00032 #include <sys/stat.h>
00033 #endif
00034 
00035 /*
00036 #include "socketcall.h"
00037 */
00038                                                                                 
00039 #define PD_MAGIC        0x36472ba1      /* just a number */
00040 
00041 #define MAX_ERRORS      50
00042 #define MAX_WARNINGS    50
00043 #define MAXSTRLEN 256
00044                                                                               
00045 
00046 typedef enum
00047   { EM_QUIET = 0,                         /* Suppress messages */
00048     EM_PRINT,                             /* Print message */
00049     EM_STYLE                              /* include style-messages */
00050   } errormode;
00051 
00052 typedef enum
00053   { SA_FILE = 0,                          /* Stop at end-of-file */
00054     SA_INPUT,                             /* Do not complete input */
00055     SA_ELEMENT,                           /* Stop after first element */
00056     SA_CONTENT,                           /* Stop after close */
00057     SA_DECL                               /* Stop after declaration */
00058   } stopat;
00059 
00060 typedef struct _env
00061 { prolog_term        tail;
00062   struct _env *parent;
00063 } env;
00064                                                                                 
00065 typedef struct _parser_data
00066 { int         magic;                    /* PD_MAGIC */
00067   dtd_parser *parser;                   /* parser itself */
00068                                                                                 
00069   int         warnings;                 /* #warnings seen */
00070   int         errors;                   /* #errors seen */
00071   int         max_errors;               /* error limit */
00072   int         max_warnings;             /* warning limit */
00073   errormode   error_mode;               /* how to handle errors */
00074   int         positions;                /* report file-positions */
00075                                                                                 
00076   predicate_t on_begin;                 /* begin element */
00077   predicate_t on_end;                   /* end element */
00078   predicate_t on_cdata;                 /* cdata */
00079   predicate_t on_entity;                /* entity */
00080   predicate_t on_pi;                    /* processing instruction */
00081   predicate_t on_urlns;                 /* url --> namespace */
00082   predicate_t on_error;                 /* errors */
00083   predicate_t on_decl;                  /* declarations */
00084   
00085   stopat      stopat;                   /* Where to stop */
00086   int         stopped;                  /* Environment is complete */
00087   
00088   void*   source;                  /* Where we are reading from */
00089   int its_a_url;
00090   
00091   prolog_term      list;                     /* output term (if any) */
00092   prolog_term      tail;                     /* tail of the list */
00093   env        *stack;                    /* environment stack */
00094   int         free_on_close;            /* sgml_free parser on close */
00095 } parser_data;
00096 
00097 
00098 dtd_parser * parser_error = NULL;
00099 
00100 #include "error.c"
00101 
00102 
00103 dtd *
00104 new_dtd(const ichar *doctype);
00105                                                                                                  
00106 static int
00107 get_dtd(prolog_term t, dtd **dtdp);
00108 
00109 dtd_parser *
00110 new_dtd_parser(dtd *dtd);
00111 
00112 int unify_dtd( prolog_term t, dtd * dtd);
00113 
00114 int unify_parser( prolog_term t, dtd_parser * p);
00115 
00116 
00117 static int
00118 on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv);
00119 
00120 static int
00121 on_end(dtd_parser *p, dtd_element *e);
00122                                                                           
00123 static int
00124 on_entity(dtd_parser *p, dtd_entity *e, int chr);
00125                                                                           
00126 static int
00127 on_pi(dtd_parser *p, const ichar *pi);
00128                                                                           
00129 static int
00130 on_cdata(dtd_parser *p, data_type type, int len, const ochar *data);
00131                                                                                 
00132 static void
00133 put_element_name(dtd_parser *p, prolog_term t, dtd_element *e);
00134 
00135 static int
00136 unify_attribute_list(dtd_parser *p, prolog_term alist,
00137                      int argc, sgml_attribute *argv);
00138 static parser_data *
00139 new_parser_data(dtd_parser *p);
00140 
00141 static void
00142 put_url(dtd_parser *p, prolog_term t, const ichar *url);
00143 
00144 static int
00145 on_error(dtd_parser *p, dtd_error *error);
00146                                                                           
00147 static int
00148 on_decl(dtd_parser *p, const ichar *decl);
00149 
00150 static void
00151 put_attribute_name(dtd_parser *p, prolog_term t, dtd_symbol *nm);
00152 
00153 static void
00154 put_attribute_value(dtd_parser *p, prolog_term t, sgml_attribute *a);
00155 
00156 
00157 static ichar *
00158 istrblank(const ichar *s);
00159 
00160 static int
00161 unify_listval(dtd_parser *p,  prolog_term t, attrtype type, int len, const char *text);
00162 
00163 static dtd_srcloc *
00164 file_location(dtd_parser *p, dtd_srcloc *l);
00165 
00166 static int
00167 can_end_omitted(dtd_parser *p);
00168 
00169 static int
00170 set_option_dtd( dtd *dtd, dtd_option option, char *set);
00171 
00172 
00173 
00180 DllExport int call_conv pl_new_sgml_parser()
00181 { 
00182   /*Temporary terms to parse the input from prolog side*/
00183   prolog_term head, tail, tmp, ref, tmp1;   
00184 
00185   /*Pointer to dtd and parser objects*/
00186   dtd *dtd = NULL;
00187   dtd_parser *p;
00188         
00189   char *str;
00190 
00191   tail = reg_term(2);
00192  
00193   /*Parsing the options list*/
00194   while(is_list(tail))
00195     {
00196       head = p2p_car(tail);
00197       tmp1 = p2p_cdr(tail);
00198       tail = tmp1;
00199       if(is_functor( head))
00200         {
00201 
00202           /*Extract the dtd pointer if present. Otherwise create the 
00203             dtd object*/
00204           str = p2c_functor( head);
00205           if(strcmp( str, "dtd_struct"))
00206             {
00207               return FALSE;
00208             }   
00209           tmp = p2p_arg(head, 1);
00210           if( is_var( tmp))
00211             {
00212               dtd = new_dtd(NULL);              
00213               dtd->references++;
00214               c2p_int( (int)dtd, tmp);
00215             }
00216           else
00217             {
00218               if( !get_dtd( head,  &dtd))
00219                 return FALSE;
00220 
00221             }
00222         }
00223     } 
00224   
00225   ref = reg_term(1);
00226   p = new_dtd_parser(dtd);
00227 
00228   parser_error = p;
00229   return unify_parser(ref, p);
00230 
00231 }
00232 
00239 int unify_parser( prolog_term t, dtd_parser *p)
00240 {
00241   prolog_term tmp, tmp1;
00242 
00243   /*Temporary prolog terms to create the output terms*/
00244   tmp1 = p2p_new();
00245   tmp = p2p_new();
00246 
00247   /*Create the prolog term*/    
00248   c2p_functor( "sgml_parser", 1, tmp1);
00249   c2p_int( (int) p, p2p_arg( tmp1, 1));
00250 
00251   return p2p_unify( t, tmp1); 
00252 }
00253 
00254 
00261 int unify_dtd( prolog_term t, dtd * d)
00262 {
00263   /*Temporary prolog term to create the output term*/
00264   prolog_term tmp, tmp1, tmp2;
00265 
00266   tmp1 = p2p_new();
00267   tmp = p2p_new();
00268   tmp2 = p2p_new();
00269 
00270   /*dtd_struct/2 if doctype is specified*/
00271 
00272   if(d->doctype)
00273     {
00274       c2p_functor( "dtd_struct", 2, tmp1);
00275       c2p_int( (int) d, p2p_arg( tmp1, 1));
00276       c2p_string( d->doctype, p2p_arg( tmp1, 2));
00277     }
00278   /* dtd_struct/1 if no doctype is specified */
00279   else
00280     {
00281       c2p_functor( "dtd_struct", 1, tmp1);
00282       c2p_int( (int) d, tmp);
00283       p2p_unify( p2p_arg( tmp1, 1), tmp);
00284     }
00285         
00286   return p2p_unify( t, tmp1);
00287 }
00288 
00295 DllExport int call_conv pl_new_dtd()
00296 { char *dt;
00297   dtd *dtd;
00298   prolog_term doctype;
00299   prolog_term ref;
00300 
00301   doctype = reg_term(1);
00302   ref = reg_term(2);
00303 
00304   /*Extract the doctype*/
00305   if ( !(dt = p2c_string( doctype) ))
00306     return sgml2pl_error(ERR_TYPE, "atom", doctype);
00307 
00308   /*Create the dtd*/
00309   if ( !(dtd=new_dtd(dt)) )
00310     return FALSE;
00311   
00312   dtd->references++;
00313                                                                               
00314   return unify_dtd(ref, dtd);
00315 }
00316 
00322 dtd *
00323 new_dtd(const ichar *doctype)
00324 { dtd *dtd = calloc(1, sizeof(*dtd));
00325                                                                                 
00326   dtd->magic     = SGML_DTD_MAGIC;
00327   dtd->implicit  = TRUE;
00328   dtd->dialect   = DL_SGML;
00329   if ( doctype )
00330     dtd->doctype = istrdup(doctype);
00331   dtd->symbols   = new_symbol_table();
00332   dtd->charclass = new_charclass();
00333   dtd->charfunc  = new_charfunc();
00334   dtd->charmap   = new_charmap();
00335   dtd->space_mode = SP_SGML;
00336   dtd->ent_case_sensitive = TRUE;      
00337   dtd->shorttag    = TRUE;              
00338   dtd->number_mode = NU_TOKEN;
00339   return dtd;
00340 }
00341                                                                                
00350 dtd_parser *
00351 new_dtd_parser(dtd *dtd)
00352 {
00353   dtd_parser *p = calloc(1, sizeof(*p));
00354   
00355   if ( !dtd )
00356     dtd = new_dtd(NULL);
00357   dtd->references++;
00358   
00359   p->magic       = SGML_PARSER_MAGIC;
00360   p->dtd         = dtd;
00361   p->state       = S_PCDATA;
00362   p->mark_state  = MS_INCLUDE;
00363   p->dmode       = DM_DTD;
00364   p->encoding    = ENC_ISO_LATIN1;
00365   p->buffer      = new_icharbuf();
00366   p->cdata       = new_ocharbuf();
00367   p->event_class = EV_EXPLICIT;
00368   set_src_dtd_parser(p, IN_NONE, NULL);
00369   
00370   return p;
00371 }
00372 
00379 static int
00380 get_dtd(prolog_term t, dtd **dtdp)
00381 {
00382   char * str;
00383   
00384   if ( is_functor(t))
00385     {
00386       /*Temporary prolog terms to parse the inputs*/
00387       prolog_term temp_term;
00388       void *ptr;
00389 
00390       str = p2c_functor(t);
00391 
00392       if(strcmp( str, "dtd_struct"))
00393         return FALSE;       
00394 
00395       temp_term = p2p_arg(t, 1);
00396       /*Extract the dtd object pointer from prolog term*/
00397       if ((ptr = (void *) p2c_int(temp_term) ))
00398         { 
00399           dtd *tmp = ptr;
00400           if ( tmp->magic == SGML_DTD_MAGIC )
00401             {
00402               *dtdp = tmp;
00403                                                                             
00404               return TRUE;
00405             }
00406           return sgml2pl_error(ERR_EXISTENCE, "dtd_struct", t);
00407         }
00408     }
00409                                                                          
00410   return sgml2pl_error(ERR_TYPE, "dtd_struct", t);
00411 }
00412 
00419 static int
00420 get_parser(prolog_term parser, dtd_parser **p)
00421 {
00422   /*Temporary terms to parse the prolog input*/
00423   prolog_term temp_term;
00424   void *ptr;
00425   char *str = NULL;
00426 
00427   if(is_functor(parser))
00428     {
00429 
00430       /*Extract the parser object pointer from prolog term*/
00431       str = p2c_functor( parser);
00432    
00433       if(strcmp(str,"sgml_parser"))
00434         {
00435           return FALSE;
00436         }
00437       temp_term = p2p_arg( parser, 1);
00438         
00439       if( (ptr = (void *) p2c_int(temp_term)))
00440         {
00441           dtd_parser *tmp = ptr;
00442           if ( tmp->magic == SGML_PARSER_MAGIC )
00443             {
00444               *p = tmp;
00445               return TRUE;
00446             }
00447           return sgml2pl_error(ERR_EXISTENCE, "sgml_parser", parser);
00448         }
00449     }
00450  
00451   return sgml2pl_error(ERR_TYPE, "sgml_parser", parser);
00452 }
00453 
00460 DllExport int call_conv pl_doctype()
00461 {
00462   dtd_parser *p;
00463   prolog_term parser, doctype;
00464   dtd * dtd;
00465 
00466   parser = reg_term(1);
00467   doctype = reg_term(2);
00468 
00469 
00470   /*Extract parser from the parser prolog term*/
00471   if ( !get_parser(parser, &p) )
00472     return FALSE;
00473   dtd = p->dtd;
00474 
00475   if(is_var(doctype) && dtd->doctype)
00476     {
00477       c2p_string( dtd->doctype, doctype);       
00478     }
00479   return TRUE;
00480 }
00481 
00487 DllExport int call_conv pl_set_sgml_parser()
00488 {
00489   dtd_parser *p;
00490   prolog_term parser, options, temp_term;
00491 
00492   parser = reg_term(1);
00493   options = reg_term(2);
00494 
00495   /*Extract the parser object pointer from the prolog term*/
00496   if ( !get_parser(parser, &p) )
00497     return FALSE;
00498 
00499   if( is_functor(options)) 
00500     {
00501       char *funcname;
00502                 
00503       funcname = p2c_functor( options);
00504 
00505       /*Set the dialect in the parser. Dialect may be xml, xmlns or sgml*/
00506       if( streq( funcname, "dialect"))
00507         {
00508           char *s;                     
00509           temp_term = p2p_arg(options, 1);
00510           s=p2c_string( temp_term);
00511           if ( streq(s, "xml") )
00512             set_dialect_dtd(p->dtd, DL_XML);
00513           else if ( streq(s, "xmlns") )
00514             set_dialect_dtd(p->dtd, DL_XMLNS);
00515           else if ( streq(s, "sgml") )
00516             set_dialect_dtd(p->dtd, DL_SGML);
00517           else
00518             return sgml2pl_error(ERR_DOMAIN, "sgml_dialect", temp_term);
00519         }
00520       /*Sets the shorttag handling option to FALSE or true*/
00521       else if( streq( funcname, "shorttag"))
00522         {
00523           char *booleanstring=NULL;
00524           
00525           temp_term = p2p_arg( options, 1);
00526           
00527           booleanstring = p2c_string( temp_term);
00528           if( !booleanstring){
00529             return sgml2pl_error(ERR_TYPE, "boolen", temp_term);
00530           }
00531           if( strcmp( booleanstring, "false") && 
00532               strcmp( booleanstring, "true") &&
00533               strcmp( booleanstring, "FALSE") &&
00534               strcmp( booleanstring, "TRUE"))
00535             {
00536               return sgml2pl_error( ERR_TYPE, "boolean", temp_term);
00537             }
00538 
00539           set_option_dtd( p->dtd, OPT_SHORTTAG, booleanstring);
00540           
00541         }
00542       /*Set the file name which is displayed as the source of errors*/
00543       else if( streq( funcname, "file"))
00544         {
00545           char * file;
00546                         
00547           temp_term = p2p_arg( options, 1);
00548           file=p2c_string( temp_term);
00549           set_src_dtd_parser( p, IN_FILE, file);
00550         }
00551       /*Set the current line to parse*/
00552       else if ( streq( funcname, "line"))
00553         {
00554           temp_term = p2p_arg( options, 1);
00555                         
00556           (p->location.line = p2c_int( temp_term));
00557         }               
00558       /*Set the current character position to parse*/
00559       else if ( streq( funcname, "charpos"))
00560         {
00561           temp_term = p2p_arg( options, 1);
00562                                                                               
00563           p->location.charpos = p2c_int( temp_term);
00564                                                                                
00565         }
00566       /*Set the space handling*/
00567       else if( streq( funcname, "space"))
00568         {
00569           char *s;
00570           temp_term =p2p_arg(options, 1);
00571           s=p2c_string( temp_term);
00572           
00573           if ( streq(s, "preserve") )
00574             p->dtd->space_mode = SP_PRESERVE;
00575           else if ( streq(s, "default") )
00576             p->dtd->space_mode = SP_DEFAULT;
00577           else if ( streq(s, "remove") )
00578             p->dtd->space_mode = SP_REMOVE;
00579           else if ( streq(s, "sgml") )
00580             p->dtd->space_mode = SP_SGML;
00581           else 
00582             return FALSE;
00583 
00584         }
00585       /*Set the defaults*/
00586       else if( streq( funcname, "defaults"))
00587         {
00588           int val;
00589                         
00590           temp_term =p2p_arg(options, 1);
00591 
00592           val=p2c_int( temp_term);
00593 
00594           if ( val )
00595             p->flags &= ~SGML_PARSER_NODEFS;
00596           else
00597             p->flags |= SGML_PARSER_NODEFS;
00598 
00599         }
00600       /*Set the number option*/
00601       else if( streq( funcname, "number"))
00602         {
00603           char *s;
00604           temp_term = p2p_arg(options, 1);
00605           s=p2c_string( temp_term);
00606                                                                                
00607           if ( streq(s, "token") )
00608             p->dtd->number_mode = NU_TOKEN;
00609           else if ( streq(s, "integer") )
00610             p->dtd->number_mode = NU_INTEGER;
00611 
00612           else 
00613             return FALSE;
00614                                                                                
00615         }
00616       /*Set the doctype*/
00617       else if( streq( funcname, "doctype"))
00618         {
00619           char *s;
00620           temp_term = p2p_arg(options, 1);
00621 
00622           if( is_var( temp_term))
00623             p->enforce_outer_element = NULL;
00624           else
00625             {
00626               if( !(s=p2c_string(temp_term) ))
00627                 return FALSE;
00628               p->enforce_outer_element = dtd_add_symbol(p->dtd, s);
00629 
00630             }
00631                          
00632         }       
00633 
00634     }
00635   return TRUE;  
00636         
00637 }
00638 
00645 DllExport int call_conv pl_allocate_error_term()
00646 {
00647   global_error_term = reg_term(1);
00648   global_warning_term = reg_term(2);
00649   return TRUE;
00650 }
00651 
00656 DllExport int call_conv pl_finalize_warn()
00657 {
00658   /*Temporary prolog terms to iterate the warnings list*/
00659   prolog_term tmp, tmp1;
00660 
00661   tmp = reg_term(1);
00662   while( is_list( tmp)){
00663     tmp1 = p2p_cdr( tmp);
00664     tmp = tmp1;
00665   }
00666   if( is_var( tmp)){
00667     c2p_nil(tmp);
00668   }
00669   return TRUE;
00670 }
00671 
00672 
00680 DllExport int call_conv pl_sgml_parse()
00681 {
00682   dtd_parser *p;
00683   parser_data *pd;
00684   parser_data *oldpd;
00685   /*Temporary prolog terms to parse the options list*/
00686   prolog_term head , parser, options, tail, tmp1;
00687   FILE *in = NULL;
00688   struct stat stbuf;
00689 
00690   int  recursive, has_content_length = FALSE, content_length = 0, its_a_url = 0, source_len = 0;
00691 
00692   char *str, *source=NULL, fname[MAXSTRLEN], *tmpsource=NULL;
00693         
00694   parser = reg_term(1);
00695   options = reg_term(2);
00696   tail = options;
00697 
00698   /*Extract the parser from input prolog term*/
00699   if ( !get_parser(parser, &p) )
00700     return FALSE;
00701 
00702 
00703   if( p->closure)
00704     {
00705       recursive = TRUE;
00706       oldpd = p->closure;
00707 
00708       if ( oldpd->magic != PD_MAGIC || oldpd->parser != p )
00709         return sgml2pl_error(ERR_MISC, "sgml", "Parser associated with illegal data");
00710 
00711       pd = calloc(1, sizeof(*pd));
00712       *pd = *oldpd;
00713       p->closure = pd;
00714 
00715       its_a_url = pd->its_a_url;
00716       if(its_a_url == 1)
00717         source = (char *) pd->source;
00718       else if( its_a_url == 0)
00719         in = pd->source;
00720 
00721     }
00722   else
00723     {
00724       recursive = FALSE;
00725       oldpd = NULL;              
00726                 
00727       set_mode_dtd_parser(p, DM_DATA);
00728                 
00729       /*Set the call back functions in the parser*/
00730       p->on_begin_element = on_begin;
00731       p->on_end_element   = on_end;
00732       p->on_entity        = on_entity;
00733       p->on_pi            = on_pi;
00734       p->on_data          = on_cdata;
00735       p->on_error         = on_error;
00736       p->on_decl          = on_decl;
00737       pd = new_parser_data(p);
00738     }
00739 
00740 
00741   /*Validate the options list*/
00742   if(!is_list(tail))
00743     return sgml2pl_error( ERR_DOMAIN, "source", tail);
00744 
00745   while(is_list(tail)){
00746     head = p2p_car(tail);
00747     tmp1 = p2p_cdr(tail);
00748     tail = tmp1;
00749 
00750           
00751     if(is_functor( head)){
00752       str = p2c_functor( head);
00753 
00754       /*Assign the output prolog term to the parser object. The parser creates the output in this term*/
00755                     
00756       if(!strcmp(str,"document")){
00757         pd->list = p2p_arg( head, 1);
00758         pd->tail = pd->list;
00759         pd->stack = NULL;
00760       }
00761       /*Set the source in the relevant field of the parser*/
00762       else if(!strcmp(str,"source")){
00763         /*Temporary terms used to parse the prolog input*/
00764         prolog_term temp_term1, temp_term2 = 0;   
00765         char server[MAXSTRLEN], * tmpstr=NULL;
00766    
00767         temp_term1 = p2p_arg( head, 1);
00768      
00769 
00770         if( is_functor( temp_term1)){
00771                 
00772           tmpstr = p2c_functor( temp_term1);
00773         
00774           /*Source is a url*/                   
00775           if( !strcmp("url", tmpstr)){
00776 
00777             temp_term2 = p2p_arg(temp_term1, 1);
00778             tmpsource = p2c_string(temp_term2);
00779             source = malloc( strlen(tmpsource));
00780             strcpy( source, tmpsource);
00781             
00782             /*Validate the url*/
00783             if(parse_url( source, server, fname) != FALSE)
00784               {
00785                 /*Url is of the form file:// */
00786                 if( !strcmp( server, "file")){
00787                   if(!(in = fopen( fname, "rb"))){
00788                     return sgml2pl_error(ERR_EXISTENCE, "file", temp_term2);
00789                   }
00790                   its_a_url = 0;
00791                   fstat( fileno( in), &stbuf);
00792                   source_len = stbuf.st_size;
00793                 }
00794 
00795                 else{
00796                   /*Url is of the from http://  */
00797                   if(get_file_www( server, fname, &source) == FALSE){
00798                     return sgml2pl_error(ERR_MISC, "url", source);
00799                   }
00800                   else{
00801                     source_len = strlen( source);
00802                     its_a_url = 1;
00803                   }
00804                 }
00805               }
00806             else
00807               {
00808                 return sgml2pl_error(ERR_DOMAIN, "url", temp_term2);
00809               }
00810           }
00811           /*Source is a file*/
00812           else if ( !strcmp( "file", tmpstr)){
00813 
00814             temp_term2 = p2p_arg(temp_term1, 1);
00815             source = p2c_string(temp_term2);
00816             if(!(in = fopen( source, "rb"))){
00817               return sgml2pl_error(ERR_EXISTENCE, "file", temp_term2);
00818             }
00819             its_a_url = 0;
00820             fstat( fileno( in), &stbuf);
00821             source_len = stbuf.st_size;                 
00822             set_src_dtd_parser(p, IN_FILE, source);
00823           }
00824           /*Input is a string*/
00825           else if ( !strcmp( "string", tmpstr)){
00826 
00827             temp_term2 = p2p_arg(temp_term1, 1);
00828             source = p2c_string( temp_term2);
00829             source_len = strlen( source);
00830             its_a_url = 1;
00831           }
00832           else{
00833             return sgml2pl_error( ERR_MISC, "source", temp_term2);
00834           }
00835         }
00836         else{
00837           return sgml2pl_error( ERR_MISC, "source", "Improper input format");
00838         }
00839       }
00840       /*Set the content length to parse*/  
00841       else if( !strcmp(str,"content_length")){
00842         /*Temporary prolog term to parse the options list*/
00843         prolog_term temp_term1, temp_term2;
00844         char * tmp;
00845 
00846         temp_term1 = p2p_arg( head, 1);
00847         tmp = p2c_functor( temp_term1);
00848         temp_term2 = p2p_arg( temp_term1, 1);
00849         tmp = p2c_functor( temp_term2);
00850         content_length = p2c_int( temp_term2);
00851         has_content_length = TRUE;
00852               
00853       }
00854       /*Sets how much of the current input should be parsed*/
00855       else if( !strcmp(str,"parse"))
00856         {
00857           char *s;
00858           /*Temporary prolog terms to parse the options list*/
00859           prolog_term temp_term;
00860 
00861           temp_term = p2p_arg( head, 1);
00862 
00863           s = p2c_string(temp_term);
00864 
00865           if(streq(s,"element"))
00866             pd->stopat = SA_ELEMENT;
00867           else if ( streq(s, "content") )
00868             pd->stopat = SA_CONTENT;
00869           else if ( streq(s, "file") )
00870             pd->stopat = SA_FILE;
00871           else if ( streq(s, "input") )
00872             pd->stopat = SA_INPUT;
00873           else if ( streq(s, "declaration") )
00874             pd->stopat = SA_DECL;
00875           else
00876             {
00877               return sgml2pl_error(ERR_DOMAIN, "parse", temp_term);
00878             }
00879 
00880         }
00881       /*Set how the syntax errors should be handled*/
00882       else if( !strcmp( str, "syntax_errors")){
00883         char *s;
00884         /*Temporary prolog term to parse the options list*/
00885         prolog_term temp_term;
00886                           
00887         temp_term = p2p_arg( head, 1);
00888                           
00889         s = p2c_string(temp_term);
00890                           
00891         if ( streq(s, "quiet") )
00892           pd->error_mode = EM_QUIET;
00893         else if ( streq(s, "print") )
00894           pd->error_mode = EM_PRINT;
00895         else if ( streq(s, "style") )
00896           pd->error_mode = EM_STYLE;
00897         else
00898           return sgml2pl_error(ERR_DOMAIN, "syntax_error", temp_term);
00899                           
00900       }
00901       /*Set the positions option*/
00902       else if( !strcmp( str, "positions")){
00903         char *s=NULL;
00904         /*Temporary prolog terms to parse the options list*/
00905         prolog_term temp_term = 0;
00906 
00907         temp_term = p2p_arg( head, 1);
00908                           
00909         s = p2c_string(temp_term);
00910               
00911         if ( streq(s, "true") )
00912           pd->positions = TRUE;
00913         else if ( streq(s, "false") )
00914           pd->positions = FALSE;
00915         else
00916           return sgml2pl_error(ERR_DOMAIN, "positions", temp_term);
00917       }
00918             
00919     }
00920     else{
00921       return sgml2pl_error(ERR_DOMAIN, "source", head);
00922     }
00923   }
00924 
00925 #define CHECKERROR                                                      \
00926   if ( pd->errors > pd->max_errors && pd->max_errors >= 0 )             \
00927     return sgml2pl_error(ERR_LIMIT, "max_errors", (long)pd->max_errors);
00928         
00929   if ( pd->stopat == SA_CONTENT && p->empty_element )
00930     goto out;
00931 
00932 
00933   if(in || its_a_url)
00934     {
00935       int eof = FALSE;
00936       int i = 0;
00937 
00938       if(!recursive)
00939         {
00940           pd->its_a_url = its_a_url;
00941           if ( its_a_url ==1)
00942             {
00943               pd->source = source;
00944             }
00945           else if( its_a_url ==0)
00946             pd->source = in;
00947         }
00948       /*Read the source character by character and parse xml*/
00949       while( !eof)
00950         {
00951           char c=0;
00952           char ateof = FALSE;   
00953         
00954           if ( has_content_length )
00955             {
00956               if ( content_length <= 0 )
00957                 c = EOF;
00958               else
00959                 {
00960                   if (its_a_url == 1)   
00961                     {
00962                       c = source[i++];
00963                       if (i == source_len)
00964                         {
00965                           ateof = TRUE;
00966                         }
00967                     }
00968                   else if(its_a_url == 0)
00969                     {
00970                       c = fgetc(in);
00971                       source_len=source_len -1;  
00972                       if( source_len <= 0)
00973                         ateof = TRUE;
00974                      
00975                     }
00976                 }
00977 
00978               if(!ateof)
00979                 ateof = (--content_length <= 0);
00980                                         
00981             }
00982           else
00983             { 
00984               if (its_a_url == 1)
00985                 {
00986                   c = source[i++];
00987                   if (i == source_len)
00988                     {
00989                       ateof = TRUE;
00990                     }
00991                 }
00992               else if( its_a_url ==0)
00993                 {
00994                   c = fgetc(in);
00995                   source_len=source_len -1;
00996                   if( source_len <= 0)
00997                     ateof = TRUE;
00998 
00999                         
01000                 }
01001             }
01002 
01003           if(ateof)
01004             {
01005               eof = TRUE;
01006               if ( c == LF )                  /* file ends in LF */            
01007                 {       
01008                   c = CR;
01009                 }
01010               else if ( c != CR )             /* file ends in normal char */
01011                 { 
01012                   putchar_dtd_parser(p, c);
01013                   if ( pd->stopped )
01014                     goto stopped;
01015                   c = CR;
01016                                 
01017                 }
01018             }
01019           putchar_dtd_parser( p, c);
01020           if ( pd->stopped )
01021             { 
01022             stopped:
01023                                 
01024               pd->stopped = FALSE;
01025               if ( pd->stopat != SA_CONTENT )
01026                 reset_document_dtd_parser(p); /* ensure a clean start */
01027               goto out;
01028             }
01029         
01030         }
01031       if ( !recursive && pd->stopat != SA_INPUT )
01032         end_document_dtd_parser(p);
01033 
01034     out:
01035       /*Remove the ununified portions of the output prolog term*/
01036       if( !is_nil( pd->tail))
01037         {
01038           c2p_nil(pd->tail);
01039         }                                                                
01040       if ( recursive )
01041         {
01042           p->closure = oldpd;
01043         }
01044       else
01045         {
01046           p->closure = NULL;
01047         }
01048       
01049       pd->magic = 0;                      /* invalidate */
01050       free(pd);
01051 
01052       if (its_a_url == 0)
01053         fclose(in);
01054       return TRUE;
01055     }
01056 
01057   return TRUE;
01058 }
01059 
01066 DllExport int call_conv pl_open_dtd()
01067 { dtd *dtd;
01068   dtd_parser *p;
01069   parser_data *pd;
01070  
01071   /*Prolog terms to parse the options list*/
01072   prolog_term ref, options, tail, head, tmp1;
01073 
01074   FILE * in = NULL;                                          
01075 
01076   char *str, file[MAXSTRLEN], server[MAXSTRLEN], *fname=NULL, *tmpfname=NULL;
01077   int its_a_url = 0;
01078   struct stat stbuf;
01079   int source_len = 0;
01080 
01081   ref = reg_term(1);
01082   options = reg_term(2);
01083 
01084   /*Extract the Dtd*/                                                                           
01085   if ( !get_dtd(ref, &dtd) )
01086     return FALSE;
01087 
01088   /*Create a new parser object*/
01089   p = new_dtd_parser(dtd);
01090   p->dmode = DM_DTD;
01091   pd = new_parser_data(p);
01092   pd->free_on_close = TRUE;
01093 
01094   tail = options;
01095 
01096   while(is_list(tail))
01097     { 
01098       head = p2p_car(tail);
01099       tmp1 = p2p_cdr(tail);
01100       tail = tmp1;
01101          
01102       /*Go through the list of options*/
01103       if(is_functor( head)){
01104         str = p2c_functor( head);
01105              
01106 
01107         if(!strcmp(str,"source")){
01108           /*Temporary prolog terms to parse the options list*/
01109           prolog_term temp_term1, temp_term2;
01110           char * tmpstr = NULL;
01111           temp_term1 = p2p_arg( head, 1);
01112           tmpstr = p2c_functor(temp_term1);
01113           
01114 
01115           /*The source is a url*/
01116           if(!strcmp( tmpstr, "url")){   
01117             temp_term2 = p2p_arg(temp_term1, 1);        
01118             tmpfname = p2c_string(temp_term2);
01119             fname = malloc( strlen(tmpfname));
01120             strcpy( fname, tmpfname);
01121             if( parse_url( fname, server, file) != FALSE) {
01122                              
01123               source_len = 0;
01124 
01125               /*The url is of the form file:// */
01126               if( !strcmp( server, "file")){
01127                 if(!(in = fopen( file, "rb"))){
01128                   return sgml2pl_error(ERR_EXISTENCE, "file", temp_term2);
01129                 }
01130                 its_a_url = 0;
01131                 fstat( fileno( in), &stbuf);
01132                 source_len = stbuf.st_size;
01133               }
01134                                 
01135               else{
01136                 /*Source is a url of the form http://...*/
01137                 if( get_file_www( server, file, &fname) == FALSE){
01138                   return sgml2pl_error( ERR_MISC, "url", fname);
01139                 }
01140                 else{
01141                   its_a_url = 1;
01142                   source_len = strlen( fname);
01143                 }
01144               }
01145             }
01146             else
01147               {
01148                 return sgml2pl_error( ERR_DOMAIN, "url", temp_term2);
01149               }
01150           }
01151           /*Source is a file*/
01152           else if( !strcmp( tmpstr, "file")){
01153             temp_term2 = p2p_arg( temp_term1, 1);
01154             fname = p2c_string( temp_term2);
01155             its_a_url = 0;
01156             if(!(in = fopen( fname, "r"))){
01157               return sgml2pl_error(ERR_EXISTENCE, "File", temp_term2);
01158                          
01159             }
01160             fstat( fileno( in), &stbuf);
01161             source_len = stbuf.st_size;
01162           }
01163           /*Source is a string*/
01164           else if(!strcmp( tmpstr, "string")){
01165             its_a_url = 1;
01166             temp_term2 = p2p_arg( temp_term1, 1);
01167             fname = p2c_string( temp_term2);
01168             source_len = strlen( fname );
01169           }
01170           else{
01171             return FALSE;
01172           }
01173         }
01174       }
01175     }        
01176 
01177     
01178 
01179   if ( !pd->parser || pd->parser->magic != SGML_PARSER_MAGIC ){ 
01180     errno = EINVAL;
01181     return FALSE;
01182   }
01183 
01184   if ( (pd->errors > pd->max_errors && pd->max_errors >= 0) || pd->stopped ){ 
01185     errno = EIO;
01186     return FALSE;
01187   }
01188   
01189   /*Parse the dtd contents*/
01190   if (its_a_url == 1) {
01191     int i = 0;
01192     source_len = strlen(fname);
01193 
01194     for( i=0; i<source_len ; i++){
01195       putchar_dtd_parser(pd->parser, fname[i]);
01196     }
01197   }        
01198           
01199   else if( its_a_url == 0)
01200     {
01201       char c;
01202       int i = 0;
01203 
01204       for( i=0;i<source_len;i++)        
01205         {
01206           c = fgetc(in);
01207           putchar_dtd_parser(pd->parser, c);
01208         }
01209       fclose(in);
01210     }
01211   return TRUE;
01212 }
01213 
01219 DllExport int call_conv pl_free_sgml_parser()
01220 { 
01221   dtd_parser *p;
01222   prolog_term parser;
01223 
01224   parser = reg_term(1);
01225 
01226   if ( get_parser(parser, &p) )
01227     { 
01228       free_dtd_parser(p);
01229       return TRUE;
01230     }
01231 
01232   return FALSE;
01233 }
01234 
01239 DllExport int call_conv pl_free_dtd()
01240 { dtd *dtd;
01241 
01242   prolog_term dtd_term;
01243 
01244   dtd_term  = reg_term(1);
01245 
01246   if ( get_dtd(dtd_term, &dtd) )
01247     { 
01248       free_dtd(dtd);
01249       return TRUE;
01250     }
01251 
01252   return FALSE;
01253 }
01254 
01255 
01261 static parser_data *
01262 new_parser_data(dtd_parser *p)
01263 { 
01264   parser_data *pd;
01265                                                                              
01266   pd = calloc(1, sizeof(*pd));
01267   pd->magic = PD_MAGIC;
01268   pd->parser = p;
01269   pd->max_errors = MAX_ERRORS;
01270   pd->max_warnings = MAX_WARNINGS;
01271   pd->error_mode = EM_PRINT;
01272   p->closure = pd;
01273   
01274   return pd;
01275 }
01276 
01280 static dtd_srcloc *
01281 file_location(dtd_parser *p, dtd_srcloc *l)
01282 { 
01283   while(l->parent && l->type != IN_FILE)
01284     l = l->parent;
01285                                                                              
01286   return l;
01287 }
01288 
01289 static int
01290 can_end_omitted(dtd_parser *p)
01291 { sgml_environment *env;
01292                                                                                
01293   for(env=p->environments; env; env = env->parent)
01294     { 
01295       dtd_element *e = env->element;
01296                                                                                
01297       if ( !(e->structure && e->structure->omit_close) )
01298         return FALSE;
01299     }
01300                                                                                
01301   return TRUE;
01302 }
01303 
01311 static int
01312 on_error(dtd_parser *p, dtd_error *error)
01313 {
01314   parser_data *pd = p->closure;
01315   const char *severity;
01316   
01317   if ( pd->stopped )
01318     return TRUE;
01319         
01320   if ( pd->stopat == SA_ELEMENT &&
01321        (error->minor == ERC_NOT_OPEN || error->minor == ERC_NOT_ALLOWED) && can_end_omitted(p) )
01322     { 
01323       end_document_dtd_parser(p);
01324       sgml_cplocation(&p->location, &p->startloc);
01325       pd->stopped = TRUE;
01326       return TRUE;
01327     }
01328 
01329   switch(error->severity)
01330     { 
01331     case ERS_STYLE:
01332       if ( pd->error_mode != EM_STYLE )
01333         return TRUE;
01334       severity = "informational";
01335       break;
01336     case ERS_WARNING:
01337       pd->warnings++;
01338       severity = "warning";
01339       break;
01340     case ERS_ERROR:
01341     default:                            /* make compiler happy */
01342       pd->errors++;
01343       severity = "error";
01344       break;
01345     }
01346 
01347   /*Create the error(...) term in the allocated error term*/
01348   if ( pd->error_mode != EM_QUIET )
01349     {
01350 
01351       /*Temporary prolog variables to create the error term*/
01352       prolog_term temp_term1 = p2p_new();
01353       prolog_term temp_term2 = p2p_new();
01354       prolog_term tmptail, tmp;
01355       dtd_srcloc *l = file_location(p, &p->startloc);
01356 
01357 
01358       /*Create the error term*/
01359       c2p_functor( "sgml", 4, temp_term1);
01360       unify_parser(p2p_arg(temp_term1, 1), p);
01361       c2p_string( (l->name ? (char*) l->name : "[]"), p2p_arg( temp_term1, 2));
01362       c2p_int( l->line, p2p_arg( temp_term1, 3));
01363       c2p_string( error->plain_message, p2p_arg( temp_term1, 4));
01364 
01365       c2p_functor( (char*)severity, 1, temp_term2);
01366       p2p_unify( temp_term1, p2p_arg( temp_term2, 1));
01367 
01368       /*Generate an error or a warning based on severity*/
01369       if(!strcmp(severity, "error")){
01370         p2p_unify( global_error_term, temp_term2);
01371       }
01372       else
01373         {
01374           tmptail = global_warning_term;
01375           while( is_list( tmptail))
01376             {
01377               tmp = p2p_cdr(tmptail);
01378               tmptail = tmp;
01379             }
01380           c2p_list(tmptail);
01381           p2p_unify( p2p_car(tmptail), temp_term2);
01382         }
01383     }
01384 
01385   return TRUE;
01386 }
01387 
01393 static int
01394 on_decl(dtd_parser *p, const ichar *decl)
01395 {
01396   parser_data *pd = p->closure;
01397 
01398   if ( pd->stopped )
01399     return TRUE;
01400 
01401   if ( pd->stopat == SA_DECL )
01402     pd->stopped = TRUE;
01403   
01404   return TRUE;
01405 
01406 }
01407 
01414 static int
01415 on_begin(dtd_parser *p, dtd_element *e, int argc, sgml_attribute *argv)
01416 {
01417   parser_data *pd = p->closure;
01418   env *env1;
01419            
01420                                                                      
01421   if ( pd->stopped )
01422     return TRUE;
01423 
01424   if(pd->tail)
01425     {
01426       /*Prolog term representing the element term created*/
01427       prolog_term et = p2p_new();
01428         
01429       /*Temporary prolog terms to create the output terms*/                    
01430       prolog_term tmp, content;
01431 
01432       tmp = p2p_new();
01433 
01434 
01435       /*Create an element(...) term in the output*/
01436       c2p_functor( "element", 3, et);
01437         
01438       put_element_name(p, p2p_arg( et, 1) , e);
01439         
01440 
01441       /*Create the attribute list for the element*/
01442       unify_attribute_list( p, p2p_arg( et, 2), argc, argv);
01443 
01444       c2p_list(tmp);
01445 
01446       if(!p2p_unify( pd->tail, tmp))
01447         return FALSE;
01448 
01449       tmp = p2p_car( pd->tail);
01450 
01451       if(!p2p_unify( tmp, et))
01452         return FALSE;
01453 
01454       content = p2p_arg( tmp, 3);
01455 
01456       tmp = p2p_cdr( pd->tail);
01457       pd->tail = tmp;
01458 
01459       /*Adjust the output term to handle the recursive nature of an xml document*/
01460       env1 = sgml_calloc(1, sizeof(struct _env *));
01461       env1->tail   = pd->tail;
01462       env1->parent = pd->stack;
01463       pd->stack   = env1;
01464            
01465                 
01466       pd->tail = content;
01467     }
01468   return TRUE;
01469 }
01470 
01471 
01477 static int
01478 unify_attribute_list(dtd_parser *p, prolog_term alist,
01479                      int argc, sgml_attribute *argv)
01480 {
01481   int i;
01482 
01483   /*Temporary prolog terms*/
01484   prolog_term tail = alist;
01485   prolog_term temp_term[2];
01486   prolog_term tmp, tmp1;
01487 
01488   for( i = 0 ; i<argc;  i++)
01489     {
01490       tmp = p2p_new();
01491       temp_term[0] = p2p_new();
01492       temp_term[1] = p2p_new();
01493       tmp1 = p2p_new();
01494 
01495       put_attribute_name(p, temp_term[0], argv[i].definition->name);
01496       put_attribute_value(p, temp_term[1], &argv[i]);
01497 
01498       /*Create a list of attributes with '=' as functor*/               
01499       c2p_functor( "=", 2, tmp);
01500       p2p_unify( p2p_arg( tmp, 1), temp_term[0]);
01501       p2p_unify( p2p_arg( tmp, 2), temp_term[1]);
01502                 
01503       c2p_list( tmp1);  
01504 
01505       if( !p2p_unify( tail, tmp1))
01506         return FALSE;
01507 
01508       tmp1 = p2p_car( tail);
01509                 
01510       if(!p2p_unify( tmp1, tmp))
01511         return FALSE;
01512 
01513       tmp1 = p2p_cdr(tail);     
01514       tail = tmp1;      
01515     }
01516 
01517   tmp1 = p2p_new();
01518   c2p_nil( tmp1);
01519         
01520   if(!p2p_unify( tail, tmp1))
01521     return FALSE;
01522 
01523   return TRUE;
01524 }
01525 
01530 static void
01531 put_attribute_name(dtd_parser *p, prolog_term t, dtd_symbol *nm)
01532 {
01533 
01534   const ichar *url, *local;
01535 
01536   if ( p->dtd->dialect == DL_XMLNS )
01537     {
01538       xmlns_resolve_attribute(p, nm, &local, &url);
01539       if(url)
01540         {
01541 
01542           c2p_functor( ":", 2, t);
01543           put_url(p, p2p_arg( t, 1), url);
01544           c2p_string( (char*)local, p2p_arg( t, 2));
01545         }
01546       else
01547         {
01548           c2p_string( (char*)local, t);
01549         }
01550 
01551     }
01552   else
01553     {
01554       c2p_string( nm->name, t); 
01555     }
01556 }
01557 
01562 static ichar *
01563 istrblank(const ichar *s)
01564 { for( ; *s; s++ )
01565     { if ( isspace(*s) )
01566         return (ichar *)s;
01567     }
01568 
01569   return NULL;
01570 }
01571 
01575 static void
01576 put_attribute_value(dtd_parser *p, prolog_term t, sgml_attribute *a)
01577 {
01578   switch(a->definition->type)
01579     { case AT_CDATA:
01580         c2p_string( a->value.cdata, t);
01581         break;
01582     case AT_NUMBER:
01583       { 
01584         if ( a->value.text )
01585           c2p_string( a->value.text, t);
01586         else
01587           c2p_int( a->value.number, t);
01588         break;
01589       }
01590     default:
01591       { 
01592         const ichar *val = a->value.text;
01593         const ichar *e;
01594         prolog_term tmp;
01595 
01596         if ( a->definition->islist )    /* multi-valued attribute */
01597           {
01598             prolog_term tail, head;
01599                                         
01600             tail = t;
01601             for(e=istrblank(val); e; val = e+1, e=istrblank(val))
01602               { 
01603                 if ( e == val )
01604                   continue;                     /* skip spaces */
01605                                  
01606                 tmp = p2p_new();
01607                 c2p_list( tmp);
01608 
01609                 p2p_unify( tail, tmp);
01610                                          
01611                 head = p2p_car( tail);
01612                 tmp = p2p_cdr( tail);
01613                 tail = tmp;
01614                 unify_listval(p, head, a->definition->type, e-val, val);
01615               }
01616 
01617             tmp = p2p_new();
01618             c2p_list( tmp);
01619 
01620             p2p_unify( tail, tmp);
01621 
01622 
01623             head = p2p_car( tail);
01624             tmp = p2p_cdr( tail);
01625             tail = tmp;
01626             unify_listval(p, head, a->definition->type, e-val, val);
01627 
01628             c2p_nil( tmp);
01629             p2p_unify( tmp, tail);
01630 
01631           }
01632         else
01633           c2p_string( (char*)val, t);
01634 
01635       }
01636     }
01637         
01638 }
01639 
01640 static int
01641 unify_listval(dtd_parser *p,  prolog_term t, attrtype type, int len, const char *text)
01642 { 
01643   prolog_term tmp = p2p_new();
01644   if ( type == AT_NUMBERS && p->dtd->number_mode == NU_INTEGER )
01645     { 
01646       char *e;
01647       long v = strtol(text, &e, 10);
01648 
01649       if ( e-text == len && errno != ERANGE )
01650         {
01651           c2p_int( v, tmp);
01652           return p2p_unify( t, tmp);
01653         }
01654       /* TBD: Error!? */
01655     }
01656 
01657   c2p_string( (char*)text, tmp);
01658 
01659 
01660   return p2p_unify( t, tmp);
01661 }
01662 
01668 static int
01669 on_entity(dtd_parser *p, dtd_entity *e, int chr)
01670 {
01671   parser_data *pd = p->closure;
01672 
01673   if ( pd->stopped )
01674     return TRUE;
01675 
01676   if(pd->tail)
01677     {
01678       /*Temporary prolog terms to parse prolog inputs*/
01679       prolog_term h, tmp, tmp2, tmp1;
01680         
01681       tmp1 = p2p_new();
01682       c2p_list(tmp1);
01683 
01684       /*Create a term entity(...) in the output*/
01685       if(p2p_unify( pd->tail, tmp1))
01686         {
01687           h = p2p_car(pd->tail);
01688           tmp = p2p_cdr(pd->tail);
01689           pd->tail = tmp;
01690           tmp2 = p2p_new();
01691           
01692           /*Creating the output term for the entity*/
01693           if(e)
01694             {
01695                         
01696               c2p_functor( "entity", 1 , tmp2);
01697               c2p_string( e->name->name, p2p_arg( tmp2, 1));
01698               p2p_unify( h, tmp2);
01699 
01700             }
01701           else
01702             {
01703               c2p_functor( "entity", 1, tmp2);
01704               c2p_int( chr, p2p_arg( tmp2, 1));
01705               p2p_unify( h, tmp2);
01706             }
01707         }
01708     }
01709   return TRUE;
01710 }
01711 
01719 static int
01720 on_pi(dtd_parser *p, const ichar *pi)
01721 {
01722   parser_data *pd = p->closure;
01723   if ( pd->stopped )
01724     return TRUE;
01725 
01726   if ( pd->tail )
01727     { 
01728       prolog_term head, tmp1, tmp;      
01729 
01730       tmp = p2p_new();
01731       c2p_list( tmp);
01732 
01733       /*Create a term of the form pi(...) in the output*/
01734       if( p2p_unify(pd->tail, tmp))
01735         {
01736           head = p2p_car(pd->tail);
01737           tmp = p2p_cdr(pd->tail);
01738           pd->tail = tmp;
01739 
01740           tmp1 = p2p_new();
01741 
01742           c2p_functor("pi", 1, tmp1);
01743           c2p_string( (char*)pi, p2p_arg( tmp1, 1));
01744 
01745           p2p_unify( head, tmp1);
01746         }
01747     }
01748   return TRUE;
01749 }
01750 
01756 static int
01757 on_cdata(dtd_parser *p, data_type type, int len, const ochar *data)
01758 {
01759   parser_data *pd = p->closure;
01760   int rval=0;
01761   if ( pd->tail && !pd->stopped )
01762     {
01763       /*Temporary prolog terms used to create the output terms*/
01764       prolog_term head, tmp, tmp1;
01765                 
01766       tmp1 = p2p_new();
01767 
01768       tmp = p2p_new();
01769       c2p_list( tmp);
01770 
01771       /*Create cdata(...)/sdata(...)/ndata(...) terms in the output*/
01772       if(p2p_unify(pd->tail, tmp))
01773         {
01774           head = p2p_car( pd->tail);
01775           tmp = p2p_cdr( pd->tail);
01776           pd->tail = tmp;
01777 
01778           switch(type)
01779             {
01780             case EC_CDATA:
01781               c2p_string( (char*)data, tmp1);
01782               p2p_unify( tmp1, head);
01783               break;
01784             case EC_SDATA:
01785               { 
01786                 prolog_term data_term = p2p_new();
01787                                         
01788                 c2p_functor( "sdata", 1, data_term);
01789                 c2p_string( (char*)data, p2p_arg( data_term, 1));
01790 
01791                 rval =  p2p_unify( head, data_term);
01792                 break;
01793               }
01794             case EC_NDATA:
01795               { 
01796                 prolog_term data_term = p2p_new();
01797                                         
01798                 c2p_functor( "ndata", 1, data_term);
01799                 c2p_string( (char*)data, p2p_arg( data_term, 1));
01800 
01801                 rval =  p2p_unify( head, data_term);
01802                 break;
01803               }
01804             default:
01805               rval = FALSE;
01806               assert(0);
01807             }
01808           if (rval)
01809             {
01810               return TRUE;
01811             }
01812         }
01813                                                                               
01814     }
01815   return FALSE;
01816 }
01817 
01823 static int 
01824 on_end(dtd_parser *p, dtd_element *e)
01825 {
01826   parser_data *pd = p->closure;
01827 
01828   /*Temporary prolog terms used to delete the ununified parts of the output
01829     term*/
01830   prolog_term tmp;
01831 
01832   tmp = p2p_new();
01833   c2p_nil(tmp);
01834 
01835   if(pd->stopped)
01836     return TRUE;
01837 
01838   if ( pd->tail && !pd->stopped )
01839     {
01840       if( !is_nil( pd->tail))
01841         {
01842           p2p_unify( pd->tail, tmp);
01843         }
01844       if ( pd->stack )
01845         { 
01846           env *parent = pd->stack->parent;
01847           pd->tail = pd->stack->tail;
01848           sgml_free(pd->stack);
01849           pd->stack = parent;
01850         } 
01851       else
01852         { 
01853           if ( pd->stopat == SA_CONTENT )
01854             pd->stopped = TRUE;
01855         }
01856     }
01857                                                                                
01858   if ( pd->stopat == SA_ELEMENT && !p->environments->parent )
01859     pd->stopped = TRUE;
01860                                                                                
01861   return TRUE;
01862 }
01863 
01864 
01870 static void
01871 put_element_name(dtd_parser *p, prolog_term t, dtd_element *e)
01872 {
01873   const ichar *url, *local;
01874 
01875   if ( p->dtd->dialect == DL_XMLNS)
01876     {
01877       assert(p->environments->element == e);
01878       xmlns_resolve_element(p, &local, &url);
01879 
01880       if(url)
01881         {
01882 
01883           c2p_functor( ":", 2, t);
01884           put_url(p, p2p_arg( t, 1), url);
01885           c2p_string( (char*)local, p2p_arg( t, 2));
01886                         
01887         }
01888       else
01889         {
01890           c2p_string( (char*)local, t);
01891         }
01892 
01893     }
01894   else
01895     c2p_string ( e->name->name, t);
01896 
01897   return;
01898 }
01899 
01900 
01912 static void
01913 put_url(dtd_parser *p, prolog_term t, const ichar *url)
01914 {
01915   parser_data *pd = p->closure;
01916         
01917   if ( !pd->on_urlns )
01918     { 
01919       c2p_string( (char*) url, t);
01920       return;
01921     }
01922 }
01923 
01924 
01929 #define CHARSET MAXSTRLEN
01930 
01931 static int
01932 do_quote(prolog_term in, prolog_term quoted, char **map)
01933 { char *ins;
01934   unsigned len;
01935   unsigned  char *s;
01936   char outbuf[1024];
01937   char *out = outbuf;
01938   int outlen = sizeof(outbuf);
01939   int o = 0;
01940   int changes = 0;
01941 
01942   prolog_term tmp = 0;
01943 
01944   ins = p2c_string( in);
01945 
01946   len = strlen( ins);
01947 
01948   if ( len == 0 )
01949     return p2p_unify(in, quoted);
01950 
01951   for(s = (unsigned char*)ins ; len-- > 0; s++ )
01952     { int c = *s;
01953 
01954       if ( map[c] )
01955         { int l = strlen(map[c]);
01956           if ( o+l >= outlen )
01957             { outlen *= 2;
01958 
01959               if ( out == outbuf )
01960                 { out = malloc(outlen);
01961                   memcpy(out, outbuf, sizeof(outbuf));
01962                 } else
01963                 { out = realloc(out, outlen);
01964                 }
01965             }
01966           memcpy(&out[o], map[c], l);
01967           o += l;
01968           changes++;
01969         } else
01970         { if ( o >= outlen-1 )
01971             { outlen *= 2;
01972 
01973               if ( out == outbuf )
01974                 { out = malloc(outlen);
01975                   memcpy(out, outbuf, sizeof(outbuf));
01976                 } else
01977                 { out = realloc(out, outlen);
01978                 }
01979             }
01980           out[o++] = c;
01981         }
01982     }
01983   out[o]= 0;
01984 
01985   if ( changes > 0 )
01986     {
01987       c2p_string( out, tmp);
01988       return p2p_unify( quoted, tmp);
01989     }
01990   else
01991     return p2p_unify(in, quoted);
01992 }
01993 
01997 DllExport int call_conv pl_xml_quote_attribute()
01998 {
01999   prolog_term in = reg_term(1);
02000   prolog_term out = reg_term(2);
02001   static char **map;
02002 
02003   if ( !map )
02004     { int i;
02005 
02006       if ( !(map = malloc(CHARSET*sizeof(char*))) )
02007         return sgml2pl_error(ERR_ERRNO, errno);
02008 
02009       for(i=0; i<CHARSET; i++)
02010         map[i] = NULL;
02011 
02012       map['<']  = "&lt;";
02013       map['>']  = "&gt;";
02014       map['&']  = "&amp;";
02015       map['\''] = "&apos;";
02016       map['"']  = "&quot;";
02017     }
02018 
02019   return do_quote(in, out, map);
02020 }
02021 
02025 DllExport int call_conv pl_xml_quote_cdata()
02026 {
02027   prolog_term in = reg_term(1);
02028   prolog_term out = reg_term(2);
02029   static char **map;
02030 
02031   if ( !map )
02032     { int i;
02033 
02034       if ( !(map = malloc(CHARSET*sizeof(char*))) )
02035         return sgml2pl_error(ERR_ERRNO, errno);
02036 
02037       for(i=0; i<CHARSET; i++)
02038         map[i] = NULL;
02039 
02040       map['<']  = "&lt;";
02041       map['>']  = "&gt;";
02042       map['&']  = "&amp;";
02043     }
02044 
02045   return do_quote(in, out, map);
02046 }
02047 
02048 DllExport int call_conv pl_xml_name()
02049 { char *ins;
02050   unsigned len;
02051   static dtd_charclass *map;
02052   unsigned int i;
02053   prolog_term in = reg_term(1);
02054 
02055 
02056   if ( !map )
02057     map = new_charclass();
02058 
02059   ins = p2c_string( in);
02060 
02061   len = strlen( ins);
02062 
02063   if ( len == 0 )
02064     return FALSE;
02065   if ( !(map->class[ins[0] & 0xff] & CH_NMSTART) )
02066     return FALSE;
02067   for(i=1; i<len; i++)
02068     {
02069       if ( !(map->class[ins[i] & 0xff] & CH_NAME) )
02070         return FALSE;
02071     }
02072 
02073   return TRUE;
02074 }
02075 
02076 
02077 
02078 
02079 
02080 
02081 
02082 
02083 
02084 
02085 

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5