parser.c

00001 /*****************************************************************************
00002  *                           parser.c
00003  * This file contains the internal functions of the parser. It also contains
00004  * certain other functions which aid in processing the xml document.
00005  *
00006  ****************************************************************************/
00007 
00008 #define DTD_IMPLEMENTATION 1
00009 
00010 #include "xsb_config.h"
00011 #include "dtd.h"
00012 #include "parser.h"
00013 #include "model.h"
00014 #include "sgmldefs.h"
00015 #include <assert.h>
00016 #include <ctype.h>
00017 #include <errno.h>
00018 #include "utf8.h"
00019 #include "utf8.c"
00020 #include <stdarg.h>
00021 #include <ctype.h>
00022 #include <string.h>
00023 #include <stdlib.h>
00024 #include "util.h"
00025 /*
00026 #include "socketcall.h"
00027 */
00028 #include "error_term.h"
00029 #include "cinterf.h"
00030 
00031 
00032 #define MAXSTRLEN 256                                                          
00033 
00034 static dtd_parser *current_parser;      /* For gripes */
00035 
00036 #define HasClass(dtd, chr, mask)                \
00037   (dtd->charclass->class[(chr)] & (mask))
00038 
00039 /*Macros which takes a backup of the current parser and operates on a copy of the parser*/                                                                               
00040 #define WITH_PARSER(p, g)                       \
00041   { dtd_parser *_old = p;                       \
00042     current_parser = p;                         \
00043     g;                                          \
00044     current_parser = _old;                      \
00045   }                                                                             
00046 #define WITH_CLASS(p, c, g)                     \
00047   { sgml_event_class _oc = p->event_class;      \
00048     p->event_class = c;                         \
00049     g;                                          \
00050     p->event_class = _oc;                       \
00051   }
00052                                                                                
00053                                                                         
00054 typedef struct locbuf
00055 { dtd_srcloc start;                     /* p->startloc */
00056   dtd_srcloc here;                      /* p->location */
00057 } locbuf;
00058 
00059 static void
00060 empty_cdata(dtd_parser *p);
00061                                                                         
00062 extern int 
00063 parse_url( const char * url, char * server, char *fname);
00064 
00065 extern int 
00066 get_file_www(char *server, char* fname, char ** buf);
00067 
00068 static int
00069 match_shortref(dtd_parser *p);
00070 
00071 static int
00072 prepare_cdata(dtd_parser *p);
00073                                                                                
00074 static int
00075 process_declaration(dtd_parser *p, const ichar *decl);
00076                                                                                
00077 static int
00078 process_begin_element(dtd_parser *p, const ichar *decl);
00079 
00080 static const ichar *
00081 itake_name(dtd *dtd, const ichar *in, dtd_symbol **id);
00082 
00083 static const ichar *
00084 iskip_layout(dtd *dtd, const ichar *in);
00085 
00086 static dtd_element *
00087 find_element(dtd *dtd, dtd_symbol *id);
00088 
00089 static dtd_element *
00090 def_element(dtd *dtd, dtd_symbol *id);
00091 
00092 static int
00093 open_element(dtd_parser *p, dtd_element *e, int warn);
00094 
00095 static sgml_environment *
00096 push_element(dtd_parser *p, dtd_element *e, int callback);
00097 
00098 void
00099 sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc);
00100 
00101 static const ichar *
00102 isee_func(dtd *dtd, const ichar *in, charfunc func);
00103 
00104 static const ichar *
00105 process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
00106                    sgml_attribute *atts, int *argc);
00107 
00108 static void
00109 allow_for(dtd_element *in, dtd_element *e);
00110                                                                                
00111 static ichar const *
00112 get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att);
00113 
00114 static const ichar *
00115 itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id);
00116 
00117 static dtd_attr *
00118 find_attribute(dtd_element *e, dtd_symbol *name);
00119 
00120 static int
00121 add_default_attributes(dtd_parser *p, dtd_element *e,
00122                        int natts, sgml_attribute *atts);
00123 
00124 static void
00125 set_element_properties(dtd_element *e, dtd_attr *a);
00126 
00127 static void
00128 free_attribute_values(int argc, sgml_attribute *argv);
00129 
00130 static void
00131 free_attribute(dtd_attr *a);
00132 
00133 static const ichar *
00134 itake_string(dtd *dtd, const ichar *in, ichar *out, int len);
00135 
00136 static void
00137 add_submodel(dtd_model *m, dtd_model *sub);
00138                                                                                
00139 static void
00140 free_environment(sgml_environment *env);
00141 
00142 static void
00143 validate_completeness(sgml_environment *env);
00144                                                                                
00145 static int
00146 emit_cdata(dtd_parser *p, int last);
00147 
00148 static int
00149 complete(sgml_environment *env);
00150 
00151 static void                             /* TBD: also handle startloc */
00152 push_location(dtd_parser *p, locbuf *save);
00153 
00154 static void
00155 pop_location(dtd_parser *p, locbuf *saved);
00156 
00157 static void
00158 inc_location(dtd_srcloc *l, int chr);
00159                                                                                
00160 static void
00161 dec_location(dtd_srcloc *l, int chr);
00162 
00163 static __inline void
00164 _sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc);
00165 
00166                                                                              
00167 void
00168 sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc);
00169                                                                                
00170 #define sgml_cplocation(d,s) _sgml_cplocation(d, s)
00171 
00172 static int
00173 close_element(dtd_parser *p, dtd_element *e, int conref);
00174 
00175 static int
00176 process_entity(dtd_parser *p, const ichar *name);
00177 
00178 static int
00179 process_entity_declaration(dtd_parser *p, const ichar *decl);
00180                                                                                
00181 static dtd_symbol *
00182 dtd_find_entity_symbol(dtd *dtd, const ichar *name);
00183                                                                                
00184 static const ichar *
00185 isee_identifier(dtd *dtd, const ichar *in, char *id);
00186 
00187 static const ichar *
00188 itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id);
00189 
00190                                                                                
00191 static ichar *
00192 baseurl(dtd_parser *p);
00193 
00194 static dtd_entity *
00195 find_pentity(dtd *dtd, dtd_symbol *id);
00196 
00197 void
00198 set_src_dtd_parser(dtd_parser *p, input_type type, const char *name);
00199 
00200 static const ichar *
00201 process_entity_value_declaration(dtd_parser *p,
00202                                  const ichar *decl, dtd_entity *e);
00203 
00204 static dtd_symbol_table *
00205 new_symbol_table();
00206 
00207 static int
00208 expand_pentities(dtd_parser *p, const ichar *in, ichar *out, int len);
00209 
00210 static const ichar *
00211 entity_value(dtd_parser *p, dtd_entity *e, int *len);
00212 
00213                                                                                
00214 static const ichar *
00215 isee_character_entity(dtd *dtd, const ichar *in, int *chr);
00216 
00217 static int
00218 char_entity_value(const ichar *decl);
00219 
00220                                                                                
00221 int
00222 sgml_process_file(dtd_parser *p, const char *file, unsigned flags);
00223 
00224 static int
00225 process_cdata(dtd_parser *p, int last);
00226 
00227 int
00228 end_document_dtd_parser_(dtd_parser *p);
00229 
00230 int
00231 end_document_dtd_parser(dtd_parser *p);
00232 int
00233 sgml_process_stream(dtd_parser *p, char * buf, unsigned flags, int source_len);
00234 
00235 static const ichar *
00236 itake_url(dtd *dtd, const ichar *in, ichar **out);
00237                                                                                
00238 static int
00239 pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0);
00240                                                                              
00241 static const char *
00242 entity_file(dtd *dtd, dtd_entity *e);
00243 
00244 static int
00245 representable_char(dtd_parser *p, int chr);
00246 
00247 static int
00248 process_net(dtd_parser *p);
00249 
00250 static void
00251 update_space_mode(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts);
00252 
00253 static dtd_space_mode
00254 istr_to_space_mode(const ichar *val);
00255 
00256 
00257 static int
00258 process_element_declaraction(dtd_parser *p, const ichar *decl);
00259 
00260 static int                              /* <!DOCTYPE ...> */
00261 process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0);
00262 
00263 static const ichar *
00264 itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names,
00265                                int *n);
00266 
00267 
00268 static dtd_model *
00269 make_model(dtd *dtd, const ichar *decl, const ichar **end);
00270 
00271                                                                                
00272 static void
00273 free_elements(dtd_element *e);
00274 
00275 static void
00276 free_element_definition(dtd_edef *def);
00277 
00278 static void
00279 free_model(dtd_model *m);
00280 
00281 static void
00282 free_element_list(dtd_element_list *l);
00283 
00284 
00285 static void
00286 for_elements_in_model(dtd_model *m,
00287                       void (*f)(dtd_element *e, void *closure),
00288                       void *closure);
00289                                                                            
00290                                                                                
00291 static void
00292 add_list_element(dtd_element *e, void *closure);
00293 
00294 static const ichar *
00295 process_model(dtd *dtd, dtd_edef *e, const ichar *decl);
00296 
00297                                                                                
00298 static const ichar *
00299 itake_namegroup(dtd *dtd, charfunc sep, const ichar *decl,
00300                 dtd_symbol **names, int *n);
00301 
00302                                                                                
00303 static void
00304 add_element_list(dtd_element_list **l, dtd_element *e);
00305 
00306 
00307 static void
00308 free_attribute_list(dtd_attr_list *l);
00309 
00310                                                                             
00311 static void
00312 process_marked_section(dtd_parser *p);
00313                                                                                
00314 static void
00315 free_name_list(dtd_name_list *nl);
00316 
00317 
00318 static void
00319 recover_parser(dtd_parser *p);
00320 
00321 static int
00322 close_current_element(dtd_parser *p);
00323 
00324 
00325 static dtd_parser *
00326 clone_dtd_parser(dtd_parser *p);
00327                                                                                
00328 void
00329 reset_document_dtd_parser(dtd_parser *p);
00330 
00331 
00332 static const ichar *
00333 itake_nmtoken_chars(dtd *dtd, const ichar *in, ichar *out, int len);
00334 
00335 static int
00336 process_attlist_declaraction(dtd_parser *p, const ichar *decl);
00337 
00338 static int
00339 process_pi(dtd_parser *p, const ichar *decl);
00340 
00341 static int
00342 match_map(dtd *dtd, dtd_map *map, int len, ichar *data);
00343 
00344 typedef enum
00345   { IE_NORMAL,
00346     IE_INCLUDED,                          /* is included */
00347     IE_EXCLUDED                           /* is excluded */
00348   } includetype;
00349 
00350 
00351                                                                               
00352 static void
00353 add_name_list(dtd_name_list **nl, dtd_symbol *s);                                                                       
00354                                                    
00355 static includetype
00356 in_or_excluded(sgml_environment *env, dtd_element *e);
00357 
00358 static void
00359 pop_marked_section(dtd_parser *p);
00360 
00361 
00362 static const ichar *
00363 isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep);
00364 
00365 static const ichar *
00366 itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id);
00367 
00368 
00369 static const ichar *
00370 itake_number(dtd *dtd, const ichar *in, dtd_attr *at);
00371 
00372 static void
00373 add_verbatim_cdata(dtd_parser *p, int chr);
00374 
00375 static void
00376 set_encoding(dtd_parser *p, const ichar *enc);
00377 
00378 static void
00379 init_decoding(dtd_parser *p);
00380 
00381 static int
00382 process_notation_declaration(dtd_parser *p, const ichar *decl);
00383 
00384 static dtd_notation *
00385 find_notation(dtd *dtd, dtd_symbol *name);
00386 
00387 static const ichar *
00388 itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out);
00389 
00390 static int
00391 process_end_element(dtd_parser *p, const ichar *decl);
00392                                        
00393 static void
00394 add_notation(dtd *dtd, dtd_notation *not);
00395 
00396                                                                                
00397 static int
00398 process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s);
00399 
00400 typedef struct
00401 { dtd_symbol **list;
00402   int size;
00403 } namelist;
00404                                                                               
00405 static int
00406 process_include(dtd_parser *p, const ichar *entity_name);
00407 
00408                                                       
00409 static dtd_shortref *
00410 def_shortref(dtd_parser *p, dtd_symbol *name);
00411 
00412 static int
00413 process_shortref_declaration(dtd_parser *p, const ichar *decl);
00414 
00415 static const ichar *
00416 shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr);
00417 
00418                                                                                
00419 static void
00420 compile_map(dtd *dtd, dtd_shortref *sr);
00421 
00422 static int
00423 process_usemap_declaration(dtd_parser *p, const ichar *decl);
00424 
00425 static dtd_shortref *
00426 find_map(dtd *dtd, dtd_symbol *name);
00427 
00428 static void
00429 set_map_element(dtd_element *e, void *closure);
00430 
00431 static int
00432 expand_entities(dtd_parser *p, const ichar *in, ochar *out, int len);
00433 
00434 static ichar const *
00435 itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len);
00436 
00437 void
00438 free_dtd_parser(dtd_parser *p);
00439 
00440 void
00441 free_dtd(dtd *dtd);
00442 
00443 static void
00444 free_entity_list(dtd_entity *e);
00445 
00446 static void
00447 free_notations(dtd_notation *n);
00448 
00449 static void
00450 free_shortrefs(dtd_shortref *sr);
00451 
00452 static void
00453 free_maps(dtd_map *map);
00454 
00455 
00456 static void
00457 free_symbol_table(dtd_symbol_table *t);
00458 
00459 int
00460 is_absolute_path(const char *name);
00461 
00462 #ifdef UTF8
00463 static void
00464 process_utf8(dtd_parser *p, int chr);
00465 #endif
00466 
00467 char *
00468 localpath(const char *ref, const char *name);
00469 
00470 static char *
00471 DirName(const char *f, char *dir);
00472 
00473 static char *
00474 format_location(char *s, dtd_srcloc *l);
00475 
00476 static void
00477 format_message(dtd_error *e);
00478 
00479 int
00480 gripe(dtd_error_id e, ...);
00481                                                                                
00482  
00483 static int
00484 set_option_dtd(dtd *dtd, dtd_option option, char * set);
00485 
00486 #ifdef WIN_NT
00487 #define isDirSep(c) ((c) == '/' || (c) == '\\')
00488 #define DIRSEPSTR "\\"
00489 #else
00490 #define isDirSep(c) ((c) == '/')
00491 #define DIRSEPSTR "/"
00492 #endif
00493 
00494 
00495 #ifndef EOS
00496 #define EOS '\0'
00497 #endif
00498 
00499 #ifndef TRUE
00500 #define TRUE 1
00501 #define FALSE 0
00502 #endif
00503 
00509 void
00510 set_mode_dtd_parser(dtd_parser *p, data_mode m)
00511 { p->dmode = m;                         /* DM_DTD or DM_DATA */
00512   p->state = S_PCDATA;
00513   p->blank_cdata = TRUE;
00514 }
00515 
00519 static char *xml_entities[] =
00520   { "lt CDATA \"&#60;\"",                 /* < */
00521     "gt CDATA \"&#62;\"",                 /* > */
00522     "amp CDATA \"&#38;\"",                /* & */
00523     "apos CDATA \"&#39;\"",               /* ' */
00524     "quot CDATA \"&#34;\"",               /* " */
00525     NULL
00526   };
00527 
00528 #define streq(s1, s2) (strcmp(s1, s2) == 0)
00529 
00535 static __inline void
00536 setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos)
00537 { d->line    = line;
00538   d->linepos = lpos;
00539   d->charpos = loc->charpos - 1;
00540   d->type    = loc->type;
00541   d->name    = loc->name;
00542 }
00543 
00544   
00550 static dtd_symbol_table *
00551 new_symbol_table()
00552 { 
00553   dtd_symbol_table *t = sgml_calloc(1, sizeof(*t));
00554   t->size    = SYMBOLHASHSIZE;
00555   t->entries = calloc(t->size, sizeof(dtd_symbol*));
00556                                                                                
00557   return t;
00558 }
00559 
00566 int
00567 set_dialect_dtd(dtd *dtd, dtd_dialect dialect)
00568 { 
00569   dtd->dialect = dialect;
00570   
00571   switch(dialect)
00572     { 
00573     case DL_SGML:
00574       { 
00575         dtd->case_sensitive = FALSE;
00576         dtd->space_mode = SP_SGML;
00577         dtd->shorttag = TRUE;
00578         break;
00579       }
00580     case DL_XML:
00581     case DL_XMLNS:
00582       { 
00583         char **el;
00584         dtd_parser p;
00585         
00586         dtd->case_sensitive = TRUE;
00587         dtd->encoding = ENC_UTF8;
00588         dtd->space_mode = SP_PRESERVE;
00589         dtd->shorttag = FALSE;
00590                                                                                
00591         memset(&p, 0, sizeof(p));
00592         p.dtd = dtd;
00593                 
00594         for(el = xml_entities; *el; el++)
00595           {
00596             process_entity_declaration(&p, *el);
00597           }     
00598                                                                                
00599         break;
00600       }
00601     }
00602   return TRUE;
00603 }
00604          
00611 static void
00612 add_cdata(dtd_parser *p, int chr)
00613 {
00614   if ( p->mark_state == MS_INCLUDE )
00615     {
00616       ocharbuf *buf = p->cdata;
00617                 
00618       if ( p->blank_cdata == TRUE && !HasClass(p->dtd, chr, CH_BLANK))
00619         {
00620 
00621           p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
00622           p->blank_cdata = FALSE;
00623                 
00624         } 
00625 
00626       if ( chr == '\n' )                  /* insert missing CR */
00627         { 
00628           int sz;
00629                                                                                
00630           if ( (sz=buf->size) == 0 || buf->data[sz-1] != CR )
00631             add_cdata(p, CR);
00632         }
00633 
00634       add_ocharbuf(buf, chr);
00635                 
00636       if ( p->map && p->map->ends[chr]  && match_shortref(p) )
00637         {
00638           return;
00639         }
00640                 
00641       if ( chr == '\n' )                  /* dubious.  Whould we do that */
00642         {
00643           int sz;                           /* here or in space-handling? */
00644           if ( (sz=buf->size) > 1 &&
00645                buf->data[sz-1] == LF &&
00646                buf->data[sz-2] == CR )
00647             { 
00648               buf->data[sz-2] = LF;
00649               buf->size--;
00650             }
00651                                                                        
00652         }
00653 
00654     }  
00655 }
00656 
00660 static int
00661 match_map(dtd *dtd, dtd_map *map, int len, ichar *data)
00662 { ichar *e = data+len-1;
00663   ichar *m = map->from+map->len-1;
00664                                                                                
00665   while( m >= map->from )
00666     { if ( e < data )
00667         return 0;
00668                                                                                
00669       if ( *m == *e )
00670         { m--;
00671           e--;
00672           continue;
00673         }
00674       if ( *m == CHR_DBLANK )
00675         { if ( e>data && HasClass(dtd, *e, CH_WHITE) )
00676             e--;
00677           else
00678             return FALSE;
00679           goto blank;
00680         }
00681       if ( *m == CHR_BLANK )
00682         { blank:
00683           while( e>data && HasClass(dtd, *e, CH_WHITE) )
00684             e--;
00685           m--;
00686           continue;
00687         }
00688       return 0;
00689     }
00690                                                                                
00691   return data+len-1-e;
00692 }
00693 
00694 
00695 
00696 static int
00697 match_shortref(dtd_parser *p)
00698 {
00699   dtd_map *map;
00700                                                                                
00701   for(map = p->map->map; map; map = map->next)
00702     { 
00703       int len;
00704                                                                                
00705       if ( (len=match_map(p->dtd, map,
00706                           p->cdata->size, (ichar *)p->cdata->data)) )
00707         { p->cdata->size -= len;
00708                                                                                
00709           if ( p->cdata_must_be_empty )
00710             { 
00711               int blank = TRUE;
00712               const ichar *s;
00713               int i;
00714                                                                                
00715               for(s = p->cdata->data, i=0; i++ < p->cdata->size; s++)
00716                 { 
00717                   if ( !HasClass(p->dtd, *s, CH_BLANK) )
00718                     { blank = FALSE;
00719                       break;
00720                     }
00721                 }
00722                                                                                
00723               p->blank_cdata = blank;
00724             }
00725           WITH_CLASS(p, EV_SHORTREF,
00726                      { 
00727                        sgml_cplocation(&p->startloc, &p->location);
00728                        p->startloc.charpos -= len;
00729                        p->startloc.linepos -= len;
00730                        if ( p->startloc.linepos < 0 )
00731                          { p->startloc.line--;
00732                            p->startloc.linepos = 0; /* not correct! */
00733                          }
00734                        process_entity(p, map->to->name);
00735                      })                     /* TBD: optimise */
00736             return TRUE;
00737         }
00738     }
00739 
00740   return FALSE;
00741 
00742 }
00743 
00747 static void
00748 pop_marked_section(dtd_parser *p)
00749 { dtd_marked *m = p->marked;
00750                                                                                
00751   if ( m )
00752     { p->marked = m->parent;
00753       free(m);
00754       p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE);
00755     }
00756 }
00757 
00758 
00759 static int
00760 complete(sgml_environment *env)
00761 { if ( env->element->structure &&
00762        !env->element->undefined &&
00763        env->element->structure->type != C_ANY )
00764     { dtd_edef *def = env->element->structure;
00765                                                                                 
00766       if ( !same_state(def->final_state, env->state) )
00767         return FALSE;
00768     }
00769                                                                                
00770   return TRUE;
00771 }
00772 
00785 static void
00786 process_marked_section(dtd_parser *p)
00787 { ichar buf[MAXSTRLEN];
00788   dtd *dtd = p->dtd;
00789   const ichar *decl = p->buffer->data;
00790   const ichar *s;
00791                                                                                
00792   if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
00793        (decl=isee_func(dtd, decl, CF_DSO)) && /* [ */
00794        expand_pentities(p, decl, buf, sizeof(buf)) )
00795     { dtd_symbol *kwd;
00796                                                                                
00797       decl = buf;
00798         
00799       if ( (s=itake_name(dtd, decl, &kwd)) &&
00800            isee_func(dtd, s, CF_DSO) )    /* [ */
00801         { dtd_marked *m = sgml_calloc(1, sizeof(*m));
00802                                                                                
00803           m->keyword = kwd;                 /* push on the stack */
00804           m->parent = p->marked;
00805           p->marked = m;
00806                                                                                
00807           if ( istrcaseeq(kwd->name, "IGNORE") )
00808             m->type = MS_IGNORE;
00809           else if ( istrcaseeq(kwd->name, "INCLUDE") )
00810             m->type = MS_INCLUDE;
00811           else if ( istrcaseeq(kwd->name, "TEMP") )
00812             m->type = MS_INCLUDE;
00813           else if ( istrcaseeq(kwd->name, "CDATA") )
00814             m->type = MS_CDATA;
00815           else if ( istrcaseeq(kwd->name, "RCDATA") )
00816             m->type = MS_RCDATA;
00817           else
00818             m->type = MS_INCLUDE;           /* default */
00819           empty_icharbuf(p->buffer);
00820           if ( m->type == MS_CDATA )
00821             p->state = S_MSCDATA;
00822           else
00823             p->state = S_PCDATA;
00824           if ( p->mark_state != MS_IGNORE )
00825             p->mark_state = m->type;
00826         }
00827     } else
00828     { decl = p->buffer->data;
00829      
00830       if ( (decl=isee_func(dtd, decl, CF_MDO2)) && /* ! */
00831            !isee_func(dtd, decl, CF_DSO) ) /* [ */
00832         { p->state = S_GROUP;
00833           p->grouplevel = 1;
00834         }
00835     }
00836 }
00837 
00838 
00839 
00849 static int
00850 process_net(dtd_parser *p)
00851 { sgml_environment *env;
00852 
00853   prepare_cdata(p);
00854   for(env = p->environments; env; env=env->parent)
00855     { if ( env->wants_net )
00856         { sgml_environment *parent;
00857                                                                                
00858           pop_to(p, env, NULL);             /* close parents */
00859           validate_completeness(env);
00860           parent = env->parent;
00861                                                                                
00862           emit_cdata(p, TRUE);
00863           p->first = FALSE;
00864                                                                                
00865           if ( p->on_end_element )
00866             { WITH_CLASS(p, EV_SHORTTAG,
00867                          (*p->on_end_element)(p, env->element));
00868             }
00869                                                                                
00870           free_environment(env);
00871           p->environments = parent;
00872           p->map = (parent ? parent->map : NULL);
00873                                                                                
00874           return TRUE;
00875         }
00876     }
00877                                                                                
00878   return FALSE;
00879 }
00880 
00881                                                                                
00888 static void
00889 recover_parser(dtd_parser *p)
00890 { const ichar *s;
00891   dtd *dtd = p->dtd;
00892 
00893   terminate_icharbuf(p->buffer);
00894   add_cdata(p, dtd->charmap->map[p->saved]);
00895   for(s=p->buffer->data; *s; s++)
00896     add_cdata(p, dtd->charmap->map[*s]);
00897   p->state = S_PCDATA;
00898 }
00899                                                                                
00910 static int
00911 process_pi(dtd_parser *p, const ichar *decl)
00912 { const ichar *s;
00913   dtd *dtd = p->dtd;
00914                                                                                
00915   if ( (s=isee_identifier(dtd, decl, "xml")) ) /* <?xml version="1.0"?> */
00916     { decl = s;
00917                                                  
00918       /*Predefine certain xml specific standards*/
00919       while(*decl)
00920         { dtd_symbol *nm;
00921                                                                                
00922  
00923           if ( (s=itake_name(dtd, decl, &nm)) &&
00924                (s=isee_func(dtd, s, CF_VI)) )
00925             { ichar buf[MAXSTRINGLEN];
00926               const ichar *end;
00927                                                                                
00928 
00929               if ( !(end=itake_string(dtd, s, buf, sizeof(buf))) )
00930                 end=itake_nmtoken_chars(dtd, s, buf, sizeof(buf));
00931                                                                                
00932               if ( end )
00933                 { decl = end;
00934                                                                                
00935                   if ( istrcaseeq(nm->name, "encoding") )
00936                     set_encoding(p, buf);
00937                                                                                
00938                   continue;
00939                 }
00940             }
00941           gripe(ERC_SYNTAX_ERROR, "Illegal XML parameter", decl);
00942           break;
00943         }
00944       /*Set the xml dialect based on the <?...?> declaration*/
00945       switch(dtd->dialect)
00946         { 
00947         case DL_SGML:
00948           set_dialect_dtd(dtd, DL_XML);
00949           break;
00950         case DL_XML:
00951         case DL_XMLNS:
00952           break;
00953         }
00954       return TRUE;
00955     }
00956   if ( p->on_pi )
00957     (*p->on_pi)(p, decl);
00958   
00959   return FALSE;                         /* Warn? */
00960 }
00961 
00962 
00968 static void
00969 set_encoding(dtd_parser *p, const ichar *enc)
00970 { 
00971   dtd *dtd = p->dtd;
00972                                                                                
00973   if ( istrcaseeq(enc, "iso-8859-1") )
00974     { dtd->encoding = ENC_ISO_LATIN1;
00975     } else if ( istrcaseeq(enc, "utf-8") )
00976     { dtd->encoding = ENC_UTF8;
00977     } else
00978     gripe(ERC_EXISTENCE, "character encoding", enc);
00979                                                                                
00980   init_decoding(p);
00981 }
00982     
00987 static void
00988 init_decoding(dtd_parser *p)
00989 {
00990 #ifdef UTF8
00991   int decode;
00992   dtd *dtd = p->dtd;
00993                                                                                
00994   if ( dtd->encoding == ENC_UTF8 &&
00995        p->encoding   == ENC_ISO_LATIN1 )
00996     decode = TRUE;
00997   else
00998     decode = FALSE;
00999                                                                                
01000   if ( p->utf8_decode != decode )
01001     {/* DEBUG(fprintf(stderr, "%s UTF-8 decoding on %p\n",
01002         decode ? "Enable" : "Disable",
01003         p));
01004      */                                                                         
01005       p->utf8_decode = decode;
01006     }
01007 #endif
01008 }
01009                                                                             
01010 
01016 void
01017 reset_document_dtd_parser(dtd_parser *p)
01018 { 
01019   /*Free the parser environments*/
01020   if ( p->environments )
01021     { 
01022       sgml_environment *env, *parent;
01023                                                                                
01024       for(env = p->environments; env; env=parent)
01025         { 
01026           parent = env->parent;
01027                                                                               
01028           free_environment(env);
01029         }
01030                                                                                
01031       p->environments = NULL;
01032     }
01033   while(p->marked)
01034     pop_marked_section(p);
01035           
01036   /*Empty the parser buffers*/              
01037   empty_icharbuf(p->buffer);
01038   empty_ocharbuf(p->cdata);
01039                                                                     
01040   /*Reset the parser state*/           
01041   p->mark_state    = MS_INCLUDE;
01042   p->state         = S_PCDATA;
01043   p->grouplevel    = 0;
01044   p->blank_cdata   = TRUE;
01045   p->event_class   = EV_EXPLICIT;
01046   p->dmode         = DM_DATA;
01047                                                                                
01048   //begin_document_dtd_parser(p);
01049 }
01050 
01057 static void
01058 add_verbatim_cdata(dtd_parser *p, int chr)
01059 { if ( p->mark_state != MS_IGNORE )
01060     { ocharbuf *buf = p->cdata;
01061                                                                                
01062       if ( p->blank_cdata == TRUE && !HasClass(p->dtd, chr, CH_BLANK) )
01063         { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
01064           p->blank_cdata = FALSE;
01065         }
01066                                                                                
01067       if ( chr == '\n' && buf->size > 0 && buf->data[buf->size-1] == '\r' )
01068         buf->size--;
01069                                                                                
01070       add_ocharbuf(buf, chr);
01071     }
01072 }
01073 
01080 void
01081 putchar_dtd_parser(dtd_parser *p, int chr)
01082 {
01083   dtd *dtd = p->dtd;
01084   const ichar *f = dtd->charfunc->func;
01085   int line = p->location.line;
01086   int lpos = p->location.linepos;
01087 
01088 
01089   /*Adjust the current line and character position*/
01090   if ( f[CF_RS] == chr )
01091     { 
01092       p->location.line++;
01093       p->location.linepos = 0;
01094     } else
01095     { 
01096       if ( f[CF_RE] == chr )
01097         p->location.linepos = 0;
01098       else
01099         p->location.linepos++;
01100     }
01101                                                                                
01102   p->location.charpos++;
01103   chr &= 0x000000ff;
01104  reprocess:
01105 
01106   /*The parser is a state engine. The states of the parser change as the input is parsed*/
01107   switch(p->state)
01108     { 
01109     case S_PCDATA:
01110       {
01111         if ( f[CF_MDO1] == chr )             /*<*/          
01112           { 
01113             setlocation(&p->startloc, &p->location, line, lpos);
01114             p->state = S_DECL0;
01115             empty_icharbuf(p->buffer);
01116             return;
01117           }
01118 
01119         if ( p->dmode == DM_DTD )
01120           { 
01121             if ( f[CF_PERO] == chr )        /* % */
01122               { 
01123                 setlocation(&p->startloc, &p->location, line, lpos);
01124                 p->state = S_PENT;
01125                 return; 
01126               }
01127           } 
01128         else
01129           { 
01130             if ( f[CF_ERO] == chr )         /* & */
01131               { 
01132                 p->state = S_ENT0;
01133                 return;
01134               }
01135           }
01136 
01137         if ( p->marked && f[CF_DSC] == chr ) /* ] in marked section */
01138           { 
01139             empty_icharbuf(p->buffer);
01140             p->state = S_EMSC1;
01141             p->saved = chr;                 /* for recovery */
01142             return;
01143           }
01144                                                                  
01145 
01146         if ( p->waiting_for_net && f[CF_ETAGO2] == chr ) /* shorttag */
01147           {
01148             setlocation(&p->startloc, &p->location, line, lpos);
01149             WITH_PARSER( p,     process_net(p));
01150             return;
01151           }
01152 
01153 #ifdef UTF8
01154         if ( p->utf8_decode && ISUTF8_MB(chr) )
01155           { 
01156             process_utf8(p, chr);
01157             return;
01158           }
01159 #endif
01160         
01161         if ( p->cdata->size == 0 )
01162           setlocation(&p->startcdata, &p->location, line, lpos);
01163         add_cdata(p, dtd->charmap->map[chr]);
01164         return;
01165       }
01166     case S_ECDATA2:                     /* Seen </ in CDATA/RCDATA */
01167       { 
01168         if ( f[CF_MDC] == chr &&
01169              p->etaglen == p->buffer->size &&
01170              istrncaseeq(p->buffer->data, p->etag, p->etaglen) )
01171           { 
01172             p->cdata->size -= p->etaglen+2; /* 2 for </ */
01173             terminate_ocharbuf(p->cdata);
01174             terminate_icharbuf(p->buffer);
01175             if ( p->mark_state == MS_INCLUDE )
01176               { 
01177                 WITH_PARSER(p,
01178                             process_cdata(p, TRUE);
01179                             process_end_element(p, p->buffer->data));
01180                 empty_cdata(p);
01181               }
01182             empty_icharbuf(p->buffer);
01183             p->cdata_state = p->state = S_PCDATA;
01184           } 
01185         else
01186           { 
01187             add_verbatim_cdata(p, dtd->charmap->map[chr]);
01188             if ( p->etaglen < p->buffer->size || !HasClass(dtd, chr, CH_NAME))
01189               { 
01190                 empty_icharbuf(p->buffer);    /* mismatch */
01191                 p->state = p->cdata_state;
01192               } 
01193             else
01194               add_icharbuf(p->buffer, chr);
01195           }
01196         return;
01197       }
01198 
01199     case S_ECDATA1:                     /* seen < in CDATA */
01200       { 
01201         add_verbatim_cdata(p, dtd->charmap->map[chr]);
01202         if ( f[CF_ETAGO2] == chr )        /* / */
01203           {     
01204             empty_icharbuf(p->buffer);
01205             p->state = S_ECDATA2;
01206           } 
01207         else if ( f[CF_ETAGO1] != chr ) /* <: do not change state */
01208           p->state = p->cdata_state;
01209         return;
01210       }
01211 
01212     case S_RCDATA:
01213       { 
01214         if ( f[CF_ERO] == chr ) /* & */
01215           { 
01216             setlocation(&p->startloc, &p->location, line, lpos);
01217             p->state = S_ENT0;
01218             return;
01219           }
01220         /*FALLTHROUGH*/
01221       }
01222 
01223     case S_CDATA:
01224       { 
01225         add_verbatim_cdata(p, dtd->charmap->map[chr]);
01226                                                                                 
01227         if ( f[CF_MDO1] == chr )          /* < */
01228           { 
01229             setlocation(&p->startloc, &p->location, line, lpos);
01230             p->state = S_ECDATA1;
01231           }
01232 
01233         /* / in CDATA shorttag element */
01234         if ( p->waiting_for_net && f[CF_ETAGO2] == chr )
01235           { 
01236             setlocation(&p->startloc, &p->location, line, lpos);
01237             p->cdata->size--;
01238             terminate_ocharbuf(p->cdata);
01239             terminate_icharbuf(p->buffer);
01240             if ( p->mark_state == MS_INCLUDE )
01241               { 
01242                 WITH_PARSER(p,
01243                             process_cdata(p, TRUE);
01244                             process_net(p));
01245                 empty_cdata(p);
01246               }
01247             empty_icharbuf(p->buffer);
01248             p->cdata_state = p->state = S_PCDATA;
01249           }
01250                                                                                 
01251         return;
01252       }
01253     case S_PENT:                        /* %parameter entity; */
01254       {
01255 
01256         if ( f[CF_ERC] == chr )
01257           { 
01258             p->state = S_PCDATA;
01259             terminate_icharbuf(p->buffer);
01260             if ( p->mark_state == MS_INCLUDE )
01261               { 
01262                 WITH_PARSER(p, process_include(p, p->buffer->data));
01263               }
01264             empty_icharbuf(p->buffer);
01265             return;
01266           }
01267         if ( HasClass(dtd, chr, CH_NAME) )
01268           {
01269             add_icharbuf(p->buffer, chr);
01270             return;
01271           }
01272                                                                               
01273         terminate_icharbuf(p->buffer);
01274         gripe(ERC_SYNTAX_ERROR, "Illegal parameter entity", p->buffer->data);
01275         break;
01276       }
01277         
01278     case  S_DECL0:
01279       {
01280         if ( f[CF_ETAGO2] == chr )        /* </ */
01281           {
01282             add_icharbuf(p->buffer, chr);
01283             p->state = S_DECL;
01284           }
01285         else if ( HasClass(dtd, chr, CH_NAME) ) /* <letter> */
01286           { 
01287             add_icharbuf(p->buffer, chr);
01288             p->state = S_DECL;
01289           } 
01290         else if ( f[CF_MDO2] == chr )   /* <! */
01291           { 
01292             p->state = S_MDECL0;
01293           }
01294         else if ( f[CF_PRO2] == chr )   /* <? */
01295           {
01296             p->state = S_PI;
01297           } 
01298         else                            /* recover */
01299           { 
01300             add_cdata(p, f[CF_MDO1]);
01301             add_cdata(p, chr);
01302             p->state = S_PCDATA;
01303           }
01304                                                                                
01305         return;
01306         break;
01307       }
01308     case S_PI:
01309       { 
01310         add_icharbuf(p->buffer, chr);
01311         if ( f[CF_PRO2] == chr )          /* <? ... ? */
01312           p->state = S_PI2;
01313         if ( f[CF_PRC] == chr )           /* no ? is ok too (XML/SGML) */
01314           goto pi;
01315         return;
01316       }
01317     case S_PI2:
01318       { 
01319         if ( f[CF_PRC] == chr )
01320           { 
01321           pi:
01322             process_cdata(p, FALSE);
01323             p->state = S_PCDATA;
01324             p->buffer->size--;
01325             terminate_icharbuf(p->buffer);
01326             if ( p->mark_state == MS_INCLUDE )
01327               { 
01328                 WITH_PARSER(p, process_pi(p, p->buffer->data));
01329               }
01330             empty_icharbuf(p->buffer);
01331             return;
01332           }
01333         add_icharbuf(p->buffer, chr);
01334         p->state = S_PI;
01335         return;
01336       }
01337 
01338     case S_MDECL0:
01339       { 
01340         if ( f[CF_CMT] == chr )           /* <!- */
01341           { 
01342             p->state = S_CMTO;
01343             return;
01344           }
01345         add_icharbuf(p->buffer, f[CF_MDO2]);
01346         add_icharbuf(p->buffer, chr);
01347         p->state = S_DECL;
01348         return;
01349       }
01350 
01351     case S_DECL:                         /*Processing declarations*/
01352       {
01353         if(f[CF_MDC] == chr)
01354           {
01355             prepare_cdata( p);
01356             p->state = S_PCDATA;
01357             terminate_icharbuf( p->buffer);
01358 
01359             if ( p->mark_state == MS_INCLUDE )
01360               { 
01361                 WITH_PARSER(p, process_declaration(p, p->buffer->data));
01362               }
01363             empty_icharbuf( p->buffer);
01364             return;
01365           }
01366 
01367         if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 )
01368           { 
01369             prepare_cdata(p);
01370             p->state = S_PCDATA;
01371             terminate_icharbuf(p->buffer);
01372             if ( p->mark_state == MS_INCLUDE )
01373               { WITH_CLASS(p, EV_SHORTTAG,
01374                            WITH_PARSER(p, process_declaration(p, p->buffer->data)));
01375               }
01376             empty_icharbuf(p->buffer);
01377             p->waiting_for_net = TRUE;
01378             return;
01379           }
01380 
01381         add_icharbuf(p->buffer, chr);
01382         
01383         if ( f[CF_LIT] == chr )           /* " */
01384           { 
01385             p->state = S_STRING;
01386             p->saved = chr;
01387             p->lit_saved_state = S_DECL;
01388           } 
01389         else if ( f[CF_LITA] == chr )   /* ' */
01390           { 
01391             p->state = S_STRING;
01392             p->saved = chr;
01393             p->lit_saved_state = S_DECL;
01394             return;
01395           }
01396         
01397         else if ( f[CF_DSO] == chr )    /* [: marked section */
01398           { 
01399             terminate_icharbuf(p->buffer);
01400                                                                           
01401             process_marked_section(p);
01402           }
01403 
01404         else if ( f[CF_CMT] == chr &&   /* - */
01405                   p->buffer->data[0] == f[CF_MDO2] ) /* Started <! */
01406           { 
01407             p->state = S_DECLCMT0;
01408           }                                                                 
01409         break;
01410 
01411         
01412       }
01413     case S_ENT0:                               /*Processing entities*/
01414       { 
01415         if ( chr == '#' || HasClass(dtd, chr, CH_NAME) )
01416           { 
01417             empty_icharbuf(p->buffer);
01418             add_icharbuf(p->buffer, chr);
01419             p->state = S_ENT;
01420           } 
01421         else
01422           {
01423             add_cdata(p, f[CF_ERO]);
01424             p->state = p->cdata_state;
01425             goto reprocess;
01426           }
01427                                                                                
01428         return;
01429       }
01430     case S_ENT:                              /*Processing entities*/
01431       {
01432                 
01433         if ( HasClass(dtd, chr, CH_NAME) )
01434           { 
01435             add_icharbuf(p->buffer, chr);
01436             return;
01437           }
01438         terminate_icharbuf(p->buffer);
01439         p->state = p->cdata_state;
01440 
01441         /* Added to handle cases where there is cdata of the form &xyz
01442            which is not an entity*/
01443 
01444         if(  (f[CF_ERC] != chr) && (chr != '@') && !HasClass( dtd, chr, CH_WHITE) && !HasClass( dtd, chr, CH_RE) && !HasClass( dtd, chr, CH_RS) && (f[CF_ERO] != chr))
01445           {
01446             int i = 0; 
01447 
01448             add_ocharbuf( p->cdata, '&');
01449             for( i = 0; i < p->buffer->size; i++)
01450               {
01451                 add_cdata(p, dtd->charmap->map[p->buffer->data[i]]);
01452               }
01453             goto reprocess;
01454           }
01455 
01456         if ( p->mark_state == MS_INCLUDE )
01457           { 
01458             WITH_PARSER(p, process_entity(p, p->buffer->data));
01459           }
01460          
01461         empty_icharbuf(p->buffer);
01462 
01463         if ( chr == CR ){
01464           p->state = S_ENTCR;
01465           break;
01466         }
01467         else if ( f[CF_ERC] != chr && chr != '\n' ) {
01468           goto reprocess;
01469         }
01470 
01471         break;
01472       }
01473 
01474     case S_ENTCR:                       /* seen &entCR, eat the LF */
01475       { 
01476         p->state = p->cdata_state;
01477         if ( chr != LF )
01478           goto reprocess;
01479         break;
01480       }
01481 
01482     case S_DECLCMT0:                    /* <...- */
01483       { 
01484         if ( f[CF_CMT] == chr )
01485           { 
01486             p->buffer->size--;
01487             p->state = S_DECLCMT;
01488           } 
01489         else
01490           { 
01491             add_icharbuf(p->buffer, chr);
01492             p->state = S_DECL;
01493           }
01494         break;
01495       }
01496 
01497     case S_DECLCMT:                     /* <...--.. */
01498       { 
01499         if ( f[CF_CMT] == chr )
01500           p->state = S_DECLCMTE0;
01501         break;
01502       }
01503     case S_DECLCMTE0:                   /* <...--..- */
01504       { 
01505         if ( f[CF_CMT] == chr )
01506           p->state = S_DECL;
01507         else
01508           p->state = S_DECLCMT;
01509         break;
01510       }
01511 
01512     case S_CMTO:                        /* Seen <!- */
01513       { 
01514         if ( f[CF_CMT] == chr )           /* - */
01515           { 
01516             p->state = S_CMT;
01517             return;
01518           } 
01519         else
01520           { 
01521             add_cdata(p, f[CF_MDO1]);
01522             add_cdata(p, f[CF_MDO2]);
01523             add_cdata(p, f[CF_CMT]);
01524             add_cdata(p, chr);
01525             p->state = S_PCDATA;
01526             return;
01527           }
01528       }
01529     case S_CMT:
01530       { 
01531         if ( f[CF_CMT] == chr )
01532           p->state = S_CMTE0;             /* <!--...- */
01533         break;
01534       }
01535     case S_CMTE0:                       /* <!--... -- */
01536       { 
01537         if ( f[CF_CMT] == chr )
01538           p->state = S_CMTE1;
01539         else
01540           p->state = S_CMT;
01541         break;
01542       }
01543     case S_CMTE1:                       /* <!--...-- seen */
01544       {         
01545         if ( f[CF_MDC] == chr )           /* > */
01546           { 
01547             if ( p->on_decl )
01548               (*p->on_decl)(p, "");
01549             p->state = S_PCDATA;
01550                                                                  
01551           } 
01552         else
01553           p->state = S_CMT;
01554         break;
01555       }
01556 
01557     case S_EMSC1:
01558       { 
01559         if ( f[CF_DSC] == chr )           /* ]] in marked section */
01560           { 
01561             p->state = S_EMSC2;
01562             return;
01563           } 
01564         else
01565           { 
01566             add_icharbuf(p->buffer, chr);
01567             recover_parser(p);
01568             return;
01569           }
01570       }
01571 
01572     case S_EMSC2:
01573       { 
01574         if ( f[CF_MDC] == chr )           /* ]]> in marked section */
01575           { 
01576             pop_marked_section(p);
01577             p->state = S_PCDATA;
01578             return;
01579           } else
01580           { 
01581             add_icharbuf(p->buffer, chr);
01582             recover_parser(p);
01583             return;
01584           }
01585       }
01586 
01587 
01588     case S_GROUP:                       /* [...] in declaration */
01589       { 
01590         add_icharbuf(p->buffer, chr);
01591         if ( f[CF_DSO] == chr )
01592           { 
01593             p->grouplevel++;
01594           } 
01595         else if ( f[CF_DSC] == chr )
01596           { 
01597             if ( --p->grouplevel == 0 )
01598               p->state = S_DECL;
01599           } 
01600         else if ( f[CF_LIT] == chr )    /* " */
01601           { 
01602             p->state = S_STRING;
01603             p->saved = chr;
01604             p->lit_saved_state = S_GROUP;
01605           } 
01606         else if ( f[CF_LITA] == chr )   /* ' */
01607           { 
01608             p->state = S_STRING;
01609             p->saved = chr;
01610             p->lit_saved_state = S_GROUP;
01611             return;
01612           }
01613         break;
01614       }
01615     case S_STRING:
01616       { 
01617         add_icharbuf(p->buffer, chr);
01618         if ( chr == p->saved )
01619           p->state = p->lit_saved_state;
01620         break;
01621       }
01622 
01623     case S_MSCDATA:
01624       { 
01625         add_verbatim_cdata(p, dtd->charmap->map[chr]);
01626         if ( f[CF_DSC] == chr )           /* ] */
01627           p->state = S_EMSCDATA1;
01628         return;
01629       }
01630     case S_EMSCDATA1:
01631       {         
01632         add_verbatim_cdata(p, dtd->charmap->map[chr]);
01633         if ( f[CF_DSC] == chr )           /* ]] */
01634           p->state = S_EMSCDATA2;
01635         else
01636           p->state = S_MSCDATA;
01637         return;
01638       }
01639     case S_EMSCDATA2:
01640       { 
01641         add_verbatim_cdata(p, dtd->charmap->map[chr]);
01642         if ( f[CF_MDC] == chr )           /* ]]> */
01643           { 
01644             p->cdata->size -= 3;            /* Delete chars for ]] */
01645             pop_marked_section(p);
01646             p->state = S_PCDATA;
01647           } 
01648         else if ( f[CF_DSC] != chr )    /* if ]]], stay in this state */
01649           p->state = S_MSCDATA;
01650         return;
01651       }
01652 
01653 #ifdef UTF8
01654     case S_UTF8:
01655       if ( (chr & 0xc0) != 0x80 )       /* TBD: recover */
01656         gripe(ERC_SYNTAX_ERROR, "Bad UTF-8 sequence", "");
01657       p->utf8_char <<= 6;
01658       p->utf8_char |= (chr & ~0xc0);
01659       if ( --p->utf8_left == 0 )
01660         { 
01661           if ( p->utf8_char >= OUTPUT_CHARSET_SIZE &&
01662                p->mark_state == MS_INCLUDE )
01663             { 
01664               if ( p->on_entity )
01665                 { 
01666                   process_cdata(p, FALSE);
01667                   (*p->on_entity)(p, NULL, p->utf8_char);
01668                   goto utf8_done;
01669                 } 
01670               else
01671                 gripe(ERC_REPRESENTATION, "character");
01672             }
01673           add_cdata(p, p->utf8_char);   /* verbatim? */
01674         utf8_done:
01675           p->state = p->utf8_saved_state;
01676         }
01677 
01678       break;
01679 #endif
01680 
01681     }
01682 }
01683 
01692 int
01693 gripe(dtd_error_id e, ...)
01694 { va_list args;
01695   char buf[1024];
01696   dtd_error error;
01697   int dtdmode = FALSE;
01698                                                                                
01699   va_start(args, e);
01700                                                                                
01701   memset(&error, 0, sizeof(error));
01702   error.minor = e;                      /* detailed error code */
01703     
01704   /*Save the current parsing location*/            
01705   if ( current_parser )
01706     { 
01707       error.location = &current_parser->location;
01708       if ( current_parser->dmode == DM_DTD )
01709         dtdmode = TRUE;
01710     } 
01711   else
01712     { 
01713       error.location = NULL;
01714     }
01715   /*Create the error term based on the error code sent*/
01716   switch(e)
01717     { 
01718 
01719       /*Not enough resource*/
01720     case ERC_REPRESENTATION:
01721     case ERC_RESOURCE:
01722       error.severity = ERS_ERROR;
01723       error.argv[0]  = va_arg(args, char *);
01724       break;
01725       /*Limit exceeded*/
01726     case ERC_LIMIT:
01727       error.severity = ERS_WARNING;
01728       error.argv[0]  = va_arg(args, char *);
01729       break;
01730       /*Syntax error*/
01731     case ERC_SYNTAX_ERROR:
01732     case ERC_SYNTAX_WARNING:
01733       { char *m = va_arg(args, char *);
01734         const char *s = va_arg(args, const char *);
01735                                                                                
01736         if ( s && *s )
01737           { sprintf(buf, "%s, found \"%s\"", m, str_summary(s, 25));
01738             error.argv[0] = buf;
01739           } else
01740           error.argv[0] = m;
01741                                                                                
01742         error.severity = (e == ERC_SYNTAX_WARNING ? ERS_WARNING : ERS_ERROR);
01743         e = ERC_SYNTAX_ERROR;
01744         break;
01745       }
01746       /*Functor domain error*/
01747     case ERC_DOMAIN:
01748       { const char *expected = va_arg(args, const char *);
01749         const char *found    = str_summary(va_arg(args, const char *), 25);
01750                                                                                
01751         sprintf(buf, "Expected type %s, found \"%s\"", expected, found);
01752         error.argv[0] = buf;
01753         error.severity = ERS_ERROR;
01754         e = (dtdmode ? ERC_SYNTAX_ERROR : ERC_VALIDATE);
01755         break;
01756       }
01757       /*Redefinition error*/
01758     case ERC_REDEFINED:
01759       { dtd_symbol *name;
01760         error.argv[0] = va_arg(args, char *); /* type */
01761         name = va_arg(args, dtd_symbol *); /* name */
01762         error.argv[1] = (char *)name->name;
01763         error.severity = ERS_STYLE;
01764         break;
01765       }
01766       /*Existence error*/
01767     case ERC_EXISTENCE:
01768       { error.argv[0] = va_arg(args, char *); /* type */
01769         error.argv[1] = va_arg(args, char *); /* name */
01770         error.severity = ERS_ERROR;
01771         break;
01772       }
01773       /*Validation error*/
01774     case ERC_VALIDATE:
01775       { error.argv[0] = va_arg(args, char *); /* message */
01776         error.severity = ERS_WARNING;
01777         break;
01778       }
01779       /*The closing tag has been omitted*/
01780     case ERC_OMITTED_CLOSE:
01781       { const char *element = va_arg(args, const char *);
01782                                                                                
01783         sprintf(buf, "Inserted omitted end-tag for \"%s\"", element);
01784         error.argv[0] = buf;
01785         error.severity = ERS_WARNING;
01786         e = ERC_VALIDATE;
01787         break;
01788       }
01789       /*The starting tag has been omitted*/
01790     case ERC_OMITTED_OPEN:
01791       { const char *element = va_arg(args, const char *);
01792                                                                                
01793         sprintf(buf, "Inserted omitted start-tag for \"%s\"", element);
01794         error.argv[0] = buf;
01795         error.severity = ERS_WARNING;
01796         e = ERC_VALIDATE;
01797         break;
01798       }
01799       /*The opening tag has been omitted*/
01800     case ERC_NOT_OPEN:
01801       { const char *element = va_arg(args, const char *);
01802                                                                                
01803         sprintf(buf, "Ignored end-tag for \"%s\" which is not open", element);
01804         error.argv[0] = buf;
01805         error.severity = ERS_WARNING;
01806         e = ERC_VALIDATE;
01807         break;
01808       }
01809       /*A Syntax error*/
01810     case ERC_NOT_ALLOWED:
01811       { const char *element = va_arg(args, const char *);
01812                                                                                
01813         sprintf(buf, "Element \"%s\" not allowed here", element);
01814         error.argv[0] = buf;
01815         error.severity = ERS_WARNING;
01816         e = ERC_VALIDATE;
01817         break;
01818       }
01819       /*PCDATA misplaced*/
01820     case ERC_NOT_ALLOWED_PCDATA:
01821       {   char *text = va_arg(args, char *);
01822         text[ strlen(text) - 1] = '\0';
01823         sprintf(buf, "#PCDATA (\"%s\") not allowed here", str_summary(text,25));
01824         error.argv[0] = buf;
01825         error.severity = ERS_WARNING;
01826         e = ERC_VALIDATE;
01827         break;
01828       }
01829       /*No attribute where expected*/
01830     case ERC_NO_ATTRIBUTE:
01831       { const char *elem = va_arg(args, char *); /* element */
01832         const char *attr = va_arg(args, char *); /* attribute */
01833                                                                                
01834         sprintf(buf, "Element \"%s\" has no attribute \"%s\"", elem, attr);
01835         error.argv[0] = buf;
01836         error.severity = ERS_WARNING;
01837                                                                                
01838         e = ERC_VALIDATE;
01839         break;
01840       }
01841       /*Attribute has no value*/
01842     case ERC_NO_ATTRIBUTE_VALUE:
01843       { const char *elem  = va_arg(args, char *); /* element */
01844         const char *value = va_arg(args, char *); /* attribute value */
01845                                                                                
01846         sprintf(buf, "Element \"%s\" has no attribute with value \"%s\"",
01847                 elem, value);
01848         error.argv[0] = buf;
01849         error.severity = ERS_WARNING;
01850                                                                                
01851         e = ERC_VALIDATE;
01852         break;
01853       }
01854       /*Entity has no value*/
01855     case ERC_NO_VALUE:
01856       { error.argv[0] = "entity value";
01857         error.argv[1] = va_arg(args, char *); /* entity */
01858                                                                                
01859         error.severity = ERS_ERROR;
01860         e = ERC_EXISTENCE;
01861         break;
01862       }
01863       /*xml has no doctype*/
01864     case ERC_NO_DOCTYPE:
01865       { const char *doctype = va_arg(args, char *); /* element */
01866         const char *file    = va_arg(args, char *); /* DTD file */
01867                                                                                
01868         sprintf(buf, "No <!DOCTYPE ...>, assuming \"%s\" from DTD file \"%s\"",
01869                 doctype, file);
01870         error.argv[0] = buf;
01871         error.severity = ERS_WARNING;
01872                                                                                
01873         e = ERC_VALIDATE;
01874         break;
01875       }
01876    
01877     }  
01878   error.id      = e;
01879   format_message(&error);
01880   
01881   /*If the parser has been allocated invoke the error handling function*/                                                               
01882   if ( current_parser && current_parser->on_error )
01883     (*current_parser->on_error)(current_parser, &error);
01884   /*otherwise create the error term to throw*/
01885   else
01886     {
01887       /*Temporary terms used to create the output error term*/
01888       prolog_term av0, av1, av2;
01889       
01890       av0 = p2p_new();
01891       if(error.severity == ERS_ERROR)
01892         {
01893           c2p_functor("error",1,av0);
01894         }
01895       else if(error.severity == ERS_WARNING)
01896         {
01897           c2p_functor("warning",1,av0);
01898         }
01899       else      
01900         {
01901           return FALSE;
01902         }
01903       av1 = p2p_arg(av0, 1);
01904       c2p_functor("sgml", 1, av1);
01905       av2 = p2p_arg( av1, 1);
01906       c2p_functor( "miscellaneous", 1 , av2);
01907       c2p_string( error.message, p2p_arg(av2,1));
01908 
01909       if(error.severity == ERS_WARNING)
01910         {      
01911           av1 = global_warning_term;
01912           while( is_list( av1))
01913             {
01914               av2 = p2p_cdr(av1);
01915               av1 = av2;
01916             }
01917           c2p_list(av1);
01918           p2p_unify( p2p_car(av1), av0);
01919         }
01920       else if(error.severity == ERS_ERROR)
01921         {
01922           av1 = global_error_term;
01923           p2p_unify( av1, av0);
01924         }
01925       else
01926         {
01927           return FALSE;
01928         }
01929     }
01930   va_end(args);
01931                                                                                
01932   return FALSE;
01933 }
01934 
01941 static char *
01942 format_location(char *s, dtd_srcloc *l)
01943 { int first = TRUE;
01944                                                                                
01945   if ( !l || l->type == IN_NONE )
01946     return s;
01947                                                                                
01948   for( ; l && l->type != IN_NONE;
01949        l = l->parent, first = FALSE )
01950     { if ( !first )
01951         { sprintf(s, " (from ");
01952           s += strlen(s);
01953         }
01954       switch(l->type)
01955         { case IN_NONE:
01956             assert(0);
01957         case IN_FILE:
01958           sprintf(s, "%s:%d:%d", l->name, l->line, l->linepos);
01959           break;
01960         case IN_ENTITY:
01961           sprintf(s, "&%s;%d:%d", l->name, l->line, l->linepos);
01962           break;
01963         }
01964       s += strlen(s);
01965       if ( !first )
01966         { *s++ = ')';
01967         }
01968     }
01969                                                                                
01970   *s++ = ':';
01971   *s++ = ' ';
01972                                                                                
01973   return s;
01974 }
01975 
01982 static void
01983 format_message(dtd_error *e)
01984 { char buf[1024];
01985   char *s;
01986   int prefix_len;
01987                                                                                
01988   switch(e->severity)
01989     { case ERS_ERROR:
01990         strcpy(buf, "Error: ");
01991         break;
01992     case ERS_WARNING:
01993       strcpy(buf, "Warning: ");
01994       break;
01995     default:
01996       buf[0] = '\0';
01997     }
01998   s = buf+strlen(buf);
01999                                                                                
02000   s = format_location(s, e->location);
02001 
02002         
02003   prefix_len = s-buf;
02004 
02005   switch(e->id)
02006     { case ERC_REPRESENTATION:
02007         sprintf(s, "Cannot represent due to %s", e->argv[0]);
02008         break;
02009     case ERC_RESOURCE:
02010       sprintf(s, "Insufficient %s resources", e->argv[0]);
02011       break;
02012     case ERC_LIMIT:
02013       sprintf(s, "%s limit exceeded", e->argv[0]);
02014       break;
02015     case ERC_VALIDATE:
02016       sprintf(s, "%s", e->argv[0]);
02017       break;
02018     case ERC_SYNTAX_ERROR:
02019       sprintf(s, "Syntax error: %s", e->argv[0]);
02020       break;
02021     case ERC_EXISTENCE:
02022       sprintf(s, "%s \"%s\" does not exist", e->argv[0], e->argv[1]);
02023       break;
02024     case ERC_REDEFINED:
02025       sprintf(s, "Redefined %s \"%s\"", e->argv[0], e->argv[1]);
02026       break;
02027     default:
02028       ;
02029     }
02030   
02031   e->message = buf;
02032   e->plain_message = e->message + prefix_len;
02033 }
02034 
02040 static int
02041 process_entity(dtd_parser *p, const ichar *name)
02042 { 
02043 
02044   if ( name[0] == '#' )                 /* #charcode: character entity */
02045     {
02046       int v = char_entity_value(name);
02047     
02048                                                                        
02049       if ( v == FALSE )
02050         {
02051           return gripe(ERC_SYNTAX_ERROR, "Bad character entity", name);
02052         }
02053                                                                                
02054       if ( v >= OUTPUT_CHARSET_SIZE )
02055         { 
02056           /*Invoke the entity handling function of the parser*/
02057           if ( p->on_entity )
02058             {
02059               process_cdata(p, FALSE);
02060               (*p->on_entity)(p, NULL, v);
02061             } 
02062           else
02063             {
02064               return gripe(ERC_REPRESENTATION, "character");
02065             }
02066         } 
02067       else
02068         add_ocharbuf(p->cdata, v);
02069     }
02070   else
02071     {
02072       dtd_symbol *id;
02073       dtd_entity *e;
02074       dtd *dtd = p->dtd;
02075       int len;
02076       const ichar *text;
02077       const ichar *s;
02078       int   chr;
02079       const char *file;
02080               
02081       /*Find the new entity in the entity symbol table. If not found add it*/                                                                 
02082       if ( !(id=dtd_find_entity_symbol(dtd, name)) ||
02083            !(e=id->entity) )
02084         {
02085           if ( dtd->default_entity )
02086             e = dtd->default_entity;
02087           else
02088             {   
02089               return gripe(ERC_EXISTENCE, "entity", name);
02090             }
02091         }
02092       if ( !e->value &&
02093            e->content == EC_SGML &&
02094            (file=entity_file(p->dtd, e)) )
02095         { 
02096           empty_icharbuf(p->buffer);                /* dubious */
02097                                                                                
02098           return sgml_process_file(p, file, SGML_SUB_DOCUMENT);
02099         }
02100 
02101       if ( !(text = entity_value(p, e, &len)) )
02102         return gripe(ERC_NO_VALUE, e->name->name);
02103     
02104       /*Invoke the appropriate handling function based on the entity content*/
02105       switch ( e->content )
02106         { 
02107         case EC_SGML:
02108         case EC_CDATA:
02109 
02110           if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' )
02111             { 
02112               if ( p->blank_cdata == TRUE && !HasClass(dtd, chr, CH_BLANK) )
02113                 { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
02114                   p->blank_cdata = FALSE;
02115                 }
02116 
02117               if ( chr > 0 && chr < OUTPUT_CHARSET_SIZE )
02118                 { add_ocharbuf(p->cdata, chr);
02119                   return TRUE;
02120                 } else
02121                 { 
02122                   if ( p->on_entity )
02123                     { process_cdata(p, FALSE);
02124                       (*p->on_entity)(p, e, chr);
02125                     } else
02126                     return gripe(ERC_REPRESENTATION, "character");
02127                 }
02128               break;
02129             }
02130           if ( e->content == EC_SGML )
02131             { locbuf oldloc;
02132         
02133               push_location(p, &oldloc);
02134               set_src_dtd_parser(p, IN_ENTITY, e->name->name);
02135               empty_icharbuf(p->buffer);            /* dubious */
02136               for(s=text; *s; s++)
02137                 putchar_dtd_parser(p, *s);
02138               pop_location(p, &oldloc);
02139             } 
02140           else if ( *text )
02141             { const ochar *o;
02142                        
02143               if ( p->blank_cdata == TRUE )
02144                 { 
02145                   p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
02146                   p->blank_cdata = FALSE;
02147                 }
02148                                                                                
02149               for(o=(const ochar *)text; *o; o++)
02150                 add_ocharbuf(p->cdata, *o);
02151             }
02152           break;
02153         case EC_SDATA:
02154         case EC_NDATA:
02155           process_cdata(p, FALSE);
02156           if ( p->on_data )
02157             (*p->on_data)(p, e->content, len, text);
02158           break;
02159         case EC_PI:
02160           process_cdata(p, FALSE);
02161           if ( p->on_pi )
02162             (*p->on_pi)(p, text);
02163         case EC_STARTTAG:
02164 #if 0
02165           prepare_cdata(p);
02166           process_begin_element(p, text);
02167 #endif
02168           break;
02169         case EC_ENDTAG:
02170 #if 0
02171           prepare_cdata(p);
02172           process_end_element(p, text);
02173 #endif
02174           break;
02175         }
02176                                                                                
02177       return TRUE;
02178     }
02179                                                                                
02180   return TRUE;
02181 }
02182 
02187 int
02188 sgml_process_file(dtd_parser *p, const char *file, unsigned flags)
02189 { int rval;
02190   locbuf oldloc;
02191 
02192   char fname[MAXSTRLEN];
02193   char server[MAXSTRLEN];
02194   
02195   char * buf = NULL;
02196 
02197   int n= 0;
02198 
02199   push_location(p, &oldloc);
02200   set_src_dtd_parser(p, IN_FILE, file);
02201   if ( !(flags & SGML_SUB_DOCUMENT) )
02202     set_mode_dtd_parser(p, DM_DATA);
02203  
02204   
02205   if( parse_url( file, server, fname) != FALSE)
02206     {
02207 
02208       if( get_file_www( server, fname, &buf) == FALSE){
02209         rval = FALSE; 
02210       }
02211       else{
02212         n = strlen( buf);
02213       }
02214     }
02215 
02216   if ( buf )
02217     {
02218       rval = sgml_process_stream(p, buf, flags, n);
02219     }
02220   else
02221     rval = FALSE;
02222                                                                                
02223   pop_location(p, &oldloc);
02224                                                                                
02225   return rval;
02226 }
02227                         
02228    
02232 int
02233 sgml_process_stream(dtd_parser *p, char *buf, unsigned flags, int source_len)
02234 { int p0, p1, i=0 ;
02235                                                                                
02236   if ( (p0 = buf[i]) == EOF )
02237     return TRUE;
02238   i++;
02239   if ( (p1 = buf[i]) == EOF )
02240     { putchar_dtd_parser(p, p0);
02241       return end_document_dtd_parser(p);
02242     }
02243   i++;                                             
02244   for( ; i<=source_len ; i++)
02245     { int p2 = buf[i];
02246                                                                                
02247       if ( p2 == EOF || p2 == '\0')
02248         { putchar_dtd_parser(p, p0);
02249           if ( p1 != LF )
02250             putchar_dtd_parser(p, p1);
02251           else if ( p0 != CR )
02252             putchar_dtd_parser(p, CR);
02253                                                                                
02254           if ( flags & SGML_SUB_DOCUMENT )
02255             return TRUE;
02256           else
02257             return end_document_dtd_parser(p);
02258         }
02259                                                                                
02260       putchar_dtd_parser(p, p0);
02261       p0 = p1;
02262       p1 = p2;
02263     }
02264   return TRUE;
02265 }
02266 
02271 int
02272 end_document_dtd_parser(dtd_parser *p)
02273 { int rval;
02274                                                                                
02275   WITH_PARSER(p, rval = end_document_dtd_parser_(p));
02276                                                                               
02277   return rval;
02278 }
02279 
02280 
02281 
02291 int
02292 end_document_dtd_parser_(dtd_parser *p)
02293 { int rval;
02294 
02295   switch(p->state)
02296     { case S_RCDATA:
02297     case S_CDATA:
02298     case S_PCDATA:
02299       rval = TRUE;
02300       break;
02301     case S_CMT:
02302     case S_CMTE0:
02303     case S_CMTE1:
02304     case S_DECLCMT0:
02305     case S_DECLCMT:
02306     case S_DECLCMTE0:
02307       rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file in comment", "");
02308       break;
02309     case S_ECDATA1:
02310     case S_ECDATA2:
02311     case S_EMSC1:
02312     case S_EMSC2:
02313     case S_DECL0:
02314     case S_DECL:
02315     case S_MDECL0:
02316     case S_STRING:
02317     case S_CMTO:
02318     case S_GROUP:
02319     case S_PENT:
02320     case S_ENT:
02321     case S_ENT0:
02322       rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file", "");
02323       break;
02324 #ifdef UTF8
02325     case S_UTF8:
02326       rval = gripe(ERC_SYNTAX_ERROR,"Unexpected end-of-file in UTF-8 sequence", "");
02327       break;
02328 #endif
02329     case S_MSCDATA:
02330     case S_EMSCDATA1:
02331     case S_EMSCDATA2:
02332       rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file in CDATA marked section", "");
02333       break;
02334     case S_PI:
02335     case S_PI2:
02336       rval = gripe(ERC_SYNTAX_ERROR,"Unexpected end-of-file in processing instruction", "");
02337       break;
02338     default:
02339       rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file in ???");
02340       break;
02341     }
02342   if ( p->dmode == DM_DATA )
02343     { sgml_environment *env;
02344       process_cdata(p, TRUE);
02345       if ( (env=p->environments) )
02346         { dtd_element *e;
02347           while(env->parent)
02348             env = env->parent;
02349           pop_to(p, env, CDATA_ELEMENT);
02350           e = env->element;
02351           if ( e->structure && !e->structure->omit_close )
02352             {
02353               gripe(ERC_OMITTED_CLOSE, e->name->name);
02354             }
02355 
02356           close_element(p, e, FALSE);
02357         }
02358     }
02359   return rval;
02360 }
02361 
02370 static int
02371 pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0)
02372 { sgml_environment *env, *parent;
02373   for(env = p->environments; env != to; env=parent)
02374     { dtd_element *e = env->element;
02375       validate_completeness(env);
02376       parent = env->parent;
02377       if ( e->structure && !e->structure->omit_close )
02378         {
02379           gripe(ERC_OMITTED_CLOSE, e->name->name);
02380         }
02381       if ( e0 != CDATA_ELEMENT )
02382         emit_cdata(p, TRUE);
02383       p->first = FALSE;
02384       p->environments = env;
02385       if ( p->dtd->shorttag )
02386         p->waiting_for_net = env->saved_waiting_for_net;
02387 
02388       WITH_CLASS(p, EV_OMITTED,
02389                  if ( p->on_end_element )
02390                    (*p->on_end_element)(p, e));
02391       free_environment(env);
02392     }
02393   p->environments = to;
02394   p->map = to->map;
02395   return TRUE;
02396 }
02397 
02398 
02399 
02400 static int
02401 process_cdata(dtd_parser *p, int last)
02402 {
02403   prepare_cdata(p);
02404   return emit_cdata(p, last);
02405 }
02406 
02413 static dtd_symbol *
02414 dtd_find_entity_symbol(dtd *dtd, const ichar *name)
02415 { dtd_symbol_table *t = dtd->symbols;
02416   if ( dtd->ent_case_sensitive )
02417     { int k = istrhash(name, t->size);
02418       dtd_symbol *s;
02419               
02420                                                                   
02421       for(s=t->entries[k]; s; s = s->next)
02422         { 
02423           if ( istreq(s->name, name) )
02424             {
02425               return s;
02426             }
02427         }
02428     } else
02429     { int k = istrcasehash(name, t->size);
02430       dtd_symbol *s;
02431         
02432       for(s=t->entries[k]; s; s = s->next)
02433         { if ( istrcaseeq(s->name, name) )
02434             {
02435 
02436               return s;
02437             }
02438         }
02439     }
02440 
02441   return NULL;
02442 }
02443 
02451 static void
02452 add_attribute(dtd *dtd, dtd_element *e, dtd_attr *a)
02453 {
02454   dtd_attr_list **l;
02455   dtd_attr_list *n;
02456   for(l = &e->attributes; *l; l = &(*l)->next)
02457     { 
02458       if ( (*l)->attribute->name == a->name )
02459         { 
02460           char temp[30];
02461           sprintf(temp, "Attribute %s redefined\n", a->name->name);
02462           gripe(ERC_VALIDATE, temp);                            
02463           (a->references)++;                  /* attempt to redefine attribute: */
02464           free_attribute(a);                /* first wins according to standard */
02465           return;
02466         }
02467     }
02468 
02469   n = calloc(1, sizeof(*n));
02470   n->attribute = a;
02471   a->references++;
02472   *l = n;
02473   set_element_properties(e, a);
02474 }
02475         
02480 static void
02481 free_attribute_values(int argc, sgml_attribute *argv)
02482 { int i;
02483 
02484   for(i=0; i<argc; i++, argv++)
02485     { 
02486       if ( (argv->flags & SGML_AT_DEFAULT) )
02487         continue;                         /* shared with the DTD */
02488       if ( argv->value.cdata )
02489         free(argv->value.cdata);
02490       if ( argv->value.text )
02491         free(argv->value.text);
02492     }
02493 }
02494                                           
02499 static void
02500 add_list_element(dtd_element *e, void *closure)
02501 { namelist *nl = closure;
02502                                                                               
02503   nl->list[nl->size++] = e->name;
02504 }
02505 
02506 static void
02507 add_submodel(dtd_model *m, dtd_model *sub)
02508 { dtd_model **d;
02509   for( d = &m->content.group; *d; d = &(*d)->next )
02510     ;
02511   *d = sub;
02512 }
02513                                       
02514                                                                             
02515 static void
02516 free_name_list(dtd_name_list *nl)
02517 { dtd_name_list *next;
02518                                                                             
02519   for( ; nl; nl=next)
02520     { next = nl->next;
02521                                                                             
02522       free(nl);
02523     }
02524 }
02525 
02526 
02527 static void
02528 free_attribute(dtd_attr *a)
02529 { 
02530   if ( --a->references == 0 )
02531     {
02532       switch(a->type)
02533         { 
02534         case AT_NAMEOF:
02535         case AT_NOTATION:
02536           free_name_list(a->typeex.nameof);
02537         default:
02538           ;
02539         }
02540       switch(a->def)
02541         { 
02542         case AT_DEFAULT:
02543           { 
02544             if ( a->type == AT_CDATA )
02545               free(a->att_def.cdata);
02546             else if ( a->islist )
02547               free(a->att_def.list);
02548           }
02549         default:
02550           ;
02551         }
02552                                                                             
02553       free(a);
02554 
02555     }
02556 
02557 }
02558 
02565 static int
02566 prepare_cdata(dtd_parser *p)
02567 {
02568         
02569   if ( p->cdata->size == 0 )
02570     return TRUE;
02571 
02572   terminate_ocharbuf(p->cdata);
02573 
02574   if ( p->mark_state == MS_INCLUDE )
02575     {
02576       dtd *dtd = p->dtd;
02577       dtd_element *e;
02578                 
02579 
02580       /*Handle the environment*/                                                                
02581       if ( p->environments )              /* needed for <img> <img> */
02582         { 
02583           e = p->environments->element;
02584 
02585           if ( e->structure && e->structure->type == C_EMPTY && !e->undefined )
02586             {
02587 
02588               close_element(p, e, FALSE);
02589             }
02590         }
02591                 
02592       /*Handle the cdata in the parser buffer*/
02593       if ( p->blank_cdata == TRUE )
02594         { 
02595           int blank = TRUE;
02596           const ichar *s;
02597                                 
02598           for(s = p->cdata->data; *s; s++)
02599             { 
02600               if ( !HasClass(dtd, *s, CH_BLANK) )
02601                 { 
02602                   blank = FALSE;
02603                   break;
02604                 }
02605             }
02606 
02607           p->blank_cdata = blank;
02608           if ( !blank )
02609             { 
02610               if ( p->dmode == DM_DTD )
02611                 gripe(ERC_SYNTAX_ERROR, "CDATA in DTD", p->cdata->data);
02612               else
02613                 {
02614                                         
02615                   open_element(p, CDATA_ELEMENT, TRUE);
02616                 }
02617             }   
02618         }
02619     }
02620         
02621   return TRUE;  
02622 }
02623 
02630 static int
02631 close_element(dtd_parser *p, dtd_element *e, int conref)
02632 { 
02633   sgml_environment *env;
02634 
02635   for(env = p->environments; env; env=env->parent)
02636     {
02637                 
02638       if ( env->element == e )            /* element is open */
02639         {
02640           sgml_environment *parent;
02641                                 
02642           for(env = p->environments; ; env=parent)
02643             {
02644               dtd_element *ce = env->element;
02645     
02646               /*Validate the element*/
02647               if ( !(conref && env == p->environments) )
02648                 validate_completeness(env);
02649                                 
02650               parent = env->parent;
02651 
02652               p->first = FALSE;
02653                                 
02654               /*Call the handler function*/
02655               if ( p->on_end_element )
02656                 (*p->on_end_element)(p, env->element);
02657                                 
02658               free_environment(env);
02659                 
02660               p->environments = parent;
02661                                             
02662               if ( ce == e )                  /* closing current element */
02663                 { 
02664                   p->map = (parent ? parent->map : NULL);
02665                   return TRUE;
02666                 } 
02667               else                          /* omited close */
02668                 {
02669                   if ( ce->structure && !ce->structure->omit_close )
02670                     gripe(ERC_OMITTED_CLOSE, ce->name->name);
02671                 }
02672                                 
02673             }     
02674           
02675         }
02676     }
02677   return gripe(ERC_NOT_OPEN, e->name->name);
02678 }                         
02679         
02684 static void
02685 validate_completeness(sgml_environment *env)
02686 { if ( !complete(env) )
02687     { 
02688       char buf[MAXSTRLEN];
02689       sprintf(buf, "Incomplete <%s> element", env->element->name->name);
02690     
02691       gripe(ERC_VALIDATE, buf);           /* TBD: expected */
02692     }
02693 }
02694 
02700 static void
02701 free_environment(sgml_environment *env)
02702 {
02703 #ifdef XMLNS
02704   if ( env->xmlns )
02705     xmlns_free(env);
02706 #endif
02707   free(env);
02708 }
02709 
02716 static int
02717 process_end_element(dtd_parser *p, const ichar *decl)
02718 {
02719   dtd *dtd = p->dtd;
02720   dtd_symbol *id;
02721   const ichar *s;
02722   char temp[30];
02723 
02724   emit_cdata( p, TRUE);
02725 
02726         
02727   if ( (s=itake_name(dtd, decl, &id)) && *s == '\0' )
02728     return close_element(p, find_element(dtd, id), FALSE);
02729 
02730 
02731   if ( p->dtd->shorttag && *decl == '\0' ) /* </>: close current element */
02732     return close_current_element(p);
02733         
02734   sprintf( temp, "Bad close-element tag %s\n", decl);
02735   return gripe(ERC_SYNTAX_ERROR,  "Bad close-element tag %s\n", decl);
02736 }
02737     
02738 
02744 static int
02745 close_current_element(dtd_parser *p)
02746 { if ( p->environments )
02747     { dtd_element *e = p->environments->element;
02748       emit_cdata(p, TRUE);
02749       return close_element(p, e, FALSE);
02750     }
02751   return gripe(ERC_SYNTAX_ERROR, "No element to close", "");
02752 }
02753 
02760 static int
02761 process_declaration(dtd_parser *p, const ichar *decl)
02762 {
02763   const ichar *s;
02764   dtd *dtd = p->dtd;
02765                            
02766 
02767   /*Its either an opening or closing xml tag*/  
02768   if ( p->dmode != DM_DTD )
02769     {
02770       if ( HasClass(dtd, *decl, CH_NAME) ) 
02771         { 
02772           return process_begin_element(p, decl);
02773         }
02774       else if ( (s=isee_func(dtd,decl,CF_ETAGO2)) ) 
02775         {
02776           process_end_element(p, s);
02777           return TRUE;
02778         }
02779     }
02780         
02781   if ( (s=isee_func(dtd, decl, CF_MDO2)) ) /* <! ... >*/
02782     { 
02783       decl = s;
02784 
02785       if ( p->on_decl )
02786         (*p->on_decl)(p, decl);
02787 
02788       if ( (s = isee_identifier(dtd, decl, "entity")) ){
02789         process_entity_declaration(p, s);
02790       }
02791       else if ( (s = isee_identifier(dtd, decl, "element")) )
02792         {
02793           process_element_declaraction(p, s);
02794         }
02795       else if ( (s = isee_identifier(dtd, decl, "attlist")) )
02796         {
02797           process_attlist_declaraction(p, s);
02798         }
02799       else if ( (s = isee_identifier(dtd, decl, "notation")) )
02800         {
02801           process_notation_declaration(p, s);
02802         }
02803       else if ( (s = isee_identifier(dtd, decl, "shortref")) )
02804         {
02805           process_shortref_declaration(p, s);
02806         }
02807       else if ( (s = isee_identifier(dtd, decl, "usemap")) )
02808         {
02809           process_usemap_declaration(p, s);
02810         }
02811       else if ( (s = isee_identifier(dtd, decl, "doctype")) )
02812         { 
02813           if ( p->dmode != DM_DTD )
02814             process_doctype(p, s, decl-1);
02815         } 
02816       else
02817         { 
02818           s = iskip_layout(dtd, decl);
02819           if ( *s )
02820             gripe(ERC_SYNTAX_ERROR, "Invalid declaration", s);
02821         }
02822 
02823       return TRUE;
02824 
02825     }
02826 
02827   return gripe(ERC_SYNTAX_ERROR, "Invalid declaration", decl);  
02828 }
02829 
02830 
02836 static int
02837 process_usemap_declaration(dtd_parser *p, const ichar *decl)
02838 { dtd *dtd = p->dtd;
02839   ichar buf[MAXDECL];
02840   dtd_symbol *name;
02841   const ichar *s;
02842   dtd_symbol *ename;
02843   dtd_element *e;
02844   dtd_shortref *map;
02845 
02846 
02847   if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
02848     return FALSE;
02849   decl = buf;
02850 
02851   if ( !(s=itake_name(dtd, decl, &name)) )
02852     { if ( (s=isee_identifier(dtd, decl, "#empty")) )
02853         name = NULL;
02854       else
02855         return gripe(ERC_SYNTAX_ERROR, "map-name expected", decl);
02856     }
02857   decl = s;
02858         
02859   if ( !(map = find_map(dtd, name)) )
02860     map = def_shortref(p, name);        /* make undefined map */
02861       
02862                                                                          
02863   if ( isee_func(dtd, decl, CF_GRPO) )  /* ( */
02864     { dtd_model *model;
02865       if ( (model = make_model(dtd, decl, &s)) )
02866         { for_elements_in_model(model, set_map_element, map);
02867           free_model(model);
02868           decl = s;
02869         } else
02870         return FALSE;
02871     } else if ( (s=itake_name(dtd, decl, &ename)) )
02872     { e = find_element(dtd, ename);
02873       e->map = map;
02874       decl = s;
02875     } else if ( p->environments )
02876     { 
02877       if ( !map->defined )
02878         {  gripe(ERC_EXISTENCE, "map", name->name);
02879         }
02880       
02881       p->environments->map = map;
02882       p->map = p->environments->map;
02883     } else
02884     return gripe(ERC_SYNTAX_ERROR, "element-name expected", decl);
02885   
02886   if ( *decl )
02887     {
02888       return gripe(ERC_SYNTAX_ERROR, "Unparsed", decl);
02889     }
02890   
02891   return TRUE;
02892 }
02893 
02894 static void
02895 set_map_element(dtd_element *e, void *closure)
02896 { e->map = closure;
02897 }
02898 
02899 
02900 
02901 static dtd_shortref *
02902 find_map(dtd *dtd, dtd_symbol *name)
02903 { dtd_shortref *sr;
02904  
02905   if ( !name )
02906     { static dtd_shortref *empty;
02907     
02908       if ( !empty )
02909         { empty = sgml_calloc(1, sizeof(*empty));
02910           empty->name = dtd_add_symbol(dtd, "#EMPTY");
02911           empty->defined = TRUE;
02912         }
02913       
02914       return empty;
02915     }
02916   for( sr = dtd->shortrefs; sr; sr = sr->next )
02917     { if ( sr->name == name )
02918         { if ( !sr->defined )
02919             break;
02920         
02921           return sr;
02922         }
02923     }
02924   
02925   return NULL;
02926 }
02927 
02933 static int
02934 process_shortref_declaration(dtd_parser *p, const ichar *decl)
02935 { dtd *dtd = p->dtd;
02936   ichar buf[MAXDECL];
02937   dtd_shortref *sr;
02938   dtd_symbol *name;
02939   const ichar *s;
02940   
02941   if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
02942     return FALSE;
02943   decl = buf;
02944   
02945   if ( !(s=itake_name(dtd, decl, &name)) )
02946     return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
02947   decl = s;
02948   
02949   sr = def_shortref(p, name);
02950   if ( sr->defined )
02951     { gripe(ERC_REDEFINED, "shortref", name);
02952       return TRUE;
02953     }
02954   sr->defined = TRUE;
02955   
02956   while( *(decl = iskip_layout(dtd, decl)) != '\0'
02957          && (s=shortref_add_map(dtd, decl, sr)) )
02958     decl = s;
02959   compile_map(dtd, sr);
02960   
02961   if ( *decl )
02962     {
02963       return gripe(ERC_SYNTAX_ERROR, "Map expected", decl);
02964     }
02965   
02966   return TRUE;
02967 }
02968 
02977 static void
02978 compile_map(dtd *dtd, dtd_shortref *sr)
02979 { dtd_map *map;
02980  
02981   for(map = sr->map; map; map = map->next)
02982     { 
02983       ichar last = map->from[map->len-1];
02984     
02985       switch( last )
02986         { 
02987         case CHR_BLANK:
02988         case CHR_DBLANK:
02989           { int i;
02990             for( i=0; i< ICHARSET_SIZE; i++)
02991               { if ( HasClass(dtd, i, CH_BLANK) )
02992                   sr->ends[i] = TRUE;
02993               }
02994           }
02995             
02996         default:
02997           sr->ends[last] = TRUE;
02998         }
02999     }
03000 }
03001 
03005 static const ichar *
03006 shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr)
03007 { ichar buf[MAXSTRINGLEN];
03008   ichar from[MAXMAPLEN];
03009   ichar *f = from;
03010   dtd_symbol *to;
03011   const ichar *s;
03012   const ichar *end;
03013   dtd_map **p;
03014   dtd_map *m;
03015   
03016   if ( !(s=itake_string(dtd, decl, buf, sizeof(buf))) )
03017     { gripe(ERC_SYNTAX_ERROR, "map-string expected", decl);
03018       return NULL;
03019     }
03020   decl = s;
03021   if ( !(s=itake_entity_name(dtd, decl, &to)) )
03022     { gripe(ERC_SYNTAX_ERROR, "map-to name expected", decl);
03023       return NULL;
03024     }
03025   end = s;
03026   
03027   for(decl=buf; *decl;)
03028     { if ( *decl == 'B' )         /* blank */
03029         { if ( decl[1] == 'B' )
03030             { *f++ = CHR_DBLANK;
03031               decl += 2;
03032               continue;
03033             }
03034           *f++ = CHR_BLANK;
03035           decl++;
03036         } else
03037         *f++ = *decl++;                   /* any other character */
03038     }
03039   *f = 0;
03040   for(p=&sr->map; *p; p = &(*p)->next)
03041     ;
03042   
03043   m = calloc(1, sizeof(*m));
03044   m->from = istrdup(from);
03045   m->len  = istrlen(from);
03046   m->to   = to;
03047   
03048   *p = m;
03049   
03050   return end;
03051 }
03052 
03053 static dtd_shortref *
03054 def_shortref(dtd_parser *p, dtd_symbol *name)
03055 { dtd *dtd = p->dtd;
03056   dtd_shortref *sr, **pr;
03057   
03058   for(pr=&dtd->shortrefs; *pr; pr = &(*pr)->next)
03059     { dtd_shortref *r = *pr;
03060     
03061       if ( r->name == name )
03062         return r;
03063     }
03064   
03065   sr = calloc(1, sizeof(*sr));
03066   sr->name = name;
03067   *pr = sr;
03068   
03069   return sr;
03070 }
03071 
03072 
03073 static const ichar *
03074 itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out)
03075 { ichar buf[MAXSTRINGLEN];
03076   const ichar *end;
03077   
03078   if ( (end=itake_string(dtd, in, buf, sizeof(buf))) )
03079     *out = istrdup(buf);
03080   
03081   return end;
03082 }
03083 
03089 static int
03090 process_notation_declaration(dtd_parser *p, const ichar *decl)
03091 { dtd *dtd = p->dtd;
03092   dtd_symbol *nname;
03093   const ichar *s;
03094   ichar *system = NULL, *public = NULL;
03095   dtd_notation *not;
03096 
03097   if ( !(s=itake_name(dtd, decl, &nname)) )
03098     return gripe(ERC_SYNTAX_ERROR, "Notation name expected", decl);
03099   decl = s;
03100   
03101   /*Extract the notation declaration*/
03102   if ( find_notation(dtd, nname) )
03103     {
03104       gripe(ERC_REDEFINED, "notation", nname);
03105       return TRUE;
03106     }
03107 
03108   if ( (s=isee_identifier(dtd, decl, "system")) )
03109     { ;
03110     } else if ( (s=isee_identifier(dtd, decl, "public")) )
03111     { decl = s;
03112       if ( !(s=itake_dubbed_string(dtd, decl, &public)) )
03113         {
03114           return gripe(ERC_SYNTAX_ERROR, "Public identifier expected", decl);
03115         }
03116     } else
03117     {
03118       return gripe(ERC_SYNTAX_ERROR, "SYSTEM or PUBLIC expected", decl);
03119     }
03120 
03121   decl = s;
03122   if ( (s=itake_dubbed_string(dtd, decl, &system)) )
03123     decl = s;
03124 
03125   if ( *decl )
03126     return gripe(ERC_SYNTAX_ERROR, "Unexpected end of declaraction", decl);
03127   
03128   not = sgml_calloc(1, sizeof(*not));
03129   not->name = nname;
03130   not->system = system;
03131   not->public = public;
03132   not->next = NULL;
03133   add_notation(dtd, not);
03134   
03135   return TRUE;
03136 }
03137 
03143 static void
03144 add_notation(dtd *dtd, dtd_notation *not)
03145 { dtd_notation *n;
03146  
03147   for(n=dtd->notations; n; n = n->next)
03148     { if ( !n->next )
03149         { n->next = not;
03150           break;
03151         }
03152     }
03153 }
03154 
03160 static int
03161 process_include(dtd_parser *p, const ichar *entity_name)
03162 { dtd_symbol *id;
03163   dtd_entity *pe;
03164   dtd *dtd = p->dtd;
03165   
03166     
03167   /*Search entity in entity symbol table. If not present add it*/  
03168   if ( (id=dtd_find_entity_symbol(dtd, entity_name)) &&
03169        (pe=find_pentity(p->dtd, id)) )
03170     { 
03171       const char *file;
03172           
03173       /*Extract the filename in which entities have been declared*/ 
03174       if ( (file = entity_file(dtd, pe)) )
03175         {
03176           /*Parse the file*/
03177           return sgml_process_file(p, file, SGML_SUB_DOCUMENT);
03178         }
03179       else
03180         { 
03181           /*Extract the entity value*/
03182           const ichar *text = entity_value(p, pe, NULL);
03183         
03184           if ( !text )
03185             return gripe(ERC_NO_VALUE, pe->name->name);
03186           return process_chars(p, IN_ENTITY, entity_name, text);
03187         }
03188     }
03189   return gripe(ERC_EXISTENCE, "parameter entity", entity_name);
03190 }
03191 
03196 static int
03197 process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s)
03198 { locbuf old;
03199  
03200   push_location(p, &old);
03201   set_src_dtd_parser(p, in, (char *)name);
03202   empty_icharbuf(p->buffer);            /* dubious */
03203   for(; *s; s++)
03204     putchar_dtd_parser(p, *s);
03205   pop_location(p, &old);
03206   
03207   return TRUE;
03208 }
03209 
03215 static dtd_notation *
03216 find_notation(dtd *dtd, dtd_symbol *name)
03217 { dtd_notation *n;
03218  
03219   for(n=dtd->notations; n; n = n->next)
03220     { if ( n->name == name )
03221         return n;
03222     }
03223   
03224   return NULL;
03225 }
03226 
03232 static int
03233 process_attlist_declaraction(dtd_parser *p, const ichar *decl)
03234 {
03235   dtd *dtd = p->dtd;
03236   dtd_symbol *eid[MAXATTELEM];
03237   int i, en;
03238   ichar buf[MAXDECL];
03239   const ichar *s;
03240   
03241 
03242   /* expand parameter entities */
03243   if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
03244     return FALSE;
03245   decl = iskip_layout(dtd, buf);
03246 
03247   if ( !(decl=itake_el_or_model_element_list(dtd, decl, eid, &en)) )
03248     return FALSE;
03249 
03250   while(*decl)
03251     { 
03252       dtd_attr *at = calloc(1, sizeof(*at));
03253       
03254       /* name of attribute */
03255       if ( !(s = itake_name(dtd, decl, &at->name)) )
03256         return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
03257       decl = s;
03258       /* (name1|name2|...) type */
03259       if ( (s=isee_func(dtd, decl, CF_GRPO)) )
03260         { 
03261           charfunc ngs = CF_NG;
03262           at->type = AT_NAMEOF;
03263           decl=s;
03264 
03265           for(;;)
03266             { 
03267               dtd_symbol *nm;
03268               
03269               if ( !(s = itake_nmtoken(dtd, decl, &nm)) )
03270                 return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
03271               decl = s;
03272               add_name_list(&at->typeex.nameof, nm);
03273               if ( (s=isee_ngsep(dtd, decl, &ngs)) )
03274                 { 
03275                   decl = s;
03276                   continue;
03277                 }
03278               if ( (s = isee_func(dtd, decl, CF_GRPC)) )
03279                 { 
03280                   decl=s;
03281                   decl = iskip_layout(dtd, decl);
03282                   break;
03283                 }
03284               return gripe(ERC_SYNTAX_ERROR, "Illegal name-group", decl);
03285             }
03286 
03287         }
03288       /*else parse the type of attribute*/
03289       else if ( (s=isee_identifier(dtd, decl, "cdata")) )
03290         { 
03291           decl = s;
03292           at->type = AT_CDATA;
03293         } else if ( (s=isee_identifier(dtd, decl, "entity")) )
03294         { 
03295           decl = s;
03296           at->type = AT_ENTITY;
03297         } else if ( (s=isee_identifier(dtd, decl, "entities")) )
03298         { 
03299           decl = s;
03300           at->type = AT_ENTITIES;
03301           at->islist = TRUE;
03302         } else if ( (s=isee_identifier(dtd, decl, "id")) )
03303         { 
03304           decl = s;
03305           at->type = AT_ID;
03306         } else if ( (s=isee_identifier(dtd, decl, "idref")) )
03307         { 
03308           decl = s;
03309           at->type = AT_IDREF;
03310         } else if ( (s=isee_identifier(dtd, decl, "idrefs")) )
03311         { 
03312           decl = s;
03313           at->type = AT_IDREFS;
03314           at->islist = TRUE;
03315         } else if ( (s=isee_identifier(dtd, decl, "name")) )
03316         { 
03317           decl = s;
03318           at->type = AT_NAME;
03319         } else if ( (s=isee_identifier(dtd, decl, "names")) )
03320         { 
03321           decl = s;
03322           at->type = AT_NAMES;
03323           at->islist = TRUE;
03324         } else if ( (s=isee_identifier(dtd, decl, "nmtoken")) )
03325         { 
03326           decl = s;
03327           at->type = AT_NMTOKEN;
03328         } else if ( (s=isee_identifier(dtd, decl, "nmtokens")) )
03329         { 
03330           decl = s;
03331           at->type = AT_NMTOKENS;
03332           at->islist = TRUE;
03333         } else if ( (s=isee_identifier(dtd, decl, "number")) )
03334         { 
03335           decl = s;
03336           at->type = AT_NUMBER;
03337         } else if ( (s=isee_identifier(dtd, decl, "numbers")) )
03338         { 
03339           decl = s;
03340           at->type = AT_NUMBERS;
03341           at->islist = TRUE;
03342         } else if ( (s=isee_identifier(dtd, decl, "nutoken")) )
03343         { 
03344           decl = s;
03345           at->type = AT_NUTOKEN;
03346         } else if ( (s=isee_identifier(dtd, decl, "nutokens")) )
03347         { 
03348           decl = s;
03349           at->type = AT_NUTOKENS;
03350           at->islist = TRUE;
03351         } else if ( (s=isee_identifier(dtd, decl, "notation")) )
03352         { 
03353           dtd_symbol *ng[MAXNAMEGROUP];
03354           int ns;
03355           at->type = AT_NOTATION;
03356           decl=s;
03357           if ( (s=itake_namegroup(dtd, CF_OR, decl, ng, &ns)) )
03358             { 
03359               decl = s;
03360               for(i=0; i<ns; i++)
03361                 add_name_list(&at->typeex.nameof, ng[i]);
03362             } else
03363             {
03364               return gripe(ERC_SYNTAX_ERROR, "name-group expected", decl);
03365             }
03366         } else
03367         {
03368           return gripe(ERC_SYNTAX_ERROR, "Attribute-type expected", decl);
03369         }
03370 
03371       /* Attribute Defaults */
03372       if ( (s=isee_identifier(dtd, decl, "#fixed")) )
03373         {
03374           decl = s;
03375           at->def = AT_FIXED;
03376         } else if ( (s=isee_identifier(dtd, decl, "#required")) )
03377         {
03378           decl = s;
03379           at->def = AT_REQUIRED;
03380         } else if ( (s=isee_identifier(dtd, decl, "#current")) )
03381         { 
03382           decl = s;
03383           at->def = AT_CURRENT;
03384         } else if ( (s=isee_identifier(dtd, decl, "#conref")) )
03385         { 
03386           decl = s;
03387           at->def = AT_CONREF;
03388         } else if ( (s=isee_identifier(dtd, decl, "#implied")) )
03389         {       
03390           decl = s;
03391           at->def = AT_IMPLIED;
03392         } else                              /* real default */
03393         at->def = AT_DEFAULT;
03394 
03395       if ( at->def == AT_DEFAULT || at->def == AT_FIXED )
03396         { 
03397           ichar buf[MAXSTRINGLEN];
03398           const ichar *end;
03399           if ( !(end=itake_string(dtd, decl, buf, sizeof(buf))) )
03400             end=itake_nmtoken_chars(dtd, decl, buf, sizeof(buf));
03401           if ( !end )
03402             return gripe(ERC_SYNTAX_ERROR, "Bad attribute default", decl);
03403                         
03404           switch(at->type)
03405             { 
03406             case AT_CDATA:
03407               { 
03408                 at->att_def.cdata = istrdup(buf);
03409                 break;
03410               }
03411             case AT_ENTITY:
03412             case AT_NOTATION:
03413             case AT_NAME:
03414               { 
03415                 if ( !(s=itake_name(dtd, buf, &at->att_def.name)) || *s )
03416                   {
03417                     return gripe(ERC_DOMAIN, "name", decl);
03418                   }
03419                 break;
03420               }
03421             case AT_NMTOKEN:
03422             case AT_NAMEOF:
03423               { 
03424                 if ( !(s=itake_nmtoken(dtd, buf, &at->att_def.name)) || *s )
03425                   return gripe(ERC_DOMAIN, "nmtoken", decl);
03426                 break;
03427               }
03428             case AT_NUTOKEN:
03429               { 
03430                 if ( !(s=itake_nutoken(dtd, buf, &at->att_def.name)) || *s )
03431                   return gripe(ERC_DOMAIN, "nutoken", decl);
03432                 break;
03433               }
03434             case AT_NUMBER:
03435               { 
03436                 if ( !(s=itake_number(dtd, buf, at)) || *s )
03437                   return gripe(ERC_DOMAIN, "number", decl);
03438                 break;
03439               }
03440             case AT_NAMES:
03441             case AT_ENTITIES:
03442             case AT_IDREFS:
03443             case AT_NMTOKENS:
03444             case AT_NUMBERS:
03445             case AT_NUTOKENS:
03446               { at->att_def.list = istrdup(buf);
03447                 break;
03448               }
03449 
03450             default:
03451               {
03452                 return gripe(ERC_REPRESENTATION, "No default for type");
03453               }
03454             }
03455           decl = end;
03456         }
03457 
03458 
03459       for(i=0; i<en; i++)
03460         { 
03461           dtd_element *e = def_element(dtd, eid[i]);
03462           
03463           add_attribute(dtd, e, at);
03464         }
03465 
03466 
03467     }
03468   return TRUE;
03469 }
03470 
03477 static const ichar *
03478 itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id)
03479 { ichar buf[MAXNMLEN];
03480   ichar *o = buf;
03481   
03482   in = iskip_layout(dtd, in);
03483   if ( !HasClass(dtd, *in, CH_DIGIT) )
03484     return NULL;
03485   if ( dtd->case_sensitive )
03486     { while( HasClass(dtd, *in, CH_NAME) )
03487         *o++ = *in++;
03488     } else
03489     { while( HasClass(dtd, *in, CH_NAME) )
03490         *o++ = tolower(*in++);
03491     }
03492   *o = '\0';
03493   if ( o - buf > 8 )
03494     gripe(ERC_LIMIT, "nutoken length");
03495   
03496   *id = dtd_add_symbol(dtd, buf);
03497   
03498   return iskip_layout(dtd, in);
03499 }
03500         
03501 
03508 static const ichar *
03509 itake_number(dtd *dtd, const ichar *in, dtd_attr *at)
03510 { in = iskip_layout(dtd, in);
03511  
03512   switch(dtd->number_mode)
03513     { case NU_TOKEN:
03514         { ichar buf[MAXNMLEN];
03515           ichar *o = buf;
03516           
03517           while( HasClass(dtd, *in, CH_DIGIT) )
03518             *o++ = *in++;
03519           if ( o == buf )
03520             return NULL;                    /* empty */
03521           *o = '\0';
03522           at->att_def.name = dtd_add_symbol(dtd, buf);
03523           
03524           return iskip_layout(dtd, (const ichar *)in);
03525         }
03526     case NU_INTEGER:
03527       { char *end;
03528       
03529         at->att_def.number = strtol((const char *)in, &end, 10);
03530         if ( end > (char *)in && errno != ERANGE )
03531           return iskip_layout(dtd, (const ichar *)end);
03532       }
03533     }
03534   
03535   return NULL;
03536 }
03537 
03543 static const ichar *
03544 itake_nmtoken_chars(dtd *dtd, const ichar *in, ichar *out, int len)
03545 { in = iskip_layout(dtd, in);
03546   if ( !HasClass(dtd, *in, CH_NAME) )
03547     return NULL;
03548   while( HasClass(dtd, *in, CH_NAME) )
03549     { if ( --len <= 0 )
03550         gripe(ERC_REPRESENTATION, "Name token too long");
03551       *out++ = (dtd->case_sensitive ? *in++ : tolower(*in++));
03552     }
03553   *out++ = '\0';
03554   
03555   return iskip_layout(dtd, in);
03556 }
03557 
03558 
03568 static const ichar *
03569 isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep)
03570 { const ichar *s;
03571  
03572   if ( (s=isee_func(dtd, decl, *sep)) )
03573     return iskip_layout(dtd, s);
03574   if ( *sep == CF_NG )                  /* undecided */
03575     { static const charfunc ng[] = { CF_SEQ, CF_OR, CF_AND };
03576       int n;
03577       
03578       for(n=0; n<3; n++)
03579         { if ( (s=isee_func(dtd, decl, ng[n])) )
03580             { *sep = ng[n];
03581               return iskip_layout(dtd, s);
03582             }
03583         }
03584     }
03585   
03586   return NULL;
03587 }
03588 
03589 
03594 static void
03595 add_name_list(dtd_name_list **nl, dtd_symbol *s)
03596 { dtd_name_list *n = sgml_calloc(1, sizeof(*n));
03597  
03598   n->value = s;
03599   
03600   for( ; *nl; nl = &(*nl)->next )
03601     ;
03602   
03603   *nl = n;
03604 }
03605 
03613 static const ichar *
03614 itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id)
03615 { ichar buf[MAXNMLEN];
03616   ichar *o = buf;
03617   
03618   in = iskip_layout(dtd, in);
03619   if ( !HasClass(dtd, *in, CH_NAME) )
03620     return NULL;
03621   if ( dtd->case_sensitive )
03622     { while( HasClass(dtd, *in, CH_NAME) )
03623         *o++ = *in++;
03624     } else
03625     { while( HasClass(dtd, *in, CH_NAME) )
03626         *o++ = tolower(*in++);
03627     }
03628   *o = '\0';
03629   
03630   *id = dtd_add_symbol(dtd, buf);
03631   
03632   return iskip_layout(dtd, in);
03633 }
03634 
03641 static int
03642 process_element_declaraction(dtd_parser *p, const ichar *decl)
03643 { dtd *dtd = p->dtd;
03644   ichar buf[MAXDECL];
03645   const ichar *s;
03646   dtd_symbol *eid[MAXATTELEM];
03647   dtd_edef *def;
03648   int en;
03649   int i;
03650 
03651   if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
03652     return FALSE;
03653   decl = buf;
03654 
03655 
03656   if ( !(s=itake_el_or_model_element_list(dtd, decl, eid, &en)) )
03657     return gripe(ERC_SYNTAX_ERROR, "Name or name-group expected", decl);
03658   decl = s;
03659   if ( en == 0 )
03660     return TRUE;                        /* 0 elements */
03661   
03662   def = calloc(1, sizeof(*def));
03663 
03664   for(i=0; i<en; i++)
03665     { find_element(dtd, eid[i]);
03666       eid[i]->element->structure = def;
03667       eid[i]->element->undefined = FALSE;
03668     }
03669   def->references = en;                 /* for GC */
03670 
03671   if ( (s = isee_identifier(dtd, decl, "-")) )
03672     { def->omit_close = FALSE;
03673       goto seeclose;
03674     } else if ( (s = isee_identifier(dtd, decl, "o")) )
03675     { def->omit_open = TRUE;
03676     
03677     seeclose:
03678       decl = s;
03679       if ( (s = isee_identifier(dtd, decl, "-")) )
03680         { def->omit_close = FALSE;
03681         } else if ( (s = isee_identifier(dtd, decl, "o")) )
03682         { for(i=0; i<en; i++)
03683             def->omit_close = TRUE;
03684         } else
03685         return gripe(ERC_SYNTAX_ERROR, "Bad omit-tag declaration", decl);
03686       decl = s;
03687     }
03688 
03689   if ( !(decl=process_model(dtd, def, decl)) )
03690     return FALSE;
03691   
03692   if ( decl[0] == '-' || decl[0] == '+' )
03693     { dtd_symbol *ng[MAXNAMEGROUP];
03694       int ns;
03695       dtd_element_list **l;
03696       
03697       if ( decl[0] == '-' )
03698         l = &def->excluded;
03699       else
03700         l = &def->included;
03701       
03702       decl++;
03703       if ( (s=itake_namegroup(dtd, CF_OR, decl, ng, &ns)) )
03704         { int i;
03705         
03706           decl = s;
03707           
03708           for(i=0; i<ns; i++)
03709             add_element_list(l, find_element(dtd, ng[i]));
03710         } else
03711         { return gripe(ERC_SYNTAX_ERROR, "Name group expected", decl);
03712         }
03713     }
03714   
03715   if (*decl)
03716     {
03717       return gripe(ERC_SYNTAX_ERROR, "Unexpected end of declaration", decl);
03718     }
03719                                                                             
03720   return TRUE;
03721 
03722 }
03723 
03728 static const ichar *
03729 process_model(dtd *dtd, dtd_edef *e, const ichar *decl)
03730 { const ichar *s;
03731                                                                               
03732   decl = iskip_layout(dtd, decl);
03733   if ( (s = isee_identifier(dtd, decl, "empty")) )
03734     { e->type = C_EMPTY;
03735       return s;
03736     }
03737   if ( (s = isee_identifier(dtd, decl, "cdata")) )
03738     { e->type = C_CDATA;
03739       return s;
03740     }
03741   if ( (s = isee_identifier(dtd, decl, "rcdata")) )
03742     { e->type = C_RCDATA;
03743       return s;
03744     }
03745   if ( (s = isee_identifier(dtd, decl, "any")) )
03746     { e->type = C_ANY;
03747       return s;
03748     }
03749                                                                               
03750   e->type = C_PCDATA;
03751   if ( !(e->content = make_model(dtd, decl, &decl)) )
03752     return FALSE;
03753                                                                               
03754   return decl;
03755 }
03756                                                                               
03764 static const ichar *
03765 itake_namegroup(dtd *dtd, charfunc sep, const ichar *decl,
03766                 dtd_symbol **names, int *n)
03767 { const ichar *s;
03768   int en = 0;
03769                                                                               
03770   if ( (s=isee_func(dtd, decl, CF_GRPO)) )
03771     { for(;;)
03772         { if ( !(decl=itake_name(dtd, s, &names[en++])) )
03773             { gripe(ERC_SYNTAX_ERROR, "Name expected", s);
03774               return NULL;
03775             }
03776           if ( (s=isee_func(dtd, decl, sep)) )
03777             { decl = iskip_layout(dtd, s);
03778               continue;
03779             }
03780           if ( (s=isee_func(dtd, decl, CF_GRPC)) )
03781             { *n = en;
03782               decl = s;
03783               return iskip_layout(dtd, decl);
03784             }
03785 
03786           gripe(ERC_SYNTAX_ERROR, "Bad name-group", decl);
03787           return NULL;
03788         }
03789     }
03790                                                                               
03791   return NULL;
03792 }
03793 
03799 static void
03800 add_element_list(dtd_element_list **l, dtd_element *e)
03801 { dtd_element_list *n = sgml_calloc(1, sizeof(*n));
03802                                                                             
03803   n->value = e;
03804                                                                             
03805   for( ; *l; l = &(*l)->next )
03806     ;
03807   *l = n;
03808 }
03809 
03814 static const ichar *
03815 itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names,
03816                                int *n)
03817 { const ichar *s;
03818  
03819   if ( isee_func(dtd, decl, CF_GRPO) )
03820     { dtd_model *model;
03821     
03822       if ( (model = make_model(dtd, decl, &s)) )
03823         { namelist nl;
03824         
03825           nl.list = names;
03826           nl.size = 0;
03827           for_elements_in_model(model, add_list_element, &nl);
03828           free_model(model);
03829           
03830           *n = nl.size;
03831           return s;
03832         } else
03833         return NULL;
03834     } else
03835     { if ( !(s = itake_name(dtd, decl, &names[0])) )
03836         { gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
03837           return NULL;
03838         }
03839       *n = 1;
03840       return s;
03841     }
03842 }
03843 
03851 static void
03852 for_elements_in_model(dtd_model *m,
03853                       void (*f)(dtd_element *e, void *closure),
03854                       void *closure)
03855 { switch(m->type)
03856     { case MT_SEQ:
03857     case MT_AND:
03858     case MT_OR:
03859       { dtd_model *sub = m->content.group;
03860                                                                               
03861         for(; sub; sub = sub->next)
03862           for_elements_in_model(sub, f, closure);
03863         break;
03864       }
03865     case MT_ELEMENT:
03866       (*f)(m->content.element, closure);
03867       break;
03868     default:
03869       ;
03870     }
03871 }
03872                                                                               
03873 
03878 static dtd_model *
03879 make_model(dtd *dtd, const ichar *decl, const ichar **end)
03880 { const ichar *s;
03881   dtd_model *m = calloc(1, sizeof(*m));
03882   dtd_symbol *id;
03883 
03884   decl = iskip_layout(dtd, decl);
03885   
03886   if ( (s=isee_identifier(dtd, decl, "#pcdata")) )
03887     { m->type = MT_PCDATA;
03888       m->cardinality = MC_ONE;            /* actually don't care */
03889       *end = s;
03890       return m;
03891     }
03892         
03893   if ( (s=itake_name(dtd, decl, &id)) )
03894     { m->type = MT_ELEMENT;
03895       m->content.element = find_element(dtd, id);
03896       decl = s;
03897     } else
03898     { if ( !(s=isee_func(dtd, decl, CF_GRPO)) )
03899         { gripe(ERC_SYNTAX_ERROR, "Name group expected", decl);
03900           free_model(m);
03901           return NULL;
03902         }
03903       decl = s;
03904       for(;;)
03905         { dtd_model *sub;
03906           modeltype mt;
03907           
03908           if ( !(sub = make_model(dtd, decl, &s)) )
03909             return NULL;
03910           decl = s;
03911           add_submodel(m, sub);
03912           
03913           if ( (s = isee_func(dtd, decl, CF_OR)) )
03914             { decl = s;
03915               mt = MT_OR;
03916             } else if ( (s = isee_func(dtd, decl, CF_SEQ)) )
03917             { decl = s;
03918               mt = MT_SEQ;
03919             } else if ( (s = isee_func(dtd, decl, CF_AND)) )
03920             { decl = s;
03921               mt = MT_AND;
03922             } else if ( (s = isee_func(dtd, decl, CF_GRPC)) )
03923             { decl = s;
03924               break;
03925             } else
03926             { gripe(ERC_SYNTAX_ERROR, "Connector ('|', ',' or '&') expected", decl);
03927               free_model(m);
03928               return NULL;
03929             }
03930           decl = iskip_layout(dtd, decl);
03931           
03932           if ( m->type != mt )
03933             { if ( !m->type )
03934                 m->type = mt;
03935               else
03936                 { gripe(ERC_SYNTAX_ERROR, "Different connector types in model", decl);
03937                   free_model(m);
03938                   return NULL;
03939                 }
03940             }
03941         }
03942     }
03943   if ( (s = isee_func(dtd, decl, CF_OPT)) )
03944     { decl = s;
03945       m->cardinality = MC_OPT;
03946     } else if ( (s=isee_func(dtd, decl, CF_REP)) )
03947     { decl = s;
03948       m->cardinality = MC_REP;
03949     } else if ( (s=isee_func(dtd, decl, CF_PLUS)) )
03950     {                                     /* ROK: watch out for (x) +(y) */
03951       if ( isee_func(dtd, iskip_layout(dtd, s), CF_GRPO) == NULL )
03952         { decl = s;
03953           m->cardinality = MC_PLUS;
03954         }
03955     } else
03956     m->cardinality = MC_ONE;
03957   if ( m->type == MT_UNDEF )            /* simplify (e+), etc. */
03958     { dtd_model *sub = m->content.group;
03959       modelcard card;
03960       
03961       assert(!sub->next);
03962       if ( sub->cardinality == MC_ONE )
03963         card = m->cardinality;
03964       else if ( m->cardinality == MC_ONE )
03965         card = sub->cardinality;
03966       else
03967         { m->type = MT_OR;
03968           goto out;
03969         }
03970       
03971       *m = *sub;
03972       m->cardinality = card;
03973       free(sub);
03974     }
03975  out:
03976   *end = iskip_layout(dtd, decl);
03977   return m;
03978 }
03979 
03980 
03984 static void
03985 free_element_definition(dtd_edef *def)
03986 { if ( --def->references == 0 )
03987     { if ( def->content )
03988         free_model(def->content);
03989       free_element_list(def->included);
03990       free_element_list(def->excluded);
03991       free_state_engine(def->initial_state);
03992       
03993       free(def);
03994     }
03995 }
03996 
04001 static void
04002 free_element_list(dtd_element_list *l)
04003 { dtd_element_list *next;
04004 
04005   for( ; l; l=next)
04006     { next = l->next;
04007 
04008       free(l);
04009     }
04010 }
04011                                     
04016 static void
04017 free_attribute_list(dtd_attr_list *l)
04018 { dtd_attr_list *next;
04019                                                                             
04020   for(; l; l=next)
04021     { next = l->next;
04022                                                                             
04023       free_attribute(l->attribute);
04024       free(l);
04025     }
04026 }
04027 
04032 static void
04033 free_elements(dtd_element *e)
04034 { dtd_element *next;
04035  
04036   for( ; e; e=next)
04037     { next = e->next;
04038     
04039       if ( e->structure )
04040         free_element_definition(e->structure);
04041       free_attribute_list(e->attributes);
04042       
04043       sgml_free(e);
04044     }
04045 }
04046      
04051 static void
04052 free_model(dtd_model *m)
04053 { switch(m->type)
04054     { case MT_SEQ:
04055     case MT_AND:
04056     case MT_OR:
04057       { dtd_model *sub = m->content.group;
04058         dtd_model *next;
04059         
04060         for(; sub; sub = next)
04061           { next = sub->next;
04062           
04063             free_model(sub);
04064           }
04065       }
04066     default:
04067       ;
04068     }
04069   free(m);
04070 }
04071 
04078 static int                              /* <!DOCTYPE ...> */
04079 process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0)
04080 { dtd *dtd = p->dtd;
04081  
04082   dtd_symbol *id;
04083   const ichar *s;
04084   dtd_entity *et = NULL;
04085 
04086   const char *file=NULL;
04087 
04088   /*Extract the name from the declaration*/
04089   if ( !(s=itake_name(dtd, decl, &id)) )
04090     return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
04091   decl = s;
04092 
04093   /*Extract the system, public identitifier*/
04094   if ( (s=isee_identifier(dtd, decl, "system")) )
04095     { et = calloc(1, sizeof(*et));
04096       et->type = ET_SYSTEM;
04097       decl = s;
04098     } else if ( (s=isee_identifier(dtd, decl, "public")) )
04099     { et = calloc(1, sizeof(*et));
04100       et->type = ET_PUBLIC;
04101       decl = s;
04102     } else if ( isee_func(dtd, decl, CF_DSO) )
04103     goto local;
04104 
04105   if ( et )
04106     { et->name = id;
04107       et->catalog_location = 0;
04108       if ( !(s=process_entity_value_declaration(p, decl, et)) )
04109         return FALSE;
04110       decl = s;
04111     }
04112 
04113   
04114   /*Extract the doctype declaration*/
04115   if ( !dtd->doctype )                  /* i.e. anonymous DTD */
04116     { 
04117       dtd_parser *clone;
04118       dtd->doctype = istrdup(id->name); /* Fill it */
04119       if ( et )
04120         file = entity_file(dtd, et);
04121                 
04122       if ( !file )
04123         { 
04124           gripe(ERC_EXISTENCE, "DTD", dtd->doctype);
04125         } 
04126       else
04127         { 
04128           clone = clone_dtd_parser(p);
04129           /*Load the dtd from file ( even remotely) if specified*/
04130           if ( !load_dtd_from_file(clone, file) )
04131             gripe(ERC_EXISTENCE, "file", file);
04132                         
04133           free_dtd_parser(clone);
04134         }
04135     }
04136 
04137   if ( et )
04138     free_entity_list(et);
04139 
04140   /*Process the DTD declaration in [...] if present in the same file*/
04141  local:
04142   if ( (s=isee_func(dtd, decl, CF_DSO)) ) /* [...] */
04143     {
04144 
04145       int grouplevel = 1;
04146       data_mode oldmode  = p->dmode;
04147       dtdstate  oldstate = p->state;
04148       locbuf oldloc;
04149       const ichar *q;
04150       icharbuf *saved_ibuf = p->buffer;
04151       
04152       if(!dtd->doctype)
04153         dtd->doctype = istrdup(id->name);       /* Fill it */
04154       else
04155         return TRUE;
04156                         
04157                 
04158       push_location(p, &oldloc);
04159       /* try to find start-location. */
04160       /* fails if there is comment before */
04161       /* the []! */
04162       sgml_cplocation(&p->location, &p->startloc);
04163       inc_location(&p->location, '<');
04164 
04165       for(q=decl0; q < s; q++)
04166         inc_location(&p->location, *q);
04167       p->dmode = DM_DTD;
04168       p->state = S_PCDATA;
04169       p->buffer = new_icharbuf();
04170 
04171   
04172       for( ; *s; s++ )
04173         {
04174 
04175           if( isee_func(dtd, s, CF_LIT) || /* skip quoted strings */
04176               isee_func(dtd, s, CF_LITA) )
04177             { 
04178               ichar q = *s;
04179 
04180               putchar_dtd_parser(p, *s++);    /* pass open quote */
04181               for( ; *s && *s != q; s++ )
04182                 putchar_dtd_parser(p, *s);
04183               if ( *s == q )                  /* pass closing quote */
04184                 putchar_dtd_parser(p, *s);
04185               continue;
04186             }
04187 
04188           if ( isee_func(dtd, s, CF_DSO) )
04189             grouplevel++;
04190           else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
04191             break;
04192 
04193           putchar_dtd_parser(p, *s);
04194                                                                         
04195         }
04196 
04197       p->dtd->implicit = FALSE;
04198       p->state    = oldstate;
04199       p->dmode    = oldmode;
04200       free_icharbuf(p->buffer);
04201       p->buffer = saved_ibuf;
04202       pop_location(p, &oldloc);
04203 
04204     }
04205                                                                                
04206   p->enforce_outer_element = id;        /* make this the outer element */
04207   
04208   return TRUE;
04209 
04210 }
04211 
04215 static void
04216 free_entity_list(dtd_entity *e)
04217 { dtd_entity *next;
04218 
04219   for( ; e; e=next)
04220     { next = e->next;
04221 
04222       if ( e->value )   free(e->value);
04223       if ( e->extid )   free(e->extid);
04224       if ( e->exturl )  free(e->exturl);
04225       if ( e->baseurl ) free(e->baseurl);
04226 
04227       free(e);
04228     }
04229 }
04230 
04234 void
04235 free_dtd_parser(dtd_parser *p)
04236 { free_icharbuf(p->buffer);
04237   free_ocharbuf(p->cdata);
04238 
04239   free_dtd(p->dtd);
04240 
04241   free(p);
04242 }
04243 
04247 void
04248 free_dtd(dtd *dtd)
04249 { if ( --dtd->references == 0 )
04250     { if ( dtd->doctype )
04251         free(dtd->doctype);
04252   
04253       free_entity_list(dtd->entities);
04254       free_entity_list(dtd->pentities);
04255       free_notations(dtd->notations);
04256       free_shortrefs(dtd->shortrefs);
04257       free_elements(dtd->elements);
04258       free_symbol_table(dtd->symbols);
04259       free(dtd->charfunc);
04260       free(dtd->charclass);
04261       free(dtd->charmap);
04262       dtd->magic = 0;
04263   
04264       free(dtd);
04265     }
04266 }
04267 
04271 static void
04272 free_symbol_table(dtd_symbol_table *t)
04273 { int i;
04274 
04275   for(i=0; i<t->size; i++)
04276     { dtd_symbol *s, *next;
04277 
04278       for(s=t->entries[i]; s; s=next)
04279         { next = s->next;
04280 
04281           free((char *)s->name);
04282           free(s);
04283         }
04284     }
04285 
04286   free(t->entries);
04287   free(t);
04288 }
04289 
04294 static void
04295 free_notations(dtd_notation *n)
04296 { dtd_notation *next;
04297 
04298   for( ; n; n=next)
04299     { next = n->next;
04300 
04301       free(n->system);
04302       free(n->public);
04303 
04304       free(n);
04305     }
04306 }
04307 
04311 static void
04312 free_shortrefs(dtd_shortref *sr)
04313 { dtd_shortref *next;
04314 
04315   for( ; sr; sr=next)
04316     { next = sr->next;
04317       free_maps(sr->map);
04318       free(sr);
04319     }
04320 }
04321 
04327 void
04328 set_src_dtd_parser(dtd_parser *p, input_type type, const char *name)
04329 { p->location.type    = type;
04330   p->location.name    = name;
04331   p->location.line    = 1;
04332   p->location.linepos = 0;
04333   p->location.charpos = 0;
04334 }
04335 
04339 static void
04340 free_maps(dtd_map *map)
04341 { dtd_map *next;
04342 
04343   for( ; map; map=next)
04344     { next = map->next;
04345       if ( map->from )
04346         free(map->from);
04347       free(map);
04348     }
04349 }
04350 
04360 static int
04361 process_entity_declaration(dtd_parser *p, const ichar *decl)
04362 { dtd *dtd = p->dtd;
04363   const ichar *s;
04364   dtd_symbol *id;
04365   dtd_entity *e;
04366   int isparam;
04367   int isdef = FALSE;
04368 
04369 
04370   /* parameter entity */
04371   if ( (s=isee_func(dtd, decl, CF_PERO)) )
04372     { isparam = TRUE;
04373       decl = s;
04374     } else
04375     isparam = FALSE;
04376 
04377   /*Extract the entity name*/
04378   if ( !(s = itake_entity_name(dtd, decl, &id)) )
04379     { 
04380       if ( !(s = isee_identifier(dtd, decl, "#default")) )
04381         {       return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
04382         }
04383       id = dtd_add_symbol(dtd, "#DEFAULT");
04384       isdef = TRUE;
04385     }
04386 
04387   if ( isparam && find_pentity(dtd, id) ) {
04388     gripe(ERC_REDEFINED, "parameter entity", id);
04389     return TRUE;                        /* already defined parameter entity */
04390   }
04391   if ( id->entity ) {
04392     gripe(ERC_REDEFINED, "entity", id);
04393     return TRUE;                        /* already defined normal entity */
04394   }
04395          
04396   decl = iskip_layout(dtd, s);
04397   e =calloc(1, sizeof(*e));
04398   e->name = id;
04399 
04400   /*Extract the system/public identifier*/
04401   if ( (s = isee_identifier(dtd, decl, "system")) )
04402     { e->type = ET_SYSTEM;
04403       e->content = EC_SGML;
04404       decl = s;
04405     } 
04406   else if ( (s = isee_identifier(dtd, decl, "public")) )
04407     {
04408       e->type = ET_PUBLIC;
04409       e->content = EC_SGML;
04410       decl = s;
04411     } 
04412   else
04413     {
04414       e->type = ET_LITERAL;
04415       
04416       if ( !isparam )
04417         { if ( (s=isee_identifier(dtd, decl, "cdata")) )
04418             { decl = s;
04419               e->content = EC_CDATA;
04420             } else if ( (s=isee_identifier(dtd, decl, "sdata")) )
04421             { decl = s;
04422               e->content = EC_SDATA;
04423             } else if ( (s=isee_identifier(dtd, decl, "pi")) )
04424             { decl = s;
04425               e->content = EC_PI;
04426             } else if ( (s=isee_identifier(dtd, decl, "starttag")) )
04427             { decl = s;
04428               e->content = EC_STARTTAG;
04429             } else if ( (s=isee_identifier(dtd, decl, "endtag")) )
04430             { decl = s;
04431               e->content = EC_ENDTAG;
04432             } else
04433             e->content = EC_SGML;
04434         }
04435 
04436     }   
04437   /*Process the entity value declaration*/
04438   if ( (decl=process_entity_value_declaration(p, decl, e)) )
04439     {
04440       if ( e->type == ET_LITERAL )
04441         {switch(e->content)
04442             {
04443             case EC_STARTTAG:
04444               { ichar *buf =malloc((e->length + 3)*sizeof(ichar));
04445               
04446                 buf[0] = dtd->charfunc->func[CF_STAGO];
04447                 istrcpy(&buf[1], e->value);
04448                 buf[++e->length] = dtd->charfunc->func[CF_STAGC];
04449                 buf[++e->length] = 0;
04450                 
04451                 free(e->value);
04452                 e->value = buf;
04453                 e->content = EC_SGML;
04454                 
04455                 break;
04456               }
04457             case EC_ENDTAG:
04458               { ichar *buf = sgml_malloc((e->length + 4)*sizeof(ichar));
04459               
04460                 buf[0] = dtd->charfunc->func[CF_ETAGO1];
04461                 buf[1] = dtd->charfunc->func[CF_ETAGO2];
04462                 istrcpy(&buf[2], e->value);
04463                 e->length++;
04464                 buf[++e->length] = dtd->charfunc->func[CF_STAGC];
04465                 buf[++e->length] = 0;
04466                 
04467                 sgml_free(e->value);
04468                 e->value = buf;
04469                 e->content = EC_SGML;
04470                 
04471                 break;
04472               }
04473             default:
04474               break;
04475         
04476             }
04477         }
04478       else
04479         {
04480 
04481           if ( *decl )
04482             { dtd_symbol *nname;
04483             
04484               if ( (s=isee_identifier(dtd, decl, "cdata")) )
04485                 { decl = s;
04486                   e->content = EC_CDATA;
04487                 } else if ( (s=isee_identifier(dtd, decl, "sdata")) )
04488                 { decl = s;
04489                   e->content = EC_SDATA;
04490                 } else if ( (s=isee_identifier(dtd, decl, "ndata")) )
04491                 { decl = s;
04492                   e->content = EC_NDATA;
04493                 } else
04494                 {  return gripe(ERC_SYNTAX_ERROR, "Bad datatype declaration", decl);
04495                 }
04496               if ( (s=itake_name(dtd, decl, &nname)) ) /* what is this? */
04497                 { decl = s;
04498                 } else
04499                 { return gripe(ERC_SYNTAX_ERROR, "Bad notation declaration", decl);
04500                 }
04501             }
04502 
04503         }         
04504 
04505       if ( *decl )
04506         {
04507           return gripe(ERC_SYNTAX_ERROR, "Unexpected end of declaraction", decl);
04508         }
04509     }
04510  
04511   if ( isparam )
04512     { e->next = dtd->pentities;
04513       dtd->pentities = e;
04514     } else
04515     { e->name->entity = e;
04516       e->next = dtd->entities;
04517       dtd->entities = e;
04518     }
04519   
04520   if ( isdef )
04521     dtd->default_entity = e;
04522   
04523   return TRUE;
04524 
04525 }
04526 
04527 
04531 static ichar *
04532 baseurl(dtd_parser *p)
04533 { if ( p->location.type == IN_FILE && p->location.name )
04534     { return istrdup(p->location.name);
04535     }
04536  
04537   return NULL;
04538 }
04539 
04540 
04546 static const ichar *
04547 process_entity_value_declaration(dtd_parser *p,
04548                                  const ichar *decl, dtd_entity *e)
04549 { dtd *dtd = p->dtd;
04550   const ichar *s;
04551   
04552   if ( e->type == ET_SYSTEM )
04553     { 
04554       if ( (s=itake_url(dtd, decl, &e->exturl)) )
04555         { e->baseurl = baseurl(p);
04556           return s;
04557         }
04558       
04559       goto string_expected;
04560     } else
04561     { ichar buf[MAXSTRINGLEN];
04562       ichar val[MAXSTRINGLEN];
04563       
04564       if ( !(s = itake_string(dtd, decl, buf, sizeof(buf))) )
04565         goto string_expected;
04566       decl = s;
04567     
04568       expand_pentities(p, buf, val, sizeof(val));
04569       
04570       switch ( e->type )
04571         { case ET_PUBLIC:
04572             { e->extid = istrdup(val);
04573               if ( isee_func(dtd, decl, CF_LIT) ||
04574                    isee_func(dtd, decl, CF_LITA) )
04575                 { if ( (s=itake_url(dtd, decl, &e->exturl)) )
04576                     { e->baseurl = baseurl(p);
04577                       decl = s;
04578                     }
04579                 }
04580               return decl;
04581             }
04582         case ET_LITERAL:
04583           { e->value = istrdup(val);
04584             e->length = strlen(e->value);
04585             return decl;
04586           }
04587         default:
04588           assert(0);
04589           return NULL;
04590         }
04591     }
04592   
04593  string_expected:
04594   gripe(ERC_SYNTAX_ERROR, "String expected", decl);
04595   return NULL;
04596 }
04597 
04608 static const ichar *
04609 itake_url(dtd *dtd, const ichar *in, ichar **out)
04610 { ichar buf[MAXSTRINGLEN];
04611   const ichar *end;
04612   
04613   if ( (end=itake_string(dtd, in, buf, sizeof(buf))) )
04614     { *out = istrdup(buf);
04615     }
04616   
04617   return end;
04618 }
04619 
04625 static int
04626 expand_pentities(dtd_parser *p, const ichar *in, ichar *out, int len)
04627 { dtd *dtd = p->dtd;
04628   int pero = dtd->charfunc->func[CF_PERO]; /* % */
04629   int ero = dtd->charfunc->func[CF_ERO]; /* & */
04630   const ichar *s;
04631   
04632   while(*in)
04633     { if ( *in == pero )
04634         { dtd_symbol *id;
04635         
04636           if ( (s = itake_entity_name(dtd, in+1, &id)) )
04637             { dtd_entity *e = find_pentity(dtd, id);
04638               const ichar *eval;
04639               int l;
04640               
04641               in = s;
04642               if ( (s=isee_func(dtd, s, CF_ERC)) ) /* ; is not obligatory? */
04643                 in = s;
04644               
04645               if ( !e )
04646                 { return gripe(ERC_EXISTENCE, "parameter entity", id->name);
04647 
04648                 }                                                           
04649   
04650               if ( !(eval = entity_value(p, e, NULL)) )
04651                 return FALSE;
04652               
04653               if ( !expand_pentities(p, eval, out, len) )
04654                 return FALSE;
04655               l = strlen(out);                /* could be better */
04656               out += l;
04657               len -= l;
04658               
04659               continue;
04660             }
04661         }
04662     
04663       if ( --len <= 0 )
04664         { gripe(ERC_REPRESENTATION, "Declaration too long");
04665           return FALSE;
04666         }
04667       
04668       if ( *in == ero && in[1] == '#' )   /* &# */
04669         { int chr;
04670         
04671           if ( (s=isee_character_entity(dtd, in, &chr)) &&
04672                representable_char(p, chr) )
04673             { *out++ = chr;
04674               in = s;
04675               continue;
04676             }
04677         }
04678       
04679       *out++ = *in++;
04680     }
04681   
04682   *out = '\0';
04683   
04684   return TRUE;
04685 }
04686 
04691 static int
04692 representable_char(dtd_parser *p, int chr)
04693 { if ( chr < 0 )
04694     return FALSE;                       /* should not happen */
04695   if ( chr < 128 )                      /* basic ASCII set */
04696     return TRUE;
04697   if ( p->utf8_decode )
04698     return FALSE;
04699   if ( chr < OUTPUT_CHARSET_SIZE )
04700     return TRUE;
04701   return FALSE;
04702 }
04703 
04710 static const ichar *
04711 isee_character_entity(dtd *dtd, const ichar *in, int *chr)
04712 { const ichar *s;
04713                       
04714   if ( (s=isee_func(dtd, in, CF_ERO)) && *s == '#' )
04715     { ichar e[32];
04716       ichar *o = e;
04717       int v;
04718       
04719       *o++ = *s++;
04720       while(o < e+sizeof(e)-1 && HasClass(dtd, *s, CH_NAME))
04721         *o++ = *s++;
04722       if ( isee_func(dtd, s, CF_ERC))     /* skip ; */
04723         s++;
04724       
04725       *o = '\0';
04726       if ( (v=char_entity_value(e)) !=  FALSE )
04727         { *chr = v;
04728           return s;
04729         }
04730     }
04731   
04732   return NULL;
04733 }
04734 
04738 static int
04739 char_entity_value(const ichar *decl)
04740 { if ( *decl == '#' )
04741     { const ichar *s = decl+1;
04742       char *end;
04743       long v;
04744       
04745       /* do octal too? */
04746       if ( s[0] == 'x' || s[0] == 'X' )
04747         v = strtoul((char *)s+1, &end, 16);
04748       else
04749         v = strtoul((char *)s, &end, 10);
04750       
04751       if ( *end == '\0' )
04752         { return (int)v;
04753         } else if ( istreq(s, "RS") )
04754         { return '\n';
04755         } else if ( istreq(s, "RE") )
04756         { return '\r';
04757         } else if ( istreq(s, "TAB") )
04758         { return '\t';
04759         } else if ( istreq(s, "SPACE") )
04760         { return ' ';
04761         }
04762     }
04763    return FALSE;
04764 }
04765 
04766 static const ichar *
04767 entity_value(dtd_parser *p, dtd_entity *e, int *len)
04768 { const char *file;
04769  
04770   if ( !e->value && (file=entity_file(p->dtd, e)) )
04771     { int normalise = (e->content == EC_SGML || e->content == EC_CDATA);
04772     
04773       e->value = load_sgml_file_to_charp(file, normalise, &e->length);
04774     }
04775   
04776   if ( len )
04777     *len = e->length;
04778   
04779   return e->value;
04780 }
04781 
04787 static const char *
04788 entity_file(dtd *dtd, dtd_entity *e)
04789 { 
04790   char *file;   
04791   switch(e->type)
04792     { case ET_SYSTEM:
04793     case ET_PUBLIC:
04794       {            
04795         if( e->exturl)
04796           {
04797             file = e->exturl;
04798                     
04799             return file;
04800           }
04801         return NULL;
04802       }
04803     default:
04804       return NULL;
04805     }
04806 }
04807 
04814 static dtd_entity *
04815 find_pentity(dtd *dtd, dtd_symbol *id)
04816 { dtd_entity *e;
04817  
04818   for(e = dtd->pentities; e; e=e->next)
04819     { if ( e->name == id )
04820         return e;
04821     }
04822   
04823   return NULL;
04824 }
04825 
04826 
04837 static const ichar *
04838 isee_identifier(dtd *dtd, const ichar *in, char *id)
04839 { in = iskip_layout(dtd, in);
04840  
04841   /* match */
04842   while (*id && *id == tolower(*in) )
04843     id++, in++;
04844   if ( *id == 0 && !HasClass(dtd, *in, CH_NAME) )
04845     return iskip_layout(dtd, in);
04846   
04847   return NULL;
04848 }
04849 
04856 static const ichar *
04857 itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id)
04858 { ichar buf[MAXSTRLEN];
04859   ichar *o = buf;
04860 
04861   in = iskip_layout(dtd, in);
04862 
04863   if ( !HasClass(dtd, *in, CH_NMSTART) )
04864     return NULL;
04865 
04866   if ( dtd->ent_case_sensitive )
04867     { while( HasClass(dtd, *in, CH_NAME) )
04868         *o++ = *in++;
04869     } else
04870     { while( HasClass(dtd, *in, CH_NAME) )
04871         *o++ = tolower(*in++);
04872     }
04873   *o++ = '\0';
04874   
04875   *id = dtd_add_symbol(dtd, buf);
04876 
04877   return in;
04878 }
04879 
04880 
04887 static int
04888 process_begin_element(dtd_parser *p, const ichar *decl)
04889 {
04890   dtd *dtd = p->dtd;
04891   dtd_symbol *id;
04892   const ichar *s;
04893 
04894   /*Extract the element name*/
04895   if ( (s=itake_name(dtd, decl, &id)) )
04896     {
04897       sgml_attribute atts[MAXSTRLEN];
04898       int natts=0;
04899       dtd_element *e = find_element(dtd, id);
04900       int empty = FALSE;
04901       int conref = FALSE;
04902         
04903       if ( !e->structure )
04904         { 
04905           dtd_edef *def;
04906           e->undefined = TRUE;
04907           def_element(dtd, id);
04908           def = e->structure;
04909           def->type = C_EMPTY;
04910         }
04911                 
04912       open_element(p, e, TRUE);
04913 
04914       decl = s;
04915         
04916       /*Process the attribute list*/
04917       if ( (s=process_attributes(p, e, decl, atts, &natts)) )
04918         decl=s;
04919         
04920 
04921       /*Handle dialect specific details*/
04922       if ( dtd->dialect != DL_SGML )
04923         { 
04924           if ( (s=isee_func(dtd, decl, CF_ETAGO2)) )
04925             { 
04926               empty = TRUE;                 
04927               decl = s;
04928             }
04929 #ifdef XMLNS
04930           if ( dtd->dialect == DL_XMLNS )
04931             update_xmlns(p, e, natts, atts);
04932 #endif
04933           if ( dtd->dialect != DL_SGML )
04934             update_space_mode(p, e, natts, atts);
04935         } 
04936       else
04937         { 
04938           int i;
04939           
04940           for(i=0; i<natts; i++)
04941             { 
04942               if ( atts[i].definition->def == AT_CONREF )
04943                 { 
04944                   empty = TRUE;
04945                   conref = TRUE;
04946                 }
04947             }
04948         }
04949 
04950       if( *decl)
04951         gripe(ERC_SYNTAX_ERROR, "Bad attribute list", decl);
04952       
04953       if ( !(p->flags & SGML_PARSER_NODEFS) )
04954         {
04955           natts = add_default_attributes(p, e, natts, atts);
04956         }
04957 
04958       if ( empty ||
04959            (dtd->dialect == DL_SGML &&
04960             e->structure &&
04961             e->structure->type == C_EMPTY &&
04962             !e->undefined) )
04963         p->empty_element = e;
04964       else
04965         p->empty_element = NULL;
04966 
04967       /*Invoke the handler function*/
04968       if ( p->on_begin_element )
04969         (*p->on_begin_element)(p, e, natts, atts);
04970 
04971       free_attribute_values(natts, atts);
04972                         
04973       if ( p->empty_element )
04974         { 
04975           p->empty_element = NULL;
04976                         
04977           close_element(p, e, conref);
04978           if ( conref )     /* might be S_CDATA due to declared content */
04979             p->cdata_state = p->state = S_PCDATA;
04980         }
04981       
04982       return TRUE;
04983     } 
04984   return gripe(ERC_SYNTAX_ERROR, "Bad open-element tag", decl);
04985 }
04986 
04993 void
04994 update_space_mode(dtd_parser *p, dtd_element *e,
04995                   int natts, sgml_attribute *atts)
04996 { for( ; natts-- > 0; atts++ )
04997     { const ichar *name = atts->definition->name->name;
04998     
04999       if ( istreq(name, "xml:space") && atts->definition->type == AT_CDATA )
05000         { dtd_space_mode m = istr_to_space_mode(atts->value.cdata);
05001         
05002           if ( m != SP_INHERIT )
05003             p->environments->space_mode = m;
05004           else
05005             gripe(ERC_EXISTENCE, "xml:space-mode", atts->value.cdata);
05006           return;
05007         }
05008     }
05009  
05010   if ( e->space_mode != SP_INHERIT )
05011     p->environments->space_mode = e->space_mode;
05012 }
05013 
05020 static dtd_space_mode
05021 istr_to_space_mode(const ichar *val)
05022 { if ( istreq(val, "default") )
05023     return SP_DEFAULT;
05024   if ( istreq(val, "preserve") )
05025     return SP_PRESERVE;
05026   if ( istreq(val, "sgml") )
05027     return SP_SGML;
05028   if ( istreq(val, "remove") )
05029     return SP_REMOVE;
05030   
05031   return SP_INHERIT;                    /* interpret as error */
05032 }
05033 
05039 static void
05040 allow_for(dtd_element *in, dtd_element *e)
05041 { dtd_edef *def = in->structure;
05042   dtd_model *g;
05043   
05044   if ( def->type == C_EMPTY )
05045     { def->type = C_PCDATA;
05046       def->content = calloc(1, sizeof(*def->content));
05047       def->content->type = MT_OR;
05048       def->content->cardinality = MC_REP;
05049     }
05050   assert(def->content->type == MT_OR);
05051   
05052   g = def->content->content.group;
05053   
05054   if ( e == CDATA_ELEMENT )
05055     { dtd_model *m;
05056 
05057       for(; g; g = g->next)
05058         { if ( g->type == MT_PCDATA )
05059             return;
05060         }
05061       m = calloc(1, sizeof(*m));
05062       m->type        = MT_PCDATA;
05063       m->cardinality = MC_ONE;            /* ignored */
05064       add_submodel(def->content, m);
05065     } else
05066     { dtd_model *m;
05067     
05068       for(; g; g = g->next)
05069         { if ( g->type == MT_ELEMENT && g->content.element == e )
05070             return;
05071         }
05072       m = calloc(1, sizeof(*m));
05073       m->type        = MT_ELEMENT;
05074       m->cardinality = MC_ONE;            /* ignored */
05075       m->content.element = e;
05076       add_submodel(def->content, m);
05077     }
05078 }
05079 
05085 static void
05086 set_element_properties(dtd_element *e, dtd_attr *a)
05087 {
05088   if ( istreq(a->name->name, "xml:space") )
05089     {
05090       switch(a->def)
05091         { 
05092         case AT_FIXED:
05093         case AT_DEFAULT:
05094           break;
05095         default:
05096           return;
05097         }
05098 
05099       switch (a->type )
05100         {       
05101         case AT_NAMEOF:
05102         case AT_NAME:
05103         case AT_NMTOKEN:
05104           e->space_mode = istr_to_space_mode(a->att_def.name->name);
05105           break;
05106         case AT_CDATA:
05107           e->space_mode = istr_to_space_mode((ichar *)a->att_def.cdata);
05108           break;
05109         default:
05110           break;
05111         }
05112     }
05113 }
05114 
05120 static dtd_parser *
05121 clone_dtd_parser(dtd_parser *p)
05122 { dtd_parser *clone =calloc(1, sizeof(*p));
05124   clone->dtd = p->dtd;
05125   clone->dtd->references++;
05126   clone->environments = NULL;
05127   clone->marked       = NULL;
05128   clone->etag         = NULL;
05129   clone->grouplevel   = 0;
05130   clone->state        = S_PCDATA;
05131   clone->mark_state   = MS_INCLUDE;
05132   clone->dmode        = DM_DTD;
05133   clone->buffer       = new_icharbuf();
05134   clone->cdata        = new_ocharbuf();
05135   return clone;
05136 }
05137 
05144 static int
05145 open_element(dtd_parser *p, dtd_element *e, int warn)
05146 {
05147 
05148   /*Create the root element if it is enforced*/
05149   if ( !p->environments && p->enforce_outer_element )
05150     {
05151       dtd_element *f = p->enforce_outer_element->element;
05152 
05153       if ( f && f != e )
05154         {
05155           if ( !f->structure ||
05156                !f->structure->omit_open )
05157             gripe(ERC_OMITTED_OPEN, f->name->name);
05158           WITH_CLASS(p, EV_OMITTED,
05159                      { open_element(p, f, TRUE);
05160                        if ( p->on_begin_element )
05161                          { 
05162                            sgml_attribute atts[MAXATTRIBUTES];
05163                            int natts = 0;
05164                            if ( !(p->flags & SGML_PARSER_NODEFS) )
05165                              natts = add_default_attributes(p, f, natts, atts);
05166                            (*p->on_begin_element)(p, f, natts, atts);
05167                          }
05168                      });
05169         }
05170 
05171 
05172     }
05173   if ( !p->environments && !p->dtd->doctype && e != CDATA_ELEMENT )
05174     {
05175       const char *file;
05176                            
05177       if(FALSE)
05178         { 
05179           dtd_parser *clone = clone_dtd_parser(p);
05180           gripe(ERC_NO_DOCTYPE, e->name->name, file);
05181           if ( load_dtd_from_file(clone, file) )
05182             p->dtd->doctype = istrdup(e->name->name);
05183           else
05184             gripe(ERC_EXISTENCE, "file", file);
05185           free_dtd_parser(clone);
05186         }
05187 
05188     }
05189 
05190   /*Create environment for element and push it on a stack*/
05191   if(p->environments)
05192     {
05193       sgml_environment *env = p->environments;
05194       
05195       if ( env->element->undefined )
05196         { 
05197           allow_for(env->element, e);       /* <!ELEMENT x - - (model) +(y)> */
05198           push_element(p, e, FALSE);
05199           return TRUE;
05200         }
05201       if ( env->element->structure && 
05202            env->element->structure->type == C_ANY )
05203         { 
05204           if ( e != CDATA_ELEMENT && e->undefined )     
05205             gripe(ERC_EXISTENCE, "Element", e->name->name);
05206           push_element(p, e, FALSE);
05207           return TRUE;
05208         }
05209 
05210       switch(in_or_excluded(env, e))
05211         { 
05212         case IE_INCLUDED:
05213           push_element(p, e, FALSE);
05214           return TRUE;
05215         case IE_EXCLUDED:
05216           if ( warn )
05217             gripe(ERC_NOT_ALLOWED, e->name->name);
05218           /*FALLTHROUGH*/
05219 
05220         case IE_NORMAL:
05221           for(; env; env=env->parent)
05222             { dtd_state *new;
05223               if ( (new = make_dtd_transition(env->state, e)) )
05224                 { env->state = new;
05225                   pop_to(p, env, e);
05226                   push_element(p, e, FALSE);
05227                   return TRUE;
05228                 } else
05229                 { 
05230                   dtd_element *oe[MAXOMITTED]; /* omitted open */
05231                   int olen;
05232                   int i;
05233                   if ( (olen=find_omitted_path(env->state, e, oe)) != FALSE )
05234                     { 
05235                       pop_to(p, env, e);
05236                       WITH_CLASS(p, EV_OMITTED,
05237                                  for(i=0; i<olen; i++)
05238                                    { 
05239                                      env->state = make_dtd_transition(env->state, oe[i]);
05240                                      env = push_element(p, oe[i], TRUE);
05241                                    })
05242                         env->state = make_dtd_transition(env->state, e);
05243                       push_element(p, e, FALSE);
05244                       return TRUE;
05245                     }
05246                 }
05247               if ( !env->element->structure || !env->element->structure->omit_close )
05248                 break;
05249             }
05250         }
05251       if ( warn )
05252         { 
05253           if ( e == CDATA_ELEMENT )
05254             {
05255               gripe(ERC_VALIDATE, "#PCDATA not allowed here");
05256             }
05257           else if ( e->undefined )
05258             {
05259               gripe(ERC_EXISTENCE, "Element", e->name->name);
05260             }
05261           else
05262             {
05263               gripe(ERC_NOT_ALLOWED, e->name->name);
05264             }
05265         }
05266 
05267     }
05268   if ( warn )
05269     { 
05270       push_element(p, e, FALSE);
05271       return TRUE;
05272     } 
05273   else
05274     return FALSE;
05275 }
05276 
05282 int
05283 load_dtd_from_file(dtd_parser *p, const char *file)
05284 { 
05285   int rval;
05286   int n=0;
05287 
05288   char fname[MAXSTRLEN];
05289   char server[MAXSTRLEN];
05290   char *buf = NULL;
05291   
05292   data_mode   oldmode  = p->dmode;
05293   dtdstate    oldstate = p->state;
05294   locbuf      oldloc;
05295                                                                                 
05296   push_location(p, &oldloc);
05297   p->dmode = DM_DTD;
05298   p->state = S_PCDATA;
05299   empty_icharbuf(p->buffer);            /* dubious */
05300   set_src_dtd_parser(p, IN_FILE, file);
05301     
05302   /*File path is a url*/                            
05303   if( parse_url( file, server, fname) != FALSE)
05304     {
05305       if(get_file_www( server, fname, &buf) == FALSE){
05306         rval = FALSE;
05307       }
05308       else{
05309         n = strlen(buf);
05310         p->dtd->implicit = FALSE;
05311         rval = TRUE;
05312       }
05313     }
05314 
05315   /*Parse the downloaded dtd*/
05316   if ( buf )
05317     {   
05318       int chr,i;
05319 
05320       for( i=0;i<n;i++){
05321         chr=buf[i];
05322         putchar_dtd_parser(p, chr);
05323       }
05324         
05325                                                                        
05326       p->dtd->implicit = FALSE;
05327       rval = TRUE;
05328     } 
05329   else
05330     rval = FALSE;
05331                                                                         
05332   
05333   pop_location(p, &oldloc);
05334   p->dmode = oldmode;
05335   p->state = oldstate;
05336   
05337   return rval;
05338 }
05339 
05345 int
05346 is_absolute_path(const char *name)
05347 { if (isDirSep(name[0])
05348 #ifdef WIN_NT
05349       || (isalpha(toupper(name[0])) && name[1] == ':')
05350 #endif
05351       )
05352     return TRUE;
05353 
05354   return FALSE;
05355 }
05356 
05360 char *
05361 localpath(const char *ref, const char *name)
05362 { char *local;
05363 
05364   if (!ref || is_absolute_path(name))
05365     local = strdup(name);
05366   else
05367     { char buf[MAXPATHLEN];
05368 
05369       DirName(ref, buf);
05370       strcat(buf, DIRSEPSTR);
05371       strcat(buf, name);
05372 
05373       local = strdup(buf);
05374     }
05375 
05376   if (!local)
05377     sgml_nomem();
05378 
05379   return local;
05380 }
05381 
05386 static char *
05387 DirName(const char *f, char *dir)
05388 { const char *base, *p;
05389 
05390   for (base = p = f; *p; p++)
05391     { if (isDirSep(*p) && p[1] != EOS)
05392         base = p;
05393     }
05394   if (base == f)
05395     { if (isDirSep(*f))
05396         strcpy(dir, DIRSEPSTR);
05397       else
05398         strcpy(dir, ".");
05399     } else
05400     { strncpy(dir, f, base - f);
05401       dir[base - f] = EOS;
05402     }
05403 
05404   return dir;
05405 }
05406 
05407 static includetype
05408 in_or_excluded(sgml_environment *env, dtd_element *e)
05409 { 
05410   for(; env; env=env->parent)
05411     { if ( env->element->structure )
05412         { dtd_edef *def = env->element->structure;
05413           dtd_element_list *el;
05414           
05415           for(el=def->excluded; el; el=el->next)
05416             { if ( el->value == e )
05417                 return IE_EXCLUDED;
05418             }
05419           for(el=def->included; el; el=el->next)
05420             { if ( el->value == e )
05421                 return IE_INCLUDED;
05422             }
05423         }
05424     }
05425   
05426   return IE_NORMAL;
05427 }
05428 
05429 
05435 static __inline void
05436 _sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
05437 { d->type    = loc->type;
05438   d->name    = loc->name;
05439   d->line    = loc->line;
05440   d->linepos = loc->linepos;
05441   d->charpos = loc->charpos;
05442   /* but not the parent! */
05443 }
05444 
05451 static sgml_environment *
05452 push_element(dtd_parser *p, dtd_element *e, int callback)
05453 {
05454   if( e != CDATA_ELEMENT)
05455     {
05456       sgml_environment *env = calloc(1, sizeof(*env));
05457       emit_cdata(p, FALSE);
05458 
05459       env->element = e;
05460       env->state = make_state_engine(e);
05461 
05462       env->space_mode = (p->environments ? p->environments->space_mode: p->dtd->space_mode);
05463       env->parent = p->environments;
05464       p->environments = env;
05465       if ( p->dtd->shorttag )
05466         {
05467           env->saved_waiting_for_net = p->waiting_for_net;
05468           
05469           if ( p->event_class == EV_SHORTTAG )
05470             {
05471               p->waiting_for_net = TRUE;
05472               env->wants_net = TRUE;
05473             } 
05474           else
05475             { 
05476               env->wants_net = FALSE;
05477               if ( e->structure && e->structure->omit_close == FALSE )
05478                 p->waiting_for_net = FALSE;
05479             }
05480 
05481         } 
05482       if ( e->map )
05483         p->map = env->map = e->map;
05484       else if ( env->parent )
05485         p->map = env->map = env->parent->map;
05486 
05487       p->first = TRUE;
05488       if ( callback && p->on_begin_element )
05489         { 
05490           sgml_attribute atts[MAXATTRIBUTES];
05491           int natts = 0;
05492           
05493           if ( !(p->flags & SGML_PARSER_NODEFS) )
05494             natts = add_default_attributes(p, e, natts, atts);
05495                  
05496           (*p->on_begin_element)(p, e, natts, atts);
05497         }       
05498    
05499       if ( e->structure )
05500         { 
05501           if ( e->structure->type == C_CDATA || e->structure->type == C_RCDATA )
05502             { 
05503               p->state = (e->structure->type == C_CDATA ? S_CDATA : S_RCDATA);
05504               p->cdata_state = p->state;
05505               p->etag = e->name->name;
05506               p->etaglen = istrlen(p->etag);
05507               sgml_cplocation(&p->startcdata, &p->location);
05508             } 
05509           else
05510             p->cdata_state = S_PCDATA;
05511         }
05512     }
05513   return p->environments;
05514 
05515 }
05516 
05523 static void                             /* TBD: also handle startloc */
05524 push_location(dtd_parser *p, locbuf *save)
05525 { save->here  = p->location;
05526   save->start = p->startloc;
05527   p->location.parent = &save->here;
05528   p->startloc.parent = &save->start;
05529 }
05530 
05531 
05535 static int
05536 emit_cdata(dtd_parser *p, int last)
05537 {
05538   dtd *dtd = p->dtd;
05539   ichar *s, *data = p->cdata->data;
05540   locbuf locsafe;
05541         
05542   if ( p->cdata->size == 0 )
05543     {
05544       return TRUE;                        /* empty or done */
05545     }
05546 
05547   push_location(p, &locsafe);
05548   sgml_cplocation(&p->location, &p->startloc);  /* start of markup */
05549   sgml_cplocation(&p->startloc, &p->startcdata);        /* real start of CDATA */
05550 
05551   if ( p->environments )
05552     { 
05553       switch(p->environments->space_mode)
05554         {
05555         case SP_SGML:
05556         case SP_DEFAULT:
05557 
05558           if ( p->first )
05559             {
05560               if ( HasClass(dtd, *data, CH_RE) )
05561                 { 
05562                   inc_location(&p->startloc, *data);
05563                   data++;
05564                   p->cdata->size--;
05565                 }
05566               if ( HasClass(dtd, *data, CH_RS) )
05567                 { 
05568                   inc_location(&p->startloc, *data);
05569                   data++;
05570                   p->cdata->size--;
05571                 }
05572 
05573             }
05574           if( last)
05575             {
05576               ichar *e = data + p->cdata->size;
05577               if ( e > data && HasClass(dtd, e[-1], CH_RS) )
05578                 { 
05579                   dec_location(&p->location, e[-1]);
05580                   *--e = '\0';
05581                   p->cdata->size--;
05582                 }
05583               if ( e>data && HasClass(dtd, e[-1], CH_RE) )
05584                 { 
05585                   dec_location(&p->location, e[-1]);
05586                   *--e = '\0';
05587                   p->cdata->size--;
05588                 }
05589 
05590             }
05591 
05592           if ( p->environments->space_mode == SP_DEFAULT )
05593             { 
05594               ichar *o = data;
05595               for(s=data; *s; s++)
05596                 { 
05597                   if ( HasClass(dtd, *s, CH_BLANK) )
05598                     {
05599                       while(s[1] && HasClass(dtd, s[1], CH_BLANK))
05600                         s++;
05601                       *o++ = ' ';
05602                       continue;
05603                     }
05604                   *o++ = *s;
05605                 }
05606               *o = '\0';
05607               p->cdata->size = o-data;
05608             }
05609 
05610           break;
05611         case SP_REMOVE:
05612           { ichar *o = data;
05613             ichar *end = data;
05614             for(s=data; *s && HasClass(dtd, *s, CH_BLANK); )
05615               inc_location(&p->startloc, *s++);
05616             if ( *s )
05617               { 
05618                 for(; *s; s++)
05619                   { 
05620                     if ( HasClass(dtd, *s, CH_BLANK) )
05621                       {
05622                         while(s[1] && HasClass(dtd, s[1], CH_BLANK))
05623                           s++;
05624                         *o++ = ' ';
05625                         continue;
05626                       }
05627                     *o++ = *s;
05628                     end = o;
05629                   }
05630               }
05631             *end = '\0';
05632             p->cdata->size = end-data;
05633             break;
05634           }
05635 
05636         case SP_PRESERVE:
05637           break;
05638         case SP_INHERIT:
05639           assert(0);
05640           return FALSE;
05641         }
05642     }
05643   if ( p->cdata->size == 0 )
05644     {
05645       pop_location(p, &locsafe);
05646       return TRUE;
05647     }
05648   assert(p->cdata->size > 0);
05649 
05650         
05651   if ( !p->blank_cdata )
05652     {
05653       if ( p->cdata_must_be_empty )
05654         { 
05655           terminate_ocharbuf(p->cdata);
05656           gripe(ERC_NOT_ALLOWED_PCDATA, p->cdata->data);
05657         }
05658       if ( p->on_data )
05659         {
05660           (*p->on_data)(p, EC_CDATA, p->cdata->size, data);
05661                                                 
05662         }
05663     }
05664   else if(p->environments)
05665     {
05666       sgml_environment *env = p->environments;
05667       dtd_state *new;
05668       
05669       /* If an element is not in the DTD we must */
05670       /* assume mixed content and emit spaces */
05671       
05672       if ( (new=make_dtd_transition(env->state, CDATA_ELEMENT)) )
05673         { 
05674           env->state = new;
05675           if ( p->on_data )
05676             (*p->on_data)(p, EC_CDATA, p->cdata->size, data);
05677         } 
05678       else if ( env->element->undefined &&  p->environments->space_mode == SP_PRESERVE )
05679         { 
05680           if ( p->on_data )
05681             (*p->on_data)(p, EC_CDATA, p->cdata->size, data);
05682         }
05683 
05684     }
05685         
05686 
05687   pop_location(p, &locsafe);
05688   empty_cdata(p);
05689   return TRUE;
05690 
05691 }
05692 
05698 static void
05699 empty_cdata(dtd_parser *p)
05700 {
05701   if ( p->dmode == DM_DATA )
05702     { empty_ocharbuf(p->cdata);
05703       p->blank_cdata = TRUE;
05704       p->cdata_must_be_empty = FALSE;
05705     }
05706 }
05707 
05713 static void
05714 pop_location(dtd_parser *p, locbuf *saved)
05715 { p->location = saved->here;
05716   p->startloc = saved->start;
05717 }
05718 
05722 static void
05723 inc_location(dtd_srcloc *l, int chr)
05724 { if ( chr == '\n' )
05725     { l->linepos = 0;
05726       l->line++;
05727     }
05728  
05729   l->linepos++;
05730   l->charpos++;
05731 }
05732 
05736 static void
05737 dec_location(dtd_srcloc *l, int chr)
05738 { if ( chr == '\n' )
05739     { l->linepos = 2;                     /* not good! */
05740       l->line--;
05741     }
05742   l->linepos--;
05743   l->charpos--;
05744 }
05745 
05746 
05752 static dtd_element *
05753 def_element(dtd *dtd, dtd_symbol *id)
05754 { dtd_element *e = find_element(dtd, id);
05755  
05756   if ( !e->structure ) {
05757       e->structure = calloc(1, sizeof(*e->structure));
05758       e->structure->references = 1;
05759       e->structure->type = C_EMPTY;
05760   }
05761   return e;
05762 }
05763 
05769 static dtd_element *
05770 find_element(dtd *dtd, dtd_symbol *id)
05771 { dtd_element *e;
05772  
05773   if ( id->element )
05774     return id->element;                 /* must check */
05775   
05776   e = calloc(1, sizeof(*e));
05777   e->space_mode = SP_INHERIT;
05778   e->undefined = TRUE;
05779   e->name = id;
05780   id->element = e;
05781   
05782   e->next = dtd->elements;
05783   dtd->elements = e;
05784   
05785   return e;
05786 }
05787 
05793 static const ichar *
05794 itake_name(dtd *dtd, const ichar *in, dtd_symbol **id)
05795 {
05796   ichar buf[MAXSTRLEN];
05797   ichar *o = buf;
05798 
05799   in = iskip_layout(dtd, in);
05800 
05801   if ( !HasClass(dtd, *in, CH_NMSTART) )
05802     return NULL;
05803   if ( dtd->case_sensitive )
05804     { 
05805       while( HasClass(dtd, *in, CH_NAME) )
05806         *o++ = *in++;
05807     } 
05808   else
05809     { 
05810       while( HasClass(dtd, *in, CH_NAME) )
05811         *o++ = tolower(*in++);
05812     }
05813   *o++ = '\0';
05814   
05815   *id = dtd_add_symbol(dtd, buf);
05816         
05817   return iskip_layout(dtd, in);
05818 }
05819 
05825 static const ichar *
05826 iskip_layout(dtd *dtd, const ichar *in)
05827 {
05828   ichar cmt = dtd->charfunc->func[CF_CMT]; /* also skips comment */
05829   
05830                                                                               
05831   for( ; *in; in++ )
05832     { if ( HasClass(dtd, *in, CH_BLANK) )
05833         continue;
05834     
05835       if ( in[0] == cmt && in[1] == cmt )
05836         { in += 2;
05837         
05838           for( ; *in; in++ )
05839             { if ( in[0] == cmt && in[1] == cmt )
05840                 break;
05841             }
05842           in++;
05843           continue;
05844         }
05845                   
05846       return in;
05847     }
05848 
05849   return in;
05850 }
05851 
05858 dtd_symbol *
05859 dtd_add_symbol(dtd *dtd, const ichar *name)
05860 { dtd_symbol_table *t = dtd->symbols;
05861   int k = istrhash(name, t->size);
05862   dtd_symbol *s;
05863   
05864   for(s=t->entries[k]; s; s = s->next)
05865     { if ( istreq(s->name, name) )
05866         return s;
05867     }
05868   
05869   s = calloc(1, sizeof(*s));
05870   s->name = istrdup(name);
05871   s->next = t->entries[k];
05872   t->entries[k] = s;
05873  
05874   return s;
05875 }
05876 
05882 static const ichar *
05883 isee_func(dtd *dtd, const ichar *in, charfunc func)
05884 { if ( dtd->charfunc->func[func] == *in )
05885     return ++in;
05886                                                                                
05887   return NULL;
05888 }
05889 
05890 
05898 static const ichar * 
05899 process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
05900                    sgml_attribute *atts, int *argc)
05901 {
05902 
05903   int attn = 0;
05904   dtd *dtd = p->dtd;
05905   
05906   decl = iskip_layout(dtd, decl);
05907 
05908   while(decl && *decl)
05909     {
05910         
05911       dtd_symbol *nm;
05912       const ichar *s;
05913       
05914       /*Process attribute name*/
05915       if ( (s=itake_nmtoken(dtd, decl, &nm)) )
05916         {
05917           decl = s;
05918           if ( (s=isee_func(dtd, decl, CF_VI)) ) /* name= */
05919             {
05920               dtd_attr *a;
05921                                 
05922               if ( !HasClass(dtd, nm->name[0], CH_NMSTART) )
05923                 gripe(ERC_SYNTAX_WARNING, "Illegal start of attribute name\n");
05924               decl = s;
05925 
05926               /*Look for attribute in symbol table*/
05927               if ( !(a=find_attribute(e, nm)) )
05928                 {
05929                   a = calloc(1, sizeof(*a));
05930                   a->name = nm;
05931                   a->type = AT_CDATA;
05932                   a->def  = AT_IMPLIED;
05933                   add_attribute(dtd, e, a);
05934 
05935                   if ( !e->undefined && !(dtd->dialect != DL_SGML && (istreq("xmlns", nm->name) || istrprefix("xmlns:", nm->name))) )
05936                     {   gripe(ERC_NO_ATTRIBUTE, e->name->name, nm->name);                               
05937                     }
05938                 }
05939               atts[attn].definition = a;
05940 
05941               if ( (decl=get_attribute_value(p, decl, atts+attn)) )
05942                 {
05943                   attn++;
05944                   continue;
05945                 }
05946         
05947             }
05948           else if ( e->structure )
05949             { 
05950               dtd_attr_list *al;              /* value shorthand */
05951               for(al=e->attributes; al; al=al->next)
05952                 { 
05953                   dtd_attr *a = al->attribute;
05954                   
05955                   if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
05956                     { 
05957                       dtd_name_list *nl;
05958                       
05959                       for(nl=a->typeex.nameof; nl; nl = nl->next)
05960                         { 
05961                           if ( nl->value == nm )
05962                             { 
05963                               if ( dtd->dialect != DL_SGML )
05964                                 {               
05965                                   gripe(ERC_SYNTAX_WARNING, "Value short-hand in XML mode", decl);
05966                                 }
05967                               atts[attn].definition   = a;
05968                               atts[attn].value.cdata  = NULL;
05969                               atts[attn].value.number = 0;
05970                               atts[attn].value.text   = istrdup(nm->name);
05971                               attn++;
05972                               goto next;
05973                             }
05974                         }
05975                     }
05976                 }
05977 
05978               gripe(ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name);
05979               decl = s;
05980             } else
05981             {
05982               gripe(ERC_SYNTAX_ERROR, "Bad attribute", decl);
05983               decl = s;
05984             }
05985         } else
05986         { 
05987           *argc = attn;
05988           return decl;
05989         }
05990     next:
05991       ;
05992     }
05993   *argc = attn;
05994   return decl;
05995 }
05996 
05997 
06003 static int
06004 set_option_dtd( dtd *dtd, dtd_option option, char * set)
06005 {
06006   switch(option)
06007     {
06008     case OPT_SHORTTAG:
06009       if ( !strcmp( set, "FALSE") || !strcmp( set, "false"))
06010         {
06011           dtd->shorttag = FALSE;
06012         }
06013       else
06014         {
06015           dtd->shorttag = TRUE;
06016         }
06017       break;
06018     }
06019   return TRUE;
06020 }
06021 
06022 
06029 static int
06030 add_default_attributes(dtd_parser *p, dtd_element *e,
06031                        int natts, sgml_attribute *atts)
06032 {
06033 
06034   dtd_attr_list *al;
06035 
06036   if ( e == CDATA_ELEMENT )
06037     {  
06038       return natts;
06039     }
06040   
06041   for(al=e->attributes; al; al=al->next)
06042     { dtd_attr *a = al->attribute;
06043     
06044       switch(a->def)
06045         { case AT_REQUIRED:                 /* TBD: check if present */
06046         case AT_CURRENT:                  /* TBD: register in DTD and reuse */
06047         case AT_CONREF:
06048         case AT_IMPLIED:
06049           goto next;
06050         case AT_FIXED:
06051         case AT_DEFAULT:
06052           { int i;
06053             sgml_attribute *ap;
06054             
06055             for(i=0, ap=atts; i<natts; i++, ap++)
06056               { if ( ap->definition == a )
06057                   goto next;
06058               }
06059 
06060             ap->definition   = a;
06061             ap->value.cdata  = NULL;
06062             ap->value.text   = NULL;
06063             ap->value.number = 0;
06064             ap->flags        = SGML_AT_DEFAULT;
06065             
06066             switch(a->type)
06067               { case AT_CDATA:
06068                   ap->value.cdata = a->att_def.cdata;
06069                   break;
06070               case AT_NUMBER:
06071                 if ( p->dtd->number_mode == NU_TOKEN )
06072                   ap->value.text = (ichar *)a->att_def.name->name;
06073                 else
06074                   ap->value.number = a->att_def.number;
06075                 break;
06076               default:
06077                 if ( a->islist )
06078                   ap->value.text = a->att_def.list;
06079                 else
06080                   ap->value.text = (ichar *)a->att_def.name->name;
06081               }
06082 
06083             natts++;
06084           }
06085         }
06086     next:;
06087     }
06088   
06089   return natts;
06090 }
06091 
06099 static ichar const *
06100 get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att)
06101 { 
06102   ichar tmp[MAXSTRINGLEN];
06103   ichar *buf = tmp;
06104   ochar cdata[MAXSTRINGLEN];
06105   ichar const *s;
06106   ichar *d;
06107   ichar c;
06108   dtd *dtd = p->dtd;
06109   ichar const *end;
06110        
06111 
06112   enum
06113     {
06114       DIG_FIRST = 8,              /* any token start with digit? */
06115       NAM_FIRST = 4,              /* any token start with non-digit name char? */
06116       NAM_LATER = 2,              /* any token have non-digit name char later? */
06117       ANY_OTHER = 1,              /* any token have illegal character? */
06118       YET_EMPTY = 0
06119     }token;
06120 
06121   token = YET_EMPTY;
06122   
06123   end = itake_string(dtd, decl, tmp, sizeof (tmp));
06124 
06125   if (end != NULL)
06126     {
06127       if (att->definition->type == AT_CDATA)
06128         {
06129           int hasent = FALSE, hasento = FALSE;
06130           ichar const ero = dtd->charfunc->func[CF_ERO];    /* & */
06131 
06132           /*Fix by Rohan*/
06133           /* The attribute contains an entity only if there is an opening &
06134              and a closing ;*/
06135           ichar const erc = dtd->charfunc->func[CF_ERC];
06136 
06137           ichar *q;
06138           
06139           for (d = q = tmp; *d; *q++ = *d++)
06140             { 
06141               if ( d[0] == CR && d[1] == LF )
06142                 d++;
06143               if (HasClass(dtd, *d, CH_BLANK))
06144                 { 
06145                   *d = ' ';             /* map all blanks to spaces */
06146                 } 
06147               else if (*d == ero)
06148                 { 
06149                   hasento = TRUE;        /* notice char/entity references */
06150                 }
06151               else if( hasento == TRUE && (*d == erc || *d == ero || *d == '@' || HasClass( dtd, *d, CH_WHITE) || HasClass( dtd, *d, CH_RE) || HasClass( dtd, *d, CH_RS) ))
06152                 {
06153                   hasent = TRUE;
06154                 }
06155               else if( hasento == TRUE && (*d != erc && *d != ero && *d != '@' && !HasClass( dtd, *d, CH_WHITE)  && !HasClass( dtd, *d, CH_NAME) && !HasClass( dtd, *d, CH_RE) && !HasClass( dtd, *d, CH_RS) ))
06156                 {
06157                   hasento = FALSE;
06158                   hasento = FALSE;
06159                 }
06160 
06161 #ifdef UTF8
06162               else if ( p->utf8_decode && ISUTF8_MB(*d) )
06163                 { 
06164                   hasent = TRUE;
06165                 }
06166 #endif
06167 
06168                 
06169             } 
06170           *q = '\0';
06171           if (hasent)
06172             {
06173               expand_entities(p, tmp, cdata, MAXSTRINGLEN);
06174               buf = (ichar *) cdata;
06175               hasent = hasento = FALSE;
06176             }
06177         }
06178       else
06179         {
06180           ichar *d;
06181           expand_entities(p, tmp, cdata, MAXSTRINGLEN);
06182           buf = (ichar *) cdata;
06183 
06184           /* canonicalise blanks */
06185           s = buf;
06186           while ((c = *s++) != '\0' && HasClass(dtd, c, CH_BLANK))
06187             { }
06188           d = buf;
06189           while (c != '\0')
06190             { 
06191               token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
06192               if (d != buf)
06193                 *d++ = ' ';
06194 
06195               if (dtd->case_sensitive)
06196                 { 
06197                   *d++ = c;
06198                   while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
06199                     { 
06200                       token |= HasClass(dtd, c, CH_DIGIT) ? 0: HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
06201                       *d++ = c;
06202                     }
06203                 } 
06204               else
06205                 { 
06206                   *d++ = tolower(c);
06207                   while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
06208                     { 
06209                       token |= HasClass(dtd, c, CH_DIGIT) ? 0
06210                         : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
06211                       *d++ = tolower(c);
06212                     }
06213                 }
06214               while (c != '\0' && HasClass(dtd, c, CH_BLANK))
06215                 c = *s++;
06216             }
06217           *d = '\0';
06218         }               
06219     }
06220   else
06221     {
06222       end = itake_unquoted(dtd, decl, tmp, sizeof tmp);
06223       if (end == NULL)
06224         return NULL;
06225 
06226       s = buf;
06227       c = *s++;
06228       if (c != '\0')
06229         { 
06230           token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
06231             : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : /* oops! */ ANY_OTHER;
06232           while ((c = *s++) != 0)
06233             { 
06234               token |= HasClass(dtd, c, CH_DIGIT) ? 0 : HasClass(dtd, c, CH_NAME) ? NAM_LATER : /* oops! */ ANY_OTHER;
06235             }
06236         }
06237       if ( token == YET_EMPTY || (token & ANY_OTHER) != 0)
06238         gripe(ERC_SYNTAX_WARNING, "Attribute value requires quotes", buf);
06239 
06240       if (!dtd->case_sensitive && att->definition->type != AT_CDATA)
06241         istrlower(buf);
06242     }
06243 
06244   att->value.cdata = NULL;
06245   att->value.text = NULL;
06246   att->value.number = 0;
06247   att->flags = 0;
06248 
06249   switch (att->definition->type)
06250     {
06251 
06252     case AT_NUMBER:             /* number */
06253       if (token != DIG_FIRST)
06254         { 
06255           gripe(ERC_SYNTAX_WARNING, "NUMBER expected", decl);
06256         } 
06257       else if (dtd->number_mode == NU_INTEGER)
06258         { 
06259           (void) istrtol(buf, &att->value.number);
06260         }
06261       else
06262         { 
06263           att->value.text = istrdup(buf);
06264         }
06265       return end;
06266     case AT_CDATA:              /* CDATA attribute */
06267       att->value.cdata = ostrdup((ochar *) buf);
06268       return end;
06269     case AT_ID:         /* identifier */
06270     case AT_IDREF:              /* identifier reference */
06271     case AT_NAME:               /* name token */
06272     case AT_NOTATION:           /* notation-name */
06273       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06274         gripe(ERC_SYNTAX_WARNING, "NAME expected", decl);
06275       break;
06276     case AT_NAMEOF:             /* one of these names */
06277     case AT_NMTOKEN:            /* name-token */
06278       if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
06279         gripe(ERC_SYNTAX_WARNING, "NMTOKEN expected", decl);
06280       if ( att->definition->type == AT_NAMEOF )
06281         { 
06282           dtd_name_list *nl;
06283 
06284           for(nl=att->definition->typeex.nameof; nl; nl = nl->next)
06285             { 
06286               if ( istreq(nl->value->name, buf) )
06287                 goto passed;
06288             }
06289           gripe(ERC_SYNTAX_WARNING, "unexpected value", decl);
06290         }
06291       break;
06292     case AT_NUTOKEN:            /* number token */
06293       if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
06294         gripe(ERC_SYNTAX_WARNING, "NUTOKEN expected", decl);
06295       break;
06296     case AT_ENTITY:             /* entity-name */
06297       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06298         gripe(ERC_SYNTAX_WARNING, "entity NAME expected", decl);
06299       break;
06300     case AT_NAMES:              /* list of names */
06301     case AT_IDREFS:             /* list of identifier references */
06302       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06303         gripe(ERC_SYNTAX_WARNING, "NAMES expected", decl);
06304       break;
06305     case AT_ENTITIES:           /* entity-name list */
06306       if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06307         gripe(ERC_SYNTAX_WARNING, "entity NAMES expected", decl);
06308       break;
06309     case AT_NMTOKENS:           /* name-token list */
06310       if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
06311         gripe(ERC_SYNTAX_WARNING, "NMTOKENS expected", decl);
06312       break;
06313     case AT_NUMBERS:            /* number list */
06314       if (token != DIG_FIRST)
06315         gripe(ERC_SYNTAX_WARNING, "NUMBERS expected", decl);
06316       break;
06317     case AT_NUTOKENS:
06318       if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
06319         gripe(ERC_SYNTAX_WARNING, "NUTOKENS expected", decl);
06320       break;
06321     default:
06322       assert(0);
06323       return NULL;
06324 
06325     }
06326  passed:
06327   att->value.text = istrdup(buf);       /* TBD: more validation */
06328         
06329   return end;
06330 }
06331 
06337 static ichar const *
06338 itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len)
06339 { ichar const end2 = dtd->charfunc->func[CF_ETAGO2];    /* / */
06340   ichar c;
06341 
06342   /* skip leading layout.  Do NOT skip comments! --x-- is a value! */
06343   while (c = *in, HasClass(dtd, c, CH_BLANK))
06344     in++;
06345 
06346   /* copy the attribute to out[] */
06347   while ( !HasClass(dtd, c, CH_BLANK) &&
06348           c != '\0' )
06349     { if ( c == end2 && (dtd->shorttag ||
06350                          (in[1] == '\0' && dtd->dialect != DL_SGML)) )
06351         break;
06352 
06353       if ( --len > 0 )
06354         *out++ = c;
06355       else if ( len == 0 )
06356         gripe(ERC_REPRESENTATION, "Attribute too long");
06357       c = *++in;
06358     }
06359   *out = '\0';
06360 
06361   /* skip trailing layout.  While it is kind to skip comments here,
06362      it is technically wrong to do so.  Tags may not contain comments.
06363   */
06364 
06365   return iskip_layout(dtd, in);
06366 }
06367 
06374 static int
06375 expand_entities(dtd_parser *p, const ichar *in, ochar *out, int len)
06376 { const ichar *s;
06377   dtd *dtd = p->dtd;
06378   int ero = dtd->charfunc->func[CF_ERO]; /* & */
06379   const ochar *map = dtd->charmap->map;
06380 
06381   while(*in)
06382     { 
06383       if ( *in == ero )
06384         { 
06385           const ichar *estart = in;             /* for recovery */
06386           int chr;
06387 
06388           if ( (s=isee_character_entity(dtd, in, &chr)) )
06389             { 
06390               if ( chr <= 0 || chr >= OUTPUT_CHARSET_SIZE )
06391                 gripe(ERC_REPRESENTATION, "character");
06392               if ( --len <= 0 )
06393                 {
06394                   return gripe(ERC_REPRESENTATION, "CDATA string too long");
06395                 }
06396               *out++ = chr;
06397               in = s;
06398               continue;
06399             }
06400           if ( HasClass(dtd, in[1], CH_NMSTART) )
06401             { 
06402               dtd_symbol *id;
06403               dtd_entity *e;
06404               const ichar *eval;
06405               int l;
06406         
06407               in = itake_name(dtd, in+1, &id);
06408               if ( isee_func(dtd, in, CF_ERC) || *in == '\n' )
06409                 in++;
06410   
06411               if ( !(e = id->entity) && !(e=dtd->default_entity) )
06412                 { 
06413                   gripe(ERC_EXISTENCE, "entity", id->name);
06414                   in = estart;
06415                   goto recover;
06416                 }
06417   
06418               if ( !(eval = entity_value(p, e, NULL)) )
06419                 { 
06420                   gripe(ERC_NO_VALUE, e->name->name);
06421                   in = estart;
06422                   goto recover;
06423 
06424                 }
06425               in = estart;
06426               goto recover;
06427                         
06428               if ( !expand_entities(p, eval, out, len) )
06429                 return FALSE;
06430               l = ostrlen(out);         /* could be better */
06431               out += l;
06432               len -= l;
06433 
06434               continue;
06435             }
06436         }
06437 
06438     recover:
06439       if ( --len <= 0 )
06440         return gripe(ERC_REPRESENTATION, "CDATA string too long");
06441 
06442 #ifdef UTF8
06443       if ( p->utf8_decode && ISUTF8_MB(*in) )
06444         { 
06445           int chr;
06446 
06447           in = __utf8_get_char(in, &chr);
06448           if ( chr >= OUTPUT_CHARSET_SIZE )
06449             gripe(ERC_REPRESENTATION, "character");
06450           *out++ = chr;
06451         }
06452 #endif
06453       *out++ = map[*in++];
06454     }
06455 
06456   *out = 0;
06457 
06458   return TRUE;
06459 }
06460 
06466 static const ichar *
06467 itake_string(dtd *dtd, const ichar *in, ichar *out, int len)
06468 {
06469   in = iskip_layout(dtd, in);
06470   
06471   if ( isee_func(dtd, in, CF_LIT) || isee_func(dtd, in, CF_LITA) )
06472     { 
06473       ichar q = *in++;
06474       
06475       while( *in && *in != q )
06476         {
06477           *out++ = *in++;
06478           if ( --len == 0 )
06479             { 
06480               gripe(ERC_SYNTAX_ERROR, "String too long");
06481               return NULL;
06482             }
06483         }
06484       if ( *in )
06485         { 
06486           *out = '\0';
06487           return iskip_layout(dtd, ++in);
06488         }
06489     }
06490   return NULL;
06491 }
06492 
06498 static dtd_attr *
06499 find_attribute(dtd_element *e, dtd_symbol *name)
06500 { dtd_attr_list *a;
06501  
06502   for(a=e->attributes; a; a=a->next)
06503     { if ( a->attribute->name == name )
06504         return a->attribute;
06505     }
06506   
06507   return NULL;
06508 }
06509 
06516 #ifdef UTF8
06517 static void
06518 process_utf8(dtd_parser *p, int chr)
06519 { int bytes;
06520   int mask;
06521 
06522   for( bytes=1, mask=0x20; chr&mask; bytes++, mask >>= 1 )
06523     ;
06524   mask--;                               /* 0x20 --> 0x1f */
06525 
06526   p->utf8_saved_state = p->state;               /* state to return to */
06527   p->state = S_UTF8;
06528   p->utf8_char = chr & mask;
06529   p->utf8_left = bytes;
06530 }
06531 #endif

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5