00001
00002
00003
00004
00005
00006
00007
00008 #define DTD_IMPLEMENTATION 1
00009
00010 #include "xsb_config.h"
00011 #include "dtd.h"
00012 #include "parser.h"
00013 #include "model.h"
00014 #include "sgmldefs.h"
00015 #include <assert.h>
00016 #include <ctype.h>
00017 #include <errno.h>
00018 #include "utf8.h"
00019 #include "utf8.c"
00020 #include <stdarg.h>
00021 #include <ctype.h>
00022 #include <string.h>
00023 #include <stdlib.h>
00024 #include "util.h"
00025
00026
00027
00028 #include "error_term.h"
00029 #include "cinterf.h"
00030
00031
00032 #define MAXSTRLEN 256
00033
00034 static dtd_parser *current_parser;
00035
00036 #define HasClass(dtd, chr, mask) \
00037 (dtd->charclass->class[(chr)] & (mask))
00038
00039
00040 #define WITH_PARSER(p, g) \
00041 { dtd_parser *_old = p; \
00042 current_parser = p; \
00043 g; \
00044 current_parser = _old; \
00045 }
00046 #define WITH_CLASS(p, c, g) \
00047 { sgml_event_class _oc = p->event_class; \
00048 p->event_class = c; \
00049 g; \
00050 p->event_class = _oc; \
00051 }
00052
00053
00054 typedef struct locbuf
00055 { dtd_srcloc start;
00056 dtd_srcloc here;
00057 } locbuf;
00058
00059 static void
00060 empty_cdata(dtd_parser *p);
00061
00062 extern int
00063 parse_url( const char * url, char * server, char *fname);
00064
00065 extern int
00066 get_file_www(char *server, char* fname, char ** buf);
00067
00068 static int
00069 match_shortref(dtd_parser *p);
00070
00071 static int
00072 prepare_cdata(dtd_parser *p);
00073
00074 static int
00075 process_declaration(dtd_parser *p, const ichar *decl);
00076
00077 static int
00078 process_begin_element(dtd_parser *p, const ichar *decl);
00079
00080 static const ichar *
00081 itake_name(dtd *dtd, const ichar *in, dtd_symbol **id);
00082
00083 static const ichar *
00084 iskip_layout(dtd *dtd, const ichar *in);
00085
00086 static dtd_element *
00087 find_element(dtd *dtd, dtd_symbol *id);
00088
00089 static dtd_element *
00090 def_element(dtd *dtd, dtd_symbol *id);
00091
00092 static int
00093 open_element(dtd_parser *p, dtd_element *e, int warn);
00094
00095 static sgml_environment *
00096 push_element(dtd_parser *p, dtd_element *e, int callback);
00097
00098 void
00099 sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc);
00100
00101 static const ichar *
00102 isee_func(dtd *dtd, const ichar *in, charfunc func);
00103
00104 static const ichar *
00105 process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
00106 sgml_attribute *atts, int *argc);
00107
00108 static void
00109 allow_for(dtd_element *in, dtd_element *e);
00110
00111 static ichar const *
00112 get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att);
00113
00114 static const ichar *
00115 itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id);
00116
00117 static dtd_attr *
00118 find_attribute(dtd_element *e, dtd_symbol *name);
00119
00120 static int
00121 add_default_attributes(dtd_parser *p, dtd_element *e,
00122 int natts, sgml_attribute *atts);
00123
00124 static void
00125 set_element_properties(dtd_element *e, dtd_attr *a);
00126
00127 static void
00128 free_attribute_values(int argc, sgml_attribute *argv);
00129
00130 static void
00131 free_attribute(dtd_attr *a);
00132
00133 static const ichar *
00134 itake_string(dtd *dtd, const ichar *in, ichar *out, int len);
00135
00136 static void
00137 add_submodel(dtd_model *m, dtd_model *sub);
00138
00139 static void
00140 free_environment(sgml_environment *env);
00141
00142 static void
00143 validate_completeness(sgml_environment *env);
00144
00145 static int
00146 emit_cdata(dtd_parser *p, int last);
00147
00148 static int
00149 complete(sgml_environment *env);
00150
00151 static void
00152 push_location(dtd_parser *p, locbuf *save);
00153
00154 static void
00155 pop_location(dtd_parser *p, locbuf *saved);
00156
00157 static void
00158 inc_location(dtd_srcloc *l, int chr);
00159
00160 static void
00161 dec_location(dtd_srcloc *l, int chr);
00162
00163 static __inline void
00164 _sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc);
00165
00166
00167 void
00168 sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc);
00169
00170 #define sgml_cplocation(d,s) _sgml_cplocation(d, s)
00171
00172 static int
00173 close_element(dtd_parser *p, dtd_element *e, int conref);
00174
00175 static int
00176 process_entity(dtd_parser *p, const ichar *name);
00177
00178 static int
00179 process_entity_declaration(dtd_parser *p, const ichar *decl);
00180
00181 static dtd_symbol *
00182 dtd_find_entity_symbol(dtd *dtd, const ichar *name);
00183
00184 static const ichar *
00185 isee_identifier(dtd *dtd, const ichar *in, char *id);
00186
00187 static const ichar *
00188 itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id);
00189
00190
00191 static ichar *
00192 baseurl(dtd_parser *p);
00193
00194 static dtd_entity *
00195 find_pentity(dtd *dtd, dtd_symbol *id);
00196
00197 void
00198 set_src_dtd_parser(dtd_parser *p, input_type type, const char *name);
00199
00200 static const ichar *
00201 process_entity_value_declaration(dtd_parser *p,
00202 const ichar *decl, dtd_entity *e);
00203
00204 static dtd_symbol_table *
00205 new_symbol_table();
00206
00207 static int
00208 expand_pentities(dtd_parser *p, const ichar *in, ichar *out, int len);
00209
00210 static const ichar *
00211 entity_value(dtd_parser *p, dtd_entity *e, int *len);
00212
00213
00214 static const ichar *
00215 isee_character_entity(dtd *dtd, const ichar *in, int *chr);
00216
00217 static int
00218 char_entity_value(const ichar *decl);
00219
00220
00221 int
00222 sgml_process_file(dtd_parser *p, const char *file, unsigned flags);
00223
00224 static int
00225 process_cdata(dtd_parser *p, int last);
00226
00227 int
00228 end_document_dtd_parser_(dtd_parser *p);
00229
00230 int
00231 end_document_dtd_parser(dtd_parser *p);
00232 int
00233 sgml_process_stream(dtd_parser *p, char * buf, unsigned flags, int source_len);
00234
00235 static const ichar *
00236 itake_url(dtd *dtd, const ichar *in, ichar **out);
00237
00238 static int
00239 pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0);
00240
00241 static const char *
00242 entity_file(dtd *dtd, dtd_entity *e);
00243
00244 static int
00245 representable_char(dtd_parser *p, int chr);
00246
00247 static int
00248 process_net(dtd_parser *p);
00249
00250 static void
00251 update_space_mode(dtd_parser *p, dtd_element *e, int natts, sgml_attribute *atts);
00252
00253 static dtd_space_mode
00254 istr_to_space_mode(const ichar *val);
00255
00256
00257 static int
00258 process_element_declaraction(dtd_parser *p, const ichar *decl);
00259
00260 static int
00261 process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0);
00262
00263 static const ichar *
00264 itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names,
00265 int *n);
00266
00267
00268 static dtd_model *
00269 make_model(dtd *dtd, const ichar *decl, const ichar **end);
00270
00271
00272 static void
00273 free_elements(dtd_element *e);
00274
00275 static void
00276 free_element_definition(dtd_edef *def);
00277
00278 static void
00279 free_model(dtd_model *m);
00280
00281 static void
00282 free_element_list(dtd_element_list *l);
00283
00284
00285 static void
00286 for_elements_in_model(dtd_model *m,
00287 void (*f)(dtd_element *e, void *closure),
00288 void *closure);
00289
00290
00291 static void
00292 add_list_element(dtd_element *e, void *closure);
00293
00294 static const ichar *
00295 process_model(dtd *dtd, dtd_edef *e, const ichar *decl);
00296
00297
00298 static const ichar *
00299 itake_namegroup(dtd *dtd, charfunc sep, const ichar *decl,
00300 dtd_symbol **names, int *n);
00301
00302
00303 static void
00304 add_element_list(dtd_element_list **l, dtd_element *e);
00305
00306
00307 static void
00308 free_attribute_list(dtd_attr_list *l);
00309
00310
00311 static void
00312 process_marked_section(dtd_parser *p);
00313
00314 static void
00315 free_name_list(dtd_name_list *nl);
00316
00317
00318 static void
00319 recover_parser(dtd_parser *p);
00320
00321 static int
00322 close_current_element(dtd_parser *p);
00323
00324
00325 static dtd_parser *
00326 clone_dtd_parser(dtd_parser *p);
00327
00328 void
00329 reset_document_dtd_parser(dtd_parser *p);
00330
00331
00332 static const ichar *
00333 itake_nmtoken_chars(dtd *dtd, const ichar *in, ichar *out, int len);
00334
00335 static int
00336 process_attlist_declaraction(dtd_parser *p, const ichar *decl);
00337
00338 static int
00339 process_pi(dtd_parser *p, const ichar *decl);
00340
00341 static int
00342 match_map(dtd *dtd, dtd_map *map, int len, ichar *data);
00343
00344 typedef enum
00345 { IE_NORMAL,
00346 IE_INCLUDED,
00347 IE_EXCLUDED
00348 } includetype;
00349
00350
00351
00352 static void
00353 add_name_list(dtd_name_list **nl, dtd_symbol *s);
00354
00355 static includetype
00356 in_or_excluded(sgml_environment *env, dtd_element *e);
00357
00358 static void
00359 pop_marked_section(dtd_parser *p);
00360
00361
00362 static const ichar *
00363 isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep);
00364
00365 static const ichar *
00366 itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id);
00367
00368
00369 static const ichar *
00370 itake_number(dtd *dtd, const ichar *in, dtd_attr *at);
00371
00372 static void
00373 add_verbatim_cdata(dtd_parser *p, int chr);
00374
00375 static void
00376 set_encoding(dtd_parser *p, const ichar *enc);
00377
00378 static void
00379 init_decoding(dtd_parser *p);
00380
00381 static int
00382 process_notation_declaration(dtd_parser *p, const ichar *decl);
00383
00384 static dtd_notation *
00385 find_notation(dtd *dtd, dtd_symbol *name);
00386
00387 static const ichar *
00388 itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out);
00389
00390 static int
00391 process_end_element(dtd_parser *p, const ichar *decl);
00392
00393 static void
00394 add_notation(dtd *dtd, dtd_notation *not);
00395
00396
00397 static int
00398 process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s);
00399
00400 typedef struct
00401 { dtd_symbol **list;
00402 int size;
00403 } namelist;
00404
00405 static int
00406 process_include(dtd_parser *p, const ichar *entity_name);
00407
00408
00409 static dtd_shortref *
00410 def_shortref(dtd_parser *p, dtd_symbol *name);
00411
00412 static int
00413 process_shortref_declaration(dtd_parser *p, const ichar *decl);
00414
00415 static const ichar *
00416 shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr);
00417
00418
00419 static void
00420 compile_map(dtd *dtd, dtd_shortref *sr);
00421
00422 static int
00423 process_usemap_declaration(dtd_parser *p, const ichar *decl);
00424
00425 static dtd_shortref *
00426 find_map(dtd *dtd, dtd_symbol *name);
00427
00428 static void
00429 set_map_element(dtd_element *e, void *closure);
00430
00431 static int
00432 expand_entities(dtd_parser *p, const ichar *in, ochar *out, int len);
00433
00434 static ichar const *
00435 itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len);
00436
00437 void
00438 free_dtd_parser(dtd_parser *p);
00439
00440 void
00441 free_dtd(dtd *dtd);
00442
00443 static void
00444 free_entity_list(dtd_entity *e);
00445
00446 static void
00447 free_notations(dtd_notation *n);
00448
00449 static void
00450 free_shortrefs(dtd_shortref *sr);
00451
00452 static void
00453 free_maps(dtd_map *map);
00454
00455
00456 static void
00457 free_symbol_table(dtd_symbol_table *t);
00458
00459 int
00460 is_absolute_path(const char *name);
00461
00462 #ifdef UTF8
00463 static void
00464 process_utf8(dtd_parser *p, int chr);
00465 #endif
00466
00467 char *
00468 localpath(const char *ref, const char *name);
00469
00470 static char *
00471 DirName(const char *f, char *dir);
00472
00473 static char *
00474 format_location(char *s, dtd_srcloc *l);
00475
00476 static void
00477 format_message(dtd_error *e);
00478
00479 int
00480 gripe(dtd_error_id e, ...);
00481
00482
00483 static int
00484 set_option_dtd(dtd *dtd, dtd_option option, char * set);
00485
00486 #ifdef WIN_NT
00487 #define isDirSep(c) ((c) == '/' || (c) == '\\')
00488 #define DIRSEPSTR "\\"
00489 #else
00490 #define isDirSep(c) ((c) == '/')
00491 #define DIRSEPSTR "/"
00492 #endif
00493
00494
00495 #ifndef EOS
00496 #define EOS '\0'
00497 #endif
00498
00499 #ifndef TRUE
00500 #define TRUE 1
00501 #define FALSE 0
00502 #endif
00503
00509 void
00510 set_mode_dtd_parser(dtd_parser *p, data_mode m)
00511 { p->dmode = m;
00512 p->state = S_PCDATA;
00513 p->blank_cdata = TRUE;
00514 }
00515
00519 static char *xml_entities[] =
00520 { "lt CDATA \"<\"",
00521 "gt CDATA \">\"",
00522 "amp CDATA \"&\"",
00523 "apos CDATA \"'\"",
00524 "quot CDATA \""\"",
00525 NULL
00526 };
00527
00528 #define streq(s1, s2) (strcmp(s1, s2) == 0)
00529
00535 static __inline void
00536 setlocation(dtd_srcloc *d, dtd_srcloc *loc, int line, int lpos)
00537 { d->line = line;
00538 d->linepos = lpos;
00539 d->charpos = loc->charpos - 1;
00540 d->type = loc->type;
00541 d->name = loc->name;
00542 }
00543
00544
00550 static dtd_symbol_table *
00551 new_symbol_table()
00552 {
00553 dtd_symbol_table *t = sgml_calloc(1, sizeof(*t));
00554 t->size = SYMBOLHASHSIZE;
00555 t->entries = calloc(t->size, sizeof(dtd_symbol*));
00556
00557 return t;
00558 }
00559
00566 int
00567 set_dialect_dtd(dtd *dtd, dtd_dialect dialect)
00568 {
00569 dtd->dialect = dialect;
00570
00571 switch(dialect)
00572 {
00573 case DL_SGML:
00574 {
00575 dtd->case_sensitive = FALSE;
00576 dtd->space_mode = SP_SGML;
00577 dtd->shorttag = TRUE;
00578 break;
00579 }
00580 case DL_XML:
00581 case DL_XMLNS:
00582 {
00583 char **el;
00584 dtd_parser p;
00585
00586 dtd->case_sensitive = TRUE;
00587 dtd->encoding = ENC_UTF8;
00588 dtd->space_mode = SP_PRESERVE;
00589 dtd->shorttag = FALSE;
00590
00591 memset(&p, 0, sizeof(p));
00592 p.dtd = dtd;
00593
00594 for(el = xml_entities; *el; el++)
00595 {
00596 process_entity_declaration(&p, *el);
00597 }
00598
00599 break;
00600 }
00601 }
00602 return TRUE;
00603 }
00604
00611 static void
00612 add_cdata(dtd_parser *p, int chr)
00613 {
00614 if ( p->mark_state == MS_INCLUDE )
00615 {
00616 ocharbuf *buf = p->cdata;
00617
00618 if ( p->blank_cdata == TRUE && !HasClass(p->dtd, chr, CH_BLANK))
00619 {
00620
00621 p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
00622 p->blank_cdata = FALSE;
00623
00624 }
00625
00626 if ( chr == '\n' )
00627 {
00628 int sz;
00629
00630 if ( (sz=buf->size) == 0 || buf->data[sz-1] != CR )
00631 add_cdata(p, CR);
00632 }
00633
00634 add_ocharbuf(buf, chr);
00635
00636 if ( p->map && p->map->ends[chr] && match_shortref(p) )
00637 {
00638 return;
00639 }
00640
00641 if ( chr == '\n' )
00642 {
00643 int sz;
00644 if ( (sz=buf->size) > 1 &&
00645 buf->data[sz-1] == LF &&
00646 buf->data[sz-2] == CR )
00647 {
00648 buf->data[sz-2] = LF;
00649 buf->size--;
00650 }
00651
00652 }
00653
00654 }
00655 }
00656
00660 static int
00661 match_map(dtd *dtd, dtd_map *map, int len, ichar *data)
00662 { ichar *e = data+len-1;
00663 ichar *m = map->from+map->len-1;
00664
00665 while( m >= map->from )
00666 { if ( e < data )
00667 return 0;
00668
00669 if ( *m == *e )
00670 { m--;
00671 e--;
00672 continue;
00673 }
00674 if ( *m == CHR_DBLANK )
00675 { if ( e>data && HasClass(dtd, *e, CH_WHITE) )
00676 e--;
00677 else
00678 return FALSE;
00679 goto blank;
00680 }
00681 if ( *m == CHR_BLANK )
00682 { blank:
00683 while( e>data && HasClass(dtd, *e, CH_WHITE) )
00684 e--;
00685 m--;
00686 continue;
00687 }
00688 return 0;
00689 }
00690
00691 return data+len-1-e;
00692 }
00693
00694
00695
00696 static int
00697 match_shortref(dtd_parser *p)
00698 {
00699 dtd_map *map;
00700
00701 for(map = p->map->map; map; map = map->next)
00702 {
00703 int len;
00704
00705 if ( (len=match_map(p->dtd, map,
00706 p->cdata->size, (ichar *)p->cdata->data)) )
00707 { p->cdata->size -= len;
00708
00709 if ( p->cdata_must_be_empty )
00710 {
00711 int blank = TRUE;
00712 const ichar *s;
00713 int i;
00714
00715 for(s = p->cdata->data, i=0; i++ < p->cdata->size; s++)
00716 {
00717 if ( !HasClass(p->dtd, *s, CH_BLANK) )
00718 { blank = FALSE;
00719 break;
00720 }
00721 }
00722
00723 p->blank_cdata = blank;
00724 }
00725 WITH_CLASS(p, EV_SHORTREF,
00726 {
00727 sgml_cplocation(&p->startloc, &p->location);
00728 p->startloc.charpos -= len;
00729 p->startloc.linepos -= len;
00730 if ( p->startloc.linepos < 0 )
00731 { p->startloc.line--;
00732 p->startloc.linepos = 0;
00733 }
00734 process_entity(p, map->to->name);
00735 })
00736 return TRUE;
00737 }
00738 }
00739
00740 return FALSE;
00741
00742 }
00743
00747 static void
00748 pop_marked_section(dtd_parser *p)
00749 { dtd_marked *m = p->marked;
00750
00751 if ( m )
00752 { p->marked = m->parent;
00753 free(m);
00754 p->mark_state = (p->marked ? p->marked->type : MS_INCLUDE);
00755 }
00756 }
00757
00758
00759 static int
00760 complete(sgml_environment *env)
00761 { if ( env->element->structure &&
00762 !env->element->undefined &&
00763 env->element->structure->type != C_ANY )
00764 { dtd_edef *def = env->element->structure;
00765
00766 if ( !same_state(def->final_state, env->state) )
00767 return FALSE;
00768 }
00769
00770 return TRUE;
00771 }
00772
00785 static void
00786 process_marked_section(dtd_parser *p)
00787 { ichar buf[MAXSTRLEN];
00788 dtd *dtd = p->dtd;
00789 const ichar *decl = p->buffer->data;
00790 const ichar *s;
00791
00792 if ( (decl=isee_func(dtd, decl, CF_MDO2)) &&
00793 (decl=isee_func(dtd, decl, CF_DSO)) &&
00794 expand_pentities(p, decl, buf, sizeof(buf)) )
00795 { dtd_symbol *kwd;
00796
00797 decl = buf;
00798
00799 if ( (s=itake_name(dtd, decl, &kwd)) &&
00800 isee_func(dtd, s, CF_DSO) )
00801 { dtd_marked *m = sgml_calloc(1, sizeof(*m));
00802
00803 m->keyword = kwd;
00804 m->parent = p->marked;
00805 p->marked = m;
00806
00807 if ( istrcaseeq(kwd->name, "IGNORE") )
00808 m->type = MS_IGNORE;
00809 else if ( istrcaseeq(kwd->name, "INCLUDE") )
00810 m->type = MS_INCLUDE;
00811 else if ( istrcaseeq(kwd->name, "TEMP") )
00812 m->type = MS_INCLUDE;
00813 else if ( istrcaseeq(kwd->name, "CDATA") )
00814 m->type = MS_CDATA;
00815 else if ( istrcaseeq(kwd->name, "RCDATA") )
00816 m->type = MS_RCDATA;
00817 else
00818 m->type = MS_INCLUDE;
00819 empty_icharbuf(p->buffer);
00820 if ( m->type == MS_CDATA )
00821 p->state = S_MSCDATA;
00822 else
00823 p->state = S_PCDATA;
00824 if ( p->mark_state != MS_IGNORE )
00825 p->mark_state = m->type;
00826 }
00827 } else
00828 { decl = p->buffer->data;
00829
00830 if ( (decl=isee_func(dtd, decl, CF_MDO2)) &&
00831 !isee_func(dtd, decl, CF_DSO) )
00832 { p->state = S_GROUP;
00833 p->grouplevel = 1;
00834 }
00835 }
00836 }
00837
00838
00839
00849 static int
00850 process_net(dtd_parser *p)
00851 { sgml_environment *env;
00852
00853 prepare_cdata(p);
00854 for(env = p->environments; env; env=env->parent)
00855 { if ( env->wants_net )
00856 { sgml_environment *parent;
00857
00858 pop_to(p, env, NULL);
00859 validate_completeness(env);
00860 parent = env->parent;
00861
00862 emit_cdata(p, TRUE);
00863 p->first = FALSE;
00864
00865 if ( p->on_end_element )
00866 { WITH_CLASS(p, EV_SHORTTAG,
00867 (*p->on_end_element)(p, env->element));
00868 }
00869
00870 free_environment(env);
00871 p->environments = parent;
00872 p->map = (parent ? parent->map : NULL);
00873
00874 return TRUE;
00875 }
00876 }
00877
00878 return FALSE;
00879 }
00880
00881
00888 static void
00889 recover_parser(dtd_parser *p)
00890 { const ichar *s;
00891 dtd *dtd = p->dtd;
00892
00893 terminate_icharbuf(p->buffer);
00894 add_cdata(p, dtd->charmap->map[p->saved]);
00895 for(s=p->buffer->data; *s; s++)
00896 add_cdata(p, dtd->charmap->map[*s]);
00897 p->state = S_PCDATA;
00898 }
00899
00910 static int
00911 process_pi(dtd_parser *p, const ichar *decl)
00912 { const ichar *s;
00913 dtd *dtd = p->dtd;
00914
00915 if ( (s=isee_identifier(dtd, decl, "xml")) )
00916 { decl = s;
00917
00918
00919 while(*decl)
00920 { dtd_symbol *nm;
00921
00922
00923 if ( (s=itake_name(dtd, decl, &nm)) &&
00924 (s=isee_func(dtd, s, CF_VI)) )
00925 { ichar buf[MAXSTRINGLEN];
00926 const ichar *end;
00927
00928
00929 if ( !(end=itake_string(dtd, s, buf, sizeof(buf))) )
00930 end=itake_nmtoken_chars(dtd, s, buf, sizeof(buf));
00931
00932 if ( end )
00933 { decl = end;
00934
00935 if ( istrcaseeq(nm->name, "encoding") )
00936 set_encoding(p, buf);
00937
00938 continue;
00939 }
00940 }
00941 gripe(ERC_SYNTAX_ERROR, "Illegal XML parameter", decl);
00942 break;
00943 }
00944
00945 switch(dtd->dialect)
00946 {
00947 case DL_SGML:
00948 set_dialect_dtd(dtd, DL_XML);
00949 break;
00950 case DL_XML:
00951 case DL_XMLNS:
00952 break;
00953 }
00954 return TRUE;
00955 }
00956 if ( p->on_pi )
00957 (*p->on_pi)(p, decl);
00958
00959 return FALSE;
00960 }
00961
00962
00968 static void
00969 set_encoding(dtd_parser *p, const ichar *enc)
00970 {
00971 dtd *dtd = p->dtd;
00972
00973 if ( istrcaseeq(enc, "iso-8859-1") )
00974 { dtd->encoding = ENC_ISO_LATIN1;
00975 } else if ( istrcaseeq(enc, "utf-8") )
00976 { dtd->encoding = ENC_UTF8;
00977 } else
00978 gripe(ERC_EXISTENCE, "character encoding", enc);
00979
00980 init_decoding(p);
00981 }
00982
00987 static void
00988 init_decoding(dtd_parser *p)
00989 {
00990 #ifdef UTF8
00991 int decode;
00992 dtd *dtd = p->dtd;
00993
00994 if ( dtd->encoding == ENC_UTF8 &&
00995 p->encoding == ENC_ISO_LATIN1 )
00996 decode = TRUE;
00997 else
00998 decode = FALSE;
00999
01000 if ( p->utf8_decode != decode )
01001 {
01002
01003
01004
01005 p->utf8_decode = decode;
01006 }
01007 #endif
01008 }
01009
01010
01016 void
01017 reset_document_dtd_parser(dtd_parser *p)
01018 {
01019
01020 if ( p->environments )
01021 {
01022 sgml_environment *env, *parent;
01023
01024 for(env = p->environments; env; env=parent)
01025 {
01026 parent = env->parent;
01027
01028 free_environment(env);
01029 }
01030
01031 p->environments = NULL;
01032 }
01033 while(p->marked)
01034 pop_marked_section(p);
01035
01036
01037 empty_icharbuf(p->buffer);
01038 empty_ocharbuf(p->cdata);
01039
01040
01041 p->mark_state = MS_INCLUDE;
01042 p->state = S_PCDATA;
01043 p->grouplevel = 0;
01044 p->blank_cdata = TRUE;
01045 p->event_class = EV_EXPLICIT;
01046 p->dmode = DM_DATA;
01047
01048
01049 }
01050
01057 static void
01058 add_verbatim_cdata(dtd_parser *p, int chr)
01059 { if ( p->mark_state != MS_IGNORE )
01060 { ocharbuf *buf = p->cdata;
01061
01062 if ( p->blank_cdata == TRUE && !HasClass(p->dtd, chr, CH_BLANK) )
01063 { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
01064 p->blank_cdata = FALSE;
01065 }
01066
01067 if ( chr == '\n' && buf->size > 0 && buf->data[buf->size-1] == '\r' )
01068 buf->size--;
01069
01070 add_ocharbuf(buf, chr);
01071 }
01072 }
01073
01080 void
01081 putchar_dtd_parser(dtd_parser *p, int chr)
01082 {
01083 dtd *dtd = p->dtd;
01084 const ichar *f = dtd->charfunc->func;
01085 int line = p->location.line;
01086 int lpos = p->location.linepos;
01087
01088
01089
01090 if ( f[CF_RS] == chr )
01091 {
01092 p->location.line++;
01093 p->location.linepos = 0;
01094 } else
01095 {
01096 if ( f[CF_RE] == chr )
01097 p->location.linepos = 0;
01098 else
01099 p->location.linepos++;
01100 }
01101
01102 p->location.charpos++;
01103 chr &= 0x000000ff;
01104 reprocess:
01105
01106
01107 switch(p->state)
01108 {
01109 case S_PCDATA:
01110 {
01111 if ( f[CF_MDO1] == chr )
01112 {
01113 setlocation(&p->startloc, &p->location, line, lpos);
01114 p->state = S_DECL0;
01115 empty_icharbuf(p->buffer);
01116 return;
01117 }
01118
01119 if ( p->dmode == DM_DTD )
01120 {
01121 if ( f[CF_PERO] == chr )
01122 {
01123 setlocation(&p->startloc, &p->location, line, lpos);
01124 p->state = S_PENT;
01125 return;
01126 }
01127 }
01128 else
01129 {
01130 if ( f[CF_ERO] == chr )
01131 {
01132 p->state = S_ENT0;
01133 return;
01134 }
01135 }
01136
01137 if ( p->marked && f[CF_DSC] == chr )
01138 {
01139 empty_icharbuf(p->buffer);
01140 p->state = S_EMSC1;
01141 p->saved = chr;
01142 return;
01143 }
01144
01145
01146 if ( p->waiting_for_net && f[CF_ETAGO2] == chr )
01147 {
01148 setlocation(&p->startloc, &p->location, line, lpos);
01149 WITH_PARSER( p, process_net(p));
01150 return;
01151 }
01152
01153 #ifdef UTF8
01154 if ( p->utf8_decode && ISUTF8_MB(chr) )
01155 {
01156 process_utf8(p, chr);
01157 return;
01158 }
01159 #endif
01160
01161 if ( p->cdata->size == 0 )
01162 setlocation(&p->startcdata, &p->location, line, lpos);
01163 add_cdata(p, dtd->charmap->map[chr]);
01164 return;
01165 }
01166 case S_ECDATA2:
01167 {
01168 if ( f[CF_MDC] == chr &&
01169 p->etaglen == p->buffer->size &&
01170 istrncaseeq(p->buffer->data, p->etag, p->etaglen) )
01171 {
01172 p->cdata->size -= p->etaglen+2;
01173 terminate_ocharbuf(p->cdata);
01174 terminate_icharbuf(p->buffer);
01175 if ( p->mark_state == MS_INCLUDE )
01176 {
01177 WITH_PARSER(p,
01178 process_cdata(p, TRUE);
01179 process_end_element(p, p->buffer->data));
01180 empty_cdata(p);
01181 }
01182 empty_icharbuf(p->buffer);
01183 p->cdata_state = p->state = S_PCDATA;
01184 }
01185 else
01186 {
01187 add_verbatim_cdata(p, dtd->charmap->map[chr]);
01188 if ( p->etaglen < p->buffer->size || !HasClass(dtd, chr, CH_NAME))
01189 {
01190 empty_icharbuf(p->buffer);
01191 p->state = p->cdata_state;
01192 }
01193 else
01194 add_icharbuf(p->buffer, chr);
01195 }
01196 return;
01197 }
01198
01199 case S_ECDATA1:
01200 {
01201 add_verbatim_cdata(p, dtd->charmap->map[chr]);
01202 if ( f[CF_ETAGO2] == chr )
01203 {
01204 empty_icharbuf(p->buffer);
01205 p->state = S_ECDATA2;
01206 }
01207 else if ( f[CF_ETAGO1] != chr )
01208 p->state = p->cdata_state;
01209 return;
01210 }
01211
01212 case S_RCDATA:
01213 {
01214 if ( f[CF_ERO] == chr )
01215 {
01216 setlocation(&p->startloc, &p->location, line, lpos);
01217 p->state = S_ENT0;
01218 return;
01219 }
01220
01221 }
01222
01223 case S_CDATA:
01224 {
01225 add_verbatim_cdata(p, dtd->charmap->map[chr]);
01226
01227 if ( f[CF_MDO1] == chr )
01228 {
01229 setlocation(&p->startloc, &p->location, line, lpos);
01230 p->state = S_ECDATA1;
01231 }
01232
01233
01234 if ( p->waiting_for_net && f[CF_ETAGO2] == chr )
01235 {
01236 setlocation(&p->startloc, &p->location, line, lpos);
01237 p->cdata->size--;
01238 terminate_ocharbuf(p->cdata);
01239 terminate_icharbuf(p->buffer);
01240 if ( p->mark_state == MS_INCLUDE )
01241 {
01242 WITH_PARSER(p,
01243 process_cdata(p, TRUE);
01244 process_net(p));
01245 empty_cdata(p);
01246 }
01247 empty_icharbuf(p->buffer);
01248 p->cdata_state = p->state = S_PCDATA;
01249 }
01250
01251 return;
01252 }
01253 case S_PENT:
01254 {
01255
01256 if ( f[CF_ERC] == chr )
01257 {
01258 p->state = S_PCDATA;
01259 terminate_icharbuf(p->buffer);
01260 if ( p->mark_state == MS_INCLUDE )
01261 {
01262 WITH_PARSER(p, process_include(p, p->buffer->data));
01263 }
01264 empty_icharbuf(p->buffer);
01265 return;
01266 }
01267 if ( HasClass(dtd, chr, CH_NAME) )
01268 {
01269 add_icharbuf(p->buffer, chr);
01270 return;
01271 }
01272
01273 terminate_icharbuf(p->buffer);
01274 gripe(ERC_SYNTAX_ERROR, "Illegal parameter entity", p->buffer->data);
01275 break;
01276 }
01277
01278 case S_DECL0:
01279 {
01280 if ( f[CF_ETAGO2] == chr )
01281 {
01282 add_icharbuf(p->buffer, chr);
01283 p->state = S_DECL;
01284 }
01285 else if ( HasClass(dtd, chr, CH_NAME) )
01286 {
01287 add_icharbuf(p->buffer, chr);
01288 p->state = S_DECL;
01289 }
01290 else if ( f[CF_MDO2] == chr )
01291 {
01292 p->state = S_MDECL0;
01293 }
01294 else if ( f[CF_PRO2] == chr )
01295 {
01296 p->state = S_PI;
01297 }
01298 else
01299 {
01300 add_cdata(p, f[CF_MDO1]);
01301 add_cdata(p, chr);
01302 p->state = S_PCDATA;
01303 }
01304
01305 return;
01306 break;
01307 }
01308 case S_PI:
01309 {
01310 add_icharbuf(p->buffer, chr);
01311 if ( f[CF_PRO2] == chr )
01312 p->state = S_PI2;
01313 if ( f[CF_PRC] == chr )
01314 goto pi;
01315 return;
01316 }
01317 case S_PI2:
01318 {
01319 if ( f[CF_PRC] == chr )
01320 {
01321 pi:
01322 process_cdata(p, FALSE);
01323 p->state = S_PCDATA;
01324 p->buffer->size--;
01325 terminate_icharbuf(p->buffer);
01326 if ( p->mark_state == MS_INCLUDE )
01327 {
01328 WITH_PARSER(p, process_pi(p, p->buffer->data));
01329 }
01330 empty_icharbuf(p->buffer);
01331 return;
01332 }
01333 add_icharbuf(p->buffer, chr);
01334 p->state = S_PI;
01335 return;
01336 }
01337
01338 case S_MDECL0:
01339 {
01340 if ( f[CF_CMT] == chr )
01341 {
01342 p->state = S_CMTO;
01343 return;
01344 }
01345 add_icharbuf(p->buffer, f[CF_MDO2]);
01346 add_icharbuf(p->buffer, chr);
01347 p->state = S_DECL;
01348 return;
01349 }
01350
01351 case S_DECL:
01352 {
01353 if(f[CF_MDC] == chr)
01354 {
01355 prepare_cdata( p);
01356 p->state = S_PCDATA;
01357 terminate_icharbuf( p->buffer);
01358
01359 if ( p->mark_state == MS_INCLUDE )
01360 {
01361 WITH_PARSER(p, process_declaration(p, p->buffer->data));
01362 }
01363 empty_icharbuf( p->buffer);
01364 return;
01365 }
01366
01367 if ( dtd->shorttag && f[CF_ETAGO2] == chr && p->buffer->size > 0 )
01368 {
01369 prepare_cdata(p);
01370 p->state = S_PCDATA;
01371 terminate_icharbuf(p->buffer);
01372 if ( p->mark_state == MS_INCLUDE )
01373 { WITH_CLASS(p, EV_SHORTTAG,
01374 WITH_PARSER(p, process_declaration(p, p->buffer->data)));
01375 }
01376 empty_icharbuf(p->buffer);
01377 p->waiting_for_net = TRUE;
01378 return;
01379 }
01380
01381 add_icharbuf(p->buffer, chr);
01382
01383 if ( f[CF_LIT] == chr )
01384 {
01385 p->state = S_STRING;
01386 p->saved = chr;
01387 p->lit_saved_state = S_DECL;
01388 }
01389 else if ( f[CF_LITA] == chr )
01390 {
01391 p->state = S_STRING;
01392 p->saved = chr;
01393 p->lit_saved_state = S_DECL;
01394 return;
01395 }
01396
01397 else if ( f[CF_DSO] == chr )
01398 {
01399 terminate_icharbuf(p->buffer);
01400
01401 process_marked_section(p);
01402 }
01403
01404 else if ( f[CF_CMT] == chr &&
01405 p->buffer->data[0] == f[CF_MDO2] )
01406 {
01407 p->state = S_DECLCMT0;
01408 }
01409 break;
01410
01411
01412 }
01413 case S_ENT0:
01414 {
01415 if ( chr == '#' || HasClass(dtd, chr, CH_NAME) )
01416 {
01417 empty_icharbuf(p->buffer);
01418 add_icharbuf(p->buffer, chr);
01419 p->state = S_ENT;
01420 }
01421 else
01422 {
01423 add_cdata(p, f[CF_ERO]);
01424 p->state = p->cdata_state;
01425 goto reprocess;
01426 }
01427
01428 return;
01429 }
01430 case S_ENT:
01431 {
01432
01433 if ( HasClass(dtd, chr, CH_NAME) )
01434 {
01435 add_icharbuf(p->buffer, chr);
01436 return;
01437 }
01438 terminate_icharbuf(p->buffer);
01439 p->state = p->cdata_state;
01440
01441
01442
01443
01444 if( (f[CF_ERC] != chr) && (chr != '@') && !HasClass( dtd, chr, CH_WHITE) && !HasClass( dtd, chr, CH_RE) && !HasClass( dtd, chr, CH_RS) && (f[CF_ERO] != chr))
01445 {
01446 int i = 0;
01447
01448 add_ocharbuf( p->cdata, '&');
01449 for( i = 0; i < p->buffer->size; i++)
01450 {
01451 add_cdata(p, dtd->charmap->map[p->buffer->data[i]]);
01452 }
01453 goto reprocess;
01454 }
01455
01456 if ( p->mark_state == MS_INCLUDE )
01457 {
01458 WITH_PARSER(p, process_entity(p, p->buffer->data));
01459 }
01460
01461 empty_icharbuf(p->buffer);
01462
01463 if ( chr == CR ){
01464 p->state = S_ENTCR;
01465 break;
01466 }
01467 else if ( f[CF_ERC] != chr && chr != '\n' ) {
01468 goto reprocess;
01469 }
01470
01471 break;
01472 }
01473
01474 case S_ENTCR:
01475 {
01476 p->state = p->cdata_state;
01477 if ( chr != LF )
01478 goto reprocess;
01479 break;
01480 }
01481
01482 case S_DECLCMT0:
01483 {
01484 if ( f[CF_CMT] == chr )
01485 {
01486 p->buffer->size--;
01487 p->state = S_DECLCMT;
01488 }
01489 else
01490 {
01491 add_icharbuf(p->buffer, chr);
01492 p->state = S_DECL;
01493 }
01494 break;
01495 }
01496
01497 case S_DECLCMT:
01498 {
01499 if ( f[CF_CMT] == chr )
01500 p->state = S_DECLCMTE0;
01501 break;
01502 }
01503 case S_DECLCMTE0:
01504 {
01505 if ( f[CF_CMT] == chr )
01506 p->state = S_DECL;
01507 else
01508 p->state = S_DECLCMT;
01509 break;
01510 }
01511
01512 case S_CMTO:
01513 {
01514 if ( f[CF_CMT] == chr )
01515 {
01516 p->state = S_CMT;
01517 return;
01518 }
01519 else
01520 {
01521 add_cdata(p, f[CF_MDO1]);
01522 add_cdata(p, f[CF_MDO2]);
01523 add_cdata(p, f[CF_CMT]);
01524 add_cdata(p, chr);
01525 p->state = S_PCDATA;
01526 return;
01527 }
01528 }
01529 case S_CMT:
01530 {
01531 if ( f[CF_CMT] == chr )
01532 p->state = S_CMTE0;
01533 break;
01534 }
01535 case S_CMTE0:
01536 {
01537 if ( f[CF_CMT] == chr )
01538 p->state = S_CMTE1;
01539 else
01540 p->state = S_CMT;
01541 break;
01542 }
01543 case S_CMTE1:
01544 {
01545 if ( f[CF_MDC] == chr )
01546 {
01547 if ( p->on_decl )
01548 (*p->on_decl)(p, "");
01549 p->state = S_PCDATA;
01550
01551 }
01552 else
01553 p->state = S_CMT;
01554 break;
01555 }
01556
01557 case S_EMSC1:
01558 {
01559 if ( f[CF_DSC] == chr )
01560 {
01561 p->state = S_EMSC2;
01562 return;
01563 }
01564 else
01565 {
01566 add_icharbuf(p->buffer, chr);
01567 recover_parser(p);
01568 return;
01569 }
01570 }
01571
01572 case S_EMSC2:
01573 {
01574 if ( f[CF_MDC] == chr )
01575 {
01576 pop_marked_section(p);
01577 p->state = S_PCDATA;
01578 return;
01579 } else
01580 {
01581 add_icharbuf(p->buffer, chr);
01582 recover_parser(p);
01583 return;
01584 }
01585 }
01586
01587
01588 case S_GROUP:
01589 {
01590 add_icharbuf(p->buffer, chr);
01591 if ( f[CF_DSO] == chr )
01592 {
01593 p->grouplevel++;
01594 }
01595 else if ( f[CF_DSC] == chr )
01596 {
01597 if ( --p->grouplevel == 0 )
01598 p->state = S_DECL;
01599 }
01600 else if ( f[CF_LIT] == chr )
01601 {
01602 p->state = S_STRING;
01603 p->saved = chr;
01604 p->lit_saved_state = S_GROUP;
01605 }
01606 else if ( f[CF_LITA] == chr )
01607 {
01608 p->state = S_STRING;
01609 p->saved = chr;
01610 p->lit_saved_state = S_GROUP;
01611 return;
01612 }
01613 break;
01614 }
01615 case S_STRING:
01616 {
01617 add_icharbuf(p->buffer, chr);
01618 if ( chr == p->saved )
01619 p->state = p->lit_saved_state;
01620 break;
01621 }
01622
01623 case S_MSCDATA:
01624 {
01625 add_verbatim_cdata(p, dtd->charmap->map[chr]);
01626 if ( f[CF_DSC] == chr )
01627 p->state = S_EMSCDATA1;
01628 return;
01629 }
01630 case S_EMSCDATA1:
01631 {
01632 add_verbatim_cdata(p, dtd->charmap->map[chr]);
01633 if ( f[CF_DSC] == chr )
01634 p->state = S_EMSCDATA2;
01635 else
01636 p->state = S_MSCDATA;
01637 return;
01638 }
01639 case S_EMSCDATA2:
01640 {
01641 add_verbatim_cdata(p, dtd->charmap->map[chr]);
01642 if ( f[CF_MDC] == chr )
01643 {
01644 p->cdata->size -= 3;
01645 pop_marked_section(p);
01646 p->state = S_PCDATA;
01647 }
01648 else if ( f[CF_DSC] != chr )
01649 p->state = S_MSCDATA;
01650 return;
01651 }
01652
01653 #ifdef UTF8
01654 case S_UTF8:
01655 if ( (chr & 0xc0) != 0x80 )
01656 gripe(ERC_SYNTAX_ERROR, "Bad UTF-8 sequence", "");
01657 p->utf8_char <<= 6;
01658 p->utf8_char |= (chr & ~0xc0);
01659 if ( --p->utf8_left == 0 )
01660 {
01661 if ( p->utf8_char >= OUTPUT_CHARSET_SIZE &&
01662 p->mark_state == MS_INCLUDE )
01663 {
01664 if ( p->on_entity )
01665 {
01666 process_cdata(p, FALSE);
01667 (*p->on_entity)(p, NULL, p->utf8_char);
01668 goto utf8_done;
01669 }
01670 else
01671 gripe(ERC_REPRESENTATION, "character");
01672 }
01673 add_cdata(p, p->utf8_char);
01674 utf8_done:
01675 p->state = p->utf8_saved_state;
01676 }
01677
01678 break;
01679 #endif
01680
01681 }
01682 }
01683
01692 int
01693 gripe(dtd_error_id e, ...)
01694 { va_list args;
01695 char buf[1024];
01696 dtd_error error;
01697 int dtdmode = FALSE;
01698
01699 va_start(args, e);
01700
01701 memset(&error, 0, sizeof(error));
01702 error.minor = e;
01703
01704
01705 if ( current_parser )
01706 {
01707 error.location = ¤t_parser->location;
01708 if ( current_parser->dmode == DM_DTD )
01709 dtdmode = TRUE;
01710 }
01711 else
01712 {
01713 error.location = NULL;
01714 }
01715
01716 switch(e)
01717 {
01718
01719
01720 case ERC_REPRESENTATION:
01721 case ERC_RESOURCE:
01722 error.severity = ERS_ERROR;
01723 error.argv[0] = va_arg(args, char *);
01724 break;
01725
01726 case ERC_LIMIT:
01727 error.severity = ERS_WARNING;
01728 error.argv[0] = va_arg(args, char *);
01729 break;
01730
01731 case ERC_SYNTAX_ERROR:
01732 case ERC_SYNTAX_WARNING:
01733 { char *m = va_arg(args, char *);
01734 const char *s = va_arg(args, const char *);
01735
01736 if ( s && *s )
01737 { sprintf(buf, "%s, found \"%s\"", m, str_summary(s, 25));
01738 error.argv[0] = buf;
01739 } else
01740 error.argv[0] = m;
01741
01742 error.severity = (e == ERC_SYNTAX_WARNING ? ERS_WARNING : ERS_ERROR);
01743 e = ERC_SYNTAX_ERROR;
01744 break;
01745 }
01746
01747 case ERC_DOMAIN:
01748 { const char *expected = va_arg(args, const char *);
01749 const char *found = str_summary(va_arg(args, const char *), 25);
01750
01751 sprintf(buf, "Expected type %s, found \"%s\"", expected, found);
01752 error.argv[0] = buf;
01753 error.severity = ERS_ERROR;
01754 e = (dtdmode ? ERC_SYNTAX_ERROR : ERC_VALIDATE);
01755 break;
01756 }
01757
01758 case ERC_REDEFINED:
01759 { dtd_symbol *name;
01760 error.argv[0] = va_arg(args, char *);
01761 name = va_arg(args, dtd_symbol *);
01762 error.argv[1] = (char *)name->name;
01763 error.severity = ERS_STYLE;
01764 break;
01765 }
01766
01767 case ERC_EXISTENCE:
01768 { error.argv[0] = va_arg(args, char *);
01769 error.argv[1] = va_arg(args, char *);
01770 error.severity = ERS_ERROR;
01771 break;
01772 }
01773
01774 case ERC_VALIDATE:
01775 { error.argv[0] = va_arg(args, char *);
01776 error.severity = ERS_WARNING;
01777 break;
01778 }
01779
01780 case ERC_OMITTED_CLOSE:
01781 { const char *element = va_arg(args, const char *);
01782
01783 sprintf(buf, "Inserted omitted end-tag for \"%s\"", element);
01784 error.argv[0] = buf;
01785 error.severity = ERS_WARNING;
01786 e = ERC_VALIDATE;
01787 break;
01788 }
01789
01790 case ERC_OMITTED_OPEN:
01791 { const char *element = va_arg(args, const char *);
01792
01793 sprintf(buf, "Inserted omitted start-tag for \"%s\"", element);
01794 error.argv[0] = buf;
01795 error.severity = ERS_WARNING;
01796 e = ERC_VALIDATE;
01797 break;
01798 }
01799
01800 case ERC_NOT_OPEN:
01801 { const char *element = va_arg(args, const char *);
01802
01803 sprintf(buf, "Ignored end-tag for \"%s\" which is not open", element);
01804 error.argv[0] = buf;
01805 error.severity = ERS_WARNING;
01806 e = ERC_VALIDATE;
01807 break;
01808 }
01809
01810 case ERC_NOT_ALLOWED:
01811 { const char *element = va_arg(args, const char *);
01812
01813 sprintf(buf, "Element \"%s\" not allowed here", element);
01814 error.argv[0] = buf;
01815 error.severity = ERS_WARNING;
01816 e = ERC_VALIDATE;
01817 break;
01818 }
01819
01820 case ERC_NOT_ALLOWED_PCDATA:
01821 { char *text = va_arg(args, char *);
01822 text[ strlen(text) - 1] = '\0';
01823 sprintf(buf, "#PCDATA (\"%s\") not allowed here", str_summary(text,25));
01824 error.argv[0] = buf;
01825 error.severity = ERS_WARNING;
01826 e = ERC_VALIDATE;
01827 break;
01828 }
01829
01830 case ERC_NO_ATTRIBUTE:
01831 { const char *elem = va_arg(args, char *);
01832 const char *attr = va_arg(args, char *);
01833
01834 sprintf(buf, "Element \"%s\" has no attribute \"%s\"", elem, attr);
01835 error.argv[0] = buf;
01836 error.severity = ERS_WARNING;
01837
01838 e = ERC_VALIDATE;
01839 break;
01840 }
01841
01842 case ERC_NO_ATTRIBUTE_VALUE:
01843 { const char *elem = va_arg(args, char *);
01844 const char *value = va_arg(args, char *);
01845
01846 sprintf(buf, "Element \"%s\" has no attribute with value \"%s\"",
01847 elem, value);
01848 error.argv[0] = buf;
01849 error.severity = ERS_WARNING;
01850
01851 e = ERC_VALIDATE;
01852 break;
01853 }
01854
01855 case ERC_NO_VALUE:
01856 { error.argv[0] = "entity value";
01857 error.argv[1] = va_arg(args, char *);
01858
01859 error.severity = ERS_ERROR;
01860 e = ERC_EXISTENCE;
01861 break;
01862 }
01863
01864 case ERC_NO_DOCTYPE:
01865 { const char *doctype = va_arg(args, char *);
01866 const char *file = va_arg(args, char *);
01867
01868 sprintf(buf, "No <!DOCTYPE ...>, assuming \"%s\" from DTD file \"%s\"",
01869 doctype, file);
01870 error.argv[0] = buf;
01871 error.severity = ERS_WARNING;
01872
01873 e = ERC_VALIDATE;
01874 break;
01875 }
01876
01877 }
01878 error.id = e;
01879 format_message(&error);
01880
01881
01882 if ( current_parser && current_parser->on_error )
01883 (*current_parser->on_error)(current_parser, &error);
01884
01885 else
01886 {
01887
01888 prolog_term av0, av1, av2;
01889
01890 av0 = p2p_new();
01891 if(error.severity == ERS_ERROR)
01892 {
01893 c2p_functor("error",1,av0);
01894 }
01895 else if(error.severity == ERS_WARNING)
01896 {
01897 c2p_functor("warning",1,av0);
01898 }
01899 else
01900 {
01901 return FALSE;
01902 }
01903 av1 = p2p_arg(av0, 1);
01904 c2p_functor("sgml", 1, av1);
01905 av2 = p2p_arg( av1, 1);
01906 c2p_functor( "miscellaneous", 1 , av2);
01907 c2p_string( error.message, p2p_arg(av2,1));
01908
01909 if(error.severity == ERS_WARNING)
01910 {
01911 av1 = global_warning_term;
01912 while( is_list( av1))
01913 {
01914 av2 = p2p_cdr(av1);
01915 av1 = av2;
01916 }
01917 c2p_list(av1);
01918 p2p_unify( p2p_car(av1), av0);
01919 }
01920 else if(error.severity == ERS_ERROR)
01921 {
01922 av1 = global_error_term;
01923 p2p_unify( av1, av0);
01924 }
01925 else
01926 {
01927 return FALSE;
01928 }
01929 }
01930 va_end(args);
01931
01932 return FALSE;
01933 }
01934
01941 static char *
01942 format_location(char *s, dtd_srcloc *l)
01943 { int first = TRUE;
01944
01945 if ( !l || l->type == IN_NONE )
01946 return s;
01947
01948 for( ; l && l->type != IN_NONE;
01949 l = l->parent, first = FALSE )
01950 { if ( !first )
01951 { sprintf(s, " (from ");
01952 s += strlen(s);
01953 }
01954 switch(l->type)
01955 { case IN_NONE:
01956 assert(0);
01957 case IN_FILE:
01958 sprintf(s, "%s:%d:%d", l->name, l->line, l->linepos);
01959 break;
01960 case IN_ENTITY:
01961 sprintf(s, "&%s;%d:%d", l->name, l->line, l->linepos);
01962 break;
01963 }
01964 s += strlen(s);
01965 if ( !first )
01966 { *s++ = ')';
01967 }
01968 }
01969
01970 *s++ = ':';
01971 *s++ = ' ';
01972
01973 return s;
01974 }
01975
01982 static void
01983 format_message(dtd_error *e)
01984 { char buf[1024];
01985 char *s;
01986 int prefix_len;
01987
01988 switch(e->severity)
01989 { case ERS_ERROR:
01990 strcpy(buf, "Error: ");
01991 break;
01992 case ERS_WARNING:
01993 strcpy(buf, "Warning: ");
01994 break;
01995 default:
01996 buf[0] = '\0';
01997 }
01998 s = buf+strlen(buf);
01999
02000 s = format_location(s, e->location);
02001
02002
02003 prefix_len = s-buf;
02004
02005 switch(e->id)
02006 { case ERC_REPRESENTATION:
02007 sprintf(s, "Cannot represent due to %s", e->argv[0]);
02008 break;
02009 case ERC_RESOURCE:
02010 sprintf(s, "Insufficient %s resources", e->argv[0]);
02011 break;
02012 case ERC_LIMIT:
02013 sprintf(s, "%s limit exceeded", e->argv[0]);
02014 break;
02015 case ERC_VALIDATE:
02016 sprintf(s, "%s", e->argv[0]);
02017 break;
02018 case ERC_SYNTAX_ERROR:
02019 sprintf(s, "Syntax error: %s", e->argv[0]);
02020 break;
02021 case ERC_EXISTENCE:
02022 sprintf(s, "%s \"%s\" does not exist", e->argv[0], e->argv[1]);
02023 break;
02024 case ERC_REDEFINED:
02025 sprintf(s, "Redefined %s \"%s\"", e->argv[0], e->argv[1]);
02026 break;
02027 default:
02028 ;
02029 }
02030
02031 e->message = buf;
02032 e->plain_message = e->message + prefix_len;
02033 }
02034
02040 static int
02041 process_entity(dtd_parser *p, const ichar *name)
02042 {
02043
02044 if ( name[0] == '#' )
02045 {
02046 int v = char_entity_value(name);
02047
02048
02049 if ( v == FALSE )
02050 {
02051 return gripe(ERC_SYNTAX_ERROR, "Bad character entity", name);
02052 }
02053
02054 if ( v >= OUTPUT_CHARSET_SIZE )
02055 {
02056
02057 if ( p->on_entity )
02058 {
02059 process_cdata(p, FALSE);
02060 (*p->on_entity)(p, NULL, v);
02061 }
02062 else
02063 {
02064 return gripe(ERC_REPRESENTATION, "character");
02065 }
02066 }
02067 else
02068 add_ocharbuf(p->cdata, v);
02069 }
02070 else
02071 {
02072 dtd_symbol *id;
02073 dtd_entity *e;
02074 dtd *dtd = p->dtd;
02075 int len;
02076 const ichar *text;
02077 const ichar *s;
02078 int chr;
02079 const char *file;
02080
02081
02082 if ( !(id=dtd_find_entity_symbol(dtd, name)) ||
02083 !(e=id->entity) )
02084 {
02085 if ( dtd->default_entity )
02086 e = dtd->default_entity;
02087 else
02088 {
02089 return gripe(ERC_EXISTENCE, "entity", name);
02090 }
02091 }
02092 if ( !e->value &&
02093 e->content == EC_SGML &&
02094 (file=entity_file(p->dtd, e)) )
02095 {
02096 empty_icharbuf(p->buffer);
02097
02098 return sgml_process_file(p, file, SGML_SUB_DOCUMENT);
02099 }
02100
02101 if ( !(text = entity_value(p, e, &len)) )
02102 return gripe(ERC_NO_VALUE, e->name->name);
02103
02104
02105 switch ( e->content )
02106 {
02107 case EC_SGML:
02108 case EC_CDATA:
02109
02110 if ( (s=isee_character_entity(dtd, text, &chr)) && *s == '\0' )
02111 {
02112 if ( p->blank_cdata == TRUE && !HasClass(dtd, chr, CH_BLANK) )
02113 { p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
02114 p->blank_cdata = FALSE;
02115 }
02116
02117 if ( chr > 0 && chr < OUTPUT_CHARSET_SIZE )
02118 { add_ocharbuf(p->cdata, chr);
02119 return TRUE;
02120 } else
02121 {
02122 if ( p->on_entity )
02123 { process_cdata(p, FALSE);
02124 (*p->on_entity)(p, e, chr);
02125 } else
02126 return gripe(ERC_REPRESENTATION, "character");
02127 }
02128 break;
02129 }
02130 if ( e->content == EC_SGML )
02131 { locbuf oldloc;
02132
02133 push_location(p, &oldloc);
02134 set_src_dtd_parser(p, IN_ENTITY, e->name->name);
02135 empty_icharbuf(p->buffer);
02136 for(s=text; *s; s++)
02137 putchar_dtd_parser(p, *s);
02138 pop_location(p, &oldloc);
02139 }
02140 else if ( *text )
02141 { const ochar *o;
02142
02143 if ( p->blank_cdata == TRUE )
02144 {
02145 p->cdata_must_be_empty = !open_element(p, CDATA_ELEMENT, FALSE);
02146 p->blank_cdata = FALSE;
02147 }
02148
02149 for(o=(const ochar *)text; *o; o++)
02150 add_ocharbuf(p->cdata, *o);
02151 }
02152 break;
02153 case EC_SDATA:
02154 case EC_NDATA:
02155 process_cdata(p, FALSE);
02156 if ( p->on_data )
02157 (*p->on_data)(p, e->content, len, text);
02158 break;
02159 case EC_PI:
02160 process_cdata(p, FALSE);
02161 if ( p->on_pi )
02162 (*p->on_pi)(p, text);
02163 case EC_STARTTAG:
02164 #if 0
02165 prepare_cdata(p);
02166 process_begin_element(p, text);
02167 #endif
02168 break;
02169 case EC_ENDTAG:
02170 #if 0
02171 prepare_cdata(p);
02172 process_end_element(p, text);
02173 #endif
02174 break;
02175 }
02176
02177 return TRUE;
02178 }
02179
02180 return TRUE;
02181 }
02182
02187 int
02188 sgml_process_file(dtd_parser *p, const char *file, unsigned flags)
02189 { int rval;
02190 locbuf oldloc;
02191
02192 char fname[MAXSTRLEN];
02193 char server[MAXSTRLEN];
02194
02195 char * buf = NULL;
02196
02197 int n= 0;
02198
02199 push_location(p, &oldloc);
02200 set_src_dtd_parser(p, IN_FILE, file);
02201 if ( !(flags & SGML_SUB_DOCUMENT) )
02202 set_mode_dtd_parser(p, DM_DATA);
02203
02204
02205 if( parse_url( file, server, fname) != FALSE)
02206 {
02207
02208 if( get_file_www( server, fname, &buf) == FALSE){
02209 rval = FALSE;
02210 }
02211 else{
02212 n = strlen( buf);
02213 }
02214 }
02215
02216 if ( buf )
02217 {
02218 rval = sgml_process_stream(p, buf, flags, n);
02219 }
02220 else
02221 rval = FALSE;
02222
02223 pop_location(p, &oldloc);
02224
02225 return rval;
02226 }
02227
02228
02232 int
02233 sgml_process_stream(dtd_parser *p, char *buf, unsigned flags, int source_len)
02234 { int p0, p1, i=0 ;
02235
02236 if ( (p0 = buf[i]) == EOF )
02237 return TRUE;
02238 i++;
02239 if ( (p1 = buf[i]) == EOF )
02240 { putchar_dtd_parser(p, p0);
02241 return end_document_dtd_parser(p);
02242 }
02243 i++;
02244 for( ; i<=source_len ; i++)
02245 { int p2 = buf[i];
02246
02247 if ( p2 == EOF || p2 == '\0')
02248 { putchar_dtd_parser(p, p0);
02249 if ( p1 != LF )
02250 putchar_dtd_parser(p, p1);
02251 else if ( p0 != CR )
02252 putchar_dtd_parser(p, CR);
02253
02254 if ( flags & SGML_SUB_DOCUMENT )
02255 return TRUE;
02256 else
02257 return end_document_dtd_parser(p);
02258 }
02259
02260 putchar_dtd_parser(p, p0);
02261 p0 = p1;
02262 p1 = p2;
02263 }
02264 return TRUE;
02265 }
02266
02271 int
02272 end_document_dtd_parser(dtd_parser *p)
02273 { int rval;
02274
02275 WITH_PARSER(p, rval = end_document_dtd_parser_(p));
02276
02277 return rval;
02278 }
02279
02280
02281
02291 int
02292 end_document_dtd_parser_(dtd_parser *p)
02293 { int rval;
02294
02295 switch(p->state)
02296 { case S_RCDATA:
02297 case S_CDATA:
02298 case S_PCDATA:
02299 rval = TRUE;
02300 break;
02301 case S_CMT:
02302 case S_CMTE0:
02303 case S_CMTE1:
02304 case S_DECLCMT0:
02305 case S_DECLCMT:
02306 case S_DECLCMTE0:
02307 rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file in comment", "");
02308 break;
02309 case S_ECDATA1:
02310 case S_ECDATA2:
02311 case S_EMSC1:
02312 case S_EMSC2:
02313 case S_DECL0:
02314 case S_DECL:
02315 case S_MDECL0:
02316 case S_STRING:
02317 case S_CMTO:
02318 case S_GROUP:
02319 case S_PENT:
02320 case S_ENT:
02321 case S_ENT0:
02322 rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file", "");
02323 break;
02324 #ifdef UTF8
02325 case S_UTF8:
02326 rval = gripe(ERC_SYNTAX_ERROR,"Unexpected end-of-file in UTF-8 sequence", "");
02327 break;
02328 #endif
02329 case S_MSCDATA:
02330 case S_EMSCDATA1:
02331 case S_EMSCDATA2:
02332 rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file in CDATA marked section", "");
02333 break;
02334 case S_PI:
02335 case S_PI2:
02336 rval = gripe(ERC_SYNTAX_ERROR,"Unexpected end-of-file in processing instruction", "");
02337 break;
02338 default:
02339 rval = gripe(ERC_SYNTAX_ERROR, "Unexpected end-of-file in ???");
02340 break;
02341 }
02342 if ( p->dmode == DM_DATA )
02343 { sgml_environment *env;
02344 process_cdata(p, TRUE);
02345 if ( (env=p->environments) )
02346 { dtd_element *e;
02347 while(env->parent)
02348 env = env->parent;
02349 pop_to(p, env, CDATA_ELEMENT);
02350 e = env->element;
02351 if ( e->structure && !e->structure->omit_close )
02352 {
02353 gripe(ERC_OMITTED_CLOSE, e->name->name);
02354 }
02355
02356 close_element(p, e, FALSE);
02357 }
02358 }
02359 return rval;
02360 }
02361
02370 static int
02371 pop_to(dtd_parser *p, sgml_environment *to, dtd_element *e0)
02372 { sgml_environment *env, *parent;
02373 for(env = p->environments; env != to; env=parent)
02374 { dtd_element *e = env->element;
02375 validate_completeness(env);
02376 parent = env->parent;
02377 if ( e->structure && !e->structure->omit_close )
02378 {
02379 gripe(ERC_OMITTED_CLOSE, e->name->name);
02380 }
02381 if ( e0 != CDATA_ELEMENT )
02382 emit_cdata(p, TRUE);
02383 p->first = FALSE;
02384 p->environments = env;
02385 if ( p->dtd->shorttag )
02386 p->waiting_for_net = env->saved_waiting_for_net;
02387
02388 WITH_CLASS(p, EV_OMITTED,
02389 if ( p->on_end_element )
02390 (*p->on_end_element)(p, e));
02391 free_environment(env);
02392 }
02393 p->environments = to;
02394 p->map = to->map;
02395 return TRUE;
02396 }
02397
02398
02399
02400 static int
02401 process_cdata(dtd_parser *p, int last)
02402 {
02403 prepare_cdata(p);
02404 return emit_cdata(p, last);
02405 }
02406
02413 static dtd_symbol *
02414 dtd_find_entity_symbol(dtd *dtd, const ichar *name)
02415 { dtd_symbol_table *t = dtd->symbols;
02416 if ( dtd->ent_case_sensitive )
02417 { int k = istrhash(name, t->size);
02418 dtd_symbol *s;
02419
02420
02421 for(s=t->entries[k]; s; s = s->next)
02422 {
02423 if ( istreq(s->name, name) )
02424 {
02425 return s;
02426 }
02427 }
02428 } else
02429 { int k = istrcasehash(name, t->size);
02430 dtd_symbol *s;
02431
02432 for(s=t->entries[k]; s; s = s->next)
02433 { if ( istrcaseeq(s->name, name) )
02434 {
02435
02436 return s;
02437 }
02438 }
02439 }
02440
02441 return NULL;
02442 }
02443
02451 static void
02452 add_attribute(dtd *dtd, dtd_element *e, dtd_attr *a)
02453 {
02454 dtd_attr_list **l;
02455 dtd_attr_list *n;
02456 for(l = &e->attributes; *l; l = &(*l)->next)
02457 {
02458 if ( (*l)->attribute->name == a->name )
02459 {
02460 char temp[30];
02461 sprintf(temp, "Attribute %s redefined\n", a->name->name);
02462 gripe(ERC_VALIDATE, temp);
02463 (a->references)++;
02464 free_attribute(a);
02465 return;
02466 }
02467 }
02468
02469 n = calloc(1, sizeof(*n));
02470 n->attribute = a;
02471 a->references++;
02472 *l = n;
02473 set_element_properties(e, a);
02474 }
02475
02480 static void
02481 free_attribute_values(int argc, sgml_attribute *argv)
02482 { int i;
02483
02484 for(i=0; i<argc; i++, argv++)
02485 {
02486 if ( (argv->flags & SGML_AT_DEFAULT) )
02487 continue;
02488 if ( argv->value.cdata )
02489 free(argv->value.cdata);
02490 if ( argv->value.text )
02491 free(argv->value.text);
02492 }
02493 }
02494
02499 static void
02500 add_list_element(dtd_element *e, void *closure)
02501 { namelist *nl = closure;
02502
02503 nl->list[nl->size++] = e->name;
02504 }
02505
02506 static void
02507 add_submodel(dtd_model *m, dtd_model *sub)
02508 { dtd_model **d;
02509 for( d = &m->content.group; *d; d = &(*d)->next )
02510 ;
02511 *d = sub;
02512 }
02513
02514
02515 static void
02516 free_name_list(dtd_name_list *nl)
02517 { dtd_name_list *next;
02518
02519 for( ; nl; nl=next)
02520 { next = nl->next;
02521
02522 free(nl);
02523 }
02524 }
02525
02526
02527 static void
02528 free_attribute(dtd_attr *a)
02529 {
02530 if ( --a->references == 0 )
02531 {
02532 switch(a->type)
02533 {
02534 case AT_NAMEOF:
02535 case AT_NOTATION:
02536 free_name_list(a->typeex.nameof);
02537 default:
02538 ;
02539 }
02540 switch(a->def)
02541 {
02542 case AT_DEFAULT:
02543 {
02544 if ( a->type == AT_CDATA )
02545 free(a->att_def.cdata);
02546 else if ( a->islist )
02547 free(a->att_def.list);
02548 }
02549 default:
02550 ;
02551 }
02552
02553 free(a);
02554
02555 }
02556
02557 }
02558
02565 static int
02566 prepare_cdata(dtd_parser *p)
02567 {
02568
02569 if ( p->cdata->size == 0 )
02570 return TRUE;
02571
02572 terminate_ocharbuf(p->cdata);
02573
02574 if ( p->mark_state == MS_INCLUDE )
02575 {
02576 dtd *dtd = p->dtd;
02577 dtd_element *e;
02578
02579
02580
02581 if ( p->environments )
02582 {
02583 e = p->environments->element;
02584
02585 if ( e->structure && e->structure->type == C_EMPTY && !e->undefined )
02586 {
02587
02588 close_element(p, e, FALSE);
02589 }
02590 }
02591
02592
02593 if ( p->blank_cdata == TRUE )
02594 {
02595 int blank = TRUE;
02596 const ichar *s;
02597
02598 for(s = p->cdata->data; *s; s++)
02599 {
02600 if ( !HasClass(dtd, *s, CH_BLANK) )
02601 {
02602 blank = FALSE;
02603 break;
02604 }
02605 }
02606
02607 p->blank_cdata = blank;
02608 if ( !blank )
02609 {
02610 if ( p->dmode == DM_DTD )
02611 gripe(ERC_SYNTAX_ERROR, "CDATA in DTD", p->cdata->data);
02612 else
02613 {
02614
02615 open_element(p, CDATA_ELEMENT, TRUE);
02616 }
02617 }
02618 }
02619 }
02620
02621 return TRUE;
02622 }
02623
02630 static int
02631 close_element(dtd_parser *p, dtd_element *e, int conref)
02632 {
02633 sgml_environment *env;
02634
02635 for(env = p->environments; env; env=env->parent)
02636 {
02637
02638 if ( env->element == e )
02639 {
02640 sgml_environment *parent;
02641
02642 for(env = p->environments; ; env=parent)
02643 {
02644 dtd_element *ce = env->element;
02645
02646
02647 if ( !(conref && env == p->environments) )
02648 validate_completeness(env);
02649
02650 parent = env->parent;
02651
02652 p->first = FALSE;
02653
02654
02655 if ( p->on_end_element )
02656 (*p->on_end_element)(p, env->element);
02657
02658 free_environment(env);
02659
02660 p->environments = parent;
02661
02662 if ( ce == e )
02663 {
02664 p->map = (parent ? parent->map : NULL);
02665 return TRUE;
02666 }
02667 else
02668 {
02669 if ( ce->structure && !ce->structure->omit_close )
02670 gripe(ERC_OMITTED_CLOSE, ce->name->name);
02671 }
02672
02673 }
02674
02675 }
02676 }
02677 return gripe(ERC_NOT_OPEN, e->name->name);
02678 }
02679
02684 static void
02685 validate_completeness(sgml_environment *env)
02686 { if ( !complete(env) )
02687 {
02688 char buf[MAXSTRLEN];
02689 sprintf(buf, "Incomplete <%s> element", env->element->name->name);
02690
02691 gripe(ERC_VALIDATE, buf);
02692 }
02693 }
02694
02700 static void
02701 free_environment(sgml_environment *env)
02702 {
02703 #ifdef XMLNS
02704 if ( env->xmlns )
02705 xmlns_free(env);
02706 #endif
02707 free(env);
02708 }
02709
02716 static int
02717 process_end_element(dtd_parser *p, const ichar *decl)
02718 {
02719 dtd *dtd = p->dtd;
02720 dtd_symbol *id;
02721 const ichar *s;
02722 char temp[30];
02723
02724 emit_cdata( p, TRUE);
02725
02726
02727 if ( (s=itake_name(dtd, decl, &id)) && *s == '\0' )
02728 return close_element(p, find_element(dtd, id), FALSE);
02729
02730
02731 if ( p->dtd->shorttag && *decl == '\0' )
02732 return close_current_element(p);
02733
02734 sprintf( temp, "Bad close-element tag %s\n", decl);
02735 return gripe(ERC_SYNTAX_ERROR, "Bad close-element tag %s\n", decl);
02736 }
02737
02738
02744 static int
02745 close_current_element(dtd_parser *p)
02746 { if ( p->environments )
02747 { dtd_element *e = p->environments->element;
02748 emit_cdata(p, TRUE);
02749 return close_element(p, e, FALSE);
02750 }
02751 return gripe(ERC_SYNTAX_ERROR, "No element to close", "");
02752 }
02753
02760 static int
02761 process_declaration(dtd_parser *p, const ichar *decl)
02762 {
02763 const ichar *s;
02764 dtd *dtd = p->dtd;
02765
02766
02767
02768 if ( p->dmode != DM_DTD )
02769 {
02770 if ( HasClass(dtd, *decl, CH_NAME) )
02771 {
02772 return process_begin_element(p, decl);
02773 }
02774 else if ( (s=isee_func(dtd,decl,CF_ETAGO2)) )
02775 {
02776 process_end_element(p, s);
02777 return TRUE;
02778 }
02779 }
02780
02781 if ( (s=isee_func(dtd, decl, CF_MDO2)) )
02782 {
02783 decl = s;
02784
02785 if ( p->on_decl )
02786 (*p->on_decl)(p, decl);
02787
02788 if ( (s = isee_identifier(dtd, decl, "entity")) ){
02789 process_entity_declaration(p, s);
02790 }
02791 else if ( (s = isee_identifier(dtd, decl, "element")) )
02792 {
02793 process_element_declaraction(p, s);
02794 }
02795 else if ( (s = isee_identifier(dtd, decl, "attlist")) )
02796 {
02797 process_attlist_declaraction(p, s);
02798 }
02799 else if ( (s = isee_identifier(dtd, decl, "notation")) )
02800 {
02801 process_notation_declaration(p, s);
02802 }
02803 else if ( (s = isee_identifier(dtd, decl, "shortref")) )
02804 {
02805 process_shortref_declaration(p, s);
02806 }
02807 else if ( (s = isee_identifier(dtd, decl, "usemap")) )
02808 {
02809 process_usemap_declaration(p, s);
02810 }
02811 else if ( (s = isee_identifier(dtd, decl, "doctype")) )
02812 {
02813 if ( p->dmode != DM_DTD )
02814 process_doctype(p, s, decl-1);
02815 }
02816 else
02817 {
02818 s = iskip_layout(dtd, decl);
02819 if ( *s )
02820 gripe(ERC_SYNTAX_ERROR, "Invalid declaration", s);
02821 }
02822
02823 return TRUE;
02824
02825 }
02826
02827 return gripe(ERC_SYNTAX_ERROR, "Invalid declaration", decl);
02828 }
02829
02830
02836 static int
02837 process_usemap_declaration(dtd_parser *p, const ichar *decl)
02838 { dtd *dtd = p->dtd;
02839 ichar buf[MAXDECL];
02840 dtd_symbol *name;
02841 const ichar *s;
02842 dtd_symbol *ename;
02843 dtd_element *e;
02844 dtd_shortref *map;
02845
02846
02847 if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
02848 return FALSE;
02849 decl = buf;
02850
02851 if ( !(s=itake_name(dtd, decl, &name)) )
02852 { if ( (s=isee_identifier(dtd, decl, "#empty")) )
02853 name = NULL;
02854 else
02855 return gripe(ERC_SYNTAX_ERROR, "map-name expected", decl);
02856 }
02857 decl = s;
02858
02859 if ( !(map = find_map(dtd, name)) )
02860 map = def_shortref(p, name);
02861
02862
02863 if ( isee_func(dtd, decl, CF_GRPO) )
02864 { dtd_model *model;
02865 if ( (model = make_model(dtd, decl, &s)) )
02866 { for_elements_in_model(model, set_map_element, map);
02867 free_model(model);
02868 decl = s;
02869 } else
02870 return FALSE;
02871 } else if ( (s=itake_name(dtd, decl, &ename)) )
02872 { e = find_element(dtd, ename);
02873 e->map = map;
02874 decl = s;
02875 } else if ( p->environments )
02876 {
02877 if ( !map->defined )
02878 { gripe(ERC_EXISTENCE, "map", name->name);
02879 }
02880
02881 p->environments->map = map;
02882 p->map = p->environments->map;
02883 } else
02884 return gripe(ERC_SYNTAX_ERROR, "element-name expected", decl);
02885
02886 if ( *decl )
02887 {
02888 return gripe(ERC_SYNTAX_ERROR, "Unparsed", decl);
02889 }
02890
02891 return TRUE;
02892 }
02893
02894 static void
02895 set_map_element(dtd_element *e, void *closure)
02896 { e->map = closure;
02897 }
02898
02899
02900
02901 static dtd_shortref *
02902 find_map(dtd *dtd, dtd_symbol *name)
02903 { dtd_shortref *sr;
02904
02905 if ( !name )
02906 { static dtd_shortref *empty;
02907
02908 if ( !empty )
02909 { empty = sgml_calloc(1, sizeof(*empty));
02910 empty->name = dtd_add_symbol(dtd, "#EMPTY");
02911 empty->defined = TRUE;
02912 }
02913
02914 return empty;
02915 }
02916 for( sr = dtd->shortrefs; sr; sr = sr->next )
02917 { if ( sr->name == name )
02918 { if ( !sr->defined )
02919 break;
02920
02921 return sr;
02922 }
02923 }
02924
02925 return NULL;
02926 }
02927
02933 static int
02934 process_shortref_declaration(dtd_parser *p, const ichar *decl)
02935 { dtd *dtd = p->dtd;
02936 ichar buf[MAXDECL];
02937 dtd_shortref *sr;
02938 dtd_symbol *name;
02939 const ichar *s;
02940
02941 if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
02942 return FALSE;
02943 decl = buf;
02944
02945 if ( !(s=itake_name(dtd, decl, &name)) )
02946 return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
02947 decl = s;
02948
02949 sr = def_shortref(p, name);
02950 if ( sr->defined )
02951 { gripe(ERC_REDEFINED, "shortref", name);
02952 return TRUE;
02953 }
02954 sr->defined = TRUE;
02955
02956 while( *(decl = iskip_layout(dtd, decl)) != '\0'
02957 && (s=shortref_add_map(dtd, decl, sr)) )
02958 decl = s;
02959 compile_map(dtd, sr);
02960
02961 if ( *decl )
02962 {
02963 return gripe(ERC_SYNTAX_ERROR, "Map expected", decl);
02964 }
02965
02966 return TRUE;
02967 }
02968
02977 static void
02978 compile_map(dtd *dtd, dtd_shortref *sr)
02979 { dtd_map *map;
02980
02981 for(map = sr->map; map; map = map->next)
02982 {
02983 ichar last = map->from[map->len-1];
02984
02985 switch( last )
02986 {
02987 case CHR_BLANK:
02988 case CHR_DBLANK:
02989 { int i;
02990 for( i=0; i< ICHARSET_SIZE; i++)
02991 { if ( HasClass(dtd, i, CH_BLANK) )
02992 sr->ends[i] = TRUE;
02993 }
02994 }
02995
02996 default:
02997 sr->ends[last] = TRUE;
02998 }
02999 }
03000 }
03001
03005 static const ichar *
03006 shortref_add_map(dtd *dtd, const ichar *decl, dtd_shortref *sr)
03007 { ichar buf[MAXSTRINGLEN];
03008 ichar from[MAXMAPLEN];
03009 ichar *f = from;
03010 dtd_symbol *to;
03011 const ichar *s;
03012 const ichar *end;
03013 dtd_map **p;
03014 dtd_map *m;
03015
03016 if ( !(s=itake_string(dtd, decl, buf, sizeof(buf))) )
03017 { gripe(ERC_SYNTAX_ERROR, "map-string expected", decl);
03018 return NULL;
03019 }
03020 decl = s;
03021 if ( !(s=itake_entity_name(dtd, decl, &to)) )
03022 { gripe(ERC_SYNTAX_ERROR, "map-to name expected", decl);
03023 return NULL;
03024 }
03025 end = s;
03026
03027 for(decl=buf; *decl;)
03028 { if ( *decl == 'B' )
03029 { if ( decl[1] == 'B' )
03030 { *f++ = CHR_DBLANK;
03031 decl += 2;
03032 continue;
03033 }
03034 *f++ = CHR_BLANK;
03035 decl++;
03036 } else
03037 *f++ = *decl++;
03038 }
03039 *f = 0;
03040 for(p=&sr->map; *p; p = &(*p)->next)
03041 ;
03042
03043 m = calloc(1, sizeof(*m));
03044 m->from = istrdup(from);
03045 m->len = istrlen(from);
03046 m->to = to;
03047
03048 *p = m;
03049
03050 return end;
03051 }
03052
03053 static dtd_shortref *
03054 def_shortref(dtd_parser *p, dtd_symbol *name)
03055 { dtd *dtd = p->dtd;
03056 dtd_shortref *sr, **pr;
03057
03058 for(pr=&dtd->shortrefs; *pr; pr = &(*pr)->next)
03059 { dtd_shortref *r = *pr;
03060
03061 if ( r->name == name )
03062 return r;
03063 }
03064
03065 sr = calloc(1, sizeof(*sr));
03066 sr->name = name;
03067 *pr = sr;
03068
03069 return sr;
03070 }
03071
03072
03073 static const ichar *
03074 itake_dubbed_string(dtd *dtd, const ichar *in, ichar **out)
03075 { ichar buf[MAXSTRINGLEN];
03076 const ichar *end;
03077
03078 if ( (end=itake_string(dtd, in, buf, sizeof(buf))) )
03079 *out = istrdup(buf);
03080
03081 return end;
03082 }
03083
03089 static int
03090 process_notation_declaration(dtd_parser *p, const ichar *decl)
03091 { dtd *dtd = p->dtd;
03092 dtd_symbol *nname;
03093 const ichar *s;
03094 ichar *system = NULL, *public = NULL;
03095 dtd_notation *not;
03096
03097 if ( !(s=itake_name(dtd, decl, &nname)) )
03098 return gripe(ERC_SYNTAX_ERROR, "Notation name expected", decl);
03099 decl = s;
03100
03101
03102 if ( find_notation(dtd, nname) )
03103 {
03104 gripe(ERC_REDEFINED, "notation", nname);
03105 return TRUE;
03106 }
03107
03108 if ( (s=isee_identifier(dtd, decl, "system")) )
03109 { ;
03110 } else if ( (s=isee_identifier(dtd, decl, "public")) )
03111 { decl = s;
03112 if ( !(s=itake_dubbed_string(dtd, decl, &public)) )
03113 {
03114 return gripe(ERC_SYNTAX_ERROR, "Public identifier expected", decl);
03115 }
03116 } else
03117 {
03118 return gripe(ERC_SYNTAX_ERROR, "SYSTEM or PUBLIC expected", decl);
03119 }
03120
03121 decl = s;
03122 if ( (s=itake_dubbed_string(dtd, decl, &system)) )
03123 decl = s;
03124
03125 if ( *decl )
03126 return gripe(ERC_SYNTAX_ERROR, "Unexpected end of declaraction", decl);
03127
03128 not = sgml_calloc(1, sizeof(*not));
03129 not->name = nname;
03130 not->system = system;
03131 not->public = public;
03132 not->next = NULL;
03133 add_notation(dtd, not);
03134
03135 return TRUE;
03136 }
03137
03143 static void
03144 add_notation(dtd *dtd, dtd_notation *not)
03145 { dtd_notation *n;
03146
03147 for(n=dtd->notations; n; n = n->next)
03148 { if ( !n->next )
03149 { n->next = not;
03150 break;
03151 }
03152 }
03153 }
03154
03160 static int
03161 process_include(dtd_parser *p, const ichar *entity_name)
03162 { dtd_symbol *id;
03163 dtd_entity *pe;
03164 dtd *dtd = p->dtd;
03165
03166
03167
03168 if ( (id=dtd_find_entity_symbol(dtd, entity_name)) &&
03169 (pe=find_pentity(p->dtd, id)) )
03170 {
03171 const char *file;
03172
03173
03174 if ( (file = entity_file(dtd, pe)) )
03175 {
03176
03177 return sgml_process_file(p, file, SGML_SUB_DOCUMENT);
03178 }
03179 else
03180 {
03181
03182 const ichar *text = entity_value(p, pe, NULL);
03183
03184 if ( !text )
03185 return gripe(ERC_NO_VALUE, pe->name->name);
03186 return process_chars(p, IN_ENTITY, entity_name, text);
03187 }
03188 }
03189 return gripe(ERC_EXISTENCE, "parameter entity", entity_name);
03190 }
03191
03196 static int
03197 process_chars(dtd_parser *p, input_type in, const ichar *name, const ichar *s)
03198 { locbuf old;
03199
03200 push_location(p, &old);
03201 set_src_dtd_parser(p, in, (char *)name);
03202 empty_icharbuf(p->buffer);
03203 for(; *s; s++)
03204 putchar_dtd_parser(p, *s);
03205 pop_location(p, &old);
03206
03207 return TRUE;
03208 }
03209
03215 static dtd_notation *
03216 find_notation(dtd *dtd, dtd_symbol *name)
03217 { dtd_notation *n;
03218
03219 for(n=dtd->notations; n; n = n->next)
03220 { if ( n->name == name )
03221 return n;
03222 }
03223
03224 return NULL;
03225 }
03226
03232 static int
03233 process_attlist_declaraction(dtd_parser *p, const ichar *decl)
03234 {
03235 dtd *dtd = p->dtd;
03236 dtd_symbol *eid[MAXATTELEM];
03237 int i, en;
03238 ichar buf[MAXDECL];
03239 const ichar *s;
03240
03241
03242
03243 if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
03244 return FALSE;
03245 decl = iskip_layout(dtd, buf);
03246
03247 if ( !(decl=itake_el_or_model_element_list(dtd, decl, eid, &en)) )
03248 return FALSE;
03249
03250 while(*decl)
03251 {
03252 dtd_attr *at = calloc(1, sizeof(*at));
03253
03254
03255 if ( !(s = itake_name(dtd, decl, &at->name)) )
03256 return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
03257 decl = s;
03258
03259 if ( (s=isee_func(dtd, decl, CF_GRPO)) )
03260 {
03261 charfunc ngs = CF_NG;
03262 at->type = AT_NAMEOF;
03263 decl=s;
03264
03265 for(;;)
03266 {
03267 dtd_symbol *nm;
03268
03269 if ( !(s = itake_nmtoken(dtd, decl, &nm)) )
03270 return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
03271 decl = s;
03272 add_name_list(&at->typeex.nameof, nm);
03273 if ( (s=isee_ngsep(dtd, decl, &ngs)) )
03274 {
03275 decl = s;
03276 continue;
03277 }
03278 if ( (s = isee_func(dtd, decl, CF_GRPC)) )
03279 {
03280 decl=s;
03281 decl = iskip_layout(dtd, decl);
03282 break;
03283 }
03284 return gripe(ERC_SYNTAX_ERROR, "Illegal name-group", decl);
03285 }
03286
03287 }
03288
03289 else if ( (s=isee_identifier(dtd, decl, "cdata")) )
03290 {
03291 decl = s;
03292 at->type = AT_CDATA;
03293 } else if ( (s=isee_identifier(dtd, decl, "entity")) )
03294 {
03295 decl = s;
03296 at->type = AT_ENTITY;
03297 } else if ( (s=isee_identifier(dtd, decl, "entities")) )
03298 {
03299 decl = s;
03300 at->type = AT_ENTITIES;
03301 at->islist = TRUE;
03302 } else if ( (s=isee_identifier(dtd, decl, "id")) )
03303 {
03304 decl = s;
03305 at->type = AT_ID;
03306 } else if ( (s=isee_identifier(dtd, decl, "idref")) )
03307 {
03308 decl = s;
03309 at->type = AT_IDREF;
03310 } else if ( (s=isee_identifier(dtd, decl, "idrefs")) )
03311 {
03312 decl = s;
03313 at->type = AT_IDREFS;
03314 at->islist = TRUE;
03315 } else if ( (s=isee_identifier(dtd, decl, "name")) )
03316 {
03317 decl = s;
03318 at->type = AT_NAME;
03319 } else if ( (s=isee_identifier(dtd, decl, "names")) )
03320 {
03321 decl = s;
03322 at->type = AT_NAMES;
03323 at->islist = TRUE;
03324 } else if ( (s=isee_identifier(dtd, decl, "nmtoken")) )
03325 {
03326 decl = s;
03327 at->type = AT_NMTOKEN;
03328 } else if ( (s=isee_identifier(dtd, decl, "nmtokens")) )
03329 {
03330 decl = s;
03331 at->type = AT_NMTOKENS;
03332 at->islist = TRUE;
03333 } else if ( (s=isee_identifier(dtd, decl, "number")) )
03334 {
03335 decl = s;
03336 at->type = AT_NUMBER;
03337 } else if ( (s=isee_identifier(dtd, decl, "numbers")) )
03338 {
03339 decl = s;
03340 at->type = AT_NUMBERS;
03341 at->islist = TRUE;
03342 } else if ( (s=isee_identifier(dtd, decl, "nutoken")) )
03343 {
03344 decl = s;
03345 at->type = AT_NUTOKEN;
03346 } else if ( (s=isee_identifier(dtd, decl, "nutokens")) )
03347 {
03348 decl = s;
03349 at->type = AT_NUTOKENS;
03350 at->islist = TRUE;
03351 } else if ( (s=isee_identifier(dtd, decl, "notation")) )
03352 {
03353 dtd_symbol *ng[MAXNAMEGROUP];
03354 int ns;
03355 at->type = AT_NOTATION;
03356 decl=s;
03357 if ( (s=itake_namegroup(dtd, CF_OR, decl, ng, &ns)) )
03358 {
03359 decl = s;
03360 for(i=0; i<ns; i++)
03361 add_name_list(&at->typeex.nameof, ng[i]);
03362 } else
03363 {
03364 return gripe(ERC_SYNTAX_ERROR, "name-group expected", decl);
03365 }
03366 } else
03367 {
03368 return gripe(ERC_SYNTAX_ERROR, "Attribute-type expected", decl);
03369 }
03370
03371
03372 if ( (s=isee_identifier(dtd, decl, "#fixed")) )
03373 {
03374 decl = s;
03375 at->def = AT_FIXED;
03376 } else if ( (s=isee_identifier(dtd, decl, "#required")) )
03377 {
03378 decl = s;
03379 at->def = AT_REQUIRED;
03380 } else if ( (s=isee_identifier(dtd, decl, "#current")) )
03381 {
03382 decl = s;
03383 at->def = AT_CURRENT;
03384 } else if ( (s=isee_identifier(dtd, decl, "#conref")) )
03385 {
03386 decl = s;
03387 at->def = AT_CONREF;
03388 } else if ( (s=isee_identifier(dtd, decl, "#implied")) )
03389 {
03390 decl = s;
03391 at->def = AT_IMPLIED;
03392 } else
03393 at->def = AT_DEFAULT;
03394
03395 if ( at->def == AT_DEFAULT || at->def == AT_FIXED )
03396 {
03397 ichar buf[MAXSTRINGLEN];
03398 const ichar *end;
03399 if ( !(end=itake_string(dtd, decl, buf, sizeof(buf))) )
03400 end=itake_nmtoken_chars(dtd, decl, buf, sizeof(buf));
03401 if ( !end )
03402 return gripe(ERC_SYNTAX_ERROR, "Bad attribute default", decl);
03403
03404 switch(at->type)
03405 {
03406 case AT_CDATA:
03407 {
03408 at->att_def.cdata = istrdup(buf);
03409 break;
03410 }
03411 case AT_ENTITY:
03412 case AT_NOTATION:
03413 case AT_NAME:
03414 {
03415 if ( !(s=itake_name(dtd, buf, &at->att_def.name)) || *s )
03416 {
03417 return gripe(ERC_DOMAIN, "name", decl);
03418 }
03419 break;
03420 }
03421 case AT_NMTOKEN:
03422 case AT_NAMEOF:
03423 {
03424 if ( !(s=itake_nmtoken(dtd, buf, &at->att_def.name)) || *s )
03425 return gripe(ERC_DOMAIN, "nmtoken", decl);
03426 break;
03427 }
03428 case AT_NUTOKEN:
03429 {
03430 if ( !(s=itake_nutoken(dtd, buf, &at->att_def.name)) || *s )
03431 return gripe(ERC_DOMAIN, "nutoken", decl);
03432 break;
03433 }
03434 case AT_NUMBER:
03435 {
03436 if ( !(s=itake_number(dtd, buf, at)) || *s )
03437 return gripe(ERC_DOMAIN, "number", decl);
03438 break;
03439 }
03440 case AT_NAMES:
03441 case AT_ENTITIES:
03442 case AT_IDREFS:
03443 case AT_NMTOKENS:
03444 case AT_NUMBERS:
03445 case AT_NUTOKENS:
03446 { at->att_def.list = istrdup(buf);
03447 break;
03448 }
03449
03450 default:
03451 {
03452 return gripe(ERC_REPRESENTATION, "No default for type");
03453 }
03454 }
03455 decl = end;
03456 }
03457
03458
03459 for(i=0; i<en; i++)
03460 {
03461 dtd_element *e = def_element(dtd, eid[i]);
03462
03463 add_attribute(dtd, e, at);
03464 }
03465
03466
03467 }
03468 return TRUE;
03469 }
03470
03477 static const ichar *
03478 itake_nutoken(dtd *dtd, const ichar *in, dtd_symbol **id)
03479 { ichar buf[MAXNMLEN];
03480 ichar *o = buf;
03481
03482 in = iskip_layout(dtd, in);
03483 if ( !HasClass(dtd, *in, CH_DIGIT) )
03484 return NULL;
03485 if ( dtd->case_sensitive )
03486 { while( HasClass(dtd, *in, CH_NAME) )
03487 *o++ = *in++;
03488 } else
03489 { while( HasClass(dtd, *in, CH_NAME) )
03490 *o++ = tolower(*in++);
03491 }
03492 *o = '\0';
03493 if ( o - buf > 8 )
03494 gripe(ERC_LIMIT, "nutoken length");
03495
03496 *id = dtd_add_symbol(dtd, buf);
03497
03498 return iskip_layout(dtd, in);
03499 }
03500
03501
03508 static const ichar *
03509 itake_number(dtd *dtd, const ichar *in, dtd_attr *at)
03510 { in = iskip_layout(dtd, in);
03511
03512 switch(dtd->number_mode)
03513 { case NU_TOKEN:
03514 { ichar buf[MAXNMLEN];
03515 ichar *o = buf;
03516
03517 while( HasClass(dtd, *in, CH_DIGIT) )
03518 *o++ = *in++;
03519 if ( o == buf )
03520 return NULL;
03521 *o = '\0';
03522 at->att_def.name = dtd_add_symbol(dtd, buf);
03523
03524 return iskip_layout(dtd, (const ichar *)in);
03525 }
03526 case NU_INTEGER:
03527 { char *end;
03528
03529 at->att_def.number = strtol((const char *)in, &end, 10);
03530 if ( end > (char *)in && errno != ERANGE )
03531 return iskip_layout(dtd, (const ichar *)end);
03532 }
03533 }
03534
03535 return NULL;
03536 }
03537
03543 static const ichar *
03544 itake_nmtoken_chars(dtd *dtd, const ichar *in, ichar *out, int len)
03545 { in = iskip_layout(dtd, in);
03546 if ( !HasClass(dtd, *in, CH_NAME) )
03547 return NULL;
03548 while( HasClass(dtd, *in, CH_NAME) )
03549 { if ( --len <= 0 )
03550 gripe(ERC_REPRESENTATION, "Name token too long");
03551 *out++ = (dtd->case_sensitive ? *in++ : tolower(*in++));
03552 }
03553 *out++ = '\0';
03554
03555 return iskip_layout(dtd, in);
03556 }
03557
03558
03568 static const ichar *
03569 isee_ngsep(dtd *dtd, const ichar *decl, charfunc *sep)
03570 { const ichar *s;
03571
03572 if ( (s=isee_func(dtd, decl, *sep)) )
03573 return iskip_layout(dtd, s);
03574 if ( *sep == CF_NG )
03575 { static const charfunc ng[] = { CF_SEQ, CF_OR, CF_AND };
03576 int n;
03577
03578 for(n=0; n<3; n++)
03579 { if ( (s=isee_func(dtd, decl, ng[n])) )
03580 { *sep = ng[n];
03581 return iskip_layout(dtd, s);
03582 }
03583 }
03584 }
03585
03586 return NULL;
03587 }
03588
03589
03594 static void
03595 add_name_list(dtd_name_list **nl, dtd_symbol *s)
03596 { dtd_name_list *n = sgml_calloc(1, sizeof(*n));
03597
03598 n->value = s;
03599
03600 for( ; *nl; nl = &(*nl)->next )
03601 ;
03602
03603 *nl = n;
03604 }
03605
03613 static const ichar *
03614 itake_nmtoken(dtd *dtd, const ichar *in, dtd_symbol **id)
03615 { ichar buf[MAXNMLEN];
03616 ichar *o = buf;
03617
03618 in = iskip_layout(dtd, in);
03619 if ( !HasClass(dtd, *in, CH_NAME) )
03620 return NULL;
03621 if ( dtd->case_sensitive )
03622 { while( HasClass(dtd, *in, CH_NAME) )
03623 *o++ = *in++;
03624 } else
03625 { while( HasClass(dtd, *in, CH_NAME) )
03626 *o++ = tolower(*in++);
03627 }
03628 *o = '\0';
03629
03630 *id = dtd_add_symbol(dtd, buf);
03631
03632 return iskip_layout(dtd, in);
03633 }
03634
03641 static int
03642 process_element_declaraction(dtd_parser *p, const ichar *decl)
03643 { dtd *dtd = p->dtd;
03644 ichar buf[MAXDECL];
03645 const ichar *s;
03646 dtd_symbol *eid[MAXATTELEM];
03647 dtd_edef *def;
03648 int en;
03649 int i;
03650
03651 if ( !expand_pentities(p, decl, buf, sizeof(buf)) )
03652 return FALSE;
03653 decl = buf;
03654
03655
03656 if ( !(s=itake_el_or_model_element_list(dtd, decl, eid, &en)) )
03657 return gripe(ERC_SYNTAX_ERROR, "Name or name-group expected", decl);
03658 decl = s;
03659 if ( en == 0 )
03660 return TRUE;
03661
03662 def = calloc(1, sizeof(*def));
03663
03664 for(i=0; i<en; i++)
03665 { find_element(dtd, eid[i]);
03666 eid[i]->element->structure = def;
03667 eid[i]->element->undefined = FALSE;
03668 }
03669 def->references = en;
03670
03671 if ( (s = isee_identifier(dtd, decl, "-")) )
03672 { def->omit_close = FALSE;
03673 goto seeclose;
03674 } else if ( (s = isee_identifier(dtd, decl, "o")) )
03675 { def->omit_open = TRUE;
03676
03677 seeclose:
03678 decl = s;
03679 if ( (s = isee_identifier(dtd, decl, "-")) )
03680 { def->omit_close = FALSE;
03681 } else if ( (s = isee_identifier(dtd, decl, "o")) )
03682 { for(i=0; i<en; i++)
03683 def->omit_close = TRUE;
03684 } else
03685 return gripe(ERC_SYNTAX_ERROR, "Bad omit-tag declaration", decl);
03686 decl = s;
03687 }
03688
03689 if ( !(decl=process_model(dtd, def, decl)) )
03690 return FALSE;
03691
03692 if ( decl[0] == '-' || decl[0] == '+' )
03693 { dtd_symbol *ng[MAXNAMEGROUP];
03694 int ns;
03695 dtd_element_list **l;
03696
03697 if ( decl[0] == '-' )
03698 l = &def->excluded;
03699 else
03700 l = &def->included;
03701
03702 decl++;
03703 if ( (s=itake_namegroup(dtd, CF_OR, decl, ng, &ns)) )
03704 { int i;
03705
03706 decl = s;
03707
03708 for(i=0; i<ns; i++)
03709 add_element_list(l, find_element(dtd, ng[i]));
03710 } else
03711 { return gripe(ERC_SYNTAX_ERROR, "Name group expected", decl);
03712 }
03713 }
03714
03715 if (*decl)
03716 {
03717 return gripe(ERC_SYNTAX_ERROR, "Unexpected end of declaration", decl);
03718 }
03719
03720 return TRUE;
03721
03722 }
03723
03728 static const ichar *
03729 process_model(dtd *dtd, dtd_edef *e, const ichar *decl)
03730 { const ichar *s;
03731
03732 decl = iskip_layout(dtd, decl);
03733 if ( (s = isee_identifier(dtd, decl, "empty")) )
03734 { e->type = C_EMPTY;
03735 return s;
03736 }
03737 if ( (s = isee_identifier(dtd, decl, "cdata")) )
03738 { e->type = C_CDATA;
03739 return s;
03740 }
03741 if ( (s = isee_identifier(dtd, decl, "rcdata")) )
03742 { e->type = C_RCDATA;
03743 return s;
03744 }
03745 if ( (s = isee_identifier(dtd, decl, "any")) )
03746 { e->type = C_ANY;
03747 return s;
03748 }
03749
03750 e->type = C_PCDATA;
03751 if ( !(e->content = make_model(dtd, decl, &decl)) )
03752 return FALSE;
03753
03754 return decl;
03755 }
03756
03764 static const ichar *
03765 itake_namegroup(dtd *dtd, charfunc sep, const ichar *decl,
03766 dtd_symbol **names, int *n)
03767 { const ichar *s;
03768 int en = 0;
03769
03770 if ( (s=isee_func(dtd, decl, CF_GRPO)) )
03771 { for(;;)
03772 { if ( !(decl=itake_name(dtd, s, &names[en++])) )
03773 { gripe(ERC_SYNTAX_ERROR, "Name expected", s);
03774 return NULL;
03775 }
03776 if ( (s=isee_func(dtd, decl, sep)) )
03777 { decl = iskip_layout(dtd, s);
03778 continue;
03779 }
03780 if ( (s=isee_func(dtd, decl, CF_GRPC)) )
03781 { *n = en;
03782 decl = s;
03783 return iskip_layout(dtd, decl);
03784 }
03785
03786 gripe(ERC_SYNTAX_ERROR, "Bad name-group", decl);
03787 return NULL;
03788 }
03789 }
03790
03791 return NULL;
03792 }
03793
03799 static void
03800 add_element_list(dtd_element_list **l, dtd_element *e)
03801 { dtd_element_list *n = sgml_calloc(1, sizeof(*n));
03802
03803 n->value = e;
03804
03805 for( ; *l; l = &(*l)->next )
03806 ;
03807 *l = n;
03808 }
03809
03814 static const ichar *
03815 itake_el_or_model_element_list(dtd *dtd, const ichar *decl, dtd_symbol **names,
03816 int *n)
03817 { const ichar *s;
03818
03819 if ( isee_func(dtd, decl, CF_GRPO) )
03820 { dtd_model *model;
03821
03822 if ( (model = make_model(dtd, decl, &s)) )
03823 { namelist nl;
03824
03825 nl.list = names;
03826 nl.size = 0;
03827 for_elements_in_model(model, add_list_element, &nl);
03828 free_model(model);
03829
03830 *n = nl.size;
03831 return s;
03832 } else
03833 return NULL;
03834 } else
03835 { if ( !(s = itake_name(dtd, decl, &names[0])) )
03836 { gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
03837 return NULL;
03838 }
03839 *n = 1;
03840 return s;
03841 }
03842 }
03843
03851 static void
03852 for_elements_in_model(dtd_model *m,
03853 void (*f)(dtd_element *e, void *closure),
03854 void *closure)
03855 { switch(m->type)
03856 { case MT_SEQ:
03857 case MT_AND:
03858 case MT_OR:
03859 { dtd_model *sub = m->content.group;
03860
03861 for(; sub; sub = sub->next)
03862 for_elements_in_model(sub, f, closure);
03863 break;
03864 }
03865 case MT_ELEMENT:
03866 (*f)(m->content.element, closure);
03867 break;
03868 default:
03869 ;
03870 }
03871 }
03872
03873
03878 static dtd_model *
03879 make_model(dtd *dtd, const ichar *decl, const ichar **end)
03880 { const ichar *s;
03881 dtd_model *m = calloc(1, sizeof(*m));
03882 dtd_symbol *id;
03883
03884 decl = iskip_layout(dtd, decl);
03885
03886 if ( (s=isee_identifier(dtd, decl, "#pcdata")) )
03887 { m->type = MT_PCDATA;
03888 m->cardinality = MC_ONE;
03889 *end = s;
03890 return m;
03891 }
03892
03893 if ( (s=itake_name(dtd, decl, &id)) )
03894 { m->type = MT_ELEMENT;
03895 m->content.element = find_element(dtd, id);
03896 decl = s;
03897 } else
03898 { if ( !(s=isee_func(dtd, decl, CF_GRPO)) )
03899 { gripe(ERC_SYNTAX_ERROR, "Name group expected", decl);
03900 free_model(m);
03901 return NULL;
03902 }
03903 decl = s;
03904 for(;;)
03905 { dtd_model *sub;
03906 modeltype mt;
03907
03908 if ( !(sub = make_model(dtd, decl, &s)) )
03909 return NULL;
03910 decl = s;
03911 add_submodel(m, sub);
03912
03913 if ( (s = isee_func(dtd, decl, CF_OR)) )
03914 { decl = s;
03915 mt = MT_OR;
03916 } else if ( (s = isee_func(dtd, decl, CF_SEQ)) )
03917 { decl = s;
03918 mt = MT_SEQ;
03919 } else if ( (s = isee_func(dtd, decl, CF_AND)) )
03920 { decl = s;
03921 mt = MT_AND;
03922 } else if ( (s = isee_func(dtd, decl, CF_GRPC)) )
03923 { decl = s;
03924 break;
03925 } else
03926 { gripe(ERC_SYNTAX_ERROR, "Connector ('|', ',' or '&') expected", decl);
03927 free_model(m);
03928 return NULL;
03929 }
03930 decl = iskip_layout(dtd, decl);
03931
03932 if ( m->type != mt )
03933 { if ( !m->type )
03934 m->type = mt;
03935 else
03936 { gripe(ERC_SYNTAX_ERROR, "Different connector types in model", decl);
03937 free_model(m);
03938 return NULL;
03939 }
03940 }
03941 }
03942 }
03943 if ( (s = isee_func(dtd, decl, CF_OPT)) )
03944 { decl = s;
03945 m->cardinality = MC_OPT;
03946 } else if ( (s=isee_func(dtd, decl, CF_REP)) )
03947 { decl = s;
03948 m->cardinality = MC_REP;
03949 } else if ( (s=isee_func(dtd, decl, CF_PLUS)) )
03950 {
03951 if ( isee_func(dtd, iskip_layout(dtd, s), CF_GRPO) == NULL )
03952 { decl = s;
03953 m->cardinality = MC_PLUS;
03954 }
03955 } else
03956 m->cardinality = MC_ONE;
03957 if ( m->type == MT_UNDEF )
03958 { dtd_model *sub = m->content.group;
03959 modelcard card;
03960
03961 assert(!sub->next);
03962 if ( sub->cardinality == MC_ONE )
03963 card = m->cardinality;
03964 else if ( m->cardinality == MC_ONE )
03965 card = sub->cardinality;
03966 else
03967 { m->type = MT_OR;
03968 goto out;
03969 }
03970
03971 *m = *sub;
03972 m->cardinality = card;
03973 free(sub);
03974 }
03975 out:
03976 *end = iskip_layout(dtd, decl);
03977 return m;
03978 }
03979
03980
03984 static void
03985 free_element_definition(dtd_edef *def)
03986 { if ( --def->references == 0 )
03987 { if ( def->content )
03988 free_model(def->content);
03989 free_element_list(def->included);
03990 free_element_list(def->excluded);
03991 free_state_engine(def->initial_state);
03992
03993 free(def);
03994 }
03995 }
03996
04001 static void
04002 free_element_list(dtd_element_list *l)
04003 { dtd_element_list *next;
04004
04005 for( ; l; l=next)
04006 { next = l->next;
04007
04008 free(l);
04009 }
04010 }
04011
04016 static void
04017 free_attribute_list(dtd_attr_list *l)
04018 { dtd_attr_list *next;
04019
04020 for(; l; l=next)
04021 { next = l->next;
04022
04023 free_attribute(l->attribute);
04024 free(l);
04025 }
04026 }
04027
04032 static void
04033 free_elements(dtd_element *e)
04034 { dtd_element *next;
04035
04036 for( ; e; e=next)
04037 { next = e->next;
04038
04039 if ( e->structure )
04040 free_element_definition(e->structure);
04041 free_attribute_list(e->attributes);
04042
04043 sgml_free(e);
04044 }
04045 }
04046
04051 static void
04052 free_model(dtd_model *m)
04053 { switch(m->type)
04054 { case MT_SEQ:
04055 case MT_AND:
04056 case MT_OR:
04057 { dtd_model *sub = m->content.group;
04058 dtd_model *next;
04059
04060 for(; sub; sub = next)
04061 { next = sub->next;
04062
04063 free_model(sub);
04064 }
04065 }
04066 default:
04067 ;
04068 }
04069 free(m);
04070 }
04071
04078 static int
04079 process_doctype(dtd_parser *p, const ichar *decl, const ichar *decl0)
04080 { dtd *dtd = p->dtd;
04081
04082 dtd_symbol *id;
04083 const ichar *s;
04084 dtd_entity *et = NULL;
04085
04086 const char *file=NULL;
04087
04088
04089 if ( !(s=itake_name(dtd, decl, &id)) )
04090 return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
04091 decl = s;
04092
04093
04094 if ( (s=isee_identifier(dtd, decl, "system")) )
04095 { et = calloc(1, sizeof(*et));
04096 et->type = ET_SYSTEM;
04097 decl = s;
04098 } else if ( (s=isee_identifier(dtd, decl, "public")) )
04099 { et = calloc(1, sizeof(*et));
04100 et->type = ET_PUBLIC;
04101 decl = s;
04102 } else if ( isee_func(dtd, decl, CF_DSO) )
04103 goto local;
04104
04105 if ( et )
04106 { et->name = id;
04107 et->catalog_location = 0;
04108 if ( !(s=process_entity_value_declaration(p, decl, et)) )
04109 return FALSE;
04110 decl = s;
04111 }
04112
04113
04114
04115 if ( !dtd->doctype )
04116 {
04117 dtd_parser *clone;
04118 dtd->doctype = istrdup(id->name);
04119 if ( et )
04120 file = entity_file(dtd, et);
04121
04122 if ( !file )
04123 {
04124 gripe(ERC_EXISTENCE, "DTD", dtd->doctype);
04125 }
04126 else
04127 {
04128 clone = clone_dtd_parser(p);
04129
04130 if ( !load_dtd_from_file(clone, file) )
04131 gripe(ERC_EXISTENCE, "file", file);
04132
04133 free_dtd_parser(clone);
04134 }
04135 }
04136
04137 if ( et )
04138 free_entity_list(et);
04139
04140
04141 local:
04142 if ( (s=isee_func(dtd, decl, CF_DSO)) )
04143 {
04144
04145 int grouplevel = 1;
04146 data_mode oldmode = p->dmode;
04147 dtdstate oldstate = p->state;
04148 locbuf oldloc;
04149 const ichar *q;
04150 icharbuf *saved_ibuf = p->buffer;
04151
04152 if(!dtd->doctype)
04153 dtd->doctype = istrdup(id->name);
04154 else
04155 return TRUE;
04156
04157
04158 push_location(p, &oldloc);
04159
04160
04161
04162 sgml_cplocation(&p->location, &p->startloc);
04163 inc_location(&p->location, '<');
04164
04165 for(q=decl0; q < s; q++)
04166 inc_location(&p->location, *q);
04167 p->dmode = DM_DTD;
04168 p->state = S_PCDATA;
04169 p->buffer = new_icharbuf();
04170
04171
04172 for( ; *s; s++ )
04173 {
04174
04175 if( isee_func(dtd, s, CF_LIT) ||
04176 isee_func(dtd, s, CF_LITA) )
04177 {
04178 ichar q = *s;
04179
04180 putchar_dtd_parser(p, *s++);
04181 for( ; *s && *s != q; s++ )
04182 putchar_dtd_parser(p, *s);
04183 if ( *s == q )
04184 putchar_dtd_parser(p, *s);
04185 continue;
04186 }
04187
04188 if ( isee_func(dtd, s, CF_DSO) )
04189 grouplevel++;
04190 else if ( isee_func(dtd, s, CF_DSC) && --grouplevel == 0 )
04191 break;
04192
04193 putchar_dtd_parser(p, *s);
04194
04195 }
04196
04197 p->dtd->implicit = FALSE;
04198 p->state = oldstate;
04199 p->dmode = oldmode;
04200 free_icharbuf(p->buffer);
04201 p->buffer = saved_ibuf;
04202 pop_location(p, &oldloc);
04203
04204 }
04205
04206 p->enforce_outer_element = id;
04207
04208 return TRUE;
04209
04210 }
04211
04215 static void
04216 free_entity_list(dtd_entity *e)
04217 { dtd_entity *next;
04218
04219 for( ; e; e=next)
04220 { next = e->next;
04221
04222 if ( e->value ) free(e->value);
04223 if ( e->extid ) free(e->extid);
04224 if ( e->exturl ) free(e->exturl);
04225 if ( e->baseurl ) free(e->baseurl);
04226
04227 free(e);
04228 }
04229 }
04230
04234 void
04235 free_dtd_parser(dtd_parser *p)
04236 { free_icharbuf(p->buffer);
04237 free_ocharbuf(p->cdata);
04238
04239 free_dtd(p->dtd);
04240
04241 free(p);
04242 }
04243
04247 void
04248 free_dtd(dtd *dtd)
04249 { if ( --dtd->references == 0 )
04250 { if ( dtd->doctype )
04251 free(dtd->doctype);
04252
04253 free_entity_list(dtd->entities);
04254 free_entity_list(dtd->pentities);
04255 free_notations(dtd->notations);
04256 free_shortrefs(dtd->shortrefs);
04257 free_elements(dtd->elements);
04258 free_symbol_table(dtd->symbols);
04259 free(dtd->charfunc);
04260 free(dtd->charclass);
04261 free(dtd->charmap);
04262 dtd->magic = 0;
04263
04264 free(dtd);
04265 }
04266 }
04267
04271 static void
04272 free_symbol_table(dtd_symbol_table *t)
04273 { int i;
04274
04275 for(i=0; i<t->size; i++)
04276 { dtd_symbol *s, *next;
04277
04278 for(s=t->entries[i]; s; s=next)
04279 { next = s->next;
04280
04281 free((char *)s->name);
04282 free(s);
04283 }
04284 }
04285
04286 free(t->entries);
04287 free(t);
04288 }
04289
04294 static void
04295 free_notations(dtd_notation *n)
04296 { dtd_notation *next;
04297
04298 for( ; n; n=next)
04299 { next = n->next;
04300
04301 free(n->system);
04302 free(n->public);
04303
04304 free(n);
04305 }
04306 }
04307
04311 static void
04312 free_shortrefs(dtd_shortref *sr)
04313 { dtd_shortref *next;
04314
04315 for( ; sr; sr=next)
04316 { next = sr->next;
04317 free_maps(sr->map);
04318 free(sr);
04319 }
04320 }
04321
04327 void
04328 set_src_dtd_parser(dtd_parser *p, input_type type, const char *name)
04329 { p->location.type = type;
04330 p->location.name = name;
04331 p->location.line = 1;
04332 p->location.linepos = 0;
04333 p->location.charpos = 0;
04334 }
04335
04339 static void
04340 free_maps(dtd_map *map)
04341 { dtd_map *next;
04342
04343 for( ; map; map=next)
04344 { next = map->next;
04345 if ( map->from )
04346 free(map->from);
04347 free(map);
04348 }
04349 }
04350
04360 static int
04361 process_entity_declaration(dtd_parser *p, const ichar *decl)
04362 { dtd *dtd = p->dtd;
04363 const ichar *s;
04364 dtd_symbol *id;
04365 dtd_entity *e;
04366 int isparam;
04367 int isdef = FALSE;
04368
04369
04370
04371 if ( (s=isee_func(dtd, decl, CF_PERO)) )
04372 { isparam = TRUE;
04373 decl = s;
04374 } else
04375 isparam = FALSE;
04376
04377
04378 if ( !(s = itake_entity_name(dtd, decl, &id)) )
04379 {
04380 if ( !(s = isee_identifier(dtd, decl, "#default")) )
04381 { return gripe(ERC_SYNTAX_ERROR, "Name expected", decl);
04382 }
04383 id = dtd_add_symbol(dtd, "#DEFAULT");
04384 isdef = TRUE;
04385 }
04386
04387 if ( isparam && find_pentity(dtd, id) ) {
04388 gripe(ERC_REDEFINED, "parameter entity", id);
04389 return TRUE;
04390 }
04391 if ( id->entity ) {
04392 gripe(ERC_REDEFINED, "entity", id);
04393 return TRUE;
04394 }
04395
04396 decl = iskip_layout(dtd, s);
04397 e =calloc(1, sizeof(*e));
04398 e->name = id;
04399
04400
04401 if ( (s = isee_identifier(dtd, decl, "system")) )
04402 { e->type = ET_SYSTEM;
04403 e->content = EC_SGML;
04404 decl = s;
04405 }
04406 else if ( (s = isee_identifier(dtd, decl, "public")) )
04407 {
04408 e->type = ET_PUBLIC;
04409 e->content = EC_SGML;
04410 decl = s;
04411 }
04412 else
04413 {
04414 e->type = ET_LITERAL;
04415
04416 if ( !isparam )
04417 { if ( (s=isee_identifier(dtd, decl, "cdata")) )
04418 { decl = s;
04419 e->content = EC_CDATA;
04420 } else if ( (s=isee_identifier(dtd, decl, "sdata")) )
04421 { decl = s;
04422 e->content = EC_SDATA;
04423 } else if ( (s=isee_identifier(dtd, decl, "pi")) )
04424 { decl = s;
04425 e->content = EC_PI;
04426 } else if ( (s=isee_identifier(dtd, decl, "starttag")) )
04427 { decl = s;
04428 e->content = EC_STARTTAG;
04429 } else if ( (s=isee_identifier(dtd, decl, "endtag")) )
04430 { decl = s;
04431 e->content = EC_ENDTAG;
04432 } else
04433 e->content = EC_SGML;
04434 }
04435
04436 }
04437
04438 if ( (decl=process_entity_value_declaration(p, decl, e)) )
04439 {
04440 if ( e->type == ET_LITERAL )
04441 {switch(e->content)
04442 {
04443 case EC_STARTTAG:
04444 { ichar *buf =malloc((e->length + 3)*sizeof(ichar));
04445
04446 buf[0] = dtd->charfunc->func[CF_STAGO];
04447 istrcpy(&buf[1], e->value);
04448 buf[++e->length] = dtd->charfunc->func[CF_STAGC];
04449 buf[++e->length] = 0;
04450
04451 free(e->value);
04452 e->value = buf;
04453 e->content = EC_SGML;
04454
04455 break;
04456 }
04457 case EC_ENDTAG:
04458 { ichar *buf = sgml_malloc((e->length + 4)*sizeof(ichar));
04459
04460 buf[0] = dtd->charfunc->func[CF_ETAGO1];
04461 buf[1] = dtd->charfunc->func[CF_ETAGO2];
04462 istrcpy(&buf[2], e->value);
04463 e->length++;
04464 buf[++e->length] = dtd->charfunc->func[CF_STAGC];
04465 buf[++e->length] = 0;
04466
04467 sgml_free(e->value);
04468 e->value = buf;
04469 e->content = EC_SGML;
04470
04471 break;
04472 }
04473 default:
04474 break;
04475
04476 }
04477 }
04478 else
04479 {
04480
04481 if ( *decl )
04482 { dtd_symbol *nname;
04483
04484 if ( (s=isee_identifier(dtd, decl, "cdata")) )
04485 { decl = s;
04486 e->content = EC_CDATA;
04487 } else if ( (s=isee_identifier(dtd, decl, "sdata")) )
04488 { decl = s;
04489 e->content = EC_SDATA;
04490 } else if ( (s=isee_identifier(dtd, decl, "ndata")) )
04491 { decl = s;
04492 e->content = EC_NDATA;
04493 } else
04494 { return gripe(ERC_SYNTAX_ERROR, "Bad datatype declaration", decl);
04495 }
04496 if ( (s=itake_name(dtd, decl, &nname)) )
04497 { decl = s;
04498 } else
04499 { return gripe(ERC_SYNTAX_ERROR, "Bad notation declaration", decl);
04500 }
04501 }
04502
04503 }
04504
04505 if ( *decl )
04506 {
04507 return gripe(ERC_SYNTAX_ERROR, "Unexpected end of declaraction", decl);
04508 }
04509 }
04510
04511 if ( isparam )
04512 { e->next = dtd->pentities;
04513 dtd->pentities = e;
04514 } else
04515 { e->name->entity = e;
04516 e->next = dtd->entities;
04517 dtd->entities = e;
04518 }
04519
04520 if ( isdef )
04521 dtd->default_entity = e;
04522
04523 return TRUE;
04524
04525 }
04526
04527
04531 static ichar *
04532 baseurl(dtd_parser *p)
04533 { if ( p->location.type == IN_FILE && p->location.name )
04534 { return istrdup(p->location.name);
04535 }
04536
04537 return NULL;
04538 }
04539
04540
04546 static const ichar *
04547 process_entity_value_declaration(dtd_parser *p,
04548 const ichar *decl, dtd_entity *e)
04549 { dtd *dtd = p->dtd;
04550 const ichar *s;
04551
04552 if ( e->type == ET_SYSTEM )
04553 {
04554 if ( (s=itake_url(dtd, decl, &e->exturl)) )
04555 { e->baseurl = baseurl(p);
04556 return s;
04557 }
04558
04559 goto string_expected;
04560 } else
04561 { ichar buf[MAXSTRINGLEN];
04562 ichar val[MAXSTRINGLEN];
04563
04564 if ( !(s = itake_string(dtd, decl, buf, sizeof(buf))) )
04565 goto string_expected;
04566 decl = s;
04567
04568 expand_pentities(p, buf, val, sizeof(val));
04569
04570 switch ( e->type )
04571 { case ET_PUBLIC:
04572 { e->extid = istrdup(val);
04573 if ( isee_func(dtd, decl, CF_LIT) ||
04574 isee_func(dtd, decl, CF_LITA) )
04575 { if ( (s=itake_url(dtd, decl, &e->exturl)) )
04576 { e->baseurl = baseurl(p);
04577 decl = s;
04578 }
04579 }
04580 return decl;
04581 }
04582 case ET_LITERAL:
04583 { e->value = istrdup(val);
04584 e->length = strlen(e->value);
04585 return decl;
04586 }
04587 default:
04588 assert(0);
04589 return NULL;
04590 }
04591 }
04592
04593 string_expected:
04594 gripe(ERC_SYNTAX_ERROR, "String expected", decl);
04595 return NULL;
04596 }
04597
04608 static const ichar *
04609 itake_url(dtd *dtd, const ichar *in, ichar **out)
04610 { ichar buf[MAXSTRINGLEN];
04611 const ichar *end;
04612
04613 if ( (end=itake_string(dtd, in, buf, sizeof(buf))) )
04614 { *out = istrdup(buf);
04615 }
04616
04617 return end;
04618 }
04619
04625 static int
04626 expand_pentities(dtd_parser *p, const ichar *in, ichar *out, int len)
04627 { dtd *dtd = p->dtd;
04628 int pero = dtd->charfunc->func[CF_PERO];
04629 int ero = dtd->charfunc->func[CF_ERO];
04630 const ichar *s;
04631
04632 while(*in)
04633 { if ( *in == pero )
04634 { dtd_symbol *id;
04635
04636 if ( (s = itake_entity_name(dtd, in+1, &id)) )
04637 { dtd_entity *e = find_pentity(dtd, id);
04638 const ichar *eval;
04639 int l;
04640
04641 in = s;
04642 if ( (s=isee_func(dtd, s, CF_ERC)) )
04643 in = s;
04644
04645 if ( !e )
04646 { return gripe(ERC_EXISTENCE, "parameter entity", id->name);
04647
04648 }
04649
04650 if ( !(eval = entity_value(p, e, NULL)) )
04651 return FALSE;
04652
04653 if ( !expand_pentities(p, eval, out, len) )
04654 return FALSE;
04655 l = strlen(out);
04656 out += l;
04657 len -= l;
04658
04659 continue;
04660 }
04661 }
04662
04663 if ( --len <= 0 )
04664 { gripe(ERC_REPRESENTATION, "Declaration too long");
04665 return FALSE;
04666 }
04667
04668 if ( *in == ero && in[1] == '#' )
04669 { int chr;
04670
04671 if ( (s=isee_character_entity(dtd, in, &chr)) &&
04672 representable_char(p, chr) )
04673 { *out++ = chr;
04674 in = s;
04675 continue;
04676 }
04677 }
04678
04679 *out++ = *in++;
04680 }
04681
04682 *out = '\0';
04683
04684 return TRUE;
04685 }
04686
04691 static int
04692 representable_char(dtd_parser *p, int chr)
04693 { if ( chr < 0 )
04694 return FALSE;
04695 if ( chr < 128 )
04696 return TRUE;
04697 if ( p->utf8_decode )
04698 return FALSE;
04699 if ( chr < OUTPUT_CHARSET_SIZE )
04700 return TRUE;
04701 return FALSE;
04702 }
04703
04710 static const ichar *
04711 isee_character_entity(dtd *dtd, const ichar *in, int *chr)
04712 { const ichar *s;
04713
04714 if ( (s=isee_func(dtd, in, CF_ERO)) && *s == '#' )
04715 { ichar e[32];
04716 ichar *o = e;
04717 int v;
04718
04719 *o++ = *s++;
04720 while(o < e+sizeof(e)-1 && HasClass(dtd, *s, CH_NAME))
04721 *o++ = *s++;
04722 if ( isee_func(dtd, s, CF_ERC))
04723 s++;
04724
04725 *o = '\0';
04726 if ( (v=char_entity_value(e)) != FALSE )
04727 { *chr = v;
04728 return s;
04729 }
04730 }
04731
04732 return NULL;
04733 }
04734
04738 static int
04739 char_entity_value(const ichar *decl)
04740 { if ( *decl == '#' )
04741 { const ichar *s = decl+1;
04742 char *end;
04743 long v;
04744
04745
04746 if ( s[0] == 'x' || s[0] == 'X' )
04747 v = strtoul((char *)s+1, &end, 16);
04748 else
04749 v = strtoul((char *)s, &end, 10);
04750
04751 if ( *end == '\0' )
04752 { return (int)v;
04753 } else if ( istreq(s, "RS") )
04754 { return '\n';
04755 } else if ( istreq(s, "RE") )
04756 { return '\r';
04757 } else if ( istreq(s, "TAB") )
04758 { return '\t';
04759 } else if ( istreq(s, "SPACE") )
04760 { return ' ';
04761 }
04762 }
04763 return FALSE;
04764 }
04765
04766 static const ichar *
04767 entity_value(dtd_parser *p, dtd_entity *e, int *len)
04768 { const char *file;
04769
04770 if ( !e->value && (file=entity_file(p->dtd, e)) )
04771 { int normalise = (e->content == EC_SGML || e->content == EC_CDATA);
04772
04773 e->value = load_sgml_file_to_charp(file, normalise, &e->length);
04774 }
04775
04776 if ( len )
04777 *len = e->length;
04778
04779 return e->value;
04780 }
04781
04787 static const char *
04788 entity_file(dtd *dtd, dtd_entity *e)
04789 {
04790 char *file;
04791 switch(e->type)
04792 { case ET_SYSTEM:
04793 case ET_PUBLIC:
04794 {
04795 if( e->exturl)
04796 {
04797 file = e->exturl;
04798
04799 return file;
04800 }
04801 return NULL;
04802 }
04803 default:
04804 return NULL;
04805 }
04806 }
04807
04814 static dtd_entity *
04815 find_pentity(dtd *dtd, dtd_symbol *id)
04816 { dtd_entity *e;
04817
04818 for(e = dtd->pentities; e; e=e->next)
04819 { if ( e->name == id )
04820 return e;
04821 }
04822
04823 return NULL;
04824 }
04825
04826
04837 static const ichar *
04838 isee_identifier(dtd *dtd, const ichar *in, char *id)
04839 { in = iskip_layout(dtd, in);
04840
04841
04842 while (*id && *id == tolower(*in) )
04843 id++, in++;
04844 if ( *id == 0 && !HasClass(dtd, *in, CH_NAME) )
04845 return iskip_layout(dtd, in);
04846
04847 return NULL;
04848 }
04849
04856 static const ichar *
04857 itake_entity_name(dtd *dtd, const ichar *in, dtd_symbol **id)
04858 { ichar buf[MAXSTRLEN];
04859 ichar *o = buf;
04860
04861 in = iskip_layout(dtd, in);
04862
04863 if ( !HasClass(dtd, *in, CH_NMSTART) )
04864 return NULL;
04865
04866 if ( dtd->ent_case_sensitive )
04867 { while( HasClass(dtd, *in, CH_NAME) )
04868 *o++ = *in++;
04869 } else
04870 { while( HasClass(dtd, *in, CH_NAME) )
04871 *o++ = tolower(*in++);
04872 }
04873 *o++ = '\0';
04874
04875 *id = dtd_add_symbol(dtd, buf);
04876
04877 return in;
04878 }
04879
04880
04887 static int
04888 process_begin_element(dtd_parser *p, const ichar *decl)
04889 {
04890 dtd *dtd = p->dtd;
04891 dtd_symbol *id;
04892 const ichar *s;
04893
04894
04895 if ( (s=itake_name(dtd, decl, &id)) )
04896 {
04897 sgml_attribute atts[MAXSTRLEN];
04898 int natts=0;
04899 dtd_element *e = find_element(dtd, id);
04900 int empty = FALSE;
04901 int conref = FALSE;
04902
04903 if ( !e->structure )
04904 {
04905 dtd_edef *def;
04906 e->undefined = TRUE;
04907 def_element(dtd, id);
04908 def = e->structure;
04909 def->type = C_EMPTY;
04910 }
04911
04912 open_element(p, e, TRUE);
04913
04914 decl = s;
04915
04916
04917 if ( (s=process_attributes(p, e, decl, atts, &natts)) )
04918 decl=s;
04919
04920
04921
04922 if ( dtd->dialect != DL_SGML )
04923 {
04924 if ( (s=isee_func(dtd, decl, CF_ETAGO2)) )
04925 {
04926 empty = TRUE;
04927 decl = s;
04928 }
04929 #ifdef XMLNS
04930 if ( dtd->dialect == DL_XMLNS )
04931 update_xmlns(p, e, natts, atts);
04932 #endif
04933 if ( dtd->dialect != DL_SGML )
04934 update_space_mode(p, e, natts, atts);
04935 }
04936 else
04937 {
04938 int i;
04939
04940 for(i=0; i<natts; i++)
04941 {
04942 if ( atts[i].definition->def == AT_CONREF )
04943 {
04944 empty = TRUE;
04945 conref = TRUE;
04946 }
04947 }
04948 }
04949
04950 if( *decl)
04951 gripe(ERC_SYNTAX_ERROR, "Bad attribute list", decl);
04952
04953 if ( !(p->flags & SGML_PARSER_NODEFS) )
04954 {
04955 natts = add_default_attributes(p, e, natts, atts);
04956 }
04957
04958 if ( empty ||
04959 (dtd->dialect == DL_SGML &&
04960 e->structure &&
04961 e->structure->type == C_EMPTY &&
04962 !e->undefined) )
04963 p->empty_element = e;
04964 else
04965 p->empty_element = NULL;
04966
04967
04968 if ( p->on_begin_element )
04969 (*p->on_begin_element)(p, e, natts, atts);
04970
04971 free_attribute_values(natts, atts);
04972
04973 if ( p->empty_element )
04974 {
04975 p->empty_element = NULL;
04976
04977 close_element(p, e, conref);
04978 if ( conref )
04979 p->cdata_state = p->state = S_PCDATA;
04980 }
04981
04982 return TRUE;
04983 }
04984 return gripe(ERC_SYNTAX_ERROR, "Bad open-element tag", decl);
04985 }
04986
04993 void
04994 update_space_mode(dtd_parser *p, dtd_element *e,
04995 int natts, sgml_attribute *atts)
04996 { for( ; natts-- > 0; atts++ )
04997 { const ichar *name = atts->definition->name->name;
04998
04999 if ( istreq(name, "xml:space") && atts->definition->type == AT_CDATA )
05000 { dtd_space_mode m = istr_to_space_mode(atts->value.cdata);
05001
05002 if ( m != SP_INHERIT )
05003 p->environments->space_mode = m;
05004 else
05005 gripe(ERC_EXISTENCE, "xml:space-mode", atts->value.cdata);
05006 return;
05007 }
05008 }
05009
05010 if ( e->space_mode != SP_INHERIT )
05011 p->environments->space_mode = e->space_mode;
05012 }
05013
05020 static dtd_space_mode
05021 istr_to_space_mode(const ichar *val)
05022 { if ( istreq(val, "default") )
05023 return SP_DEFAULT;
05024 if ( istreq(val, "preserve") )
05025 return SP_PRESERVE;
05026 if ( istreq(val, "sgml") )
05027 return SP_SGML;
05028 if ( istreq(val, "remove") )
05029 return SP_REMOVE;
05030
05031 return SP_INHERIT;
05032 }
05033
05039 static void
05040 allow_for(dtd_element *in, dtd_element *e)
05041 { dtd_edef *def = in->structure;
05042 dtd_model *g;
05043
05044 if ( def->type == C_EMPTY )
05045 { def->type = C_PCDATA;
05046 def->content = calloc(1, sizeof(*def->content));
05047 def->content->type = MT_OR;
05048 def->content->cardinality = MC_REP;
05049 }
05050 assert(def->content->type == MT_OR);
05051
05052 g = def->content->content.group;
05053
05054 if ( e == CDATA_ELEMENT )
05055 { dtd_model *m;
05056
05057 for(; g; g = g->next)
05058 { if ( g->type == MT_PCDATA )
05059 return;
05060 }
05061 m = calloc(1, sizeof(*m));
05062 m->type = MT_PCDATA;
05063 m->cardinality = MC_ONE;
05064 add_submodel(def->content, m);
05065 } else
05066 { dtd_model *m;
05067
05068 for(; g; g = g->next)
05069 { if ( g->type == MT_ELEMENT && g->content.element == e )
05070 return;
05071 }
05072 m = calloc(1, sizeof(*m));
05073 m->type = MT_ELEMENT;
05074 m->cardinality = MC_ONE;
05075 m->content.element = e;
05076 add_submodel(def->content, m);
05077 }
05078 }
05079
05085 static void
05086 set_element_properties(dtd_element *e, dtd_attr *a)
05087 {
05088 if ( istreq(a->name->name, "xml:space") )
05089 {
05090 switch(a->def)
05091 {
05092 case AT_FIXED:
05093 case AT_DEFAULT:
05094 break;
05095 default:
05096 return;
05097 }
05098
05099 switch (a->type )
05100 {
05101 case AT_NAMEOF:
05102 case AT_NAME:
05103 case AT_NMTOKEN:
05104 e->space_mode = istr_to_space_mode(a->att_def.name->name);
05105 break;
05106 case AT_CDATA:
05107 e->space_mode = istr_to_space_mode((ichar *)a->att_def.cdata);
05108 break;
05109 default:
05110 break;
05111 }
05112 }
05113 }
05114
05120 static dtd_parser *
05121 clone_dtd_parser(dtd_parser *p)
05122 { dtd_parser *clone =calloc(1, sizeof(*p));
05124 clone->dtd = p->dtd;
05125 clone->dtd->references++;
05126 clone->environments = NULL;
05127 clone->marked = NULL;
05128 clone->etag = NULL;
05129 clone->grouplevel = 0;
05130 clone->state = S_PCDATA;
05131 clone->mark_state = MS_INCLUDE;
05132 clone->dmode = DM_DTD;
05133 clone->buffer = new_icharbuf();
05134 clone->cdata = new_ocharbuf();
05135 return clone;
05136 }
05137
05144 static int
05145 open_element(dtd_parser *p, dtd_element *e, int warn)
05146 {
05147
05148
05149 if ( !p->environments && p->enforce_outer_element )
05150 {
05151 dtd_element *f = p->enforce_outer_element->element;
05152
05153 if ( f && f != e )
05154 {
05155 if ( !f->structure ||
05156 !f->structure->omit_open )
05157 gripe(ERC_OMITTED_OPEN, f->name->name);
05158 WITH_CLASS(p, EV_OMITTED,
05159 { open_element(p, f, TRUE);
05160 if ( p->on_begin_element )
05161 {
05162 sgml_attribute atts[MAXATTRIBUTES];
05163 int natts = 0;
05164 if ( !(p->flags & SGML_PARSER_NODEFS) )
05165 natts = add_default_attributes(p, f, natts, atts);
05166 (*p->on_begin_element)(p, f, natts, atts);
05167 }
05168 });
05169 }
05170
05171
05172 }
05173 if ( !p->environments && !p->dtd->doctype && e != CDATA_ELEMENT )
05174 {
05175 const char *file;
05176
05177 if(FALSE)
05178 {
05179 dtd_parser *clone = clone_dtd_parser(p);
05180 gripe(ERC_NO_DOCTYPE, e->name->name, file);
05181 if ( load_dtd_from_file(clone, file) )
05182 p->dtd->doctype = istrdup(e->name->name);
05183 else
05184 gripe(ERC_EXISTENCE, "file", file);
05185 free_dtd_parser(clone);
05186 }
05187
05188 }
05189
05190
05191 if(p->environments)
05192 {
05193 sgml_environment *env = p->environments;
05194
05195 if ( env->element->undefined )
05196 {
05197 allow_for(env->element, e);
05198 push_element(p, e, FALSE);
05199 return TRUE;
05200 }
05201 if ( env->element->structure &&
05202 env->element->structure->type == C_ANY )
05203 {
05204 if ( e != CDATA_ELEMENT && e->undefined )
05205 gripe(ERC_EXISTENCE, "Element", e->name->name);
05206 push_element(p, e, FALSE);
05207 return TRUE;
05208 }
05209
05210 switch(in_or_excluded(env, e))
05211 {
05212 case IE_INCLUDED:
05213 push_element(p, e, FALSE);
05214 return TRUE;
05215 case IE_EXCLUDED:
05216 if ( warn )
05217 gripe(ERC_NOT_ALLOWED, e->name->name);
05218
05219
05220 case IE_NORMAL:
05221 for(; env; env=env->parent)
05222 { dtd_state *new;
05223 if ( (new = make_dtd_transition(env->state, e)) )
05224 { env->state = new;
05225 pop_to(p, env, e);
05226 push_element(p, e, FALSE);
05227 return TRUE;
05228 } else
05229 {
05230 dtd_element *oe[MAXOMITTED];
05231 int olen;
05232 int i;
05233 if ( (olen=find_omitted_path(env->state, e, oe)) != FALSE )
05234 {
05235 pop_to(p, env, e);
05236 WITH_CLASS(p, EV_OMITTED,
05237 for(i=0; i<olen; i++)
05238 {
05239 env->state = make_dtd_transition(env->state, oe[i]);
05240 env = push_element(p, oe[i], TRUE);
05241 })
05242 env->state = make_dtd_transition(env->state, e);
05243 push_element(p, e, FALSE);
05244 return TRUE;
05245 }
05246 }
05247 if ( !env->element->structure || !env->element->structure->omit_close )
05248 break;
05249 }
05250 }
05251 if ( warn )
05252 {
05253 if ( e == CDATA_ELEMENT )
05254 {
05255 gripe(ERC_VALIDATE, "#PCDATA not allowed here");
05256 }
05257 else if ( e->undefined )
05258 {
05259 gripe(ERC_EXISTENCE, "Element", e->name->name);
05260 }
05261 else
05262 {
05263 gripe(ERC_NOT_ALLOWED, e->name->name);
05264 }
05265 }
05266
05267 }
05268 if ( warn )
05269 {
05270 push_element(p, e, FALSE);
05271 return TRUE;
05272 }
05273 else
05274 return FALSE;
05275 }
05276
05282 int
05283 load_dtd_from_file(dtd_parser *p, const char *file)
05284 {
05285 int rval;
05286 int n=0;
05287
05288 char fname[MAXSTRLEN];
05289 char server[MAXSTRLEN];
05290 char *buf = NULL;
05291
05292 data_mode oldmode = p->dmode;
05293 dtdstate oldstate = p->state;
05294 locbuf oldloc;
05295
05296 push_location(p, &oldloc);
05297 p->dmode = DM_DTD;
05298 p->state = S_PCDATA;
05299 empty_icharbuf(p->buffer);
05300 set_src_dtd_parser(p, IN_FILE, file);
05301
05302
05303 if( parse_url( file, server, fname) != FALSE)
05304 {
05305 if(get_file_www( server, fname, &buf) == FALSE){
05306 rval = FALSE;
05307 }
05308 else{
05309 n = strlen(buf);
05310 p->dtd->implicit = FALSE;
05311 rval = TRUE;
05312 }
05313 }
05314
05315
05316 if ( buf )
05317 {
05318 int chr,i;
05319
05320 for( i=0;i<n;i++){
05321 chr=buf[i];
05322 putchar_dtd_parser(p, chr);
05323 }
05324
05325
05326 p->dtd->implicit = FALSE;
05327 rval = TRUE;
05328 }
05329 else
05330 rval = FALSE;
05331
05332
05333 pop_location(p, &oldloc);
05334 p->dmode = oldmode;
05335 p->state = oldstate;
05336
05337 return rval;
05338 }
05339
05345 int
05346 is_absolute_path(const char *name)
05347 { if (isDirSep(name[0])
05348 #ifdef WIN_NT
05349 || (isalpha(toupper(name[0])) && name[1] == ':')
05350 #endif
05351 )
05352 return TRUE;
05353
05354 return FALSE;
05355 }
05356
05360 char *
05361 localpath(const char *ref, const char *name)
05362 { char *local;
05363
05364 if (!ref || is_absolute_path(name))
05365 local = strdup(name);
05366 else
05367 { char buf[MAXPATHLEN];
05368
05369 DirName(ref, buf);
05370 strcat(buf, DIRSEPSTR);
05371 strcat(buf, name);
05372
05373 local = strdup(buf);
05374 }
05375
05376 if (!local)
05377 sgml_nomem();
05378
05379 return local;
05380 }
05381
05386 static char *
05387 DirName(const char *f, char *dir)
05388 { const char *base, *p;
05389
05390 for (base = p = f; *p; p++)
05391 { if (isDirSep(*p) && p[1] != EOS)
05392 base = p;
05393 }
05394 if (base == f)
05395 { if (isDirSep(*f))
05396 strcpy(dir, DIRSEPSTR);
05397 else
05398 strcpy(dir, ".");
05399 } else
05400 { strncpy(dir, f, base - f);
05401 dir[base - f] = EOS;
05402 }
05403
05404 return dir;
05405 }
05406
05407 static includetype
05408 in_or_excluded(sgml_environment *env, dtd_element *e)
05409 {
05410 for(; env; env=env->parent)
05411 { if ( env->element->structure )
05412 { dtd_edef *def = env->element->structure;
05413 dtd_element_list *el;
05414
05415 for(el=def->excluded; el; el=el->next)
05416 { if ( el->value == e )
05417 return IE_EXCLUDED;
05418 }
05419 for(el=def->included; el; el=el->next)
05420 { if ( el->value == e )
05421 return IE_INCLUDED;
05422 }
05423 }
05424 }
05425
05426 return IE_NORMAL;
05427 }
05428
05429
05435 static __inline void
05436 _sgml_cplocation(dtd_srcloc *d, dtd_srcloc *loc)
05437 { d->type = loc->type;
05438 d->name = loc->name;
05439 d->line = loc->line;
05440 d->linepos = loc->linepos;
05441 d->charpos = loc->charpos;
05442
05443 }
05444
05451 static sgml_environment *
05452 push_element(dtd_parser *p, dtd_element *e, int callback)
05453 {
05454 if( e != CDATA_ELEMENT)
05455 {
05456 sgml_environment *env = calloc(1, sizeof(*env));
05457 emit_cdata(p, FALSE);
05458
05459 env->element = e;
05460 env->state = make_state_engine(e);
05461
05462 env->space_mode = (p->environments ? p->environments->space_mode: p->dtd->space_mode);
05463 env->parent = p->environments;
05464 p->environments = env;
05465 if ( p->dtd->shorttag )
05466 {
05467 env->saved_waiting_for_net = p->waiting_for_net;
05468
05469 if ( p->event_class == EV_SHORTTAG )
05470 {
05471 p->waiting_for_net = TRUE;
05472 env->wants_net = TRUE;
05473 }
05474 else
05475 {
05476 env->wants_net = FALSE;
05477 if ( e->structure && e->structure->omit_close == FALSE )
05478 p->waiting_for_net = FALSE;
05479 }
05480
05481 }
05482 if ( e->map )
05483 p->map = env->map = e->map;
05484 else if ( env->parent )
05485 p->map = env->map = env->parent->map;
05486
05487 p->first = TRUE;
05488 if ( callback && p->on_begin_element )
05489 {
05490 sgml_attribute atts[MAXATTRIBUTES];
05491 int natts = 0;
05492
05493 if ( !(p->flags & SGML_PARSER_NODEFS) )
05494 natts = add_default_attributes(p, e, natts, atts);
05495
05496 (*p->on_begin_element)(p, e, natts, atts);
05497 }
05498
05499 if ( e->structure )
05500 {
05501 if ( e->structure->type == C_CDATA || e->structure->type == C_RCDATA )
05502 {
05503 p->state = (e->structure->type == C_CDATA ? S_CDATA : S_RCDATA);
05504 p->cdata_state = p->state;
05505 p->etag = e->name->name;
05506 p->etaglen = istrlen(p->etag);
05507 sgml_cplocation(&p->startcdata, &p->location);
05508 }
05509 else
05510 p->cdata_state = S_PCDATA;
05511 }
05512 }
05513 return p->environments;
05514
05515 }
05516
05523 static void
05524 push_location(dtd_parser *p, locbuf *save)
05525 { save->here = p->location;
05526 save->start = p->startloc;
05527 p->location.parent = &save->here;
05528 p->startloc.parent = &save->start;
05529 }
05530
05531
05535 static int
05536 emit_cdata(dtd_parser *p, int last)
05537 {
05538 dtd *dtd = p->dtd;
05539 ichar *s, *data = p->cdata->data;
05540 locbuf locsafe;
05541
05542 if ( p->cdata->size == 0 )
05543 {
05544 return TRUE;
05545 }
05546
05547 push_location(p, &locsafe);
05548 sgml_cplocation(&p->location, &p->startloc);
05549 sgml_cplocation(&p->startloc, &p->startcdata);
05550
05551 if ( p->environments )
05552 {
05553 switch(p->environments->space_mode)
05554 {
05555 case SP_SGML:
05556 case SP_DEFAULT:
05557
05558 if ( p->first )
05559 {
05560 if ( HasClass(dtd, *data, CH_RE) )
05561 {
05562 inc_location(&p->startloc, *data);
05563 data++;
05564 p->cdata->size--;
05565 }
05566 if ( HasClass(dtd, *data, CH_RS) )
05567 {
05568 inc_location(&p->startloc, *data);
05569 data++;
05570 p->cdata->size--;
05571 }
05572
05573 }
05574 if( last)
05575 {
05576 ichar *e = data + p->cdata->size;
05577 if ( e > data && HasClass(dtd, e[-1], CH_RS) )
05578 {
05579 dec_location(&p->location, e[-1]);
05580 *--e = '\0';
05581 p->cdata->size--;
05582 }
05583 if ( e>data && HasClass(dtd, e[-1], CH_RE) )
05584 {
05585 dec_location(&p->location, e[-1]);
05586 *--e = '\0';
05587 p->cdata->size--;
05588 }
05589
05590 }
05591
05592 if ( p->environments->space_mode == SP_DEFAULT )
05593 {
05594 ichar *o = data;
05595 for(s=data; *s; s++)
05596 {
05597 if ( HasClass(dtd, *s, CH_BLANK) )
05598 {
05599 while(s[1] && HasClass(dtd, s[1], CH_BLANK))
05600 s++;
05601 *o++ = ' ';
05602 continue;
05603 }
05604 *o++ = *s;
05605 }
05606 *o = '\0';
05607 p->cdata->size = o-data;
05608 }
05609
05610 break;
05611 case SP_REMOVE:
05612 { ichar *o = data;
05613 ichar *end = data;
05614 for(s=data; *s && HasClass(dtd, *s, CH_BLANK); )
05615 inc_location(&p->startloc, *s++);
05616 if ( *s )
05617 {
05618 for(; *s; s++)
05619 {
05620 if ( HasClass(dtd, *s, CH_BLANK) )
05621 {
05622 while(s[1] && HasClass(dtd, s[1], CH_BLANK))
05623 s++;
05624 *o++ = ' ';
05625 continue;
05626 }
05627 *o++ = *s;
05628 end = o;
05629 }
05630 }
05631 *end = '\0';
05632 p->cdata->size = end-data;
05633 break;
05634 }
05635
05636 case SP_PRESERVE:
05637 break;
05638 case SP_INHERIT:
05639 assert(0);
05640 return FALSE;
05641 }
05642 }
05643 if ( p->cdata->size == 0 )
05644 {
05645 pop_location(p, &locsafe);
05646 return TRUE;
05647 }
05648 assert(p->cdata->size > 0);
05649
05650
05651 if ( !p->blank_cdata )
05652 {
05653 if ( p->cdata_must_be_empty )
05654 {
05655 terminate_ocharbuf(p->cdata);
05656 gripe(ERC_NOT_ALLOWED_PCDATA, p->cdata->data);
05657 }
05658 if ( p->on_data )
05659 {
05660 (*p->on_data)(p, EC_CDATA, p->cdata->size, data);
05661
05662 }
05663 }
05664 else if(p->environments)
05665 {
05666 sgml_environment *env = p->environments;
05667 dtd_state *new;
05668
05669
05670
05671
05672 if ( (new=make_dtd_transition(env->state, CDATA_ELEMENT)) )
05673 {
05674 env->state = new;
05675 if ( p->on_data )
05676 (*p->on_data)(p, EC_CDATA, p->cdata->size, data);
05677 }
05678 else if ( env->element->undefined && p->environments->space_mode == SP_PRESERVE )
05679 {
05680 if ( p->on_data )
05681 (*p->on_data)(p, EC_CDATA, p->cdata->size, data);
05682 }
05683
05684 }
05685
05686
05687 pop_location(p, &locsafe);
05688 empty_cdata(p);
05689 return TRUE;
05690
05691 }
05692
05698 static void
05699 empty_cdata(dtd_parser *p)
05700 {
05701 if ( p->dmode == DM_DATA )
05702 { empty_ocharbuf(p->cdata);
05703 p->blank_cdata = TRUE;
05704 p->cdata_must_be_empty = FALSE;
05705 }
05706 }
05707
05713 static void
05714 pop_location(dtd_parser *p, locbuf *saved)
05715 { p->location = saved->here;
05716 p->startloc = saved->start;
05717 }
05718
05722 static void
05723 inc_location(dtd_srcloc *l, int chr)
05724 { if ( chr == '\n' )
05725 { l->linepos = 0;
05726 l->line++;
05727 }
05728
05729 l->linepos++;
05730 l->charpos++;
05731 }
05732
05736 static void
05737 dec_location(dtd_srcloc *l, int chr)
05738 { if ( chr == '\n' )
05739 { l->linepos = 2;
05740 l->line--;
05741 }
05742 l->linepos--;
05743 l->charpos--;
05744 }
05745
05746
05752 static dtd_element *
05753 def_element(dtd *dtd, dtd_symbol *id)
05754 { dtd_element *e = find_element(dtd, id);
05755
05756 if ( !e->structure ) {
05757 e->structure = calloc(1, sizeof(*e->structure));
05758 e->structure->references = 1;
05759 e->structure->type = C_EMPTY;
05760 }
05761 return e;
05762 }
05763
05769 static dtd_element *
05770 find_element(dtd *dtd, dtd_symbol *id)
05771 { dtd_element *e;
05772
05773 if ( id->element )
05774 return id->element;
05775
05776 e = calloc(1, sizeof(*e));
05777 e->space_mode = SP_INHERIT;
05778 e->undefined = TRUE;
05779 e->name = id;
05780 id->element = e;
05781
05782 e->next = dtd->elements;
05783 dtd->elements = e;
05784
05785 return e;
05786 }
05787
05793 static const ichar *
05794 itake_name(dtd *dtd, const ichar *in, dtd_symbol **id)
05795 {
05796 ichar buf[MAXSTRLEN];
05797 ichar *o = buf;
05798
05799 in = iskip_layout(dtd, in);
05800
05801 if ( !HasClass(dtd, *in, CH_NMSTART) )
05802 return NULL;
05803 if ( dtd->case_sensitive )
05804 {
05805 while( HasClass(dtd, *in, CH_NAME) )
05806 *o++ = *in++;
05807 }
05808 else
05809 {
05810 while( HasClass(dtd, *in, CH_NAME) )
05811 *o++ = tolower(*in++);
05812 }
05813 *o++ = '\0';
05814
05815 *id = dtd_add_symbol(dtd, buf);
05816
05817 return iskip_layout(dtd, in);
05818 }
05819
05825 static const ichar *
05826 iskip_layout(dtd *dtd, const ichar *in)
05827 {
05828 ichar cmt = dtd->charfunc->func[CF_CMT];
05829
05830
05831 for( ; *in; in++ )
05832 { if ( HasClass(dtd, *in, CH_BLANK) )
05833 continue;
05834
05835 if ( in[0] == cmt && in[1] == cmt )
05836 { in += 2;
05837
05838 for( ; *in; in++ )
05839 { if ( in[0] == cmt && in[1] == cmt )
05840 break;
05841 }
05842 in++;
05843 continue;
05844 }
05845
05846 return in;
05847 }
05848
05849 return in;
05850 }
05851
05858 dtd_symbol *
05859 dtd_add_symbol(dtd *dtd, const ichar *name)
05860 { dtd_symbol_table *t = dtd->symbols;
05861 int k = istrhash(name, t->size);
05862 dtd_symbol *s;
05863
05864 for(s=t->entries[k]; s; s = s->next)
05865 { if ( istreq(s->name, name) )
05866 return s;
05867 }
05868
05869 s = calloc(1, sizeof(*s));
05870 s->name = istrdup(name);
05871 s->next = t->entries[k];
05872 t->entries[k] = s;
05873
05874 return s;
05875 }
05876
05882 static const ichar *
05883 isee_func(dtd *dtd, const ichar *in, charfunc func)
05884 { if ( dtd->charfunc->func[func] == *in )
05885 return ++in;
05886
05887 return NULL;
05888 }
05889
05890
05898 static const ichar *
05899 process_attributes(dtd_parser *p, dtd_element *e, const ichar *decl,
05900 sgml_attribute *atts, int *argc)
05901 {
05902
05903 int attn = 0;
05904 dtd *dtd = p->dtd;
05905
05906 decl = iskip_layout(dtd, decl);
05907
05908 while(decl && *decl)
05909 {
05910
05911 dtd_symbol *nm;
05912 const ichar *s;
05913
05914
05915 if ( (s=itake_nmtoken(dtd, decl, &nm)) )
05916 {
05917 decl = s;
05918 if ( (s=isee_func(dtd, decl, CF_VI)) )
05919 {
05920 dtd_attr *a;
05921
05922 if ( !HasClass(dtd, nm->name[0], CH_NMSTART) )
05923 gripe(ERC_SYNTAX_WARNING, "Illegal start of attribute name\n");
05924 decl = s;
05925
05926
05927 if ( !(a=find_attribute(e, nm)) )
05928 {
05929 a = calloc(1, sizeof(*a));
05930 a->name = nm;
05931 a->type = AT_CDATA;
05932 a->def = AT_IMPLIED;
05933 add_attribute(dtd, e, a);
05934
05935 if ( !e->undefined && !(dtd->dialect != DL_SGML && (istreq("xmlns", nm->name) || istrprefix("xmlns:", nm->name))) )
05936 { gripe(ERC_NO_ATTRIBUTE, e->name->name, nm->name);
05937 }
05938 }
05939 atts[attn].definition = a;
05940
05941 if ( (decl=get_attribute_value(p, decl, atts+attn)) )
05942 {
05943 attn++;
05944 continue;
05945 }
05946
05947 }
05948 else if ( e->structure )
05949 {
05950 dtd_attr_list *al;
05951 for(al=e->attributes; al; al=al->next)
05952 {
05953 dtd_attr *a = al->attribute;
05954
05955 if ( a->type == AT_NAMEOF || a->type == AT_NOTATION )
05956 {
05957 dtd_name_list *nl;
05958
05959 for(nl=a->typeex.nameof; nl; nl = nl->next)
05960 {
05961 if ( nl->value == nm )
05962 {
05963 if ( dtd->dialect != DL_SGML )
05964 {
05965 gripe(ERC_SYNTAX_WARNING, "Value short-hand in XML mode", decl);
05966 }
05967 atts[attn].definition = a;
05968 atts[attn].value.cdata = NULL;
05969 atts[attn].value.number = 0;
05970 atts[attn].value.text = istrdup(nm->name);
05971 attn++;
05972 goto next;
05973 }
05974 }
05975 }
05976 }
05977
05978 gripe(ERC_NO_ATTRIBUTE_VALUE, e->name->name, nm->name);
05979 decl = s;
05980 } else
05981 {
05982 gripe(ERC_SYNTAX_ERROR, "Bad attribute", decl);
05983 decl = s;
05984 }
05985 } else
05986 {
05987 *argc = attn;
05988 return decl;
05989 }
05990 next:
05991 ;
05992 }
05993 *argc = attn;
05994 return decl;
05995 }
05996
05997
06003 static int
06004 set_option_dtd( dtd *dtd, dtd_option option, char * set)
06005 {
06006 switch(option)
06007 {
06008 case OPT_SHORTTAG:
06009 if ( !strcmp( set, "FALSE") || !strcmp( set, "false"))
06010 {
06011 dtd->shorttag = FALSE;
06012 }
06013 else
06014 {
06015 dtd->shorttag = TRUE;
06016 }
06017 break;
06018 }
06019 return TRUE;
06020 }
06021
06022
06029 static int
06030 add_default_attributes(dtd_parser *p, dtd_element *e,
06031 int natts, sgml_attribute *atts)
06032 {
06033
06034 dtd_attr_list *al;
06035
06036 if ( e == CDATA_ELEMENT )
06037 {
06038 return natts;
06039 }
06040
06041 for(al=e->attributes; al; al=al->next)
06042 { dtd_attr *a = al->attribute;
06043
06044 switch(a->def)
06045 { case AT_REQUIRED:
06046 case AT_CURRENT:
06047 case AT_CONREF:
06048 case AT_IMPLIED:
06049 goto next;
06050 case AT_FIXED:
06051 case AT_DEFAULT:
06052 { int i;
06053 sgml_attribute *ap;
06054
06055 for(i=0, ap=atts; i<natts; i++, ap++)
06056 { if ( ap->definition == a )
06057 goto next;
06058 }
06059
06060 ap->definition = a;
06061 ap->value.cdata = NULL;
06062 ap->value.text = NULL;
06063 ap->value.number = 0;
06064 ap->flags = SGML_AT_DEFAULT;
06065
06066 switch(a->type)
06067 { case AT_CDATA:
06068 ap->value.cdata = a->att_def.cdata;
06069 break;
06070 case AT_NUMBER:
06071 if ( p->dtd->number_mode == NU_TOKEN )
06072 ap->value.text = (ichar *)a->att_def.name->name;
06073 else
06074 ap->value.number = a->att_def.number;
06075 break;
06076 default:
06077 if ( a->islist )
06078 ap->value.text = a->att_def.list;
06079 else
06080 ap->value.text = (ichar *)a->att_def.name->name;
06081 }
06082
06083 natts++;
06084 }
06085 }
06086 next:;
06087 }
06088
06089 return natts;
06090 }
06091
06099 static ichar const *
06100 get_attribute_value(dtd_parser *p, ichar const *decl, sgml_attribute *att)
06101 {
06102 ichar tmp[MAXSTRINGLEN];
06103 ichar *buf = tmp;
06104 ochar cdata[MAXSTRINGLEN];
06105 ichar const *s;
06106 ichar *d;
06107 ichar c;
06108 dtd *dtd = p->dtd;
06109 ichar const *end;
06110
06111
06112 enum
06113 {
06114 DIG_FIRST = 8,
06115 NAM_FIRST = 4,
06116 NAM_LATER = 2,
06117 ANY_OTHER = 1,
06118 YET_EMPTY = 0
06119 }token;
06120
06121 token = YET_EMPTY;
06122
06123 end = itake_string(dtd, decl, tmp, sizeof (tmp));
06124
06125 if (end != NULL)
06126 {
06127 if (att->definition->type == AT_CDATA)
06128 {
06129 int hasent = FALSE, hasento = FALSE;
06130 ichar const ero = dtd->charfunc->func[CF_ERO];
06131
06132
06133
06134
06135 ichar const erc = dtd->charfunc->func[CF_ERC];
06136
06137 ichar *q;
06138
06139 for (d = q = tmp; *d; *q++ = *d++)
06140 {
06141 if ( d[0] == CR && d[1] == LF )
06142 d++;
06143 if (HasClass(dtd, *d, CH_BLANK))
06144 {
06145 *d = ' ';
06146 }
06147 else if (*d == ero)
06148 {
06149 hasento = TRUE;
06150 }
06151 else if( hasento == TRUE && (*d == erc || *d == ero || *d == '@' || HasClass( dtd, *d, CH_WHITE) || HasClass( dtd, *d, CH_RE) || HasClass( dtd, *d, CH_RS) ))
06152 {
06153 hasent = TRUE;
06154 }
06155 else if( hasento == TRUE && (*d != erc && *d != ero && *d != '@' && !HasClass( dtd, *d, CH_WHITE) && !HasClass( dtd, *d, CH_NAME) && !HasClass( dtd, *d, CH_RE) && !HasClass( dtd, *d, CH_RS) ))
06156 {
06157 hasento = FALSE;
06158 hasento = FALSE;
06159 }
06160
06161 #ifdef UTF8
06162 else if ( p->utf8_decode && ISUTF8_MB(*d) )
06163 {
06164 hasent = TRUE;
06165 }
06166 #endif
06167
06168
06169 }
06170 *q = '\0';
06171 if (hasent)
06172 {
06173 expand_entities(p, tmp, cdata, MAXSTRINGLEN);
06174 buf = (ichar *) cdata;
06175 hasent = hasento = FALSE;
06176 }
06177 }
06178 else
06179 {
06180 ichar *d;
06181 expand_entities(p, tmp, cdata, MAXSTRINGLEN);
06182 buf = (ichar *) cdata;
06183
06184
06185 s = buf;
06186 while ((c = *s++) != '\0' && HasClass(dtd, c, CH_BLANK))
06187 { }
06188 d = buf;
06189 while (c != '\0')
06190 {
06191 token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : ANY_OTHER;
06192 if (d != buf)
06193 *d++ = ' ';
06194
06195 if (dtd->case_sensitive)
06196 {
06197 *d++ = c;
06198 while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
06199 {
06200 token |= HasClass(dtd, c, CH_DIGIT) ? 0: HasClass(dtd, c, CH_NAME) ? NAM_LATER : ANY_OTHER;
06201 *d++ = c;
06202 }
06203 }
06204 else
06205 {
06206 *d++ = tolower(c);
06207 while ((c = *s++) != '\0' && !HasClass(dtd, c, CH_BLANK))
06208 {
06209 token |= HasClass(dtd, c, CH_DIGIT) ? 0
06210 : HasClass(dtd, c, CH_NAME) ? NAM_LATER : ANY_OTHER;
06211 *d++ = tolower(c);
06212 }
06213 }
06214 while (c != '\0' && HasClass(dtd, c, CH_BLANK))
06215 c = *s++;
06216 }
06217 *d = '\0';
06218 }
06219 }
06220 else
06221 {
06222 end = itake_unquoted(dtd, decl, tmp, sizeof tmp);
06223 if (end == NULL)
06224 return NULL;
06225
06226 s = buf;
06227 c = *s++;
06228 if (c != '\0')
06229 {
06230 token |= HasClass(dtd, c, CH_DIGIT) ? DIG_FIRST
06231 : HasClass(dtd, c, CH_NAME) ? NAM_FIRST : ANY_OTHER;
06232 while ((c = *s++) != 0)
06233 {
06234 token |= HasClass(dtd, c, CH_DIGIT) ? 0 : HasClass(dtd, c, CH_NAME) ? NAM_LATER : ANY_OTHER;
06235 }
06236 }
06237 if ( token == YET_EMPTY || (token & ANY_OTHER) != 0)
06238 gripe(ERC_SYNTAX_WARNING, "Attribute value requires quotes", buf);
06239
06240 if (!dtd->case_sensitive && att->definition->type != AT_CDATA)
06241 istrlower(buf);
06242 }
06243
06244 att->value.cdata = NULL;
06245 att->value.text = NULL;
06246 att->value.number = 0;
06247 att->flags = 0;
06248
06249 switch (att->definition->type)
06250 {
06251
06252 case AT_NUMBER:
06253 if (token != DIG_FIRST)
06254 {
06255 gripe(ERC_SYNTAX_WARNING, "NUMBER expected", decl);
06256 }
06257 else if (dtd->number_mode == NU_INTEGER)
06258 {
06259 (void) istrtol(buf, &att->value.number);
06260 }
06261 else
06262 {
06263 att->value.text = istrdup(buf);
06264 }
06265 return end;
06266 case AT_CDATA:
06267 att->value.cdata = ostrdup((ochar *) buf);
06268 return end;
06269 case AT_ID:
06270 case AT_IDREF:
06271 case AT_NAME:
06272 case AT_NOTATION:
06273 if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06274 gripe(ERC_SYNTAX_WARNING, "NAME expected", decl);
06275 break;
06276 case AT_NAMEOF:
06277 case AT_NMTOKEN:
06278 if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
06279 gripe(ERC_SYNTAX_WARNING, "NMTOKEN expected", decl);
06280 if ( att->definition->type == AT_NAMEOF )
06281 {
06282 dtd_name_list *nl;
06283
06284 for(nl=att->definition->typeex.nameof; nl; nl = nl->next)
06285 {
06286 if ( istreq(nl->value->name, buf) )
06287 goto passed;
06288 }
06289 gripe(ERC_SYNTAX_WARNING, "unexpected value", decl);
06290 }
06291 break;
06292 case AT_NUTOKEN:
06293 if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
06294 gripe(ERC_SYNTAX_WARNING, "NUTOKEN expected", decl);
06295 break;
06296 case AT_ENTITY:
06297 if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06298 gripe(ERC_SYNTAX_WARNING, "entity NAME expected", decl);
06299 break;
06300 case AT_NAMES:
06301 case AT_IDREFS:
06302 if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06303 gripe(ERC_SYNTAX_WARNING, "NAMES expected", decl);
06304 break;
06305 case AT_ENTITIES:
06306 if (token == YET_EMPTY || (token & (DIG_FIRST | ANY_OTHER)) != 0)
06307 gripe(ERC_SYNTAX_WARNING, "entity NAMES expected", decl);
06308 break;
06309 case AT_NMTOKENS:
06310 if (token == YET_EMPTY || (token & ANY_OTHER) != 0)
06311 gripe(ERC_SYNTAX_WARNING, "NMTOKENS expected", decl);
06312 break;
06313 case AT_NUMBERS:
06314 if (token != DIG_FIRST)
06315 gripe(ERC_SYNTAX_WARNING, "NUMBERS expected", decl);
06316 break;
06317 case AT_NUTOKENS:
06318 if ((token & (NAM_FIRST | ANY_OTHER)) != 0)
06319 gripe(ERC_SYNTAX_WARNING, "NUTOKENS expected", decl);
06320 break;
06321 default:
06322 assert(0);
06323 return NULL;
06324
06325 }
06326 passed:
06327 att->value.text = istrdup(buf);
06328
06329 return end;
06330 }
06331
06337 static ichar const *
06338 itake_unquoted(dtd *dtd, ichar const *in, ichar *out, int len)
06339 { ichar const end2 = dtd->charfunc->func[CF_ETAGO2];
06340 ichar c;
06341
06342
06343 while (c = *in, HasClass(dtd, c, CH_BLANK))
06344 in++;
06345
06346
06347 while ( !HasClass(dtd, c, CH_BLANK) &&
06348 c != '\0' )
06349 { if ( c == end2 && (dtd->shorttag ||
06350 (in[1] == '\0' && dtd->dialect != DL_SGML)) )
06351 break;
06352
06353 if ( --len > 0 )
06354 *out++ = c;
06355 else if ( len == 0 )
06356 gripe(ERC_REPRESENTATION, "Attribute too long");
06357 c = *++in;
06358 }
06359 *out = '\0';
06360
06361
06362
06363
06364
06365 return iskip_layout(dtd, in);
06366 }
06367
06374 static int
06375 expand_entities(dtd_parser *p, const ichar *in, ochar *out, int len)
06376 { const ichar *s;
06377 dtd *dtd = p->dtd;
06378 int ero = dtd->charfunc->func[CF_ERO];
06379 const ochar *map = dtd->charmap->map;
06380
06381 while(*in)
06382 {
06383 if ( *in == ero )
06384 {
06385 const ichar *estart = in;
06386 int chr;
06387
06388 if ( (s=isee_character_entity(dtd, in, &chr)) )
06389 {
06390 if ( chr <= 0 || chr >= OUTPUT_CHARSET_SIZE )
06391 gripe(ERC_REPRESENTATION, "character");
06392 if ( --len <= 0 )
06393 {
06394 return gripe(ERC_REPRESENTATION, "CDATA string too long");
06395 }
06396 *out++ = chr;
06397 in = s;
06398 continue;
06399 }
06400 if ( HasClass(dtd, in[1], CH_NMSTART) )
06401 {
06402 dtd_symbol *id;
06403 dtd_entity *e;
06404 const ichar *eval;
06405 int l;
06406
06407 in = itake_name(dtd, in+1, &id);
06408 if ( isee_func(dtd, in, CF_ERC) || *in == '\n' )
06409 in++;
06410
06411 if ( !(e = id->entity) && !(e=dtd->default_entity) )
06412 {
06413 gripe(ERC_EXISTENCE, "entity", id->name);
06414 in = estart;
06415 goto recover;
06416 }
06417
06418 if ( !(eval = entity_value(p, e, NULL)) )
06419 {
06420 gripe(ERC_NO_VALUE, e->name->name);
06421 in = estart;
06422 goto recover;
06423
06424 }
06425 in = estart;
06426 goto recover;
06427
06428 if ( !expand_entities(p, eval, out, len) )
06429 return FALSE;
06430 l = ostrlen(out);
06431 out += l;
06432 len -= l;
06433
06434 continue;
06435 }
06436 }
06437
06438 recover:
06439 if ( --len <= 0 )
06440 return gripe(ERC_REPRESENTATION, "CDATA string too long");
06441
06442 #ifdef UTF8
06443 if ( p->utf8_decode && ISUTF8_MB(*in) )
06444 {
06445 int chr;
06446
06447 in = __utf8_get_char(in, &chr);
06448 if ( chr >= OUTPUT_CHARSET_SIZE )
06449 gripe(ERC_REPRESENTATION, "character");
06450 *out++ = chr;
06451 }
06452 #endif
06453 *out++ = map[*in++];
06454 }
06455
06456 *out = 0;
06457
06458 return TRUE;
06459 }
06460
06466 static const ichar *
06467 itake_string(dtd *dtd, const ichar *in, ichar *out, int len)
06468 {
06469 in = iskip_layout(dtd, in);
06470
06471 if ( isee_func(dtd, in, CF_LIT) || isee_func(dtd, in, CF_LITA) )
06472 {
06473 ichar q = *in++;
06474
06475 while( *in && *in != q )
06476 {
06477 *out++ = *in++;
06478 if ( --len == 0 )
06479 {
06480 gripe(ERC_SYNTAX_ERROR, "String too long");
06481 return NULL;
06482 }
06483 }
06484 if ( *in )
06485 {
06486 *out = '\0';
06487 return iskip_layout(dtd, ++in);
06488 }
06489 }
06490 return NULL;
06491 }
06492
06498 static dtd_attr *
06499 find_attribute(dtd_element *e, dtd_symbol *name)
06500 { dtd_attr_list *a;
06501
06502 for(a=e->attributes; a; a=a->next)
06503 { if ( a->attribute->name == name )
06504 return a->attribute;
06505 }
06506
06507 return NULL;
06508 }
06509
06516 #ifdef UTF8
06517 static void
06518 process_utf8(dtd_parser *p, int chr)
06519 { int bytes;
06520 int mask;
06521
06522 for( bytes=1, mask=0x20; chr&mask; bytes++, mask >>= 1 )
06523 ;
06524 mask--;
06525
06526 p->utf8_saved_state = p->state;
06527 p->state = S_UTF8;
06528 p->utf8_char = chr & mask;
06529 p->utf8_left = bytes;
06530 }
06531 #endif