00001
00002
00003
00004
00005
00006
00007 #ifndef DTD_H_INCLUDED
00008 #define DTD_H_INCLUDED
00009 #include "sgmldefs.h"
00010
00011 #define CH_WHITE 0x0001
00012 #define CH_LCLETTER 0x0002
00013 #define CH_UCLETTER 0x0004
00014 #define CH_CNMSTRT 0x0008
00015 #define CH_CNM 0x0010
00016 #define CH_DIGIT 0x0020
00017 #define CH_RE 0x0040
00018 #define CH_RS 0x0080
00019
00020 #define CH_LETTER (CH_LCLETTER|CH_UCLETTER)
00021 #define CH_NMSTART (CH_LCLETTER|CH_UCLETTER|CH_CNMSTRT)
00022 #define CH_NAME (CH_NMSTART|CH_DIGIT|CH_CNM)
00023 #define CH_BLANK (CH_WHITE|CH_RE|CH_RS)
00024
00025 #define CHR_BLANK 0x1
00026 #define CHR_DBLANK 0x2
00027
00028 #define SGML_DTD_MAGIC 0x7364573
00029
00030 typedef enum
00031 { CF_STAGO = 0,
00032 CF_STAGC,
00033 CF_ETAGO1,
00034 CF_ETAGO2,
00035 CF_VI,
00036 CF_NS,
00037 CF_LIT,
00038 CF_LITA,
00039 CF_PERO,
00040 CF_ERO,
00041 CF_ERC,
00042 CF_MDO1,
00043 CF_MDO2,
00044 CF_MDC,
00045 CF_PRO1,
00046 CF_PRO2,
00047 CF_PRC,
00048 CF_GRPO,
00049 CF_GRPC,
00050 CF_SEQ,
00051 CF_AND,
00052 CF_OR,
00053 CF_OPT,
00054 CF_PLUS,
00055 CF_DSO,
00056 CF_DSC,
00057 CF_REP,
00058 CF_RS,
00059 CF_RE,
00060 CF_CMT,
00061 CF_NG,
00062 CF_ENDTABLE
00063 } charfunc;
00064
00065 typedef enum
00066 { ENC_ISO_LATIN1,
00067 ENC_UTF8
00068 } dtd_char_encoding;
00069
00070 typedef enum
00071 { C_CDATA,
00072 C_PCDATA,
00073 C_RCDATA,
00074 C_EMPTY,
00075 C_ANY
00076 } contenttype;
00077
00078 typedef enum
00079 { MC_ONE,
00080 MC_OPT,
00081 MC_REP,
00082 MC_PLUS
00083 } modelcard;
00084
00085 typedef enum
00086 { MT_UNDEF = 0,
00087 MT_PCDATA,
00088 MT_ELEMENT,
00089 MT_SEQ,
00090 MT_AND,
00091 MT_OR
00092 } modeltype;
00093
00094 typedef enum
00095 { AT_CDATA,
00096 AT_ENTITY,
00097 AT_ENTITIES,
00098 AT_ID,
00099 AT_IDREF,
00100 AT_IDREFS,
00101 AT_NAME,
00102 AT_NAMES,
00103 AT_NAMEOF,
00104 AT_NMTOKEN,
00105 AT_NMTOKENS,
00106 AT_NOTATION,
00107 AT_NUMBER,
00108 AT_NUMBERS,
00109 AT_NUTOKEN,
00110 AT_NUTOKENS
00111 } attrtype;
00112
00113 typedef enum
00114 { AT_FIXED,
00115 AT_REQUIRED,
00116 AT_CURRENT,
00117 AT_CONREF,
00118 AT_IMPLIED,
00119 AT_DEFAULT
00120 } attrdef;
00121
00122
00123 typedef enum
00124 { ET_SYSTEM,
00125 ET_PUBLIC,
00126 ET_LITERAL
00127 } entity_type;
00128
00129
00130 typedef enum
00131 { EC_SGML,
00132 EC_STARTTAG,
00133 EC_ENDTAG,
00134 EC_CDATA,
00135 EC_SDATA,
00136 EC_NDATA,
00137 EC_PI
00138 } data_type;
00139
00140
00141 typedef enum
00142 { DL_SGML,
00143 DL_XML,
00144 DL_XMLNS
00145 } dtd_dialect;
00146
00147
00148 typedef enum
00149 { OPT_SHORTTAG
00150 } dtd_option;
00151
00152
00153 typedef enum
00154 { SP_PRESERVE = 0,
00155 SP_DEFAULT,
00156 SP_REMOVE,
00157 SP_SGML,
00158 SP_INHERIT
00159 } dtd_space_mode;
00160
00161
00162 typedef enum
00163 { NU_TOKEN,
00164 NU_INTEGER
00165 } dtd_number_mode;
00166
00167
00168
00169
00170
00171
00172 #ifdef DTD_IMPLEMENTATION
00173 #define DTD_MINOR_ERRORS 1
00174 #endif
00175
00176 typedef enum
00177 { ERS_WARNING,
00178 ERS_ERROR,
00179 ERS_STYLE
00180 } dtd_error_severity;
00181
00182
00183 typedef enum
00184 { ERC_REPRESENTATION,
00185
00186 ERC_RESOURCE,
00187
00188 ERC_LIMIT,
00189
00190 ERC_VALIDATE,
00191
00192 ERC_SYNTAX_ERROR,
00193
00194 ERC_EXISTENCE,
00195
00196 ERC_REDEFINED,
00197
00198 ERC_SYNTAX_WARNING,
00199
00200 ERC_DOMAIN,
00201
00202 ERC_OMITTED_CLOSE,
00203
00204 ERC_OMITTED_OPEN,
00205
00206 ERC_NOT_OPEN,
00207
00208 ERC_NOT_ALLOWED,
00209
00210 ERC_NOT_ALLOWED_PCDATA,
00211
00212 ERC_NO_ATTRIBUTE,
00213
00214 ERC_NO_ATTRIBUTE_VALUE,
00215
00216 ERC_NO_VALUE,
00217
00218 ERC_NO_DOCTYPE,
00219
00220 } dtd_error_id;
00221
00222
00223 typedef enum
00224 { IN_NONE,
00225 IN_FILE,
00226 IN_ENTITY
00227 } input_type;
00228
00229
00230 typedef struct _dtd_srcloc
00231 { input_type type;
00232 const char *name;
00233 int line;
00234 int linepos;
00235 long charpos;
00236 struct _dtd_srcloc *parent;
00237 } dtd_srcloc;
00238
00239
00240 typedef struct _dtd_error
00241 { dtd_error_id id;
00242 dtd_error_id minor;
00243 dtd_error_severity severity;
00244 dtd_srcloc *location;
00245 char *plain_message;
00246 char *message;
00247
00248 char *argv[2];
00249 } dtd_error;
00250
00251
00252
00253
00254
00255
00256 typedef struct _dtd_symbol
00257 { ichar *name;
00258 struct _dtd_symbol *next;
00259 struct _dtd_element *element;
00260 struct _dtd_entity *entity;
00261 } dtd_symbol;
00262
00263
00264 typedef struct _dtd_symbol_table
00265 { int size;
00266 dtd_symbol **entries;
00267 } dtd_symbol_table;
00268
00269
00270 typedef struct _dtd_entity
00271 { dtd_symbol *name;
00272 entity_type type;
00273 data_type content;
00274 int catalog_location;
00275 int length;
00276 ichar *value;
00277 ichar *extid;
00278 ichar *exturl;
00279 ichar *baseurl;
00280 struct _dtd_entity *next;
00281 } dtd_entity;
00282
00283
00284 typedef struct _dtd_notation
00285 { dtd_symbol *name;
00286 entity_type type;
00287 ichar *public;
00288 ichar *system;
00289 struct _dtd_notation *next;
00290 } dtd_notation;
00291
00292
00293 typedef struct _dtd_element_list
00294 { struct _dtd_element *value;
00295 struct _dtd_element_list *next;
00296 } dtd_element_list;
00297
00298
00299 typedef struct _dtd_name_list
00300 { dtd_symbol *value;
00301 struct _dtd_name_list *next;
00302 } dtd_name_list;
00303
00304
00305 typedef struct _dtd_attr
00306 { dtd_symbol *name;
00307 attrtype type;
00308 attrdef def;
00309 int islist;
00310 union
00311 { dtd_name_list *nameof;
00312 } typeex;
00313 union
00314 { ochar *cdata;
00315 ichar *list;
00316 dtd_symbol *name;
00317 long number;
00318 } att_def;
00319 int references;
00320 } dtd_attr;
00321
00322
00323 typedef struct _dtd_attr_list
00324 { dtd_attr *attribute;
00325 struct _dtd_attr_list *next;
00326 } dtd_attr_list;
00327
00328
00329 typedef struct _dtd_model
00330 { modeltype type;
00331 modelcard cardinality;
00332
00333 union
00334 { struct _dtd_model *group;
00335 struct _dtd_element *element;
00336 } content;
00337 struct _dtd_model *next;
00338 } dtd_model;
00339
00340
00341 typedef struct _dtd_edef
00342 { contenttype type;
00343 int omit_open;
00344 int omit_close;
00345 dtd_model *content;
00346 dtd_element_list *included;
00347 dtd_element_list *excluded;
00348 struct _dtd_state *initial_state;
00349 struct _dtd_state *final_state;
00350 int references;
00351 } dtd_edef;
00352
00353
00354 typedef struct _dtd_map
00355 { ichar *from;
00356 int len;
00357 dtd_symbol *to;
00358 struct _dtd_map *next;
00359 } dtd_map;
00360
00361
00362 typedef struct _dtd_shortref
00363 { dtd_symbol *name;
00364 dtd_map *map;
00365 char ends[ICHARSET_SIZE];
00366 int defined;
00367 struct _dtd_shortref *next;
00368 } dtd_shortref;
00369
00370
00371 typedef struct _dtd_element
00372 { dtd_symbol *name;
00373 dtd_edef *structure;
00374 dtd_attr_list *attributes;
00375 dtd_space_mode space_mode;
00376 dtd_shortref *map;
00377 int undefined;
00378 struct _dtd_element *next;
00379 } dtd_element;
00380
00381
00382 typedef struct _dtd_charmap
00383 { ochar map[INPUT_CHARSET_SIZE];
00384 } dtd_charmap;
00385
00386
00387 typedef struct _dtd_charclass
00388 { unsigned char class[INPUT_CHARSET_SIZE];
00389 } dtd_charclass;
00390
00391
00392 typedef struct _dtd_charfunc
00393 { ichar func[(int)CF_ENDTABLE];
00394 } dtd_charfunc;
00395
00396
00397 typedef struct _dtd
00398 { int magic;
00399 int implicit;
00400 dtd_dialect dialect;
00401 int case_sensitive;
00402 int ent_case_sensitive;
00403 ichar *doctype;
00404 dtd_symbol_table *symbols;
00405 dtd_entity *pentities;
00406 dtd_entity *entities;
00407 dtd_entity *default_entity;
00408 dtd_notation *notations;
00409 dtd_shortref *shortrefs;
00410 dtd_element *elements;
00411 dtd_charfunc *charfunc;
00412 dtd_charclass *charclass;
00413 dtd_charmap *charmap;
00414 dtd_char_encoding encoding;
00415 dtd_space_mode space_mode;
00416 dtd_number_mode number_mode;
00417 int shorttag;
00418 int references;
00419 } dtd;
00420
00421 extern dtd_charfunc *new_charfunc(void);
00422 extern dtd_charclass *new_charclass(void);
00423 extern dtd_charmap *new_charmap(void);
00424
00425 extern dtd_symbol* dtd_find_symbol(dtd *dtd, const ichar *name);
00426 extern dtd_symbol* dtd_add_symbol(dtd *dtd, const ichar *name);
00427
00428
00429
00430
00431
00432
00433 #include "parser.h"
00434
00435 dtd * file_to_dtd(const char *file, const char *doctype,
00436 dtd_dialect dialect);
00437 int sgml_process_file(dtd_parser *p,
00438 const char *file, unsigned flags);
00439
00440 dtd_parser * new_dtd_parser(dtd *dtd);
00441 void free_dtd_parser(dtd_parser *p);
00442
00443 void free_dtd(dtd *dtd);
00444 int load_dtd_from_file(dtd_parser *p, const char *file);
00445 dtd * new_dtd(const ichar *doctype);
00446 int set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
00447 static int set_option_dtd(dtd *dtd, dtd_option option, char * set);
00448
00449 void putchar_dtd_parser(dtd_parser *p, int chr);
00450 int begin_document_dtd_parser(dtd_parser *p);
00451 int end_document_dtd_parser(dtd_parser *p);
00452 void reset_document_dtd_parser(dtd_parser *p);
00453 void set_src_dtd_parser(dtd_parser *p,
00454 input_type in, const char *file);
00455 void set_mode_dtd_parser(dtd_parser *p, data_mode mode);
00456 void sgml_cplocation(dtd_srcloc *dst, dtd_srcloc *src);
00457
00458 #endif
00459
00460