dtd.h

00001 /*****************************************************************************
00002  *                           dtd.h
00003  * This file defines constants, macros and enumerations useful in parsing
00004  *
00005  ****************************************************************************/
00006 
00007 #ifndef DTD_H_INCLUDED
00008 #define DTD_H_INCLUDED
00009 #include "sgmldefs.h"
00010 
00011 #define CH_WHITE        0x0001
00012 #define CH_LCLETTER     0x0002
00013 #define CH_UCLETTER     0x0004
00014 #define CH_CNMSTRT      0x0008          /* may start a name */
00015 #define CH_CNM          0x0010          /* may be in a name */
00016 #define CH_DIGIT        0x0020
00017 #define CH_RE           0x0040
00018 #define CH_RS           0x0080
00019 
00020 #define CH_LETTER       (CH_LCLETTER|CH_UCLETTER)
00021 #define CH_NMSTART      (CH_LCLETTER|CH_UCLETTER|CH_CNMSTRT)
00022 #define CH_NAME         (CH_NMSTART|CH_DIGIT|CH_CNM)
00023 #define CH_BLANK        (CH_WHITE|CH_RE|CH_RS)
00024 
00025 #define CHR_BLANK       0x1             /* SHORTREF 'B' */
00026 #define CHR_DBLANK      0x2             /* SHORTREF 'BB' */
00027 
00028 #define SGML_DTD_MAGIC  0x7364573
00029 
00030 typedef enum
00031 { CF_STAGO = 0,                         /* < */
00032   CF_STAGC,                             /* > */
00033   CF_ETAGO1,                            /* < */
00034   CF_ETAGO2,                            /* / */
00035   CF_VI,                                /* = */
00036   CF_NS,                                /* : (XMLNS) */
00037   CF_LIT,                               /* " */
00038   CF_LITA,                              /* ' */
00039   CF_PERO,                              /* % */
00040   CF_ERO,                               /* & */
00041   CF_ERC,                               /* ; */
00042   CF_MDO1,                              /* < */
00043   CF_MDO2,                              /* ! (MDO=<!) */
00044   CF_MDC,                               /* > */
00045   CF_PRO1,                              /* < */
00046   CF_PRO2,                              /* ? (PRO=<?) */
00047   CF_PRC,                               /* > */
00048   CF_GRPO,                              /* ( */
00049   CF_GRPC,                              /* ) */
00050   CF_SEQ,                               /* , */
00051   CF_AND,                               /* & */
00052   CF_OR,                                /* | */
00053   CF_OPT,                               /* ? */
00054   CF_PLUS,                              /* + */
00055   CF_DSO,                               /* [ */
00056   CF_DSC,                               /* ] */
00057   CF_REP,                               /* * */
00058   CF_RS,                                /* \n */
00059   CF_RE,                                /* \r */
00060   CF_CMT,                               /* - */
00061   CF_NG,                                /* , or & or | */
00062   CF_ENDTABLE                           /* to find size */
00063 } charfunc;                             /* function of characters */
00064 
00065 typedef enum
00066 { ENC_ISO_LATIN1,                       /* ISO-Latin-1 */
00067   ENC_UTF8                              /* Multi-byte UTF-8 encoding */
00068 } dtd_char_encoding;
00069 
00070 typedef enum
00071 { C_CDATA,                              /* pure cdata */
00072   C_PCDATA,                             /* parsed character data */
00073   C_RCDATA,                             /* pure cdata + entities */
00074   C_EMPTY,                              /* empy element */
00075   C_ANY                                 /* element may contain anything */
00076 } contenttype;
00077 
00078 typedef enum
00079 { MC_ONE,                               /* one time */
00080   MC_OPT,                               /* optional element (?) */
00081   MC_REP,                               /* any times (*) */
00082   MC_PLUS                               /* one-or-more (+) */
00083 } modelcard;
00084 
00085 typedef enum
00086 { MT_UNDEF = 0,                         /* undefined */
00087   MT_PCDATA,                            /* Contains PCDATA */
00088   MT_ELEMENT,                           /* refers to element */
00089   MT_SEQ,                               /* Sequence (,) */
00090   MT_AND,                               /* Ony order (&) */
00091   MT_OR                                 /* Disjunction (|) */
00092 } modeltype;
00093 
00094 typedef enum
00095 { AT_CDATA,                             /* CDATA attribute */
00096   AT_ENTITY,                            /* entity-name */
00097   AT_ENTITIES,                          /* entity-name list */
00098   AT_ID,                                /* identifier */
00099   AT_IDREF,                             /* identifier reference */
00100   AT_IDREFS,                            /* list of identifier references */
00101   AT_NAME,                              /* name token */
00102   AT_NAMES,                             /* list of names */
00103   AT_NAMEOF,                            /* one of these names */
00104   AT_NMTOKEN,                           /* name-token */
00105   AT_NMTOKENS,                          /* name-token list */
00106   AT_NOTATION,                          /* notation-name */
00107   AT_NUMBER,                            /* number */
00108   AT_NUMBERS,                           /* number list */
00109   AT_NUTOKEN,                           /* number token */
00110   AT_NUTOKENS                           /* number token list */
00111 } attrtype;
00112 
00113 typedef enum
00114 { AT_FIXED,                             /* fixed value */
00115   AT_REQUIRED,                          /* Required attribute */
00116   AT_CURRENT,                           /* most recent value */
00117   AT_CONREF,                            /* cross-reference */
00118   AT_IMPLIED,                           /* Implied attribute */
00119   AT_DEFAULT                            /* has default */
00120 } attrdef;
00121 
00122 
00123 typedef enum
00124 { ET_SYSTEM,                            /* System (file) entity */
00125   ET_PUBLIC,                            /* Public (external) entity */
00126   ET_LITERAL                            /* Literal text */
00127 } entity_type;
00128 
00129 
00130 typedef enum
00131 { EC_SGML,                              /* SGML data */
00132   EC_STARTTAG,                          /* SGML start-tag */
00133   EC_ENDTAG,                            /* SGML end-tag */
00134   EC_CDATA,                             /* CDATA entity */
00135   EC_SDATA,                             /* SDATA entity */
00136   EC_NDATA,                             /* non-sgml data */
00137   EC_PI                                 /* Programming instruction */
00138 } data_type;
00139 
00140 
00141 typedef enum
00142 { DL_SGML,                              /* Use SGML */
00143   DL_XML,                               /* Use XML */
00144   DL_XMLNS                              /* Use XML + Namespaces */
00145 } dtd_dialect;
00146 
00147 
00148 typedef enum
00149 { OPT_SHORTTAG                          /* do/don't accept shorttag */
00150 } dtd_option;
00151 
00152 
00153 typedef enum
00154 { SP_PRESERVE = 0,                      /* Preserve all white-space */
00155   SP_DEFAULT,                           /* Default space handling */
00156   SP_REMOVE,                            /* Remove all blank CDATA elements */
00157   SP_SGML,                              /* Compliant SGML mode */
00158   SP_INHERIT                            /* DTD: inherit from environment */
00159 } dtd_space_mode;
00160 
00161 
00162 typedef enum
00163 { NU_TOKEN,                             /* Treat numbers as tokens */
00164   NU_INTEGER                            /* Convert to integer */
00165 } dtd_number_mode;
00166 
00167 
00168                  /*******************************
00169                  *            ERRORS            *
00170                  *******************************/
00171 
00172 #ifdef DTD_IMPLEMENTATION
00173 #define DTD_MINOR_ERRORS 1
00174 #endif
00175 
00176 typedef enum
00177 { ERS_WARNING,                          /* probably correct result */
00178   ERS_ERROR,                            /* probably incrorrect result */
00179   ERS_STYLE                             /* dubious/bad style; correct result */
00180 } dtd_error_severity;
00181 
00182 
00183 typedef enum
00184 { ERC_REPRESENTATION,                   /* Internal limit */
00185         /* id */
00186   ERC_RESOURCE,                         /* external limit */
00187         /* id */
00188   ERC_LIMIT,                            /* Exceeded SGML limit */
00189         /* id */
00190   ERC_VALIDATE,                         /* DTD Validation */
00191         /* Message */
00192   ERC_SYNTAX_ERROR,                     /* Syntax error */
00193         /* Message, found */
00194   ERC_EXISTENCE,                        /* Existence error */
00195         /* Type, name */
00196   ERC_REDEFINED,                                /* Redefined object */
00197         /* Type, name */
00198   ERC_SYNTAX_WARNING,                   /* Syntax warning (i.e. fixed) */
00199         /* Message, found */
00200   ERC_DOMAIN,                           /* Relative to declared type */
00201         /* Type, found */
00202   ERC_OMITTED_CLOSE,
00203         /* Element */
00204   ERC_OMITTED_OPEN,
00205         /* Element */
00206   ERC_NOT_OPEN,
00207         /* Element */
00208   ERC_NOT_ALLOWED,
00209         /* Element */
00210   ERC_NOT_ALLOWED_PCDATA,
00211         /* Text */
00212   ERC_NO_ATTRIBUTE,
00213         /* Element, Attribute */
00214   ERC_NO_ATTRIBUTE_VALUE,
00215         /* Element, Value */
00216   ERC_NO_VALUE,
00217         /* Entity */
00218   ERC_NO_DOCTYPE,
00219         /* Implicit, file */
00220 } dtd_error_id;
00221 
00222 
00223 typedef enum
00224 { IN_NONE,                              /* unspecified input */
00225   IN_FILE,                              /* input from file */
00226   IN_ENTITY                             /* input from entity */
00227 } input_type;
00228 
00229 
00230 typedef struct _dtd_srcloc
00231 { input_type  type;                     /* type of input */
00232   const char *name;                     /* name of the file */
00233   int         line;                     /* 1-based Line no */
00234   int         linepos;                  /* 1-based char  */
00235   long        charpos;                  /* 0-based file char  */
00236   struct _dtd_srcloc *parent;           /* parent location */
00237 } dtd_srcloc;
00238 
00239 
00240 typedef struct _dtd_error
00241 { dtd_error_id id;                      /* ERC_* identifier */
00242   dtd_error_id minor;                   /* Minor code */
00243   dtd_error_severity severity;          /* ERS_* severity */
00244   dtd_srcloc *location;                 /* location of the error */
00245   char *plain_message;                  /* Clean message */
00246   char *message;                        /* complete message */
00247                                         /* (Warning: file:line: <plain>) */
00248   char *argv[2];                        /* context arguments */
00249 } dtd_error;
00250 
00251 
00252                  /*******************************
00253                  *           DTD TYPES          *
00254                  *******************************/
00255 
00256 typedef struct _dtd_symbol
00257 { ichar *name;                  /* name of the atom */
00258   struct _dtd_symbol *next;             /* next in atom list */
00259   struct _dtd_element *element;         /* connected element (if any) */
00260   struct _dtd_entity  *entity;          /* connected entity (if any) */
00261 } dtd_symbol;
00262 
00263 
00264 typedef struct _dtd_symbol_table
00265 { int           size;                   /* Allocated size */
00266   dtd_symbol  **entries;                /* Entries */
00267 } dtd_symbol_table;
00268 
00269 
00270 typedef struct _dtd_entity
00271 { dtd_symbol *name;                     /* its name */
00272   entity_type type;                     /* ET_* */
00273   data_type content;                    /* EC_* */
00274   int catalog_location;                 /* what catalog to use for lookup */
00275   int length;                           /* size of literal value */
00276   ichar *value;                         /* literal value */
00277   ichar *extid;                         /* external identifier */
00278   ichar *exturl;                        /* url to fetch from */
00279   ichar *baseurl;                       /* base url for exturl */
00280   struct _dtd_entity *next;             /* list-link */
00281 } dtd_entity;
00282 
00283 
00284 typedef struct _dtd_notation
00285 { dtd_symbol *name;                     /* name of the notation */
00286   entity_type type;                     /* ET_{PUBLIC|SYSTEM} */
00287   ichar *public;                        /* public id */
00288   ichar *system;                        /* file with info */
00289   struct _dtd_notation *next;           /* list-link */
00290 } dtd_notation;
00291 
00292 
00293 typedef struct _dtd_element_list
00294 { struct _dtd_element *value;           /* element */
00295   struct _dtd_element_list *next;       /* next in list */
00296 } dtd_element_list;
00297 
00298 
00299 typedef struct _dtd_name_list
00300 { dtd_symbol    *value;
00301   struct _dtd_name_list *next;
00302 } dtd_name_list;
00303 
00304 
00305 typedef struct _dtd_attr
00306 { dtd_symbol  *name;                    /* name of attribute */
00307   attrtype type;                        /* type (AT_*) */
00308   attrdef  def;                         /* AT_REQUIRED/AT_IMPLIED */
00309   int islist;                           /* attribute is a list */
00310   union
00311   { dtd_name_list *nameof;              /* (name1|name2|...) */
00312   } typeex;
00313   union
00314   { ochar *cdata;                       /* default for CDATA */
00315     ichar *list;                        /* text for list-data */
00316     dtd_symbol *name;                   /* AT_NAME or AT_NAMEOF */
00317     long number;                        /* AT_NUMBER */
00318   } att_def;
00319   int references;                       /* reference count */
00320 } dtd_attr;
00321 
00322 
00323 typedef struct _dtd_attr_list
00324 { dtd_attr      *attribute;
00325   struct _dtd_attr_list *next;
00326 } dtd_attr_list;
00327 
00328 
00329 typedef struct _dtd_model
00330 { modeltype type;                       /* MT_* */
00331   modelcard cardinality;                /* MC_* */
00332 
00333   union
00334   { struct _dtd_model *group;           /* ,/|/& group */
00335     struct _dtd_element *element;       /* element */
00336   } content;
00337   struct _dtd_model *next;              /* next in list (for groups) */
00338 } dtd_model;
00339 
00340 
00341 typedef struct _dtd_edef
00342 { contenttype   type;                   /* EMPTY, MIXED, ... */
00343   int           omit_open;              /* allow omitted open tag? */
00344   int           omit_close;             /* allow omitted close tag? */
00345   dtd_model     *content;               /* the content model */
00346   dtd_element_list *included;           /* +(namegroup) */
00347   dtd_element_list *excluded;           /* -(namegroup) */
00348   struct _dtd_state *initial_state;     /* Initial state in state engine */
00349   struct _dtd_state *final_state;       /* Final state in state engine */
00350   int           references;             /* #elements using this def */
00351 } dtd_edef;
00352 
00353 
00354 typedef struct _dtd_map
00355 { ichar        *from;                   /* mapped text */
00356   int           len;                    /* length of mapped text */
00357   dtd_symbol   *to;                     /* name of symbol mapped onto */
00358   struct _dtd_map *next;                /* next in shortref map */
00359 } dtd_map;
00360 
00361 
00362 typedef struct _dtd_shortref
00363 { dtd_symbol    *name;                  /* name of SHORTREF map */
00364   dtd_map       *map;                   /* implemented map */
00365   char          ends[ICHARSET_SIZE];    /* ending-characters in map */
00366   int           defined;                /* has been defined */
00367   struct _dtd_shortref *next;           /* next declared shortref */
00368 } dtd_shortref;
00369 
00370 
00371 typedef struct _dtd_element
00372 { dtd_symbol    *name;                  /* its name */
00373   dtd_edef      *structure;             /* content structure of the element */
00374   dtd_attr_list *attributes;            /* defined attributes */
00375   dtd_space_mode space_mode;            /* How to handle white-space (SP_*) */
00376   dtd_shortref  *map;                   /* SHORTREF map */
00377   int           undefined;              /* Only implicitely defined */
00378   struct _dtd_element *next;            /* in DTD'e element list */
00379 } dtd_element;
00380 
00381 
00382 typedef struct _dtd_charmap
00383 { ochar         map[INPUT_CHARSET_SIZE]; /* ichar --> ochar */
00384 } dtd_charmap;
00385 
00386 
00387 typedef struct _dtd_charclass
00388 { unsigned char class[INPUT_CHARSET_SIZE]; /* ichar --> class-mask */
00389 } dtd_charclass;
00390 
00391 
00392 typedef struct _dtd_charfunc
00393 { ichar func[(int)CF_ENDTABLE];         /* CF_ --> ichar */
00394 } dtd_charfunc;
00395 
00396 
00397 typedef struct _dtd
00398 { int                   magic;          /* SGML_DTD_MAGIC */
00399   int                   implicit;       /* There is no DTD */
00400   dtd_dialect           dialect;        /* DL_* */
00401   int                   case_sensitive; /* Tags are case-sensitive */
00402   int                   ent_case_sensitive; /* Entities are case-sensitive */
00403   ichar                *doctype;        /* defined document type */
00404   dtd_symbol_table     *symbols;        /* symbol-table */
00405   dtd_entity           *pentities;      /* defined parameter entities */
00406   dtd_entity           *entities;       /* defined entities */
00407   dtd_entity           *default_entity; /* default-entity (if any) */
00408   dtd_notation         *notations;      /* Declared notations */
00409   dtd_shortref         *shortrefs;      /* SHORTREF declarations */
00410   dtd_element          *elements;       /* defined elements */
00411   dtd_charfunc         *charfunc;       /* CF_ --> ichar */
00412   dtd_charclass        *charclass;      /* ichar -> CH_-mask */
00413   dtd_charmap          *charmap;        /* ichar ->ochar */
00414   dtd_char_encoding     encoding;       /* document encoding */
00415   dtd_space_mode        space_mode;     /* Default for handling white-space */
00416   dtd_number_mode       number_mode;    /* How to treat number attributes */
00417   int                   shorttag;       /* support SHORTTAG */
00418   int                   references;     /* destruction reference count */
00419 } dtd;
00420 
00421 extern dtd_charfunc *new_charfunc(void);   /* default classification */
00422 extern dtd_charclass *new_charclass(void); /* default classification */
00423 extern dtd_charmap  *new_charmap(void);    /* identity map */
00424 
00425 extern dtd_symbol*      dtd_find_symbol(dtd *dtd, const ichar *name);
00426 extern dtd_symbol*      dtd_add_symbol(dtd *dtd, const ichar *name);
00427 
00428 
00429                  /*******************************
00430                  *             PUBLIC           *
00431                  *******************************/
00432 
00433 #include "parser.h"
00434 
00435 dtd *           file_to_dtd(const char *file, const char *doctype,
00436                             dtd_dialect dialect);
00437 int             sgml_process_file(dtd_parser *p,
00438                                   const char *file, unsigned flags);
00439 /*int           sgml_process_stream(dtd_parser *p, FILE *in, unsigned flags);*/
00440 dtd_parser *    new_dtd_parser(dtd *dtd);
00441 void            free_dtd_parser(dtd_parser *p);
00442 
00443 void            free_dtd(dtd *dtd);
00444 int             load_dtd_from_file(dtd_parser *p, const char *file);
00445 dtd *           new_dtd(const ichar *doctype);
00446 int             set_dialect_dtd(dtd *dtd, dtd_dialect dialect);
00447 static int              set_option_dtd(dtd *dtd, dtd_option option, char * set);
00448 
00449 void            putchar_dtd_parser(dtd_parser *p, int chr);
00450 int             begin_document_dtd_parser(dtd_parser *p);
00451 int             end_document_dtd_parser(dtd_parser *p);
00452 void            reset_document_dtd_parser(dtd_parser *p);
00453 void            set_src_dtd_parser(dtd_parser *p,
00454                                    input_type in, const char *file);
00455 void            set_mode_dtd_parser(dtd_parser *p, data_mode mode);
00456 void            sgml_cplocation(dtd_srcloc *dst, dtd_srcloc *src);
00457 
00458 #endif /*DTD_H_INCLUDED*/
00459 
00460 

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5