parser.h

00001 /*  $Id: parser.h,v 1.2 2005/03/07 20:31:23 kifer Exp $
00002 
00003     Part of SWI-Prolog
00004 
00005     Author:        Jan Wielemaker
00006     E-mail:        jan@swi.psy.uva.nl
00007     WWW:           http://www.swi-prolog.org
00008     Copyright (C): 1985-2002, University of Amsterdam
00009 
00010     This library is free software; you can redistribute it and/or
00011     modify it under the terms of the GNU Lesser General Public
00012     License as published by the Free Software Foundation; either
00013     version 2.1 of the License, or (at your option) any later version.
00014 
00015     This library is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018     Lesser General Public License for more details.
00019 
00020     You should have received a copy of the GNU Lesser General Public
00021     License along with this library; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 */
00024 
00025 #ifndef SGML_PARSER_H_INCLUDED
00026 #define SGML_PARSER_H_INCLUDED
00027 #include "util.h"
00028 
00029                  /*******************************
00030                  *            CALL-BACK         *
00031                  *******************************/
00032 
00033                                         /* sgml_attribute->flags */
00034 #define SGML_AT_DEFAULT         0x1
00035 
00036 typedef struct _sgml_attribute
00037 { struct                                /* so we can free members */
00038   { ochar *cdata;                       /* CDATA value */
00039     ichar *text;                        /* other textual value */
00040     long   number;                      /* numeric value */
00041   } value;
00042   dtd_attr *definition;                 /* DTD definition */
00043   unsigned flags;                       /* additional flags */
00044 } sgml_attribute;
00045 
00046 typedef struct _dtd_parser *dtd_parser_p;
00047 
00048 typedef int (*sgml_begin_element_f)(dtd_parser_p parser,
00049                                     dtd_element *e,
00050                                     int argc,
00051                                     sgml_attribute *argv);
00052 typedef int (*sgml_end_element_f)(dtd_parser_p parser,
00053                                   dtd_element *e);
00054 typedef int (*sgml_data_f)(dtd_parser_p parser,
00055                            data_type type, int len, const ochar *text);
00056 typedef int (*sgml_entity_f)(dtd_parser_p parser,
00057                              dtd_entity *entity,
00058                              int chr);
00059 typedef int (*sgml_pi_f)(dtd_parser_p parser, const ichar *pi);
00060 typedef int (*sgml_error_f)(dtd_parser_p parser,
00061                             dtd_error *error);
00062 typedef int (*sgml_decl_f)(dtd_parser_p parser, const ichar *decl);
00063 #ifdef XMLNS
00064 typedef int (*xmlns_f)(dtd_parser_p parser,
00065                        dtd_symbol *ns, dtd_symbol *url);
00066 #endif
00067 
00068 
00069                  /*******************************
00070                  *       PARSER AND STATES      *
00071                  *******************************/
00072 
00073 #define SGML_PARSER_MAGIC       (0x834ab663)
00074 
00075 typedef enum
00076 { S_PCDATA,                             /* between declarations */
00077 #ifdef UTF8
00078   S_UTF8,                               /* Loading UTF-8 character */
00079 #endif
00080   S_CDATA,                              /* non-parsed data */
00081   S_RCDATA,                             /* CDATA+entities */
00082   S_MSCDATA,                            /* <![CDATA[...]]> */
00083   S_EMSCDATA1,                          /* Seen ] in S_MSCDATA */
00084   S_EMSCDATA2,                          /* Seen ]] in S_MSCDATA */
00085   S_ECDATA1,                            /* Seen < in CDATA */
00086   S_ECDATA2,                            /* Seen </ in CDATA */
00087   S_EMSC1,                              /* Seen ] in marked section */
00088   S_EMSC2,                              /* Seen ]] in marked section */
00089   S_PI,                                 /* Seen <? */
00090   S_PI2,                                /* Seen <?...? */
00091   S_DECL0,                              /* Seen < */
00092   S_DECL,                               /* inside a declaration */
00093   S_MDECL0,                             /* Seen <! */
00094   S_STRING,                             /* inside a "string" or 'string' */
00095   S_DECLCMT0,                           /* Seen <...- */
00096   S_DECLCMT,                            /* Seen <...-- */
00097   S_DECLCMTE0,                          /* Seen <...--..- */
00098   S_CMTO,                               /* Seen <!- */
00099   S_CMT,                                /* Seen <!--... */
00100   S_CMTE0,                              /* Seem <!--...- */
00101   S_CMTE1,                              /* Seem <!--...-- */
00102   S_GROUP,                              /* inside [...] */
00103   S_PENT,                               /* Seen % */
00104   S_ENT0,                               /* Seen & */
00105   S_ENT,                                /* Seen &(#|\w) */
00106   S_ENTCR                               /* Seen &entity<CR> */
00107 } dtdstate;
00108 
00109 
00110 typedef enum
00111 { DCL_DTD,                              /* DTD Declaration */
00112   DCL_BEGIN,                            /* begin-tag */
00113   DCL_END                               /* end-tag */
00114 } dcl_type;
00115 
00116 
00117 typedef enum
00118 { MS_IGNORE,                            /* ignore this data */
00119   MS_INCLUDE,                           /* process normally */
00120   MS_CDATA,                             /* pass literally */
00121   MS_RCDATA                             /* replace entities */
00122 } marktype;
00123 
00124 
00125 typedef enum
00126 { EV_EXPLICIT,                          /* Explicit event */
00127   EV_OMITTED,                           /* Omitted tag event */
00128   EV_SHORTTAG,                          /* SHORTTAG event: <tag/value/ */
00129   EV_SHORTREF                           /* SHORTREF event */
00130 } sgml_event_class;
00131 
00132 
00133 typedef struct _dtd_marked
00134 { dtd_symbol *keyword;                  /* keyword of the marked section */
00135   marktype      type;                   /* processing type */
00136   struct _dtd_marked *parent;           /* parent marked section */
00137 } dtd_marked;
00138 
00139 
00140 typedef enum
00141 { DM_DTD,                               /* DTD mode: no data allowed (?) */
00142   DM_DATA                               /* Environment has only elements */
00143 } data_mode;
00144 
00145 
00146 typedef struct _sgml_environment
00147 { dtd_element *element;                 /* element that opened the env */
00148   struct _dtd_state *state;             /* State we are in */
00149 #ifdef XMLNS
00150   struct _xmlns *xmlns;                 /* XML namespace */
00151   struct _xmlns *thisns;                /* Name space of element */
00152 #endif
00153 #ifdef XMLBASE
00154   ochar *uri_base;                      /* xml:base handling */
00155 #endif
00156   dtd_space_mode space_mode;            /* How to handle blanks */
00157   dtd_shortref *map;                    /* SHORTREF map */
00158   struct _sgml_environment *parent;     /* Parent environment */
00159   int   wants_net;                      /* I want a net */
00160   int   saved_waiting_for_net;          /* saved value of waiting for net */
00161 } sgml_environment;
00162 
00163                                         /* parser->flags */
00164 #define SGML_PARSER_NODEFS       0x01   /* don't handle default atts */
00165 #define SGML_PARSER_QUALIFY_ATTS 0x02   /* qualify attributes in XML mode */
00166 
00167 typedef struct _dtd_parser
00168 { unsigned long magic;                  /* SGML_PARSER_MAGIC */
00169   dtd     *dtd;                         /* DTD we are building */
00170   dtdstate state;                       /* current state */
00171   dtdstate cdata_state;                 /* S_CDATA/S_RCDATA */
00172   dtd_marked *marked;                   /* marked section stack */
00173   marktype mark_state;                  /* processing mode */
00174   dtd_element *empty_element;           /* empty of <tag/> seen */
00175   sgml_environment *environments;       /* Open environments */
00176   data_mode dmode;                      /* How to handle characters */
00177   int      first;                       /* Just seen <tag> */
00178   int      waiting_for_net;             /* waiting for / in <shorttag/mode/ */
00179   icharbuf *buffer;                     /* buffer for temp data */
00180   ocharbuf *cdata;                      /* collected character data */
00181   int      blank_cdata;                 /* CDATA is all blank */
00182   int      cdata_must_be_empty;         /* Only shortrefs allowed here */
00183   const ichar *etag;                    /* name of end-tag in CDATA */
00184   int      etaglen;                     /* length of end-tag */
00185   int      grouplevel;                  /* [..] level in declaration */
00186   int      saved;                       /* saved character */
00187   dtdstate lit_saved_state;             /* literal saved-state */
00188   dtd_char_encoding encoding;           /* CDATA output character-set */
00189   dtd_shortref *map;                    /* SHORTREF map */
00190 #ifdef UTF8
00191   int      utf8_decode;                 /* decode UTF-8 sequences? */
00192   int      utf8_char;                   /* building character */
00193   int      utf8_left;                   /* bytes left */
00194   dtdstate utf8_saved_state;            /* state from which we come */
00195 #endif
00196   dtd_srcloc    location;               /* Current location */
00197   dtd_srcloc    startloc;               /* Start of last markup */
00198   dtd_srcloc    startcdata;             /* Start of last cdata */
00199   dtd_symbol   *enforce_outer_element;  /* Outer element to look for */
00200   sgml_event_class event_class;         /* EV_* */
00201 
00202   void *closure;                        /* client handle */
00203   sgml_begin_element_f  on_begin_element; /* start an element */
00204   sgml_end_element_f    on_end_element; /* end an element */
00205   sgml_data_f           on_data;        /* process cdata */
00206   sgml_entity_f         on_entity;      /* unprocessed entity */
00207   sgml_pi_f             on_pi;          /* processing instruction */
00208   sgml_error_f          on_error;       /* handle error */
00209   sgml_decl_f           on_decl;        /* handle declarations */
00210 #ifdef XMLNS
00211   xmlns_f               on_xmlns;       /* handle new namespace */
00212 #endif
00213   unsigned              flags;          /* misc flags */
00214 } dtd_parser;
00215 
00216 
00217 #ifdef XMLNS
00218 #include "xmlns.h"
00219 #endif
00220 
00221 extern int              gripe(dtd_error_id e,  ...);
00222 
00223 #define SGML_SUB_DOCUMENT       0x1
00224 
00225 #endif /*SGML_PARSER_H_INCLUDED*/
00226 

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5