model.c

00001 /******************************************************************************
00002  *                              model.c
00003  * This file contains functions which implement the finite state engine which 
00004  * is used to validate the xml document 
00005  *
00006  ****************************************************************************/
00007 
00008 #include <stdio.h>
00009 #include <stdlib.h>
00010 #include <assert.h>
00011 #include "dtd.h"
00012 #include "model.h"
00013 #include "error_term.h"
00014 
00015 #define MAX_VISITED 256
00016 #define MAX_ALLOWED 64
00017 
00018 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
00019 This module implements a finite state  engine for validating the content
00020 model of elements. A state machine  is   the  only feasible approach for
00021 realising an event-driven SGML parser.
00022 
00023 The public functions are:
00024 
00025 dtd_state *new_dtd_state(void)
00026     Create an anonymous new state.  Normally an element creates two of
00027     these for it ->initial_state and ->final_state attributes.
00028 
00029 dtd_state *make_state_engine(dtd_element *e)
00030     Associate a state engine to this element and return the initial
00031     state of the engine.  If the element has an engine, simply return
00032     the initial state.
00033 
00034 dtd_state *make_dtd_transition(dtd_state *here, dtd_element *e)
00035     Given the current state, see whether we can accept e and return
00036     the resulting state.  If no transition is possible return NULL.
00037 
00038 int same_state(dtd_state *final, dtd_state *here)
00039     See whether two states are the same, or the final state can be
00040     reached only traversing equivalence links.
00041 
00042 The A&B&... model
00043 
00044 Models of the type a&b&c are hard   to translate, as the resulting state
00045 machine is of size order N! In practice   only  a little of this will be
00046 used however and we `fix' this problem using a `lazy state-engine', that
00047 expands to the next level  only  after   reaching  some  level.  See the
00048 function state_transitions(). The design takes more lazy generation into
00049 consideration.
00050 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
00051 
00052 typedef struct _state_transition
00053 { dtd_element        *element;          /* element on transition */
00054   dtd_state          *state;            /* state to go to */
00055   struct _state_transition *next;       /* next possible transition */
00056 } transition;
00057 
00058 typedef struct _dtd_model_list          /* list (set) of models */
00059 { dtd_model *model;
00060   struct _dtd_model_list *next;
00061 } dtd_model_list;
00062 
00063 typedef enum
00064 { EX_AND                                /* expand (a&b&...) */
00065 } expand_type;
00066 
00067 typedef struct _state_expander
00068 { dtd_state            *target;         /* Target state to expand to */
00069   expand_type           type;           /* EX_* */
00070   union
00071   { struct
00072     { dtd_model_list *set;              /* Models we should still see */
00073     } and;                              /* Expand (a&b&...) */
00074   } kind;
00075 } expander;
00076 
00077 typedef struct _visited
00078 { int   size;                           /* set-size */
00079   dtd_state *states[MAX_VISITED];       /* The set */
00080 } visited;
00081 
00082 
00083 static void     translate_model(dtd_model *m, dtd_state *from, dtd_state *to);
00084 static transition *state_transitions(dtd_state *state);
00085 
00086 static int
00087 visit(dtd_state *state, visited *visited)
00088 { int i;
00089 
00090   for(i=0; i<visited->size; i++)
00091   { if ( visited->states[i] == state )
00092       return FALSE;
00093   }
00094       
00095   if ( visited->size >= MAX_VISITED )
00096   { fprintf(stderr, "Reached MAX_VISITED!\n");
00097     return FALSE;
00098   }
00099 
00100   visited->states[visited->size++] = state;
00101 
00102   return TRUE;
00103 }
00104 
00105 
00106 static dtd_state *
00107 do_make_dtd_transition(dtd_state *here, dtd_element *e, visited *visited)
00108 { transition *tset = state_transitions(here);
00109   transition *t;
00110 
00111   for(t=tset; t; t=t->next)
00112   { if ( t->element == e )
00113       return t->state;
00114   }
00115 
00116   for(t=tset; t; t=t->next)
00117   { if ( t->element == NULL && visit(t->state, visited) )
00118     { dtd_state *new;
00119 
00120       if ( (new=do_make_dtd_transition(t->state, e, visited)) )
00121         return new;
00122     }
00123   }
00124 
00125   return NULL;
00126 }
00127 
00128 dtd_state *
00129 make_dtd_transition(dtd_state *here, dtd_element *e)
00130 { visited visited;
00131   visited.size = 0;
00132 
00133   if ( !here )                          /* from nowhere to nowhere */
00134     return NULL;
00135 
00136   return do_make_dtd_transition(here, e, &visited);
00137 }
00138 
00139 
00140 static int
00141 find_same_state(dtd_state *final, dtd_state *here, visited *visited)
00142 { transition *t;
00143 
00144   if ( final == here )
00145     return TRUE;
00146 
00147   for(t=state_transitions(here); t; t=t->next)
00148   { if ( t->element == NULL && visit(t->state, visited) )
00149     { if ( find_same_state(final, t->state, visited) )
00150         return TRUE;
00151     }
00152   }
00153 
00154   return FALSE;
00155 }
00156 
00157 
00158 int
00159 same_state(dtd_state *final, dtd_state *here)
00160 { visited visited;
00161   visited.size = 0;
00162 
00163   return find_same_state(final, here, &visited);
00164 }
00165 
00166 
00167 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
00168 state_allows_for(dtd_state *state, dtd_element **allow, int *n)
00169     See what elements are allowed if we are in this state.  This is
00170     currently not used, but might prove handly for error messages or
00171     syntax-directed editors.
00172 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
00173 
00174 static void
00175 do_state_allows_for(dtd_state *here, dtd_element **allow, int *n,
00176                     visited *visited)
00177 { transition *t;
00178 
00179   for(t=state_transitions(here); t; t=t->next)
00180   { int i;
00181 
00182     if ( t->element == NULL )
00183     { if ( visit(t->state, visited) )
00184         do_state_allows_for(t->state, allow, n, visited);
00185     } else
00186     { for(i=0; i<*n; i++)
00187       { if ( allow[i] == t->element )
00188           goto next;
00189       }
00190       allow[(*n)++] = t->element;
00191     }
00192   next:
00193     ;
00194   }
00195 }
00196 
00197 
00198 void
00199 state_allows_for(dtd_state *state, dtd_element **allow, int *n)
00200 { visited visited;
00201   visited.size = 0;
00202 
00203   *n = 0;
00204   if ( state )
00205     do_state_allows_for(state, allow, n, &visited);
00206 }
00207 
00208 
00209 static int
00210 do_find_omitted_path(dtd_state *state, dtd_element *e,
00211                      dtd_element **path, int *pl,
00212                      visited *visited)
00213 { transition *tset = state_transitions(state);
00214   transition *t;
00215   int pathlen = *pl;
00216 
00217   for(t=tset; t; t=t->next)
00218   { if ( t->element == e )
00219       return TRUE;
00220 
00221     if ( t->element &&
00222          t->element != CDATA_ELEMENT &&
00223          t->element->structure &&
00224          t->element->structure->omit_open &&
00225          visit(t->state, visited) )
00226     { dtd_state *initial = make_state_engine(t->element);
00227 
00228       path[pathlen] = t->element;
00229       *pl = pathlen+1;
00230       if ( do_find_omitted_path(initial, e, path, pl, visited) )
00231         return TRUE;
00232       *pl = pathlen;
00233     }
00234   }
00235 
00236   for(t=tset; t; t=t->next)
00237   { if ( !t->element &&
00238          visit(t->state, visited) )
00239     { if ( do_find_omitted_path(t->state, e, path, pl, visited) )
00240         return TRUE;
00241     }
00242   }
00243 
00244   return FALSE;
00245 }
00246 
00247 
00248 int 
00249 find_omitted_path(dtd_state *state, dtd_element *e, dtd_element **path)
00250 { int pl = 0;
00251   visited visited;
00252   visited.size = 0;
00253 
00254   if ( state && do_find_omitted_path(state, e, path, &pl, &visited) )
00255     return pl;
00256 
00257   return FALSE;
00258 }
00259 
00260 
00261 dtd_state *
00262 new_dtd_state()
00263 { dtd_state *s = sgml_calloc(1, sizeof(*s));
00264 
00265   return s;
00266 }
00267 
00268 
00269 static void
00270 mylink(dtd_state *from, dtd_state *to, dtd_element *e)
00271 { transition *t = sgml_calloc(1, sizeof(*t));
00272 
00273   t->state = to;
00274   t->element = e;
00275   t->next = from->transitions;
00276   from->transitions = t;
00277 }
00278 
00279 
00280                  /*******************************
00281                  *            EXPANSION         *
00282                  *******************************/
00283 
00284 static void
00285 add_model_list(dtd_model_list **list, dtd_model *m)
00286 { dtd_model_list *l = sgml_calloc(1, sizeof(*l));
00287 
00288   l->model = m;
00289 
00290   for( ; *list; list = &(*list)->next)
00291     ;
00292   *list = l;
00293 }
00294 
00295 
00296 static transition *
00297 state_transitions(dtd_state *state)
00298 { if ( !state->transitions && state->expander )
00299   { expander *ex = state->expander;
00300     
00301     switch(ex->type)
00302     { case EX_AND:
00303       { dtd_model_list *left = ex->kind.and.set;
00304 
00305         if ( !left )                    /* empty AND (should not happen) */
00306         { mylink(state, ex->target, NULL); 
00307         } else if ( !left->next )       /* only one left */
00308         { translate_model(left->model, state, ex->target);
00309         } else
00310         { for( ; left; left = left->next )
00311           { dtd_state *tmp = new_dtd_state();
00312             expander *nex = sgml_calloc(1, sizeof(*nex));
00313             dtd_model_list *l;
00314 
00315             translate_model(left->model, state, tmp);
00316             tmp->expander = nex;
00317             nex->target = ex->target;
00318             nex->type = EX_AND;
00319             for(l=ex->kind.and.set; l; l=l->next)
00320             { if ( l != left )
00321                 add_model_list(&nex->kind.and.set, l->model);
00322             }
00323           }
00324         }
00325       }
00326     }
00327   }
00328 
00329   return state->transitions;
00330 }
00331 
00332 
00333                  /*******************************
00334                  *         TRANSLATION          *
00335                  *******************************/
00336 
00337 
00338 static void
00339 translate_one(dtd_model *m, dtd_state *from, dtd_state *to)
00340 { switch(m->type)
00341   { case MT_ELEMENT:
00342     { dtd_element *e = m->content.element;
00343 
00344       mylink(from, to, e);
00345       return;
00346     }
00347     case MT_SEQ:                        /* a,b,... */
00348     { dtd_model *sub;
00349 
00350       for( sub = m->content.group; sub->next; sub = sub->next )
00351       { dtd_state *tmp = new_dtd_state();
00352         translate_model(sub, from, tmp);
00353         from = tmp;
00354       }
00355       translate_model(sub, from, to);
00356       return;
00357     }
00358     case MT_AND:                        /* a&b&... */
00359     { expander *ex = sgml_calloc(1, sizeof(*ex));
00360       dtd_model *sub;
00361 
00362       ex->target = to;
00363       ex->type   = EX_AND;
00364       
00365       for( sub = m->content.group; sub; sub = sub->next )
00366         add_model_list(&ex->kind.and.set, sub);
00367 
00368       from->expander = ex;
00369       return;
00370     }
00371     case MT_OR:                         /* a|b|... */
00372     { dtd_model *sub;
00373 
00374       for( sub = m->content.group; sub; sub = sub->next )
00375         translate_model(sub, from, to);
00376       return;
00377     }
00378     case MT_PCDATA:
00379     case MT_UNDEF:
00380       assert(0);
00381   }
00382 
00383 }
00384 
00385 
00386 static void
00387 translate_model(dtd_model *m, dtd_state *from, dtd_state *to)
00388 { if ( m->type == MT_PCDATA )
00389   { mylink(from, from, CDATA_ELEMENT);
00390     mylink(from, to, NULL);
00391     return;
00392   }
00393 
00394   switch(m->cardinality)
00395   { case MC_OPT:                        /* ? */
00396       mylink(from, to, NULL);
00397     /*FALLTHROUGH*/
00398     case MC_ONE:
00399       translate_one(m, from, to);
00400       return;
00401     case MC_REP:                        /* * */
00402       translate_one(m, from, from);
00403       mylink(from, to, NULL);
00404       return;
00405     case MC_PLUS:                       /* + */
00406       translate_one(m, from, to);
00407       translate_one(m, to, to);
00408       return;
00409   }
00410 }
00411 
00412 
00413 dtd_state *
00414 make_state_engine(dtd_element *e)
00415 { 
00416   if ( e->structure )
00417   { dtd_edef *def = e->structure;
00418 
00419     if ( !def->initial_state )
00420     { if ( def->content )
00421       { def->initial_state = new_dtd_state();
00422         def->final_state   = new_dtd_state();
00423     
00424         translate_model(def->content, def->initial_state, def->final_state);
00425       } else if ( def->type == C_CDATA || def->type == C_RCDATA )
00426       { def->initial_state = new_dtd_state();
00427         def->final_state   = new_dtd_state();
00428 
00429         mylink(def->initial_state, def->initial_state, CDATA_ELEMENT);
00430         mylink(def->initial_state, def->final_state, NULL);
00431       } else
00432         return NULL;
00433     }
00434 
00435     return def->initial_state;
00436   }
00437   
00438   return NULL;
00439 }
00440 
00441 
00442                  /*******************************
00443                  *             FREE             *
00444                  *******************************/
00445 
00446 static void do_free_state_engine(dtd_state *state, visited *visited);
00447 
00448 static void
00449 free_model_list(dtd_model_list *l)
00450 { dtd_model_list *next;
00451 
00452   for( ; l; l=next)
00453   { next = l->next;
00454 
00455     sgml_free(l);
00456   }
00457 }
00458 
00459 
00460 static void
00461 free_expander(expander *e, visited *visited)
00462 { if ( visit(e->target, visited) )
00463     do_free_state_engine(e->target, visited);
00464 
00465   switch(e->type)
00466   { case EX_AND:
00467       free_model_list(e->kind.and.set);
00468     default:
00469       ;
00470   }
00471 
00472   sgml_free(e);
00473 }
00474 
00475 
00476 static void
00477 do_free_state_engine(dtd_state *state, visited *visited)
00478 { transition *t, *next;
00479   
00480   for(t=state->transitions; t; t=next)
00481   { next = t->next;
00482 
00483     if ( visit(t->state, visited) )
00484       do_free_state_engine(t->state, visited);
00485 
00486     sgml_free(t);
00487   }
00488 
00489   if ( state->expander )
00490     free_expander(state->expander, visited);
00491 
00492   sgml_free(state);
00493 }
00494 
00495 
00496 void
00497 free_state_engine(dtd_state *state)
00498 { if ( state )
00499   { visited visited;
00500     visited.size = 0;
00501 
00502     visit(state, &visited);
00503     do_free_state_engine(state, &visited);
00504   }
00505 }
00506 
00507 
00508 

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5