catalog.c

00001 /*  $Id: catalog.c,v 1.3 2005/03/07 20:31:24 kifer Exp $
00002 
00003     Part of SWI-Prolog
00004 
00005     Author:        Jan Wielemaker and Richard O'Keefe
00006     E-mail:        jan@swi.psy.uva.nl
00007     WWW:           http://www.swi-prolog.org
00008     Copyright (C): 1985-2002, University of Amsterdam
00009 
00010     This library is free software; you can redistribute it and/or
00011     modify it under the terms of the GNU Lesser General Public
00012     License as published by the Free Software Foundation; either
00013     version 2.1 of the License, or (at your option) any later version.
00014 
00015     This library is distributed in the hope that it will be useful,
00016     but WITHOUT ANY WARRANTY; without even the implied warranty of
00017     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
00018     Lesser General Public License for more details.
00019 
00020     You should have received a copy of the GNU Lesser General Public
00021     License along with this library; if not, write to the Free Software
00022     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00023 */
00024 
00025 #include "xsb_config.h"
00026 #include "util.h"
00027 #include "catalog.h"
00028 #include <stdio.h>
00029 #include <ctype.h>
00030 #include <string.h>
00031 #include <stdlib.h>
00032 #define DTD_MINOR_ERRORS 1
00033 #include <dtd.h>                        /* error codes */
00034 
00035 #ifndef MAXPATHLEN
00036 #define MAXPATHLEN 1024
00037 #endif
00038 #ifndef MAXLINE
00039 #define MAXLINE 1024
00040 #endif
00041 #ifndef EOS
00042 #define EOS '\0'
00043 #endif
00044 #ifndef TRUE
00045 #define TRUE 1
00046 #define FALSE 0
00047 #endif
00048 
00049 //#define streq(s1, s2) strcmp(s1, s2) ==       0
00050 #define uc(p) (*(unsigned char const *)(p))
00051 
00052 typedef struct catalogue_item *catalogue_item_ptr;
00053 struct catalogue_item
00054 { catalogue_item_ptr next;
00055   int kind;
00056   char const *target;
00057   char const *replacement;
00058 };
00059 
00060 static catalogue_item_ptr first_item = 0, last_item = 0;
00061 
00062 typedef struct _catalog_file
00063 { char *file;
00064   struct _catalog_file *next;
00065   int loaded;                   /* did we parse this file? */
00066   catalogue_item_ptr first_item;        /* List of items in the file */
00067   catalogue_item_ptr last_item;
00068 } catalog_file;
00069 
00070 static catalog_file *catalog;
00071 
00072 #ifdef WIN_NT
00073 #define isDirSep(c) ((c) == '/' || (c) == '\\')
00074 #define DIRSEPSTR "\\"
00075 #else
00076 #define isDirSep(c) ((c) == '/')
00077 #define DIRSEPSTR "/"
00078 #endif
00079 
00080 static char *
00081 DirName(const char *f, char *dir)
00082 { const char *base, *p;
00083 
00084   for (base = p = f; *p; p++)
00085   { if (isDirSep(*p) && p[1] != EOS)
00086       base = p;
00087   }
00088   if (base == f)
00089   { if (isDirSep(*f))
00090       strcpy(dir, DIRSEPSTR);
00091     else
00092       strcpy(dir, ".");
00093   } else
00094   { strncpy(dir, f, base - f);
00095     dir[base - f] = EOS;
00096   }
00097 
00098   return dir;
00099 }
00100 
00101 
00102 int
00103 is_absolute_path(const char *name)
00104 { if (isDirSep(name[0])
00105 #ifdef WIN_NT
00106       || (isalpha(uc(name)) && name[1] == ':')
00107 #endif
00108     )
00109     return TRUE;
00110 
00111   return FALSE;
00112 }
00113 
00114 char *
00115 localpath(const char *ref, const char *name)
00116 { char *local;
00117 
00118   if (!ref || is_absolute_path(name))
00119     local = strdup(name);
00120   else
00121   { char buf[MAXPATHLEN];
00122 
00123     DirName(ref, buf);
00124     strcat(buf, DIRSEPSTR);
00125     strcat(buf, name);
00126 
00127     local = strdup(buf);
00128   }
00129 
00130   if (!local)
00131     sgml_nomem();
00132 
00133   return local;
00134 }
00135 
00136 
00137 int
00138 register_catalog_file(const char *file, catalog_location where)
00139 { catalog_file **f = &catalog;
00140   catalog_file *cf;
00141 
00142   for (; *f; f = &(*f)->next)
00143   { cf = *f;
00144 
00145     if (streq(cf->file, file))
00146       return TRUE;              /* existing, move? */
00147   }
00148 
00149   cf = sgml_malloc(sizeof(*cf));
00150   memset(cf, 0, sizeof(*cf));
00151   cf->file = strdup(file);
00152   if (!cf->file)
00153     sgml_nomem();
00154 
00155   if (where == CTL_END)
00156   { cf->next = NULL;
00157     *f = cf;
00158   } else
00159   { cf->next = catalog;
00160     catalog = cf;
00161   }
00162 
00163   return TRUE;
00164 }
00165 
00166 
00167 static void
00168 init_catalog()
00169 { static int done = FALSE;
00170 
00171   if (!done)
00172   { if (!catalog)
00173     { char *path = getenv("SGML_CATALOG_FILES");
00174 
00175       if (!path)
00176         return;
00177 
00178       while (*path)
00179       { char buf[MAXPATHLEN];
00180         char *s;
00181 
00182         if ((s = strchr(path, ':')))
00183         { strncpy(buf, path, s - path);
00184           buf[s - path] = '\0';
00185           path = s + 1;
00186         } else
00187         { register_catalog_file(path, CTL_START);
00188           return;
00189         }
00190 
00191         register_catalog_file(buf, CTL_START);
00192       }
00193     }
00194   }
00195 }
00196 
00197 
00198                  /*******************************
00199                  *     CATALOG FILE PARSING     *
00200                  *******************************/
00201 
00202 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
00203 The code from here to the  end  of   this  file  was  written by Richard
00204 O'Keefe and modified by Jan Wielemaker to fit   in  with the rest of the
00205 parser.
00206 - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
00207 
00208 #include <ctype.h>
00209 #include <stdio.h>
00210 #include <stdlib.h>
00211 #include <string.h>
00212 
00213 /*  OVERRIDE YES/NO
00214     sets a boolean flag initialised to NO.
00215     The value of this flag is stored as part of each entry.
00216     (PUBLIC|DOCTYPE|ENTITY)&YES will match whether a system identifier
00217     was provided in the source document or not;
00218     (PUBLIC|DOCTYPE|ENTITY)&NO will only match if a system identifier
00219     was not provided.
00220 */
00221 
00222 /*  catalogue =
00223     (   PUBLIC  pubid filename
00224     |   SYSTEM  sysid filename
00225     |   DOCTYPE name  filename
00226     |   ENTITY  name  filename
00227     |   OVERRIDE YES
00228     |   OVERRIDE NO
00229     |   BASE          filename
00230     |   junk
00231     )*
00232 */
00233 
00234 
00235 /*  Keywords are matched ignoring case.  */
00236 
00237 static int
00238 ci_streql(char const *a, char const *b)
00239 { unsigned char const *x = (unsigned char const *) a;
00240   unsigned char const *y = (unsigned char const *) b;
00241 
00242   for (;;)
00243   { if (tolower(*x) != tolower(*y))
00244       return 0;
00245     if (*x == '\0')
00246       return 1;
00247     x++, y++;
00248   }
00249 }
00250 
00251 /*  Names may be matched heeding case in XML.  */
00252 
00253 static int
00254 cs_streql(char const *a, char const *b)
00255 { return 0 == strcmp(a, b);
00256 }
00257 
00258 /*  Any other word or any quoted string is reported as CAT_OTHER.
00259     When we are not looking for the beginning of an entry, the only
00260     positive outcome is CAT_OTHER.
00261 */
00262 
00263 static int
00264 scan_overflow(size_t buflen)
00265 { gripe(ERC_REPRESENTATION, "token length");
00266 
00267   return EOF;
00268 }
00269 
00270 static int
00271 scan(FILE * src, char *buffer, size_t buflen, int kw_expected)
00272 { int c, q;
00273   char *p = buffer, *e = p + buflen - 1;
00274 
00275   for (;;)
00276   { c = getc(src);
00277     if (c <= ' ')
00278     { if (c < 0)
00279         return EOF;
00280       continue;
00281     }
00282     if (c == '-')
00283     { c = getc(src);
00284       if (c != '-')
00285       { *p++ = '-';
00286         break;
00287       }
00288       for (;;)
00289       { c = getc(src);
00290         if (c < 0)
00291           return EOF;
00292         if (c == '-')
00293         { c = getc(src);
00294           if (c < 0)
00295             return EOF;
00296           if (c == '-')
00297             break;
00298         }
00299       }
00300       continue;
00301     }
00302     if (c == '"' || c == '\'')
00303     { q = c;
00304       for (;;)
00305       { c = getc(src);
00306         if (c < 0)
00307           return EOF;
00308         if (c == q)
00309         { *p = '\0';
00310           return CAT_OTHER;
00311         }
00312         if (p == e)
00313           return scan_overflow(buflen);
00314         *p++ = c;
00315       }
00316     }
00317     break;
00318   }
00319   /*  We reach here if there is an unquoted token.   */
00320   /*  Don't try "PUBLIC--well/sortof--'foo' 'bar'"   */
00321   /*  because hyphens are allowed in unquoted words  */
00322   /*  and so are slashes and a bunch of other stuff. */
00323   /*  To keep this code simple, an unquoted token    */
00324   /*  ends at EOF, ', ", or layout.                  */
00325   while (c > ' ' && c != '"' && c != '\'')
00326   { if (p == e)
00327       return scan_overflow(buflen);
00328     *p++ = c;
00329     c = getc(src);
00330   }
00331   *p = '\0';
00332   if (kw_expected)
00333   { if (ci_streql(buffer, "public"))
00334       return CAT_PUBLIC;
00335     if (ci_streql(buffer, "system"))
00336       return CAT_SYSTEM;
00337     if (ci_streql(buffer, "entity"))
00338       return CAT_ENTITY;
00339     if (ci_streql(buffer, "doctype"))
00340       return CAT_DOCTYPE;
00341     if (ci_streql(buffer, "override"))
00342       return CAT_OVERRIDE;
00343     if (ci_streql(buffer, "base"))
00344       return CAT_BASE;
00345   }
00346   return CAT_OTHER;
00347 }
00348 
00349 /*  The strings can represent names (taken verbatim),
00350     system identifiers (ditto), or public identifiers (squished).
00351     We need to squish, and we need to copy.  When it comes to
00352     squishing, we don't need to worry about Unicode spaces,
00353     because public identifiers aren't allow to have any characters
00354     that aren't in ASCII.
00355 */
00356 
00357 static void
00358 squish(char *pubid)
00359 { unsigned char const *s = (unsigned char const *) pubid;
00360   unsigned char *d = (unsigned char *) pubid;
00361   unsigned char c;
00362   int w;
00363 
00364   w = 1;
00365   while ((c = *s++) != '\0')
00366   { if (c <= ' ')
00367     { if (!w)
00368         *d++ = ' ', w = 1;
00369     } else
00370     { *d++ = c, w = 0;
00371     }
00372   }
00373   if (w && d != (unsigned char *) pubid)
00374     d--;
00375   *d = '\0';
00376 }
00377 
00378 /*  We represent a catalogue internally by a list of
00379     (CAT_xxx, string, string)
00380     triples.
00381 */
00382 
00383 static void
00384 load_one_catalogue(catalog_file * file)
00385 { FILE *src = fopen(file->file, "r");
00386   char buffer[2 * FILENAME_MAX];
00387   char base[2 * FILENAME_MAX];
00388   char *p;
00389   int t;
00390   catalogue_item_ptr this_item;
00391   int override = 0;
00392 
00393   if ( src == 0 )
00394   { gripe(ERC_NO_CATALOGUE, file->file);
00395     return;
00396   }
00397 
00398   (void) strcpy(base, file->file);
00399   p = base + strlen(base);
00400   while (p != base && !isDirSep(p[-1]))
00401     p--;
00402 
00403   for (;;)
00404   { t = scan(src, buffer, sizeof buffer, 1);
00405     switch (t)
00406     { case CAT_BASE:
00407         if (scan(src, buffer, sizeof buffer, 0) == EOF)
00408           break;
00409         (void) strcpy(base, buffer);
00410         p = base + strlen(base);
00411         if (p != base && !isDirSep(p[-1]))
00412           *p++ = '/';
00413         continue;
00414       case CAT_OVERRIDE:
00415         if (scan(src, buffer, sizeof buffer, 0) == EOF)
00416           break;
00417         override = tolower(buffer[0]) == 'y' ? CAT_OVERRIDE : 0;
00418         continue;
00419       case CAT_PUBLIC:
00420       case CAT_SYSTEM:
00421       case CAT_ENTITY:
00422       case CAT_DOCTYPE:
00423         this_item = sgml_malloc(sizeof *this_item);
00424         if (scan(src, buffer, sizeof buffer, 0) == EOF)
00425           break;
00426         if (t == CAT_PUBLIC)
00427           squish(buffer);
00428         this_item->next = 0;
00429         this_item->kind = t == CAT_SYSTEM ? t : t + override;
00430         this_item->target = istrdup(buffer);
00431 
00432         if (scan(src, buffer, sizeof buffer, 0) == EOF)
00433           break;
00434 
00435         if (is_absolute_path(buffer) || p == base)
00436         { this_item->replacement = istrdup(buffer);
00437         } else
00438         { (void) strcpy(p, buffer);
00439           this_item->replacement = istrdup(base);
00440         }
00441 
00442         if (file->first_item == 0)
00443         { file->first_item = this_item;
00444         } else
00445         { file->last_item->next = this_item;
00446         }
00447 
00448         file->last_item = this_item;
00449         continue;
00450       case EOF:
00451         break;
00452       default:
00453         continue;
00454     }
00455     break;
00456   }
00457 }
00458 
00459 
00460 /*  To look up a DTD:
00461     f = find_in_catalogue(CAT_DOCTYPE, name, pubid, sysid, ci);
00462     If it cannot otherwise be found and name is not null,
00463     ${name}.dtd will be returned.
00464 
00465     To look up a parameter entity:
00466     f = find_in_catalogue(CAT_PENTITY, name, pubid, sysid, ci);
00467     The name may begin with a % but need not; if it doesn't    
00468     a % will be prefixed for the search.
00469     If it cannot otherwise be found ${name}.pen will be returned.
00470 
00471     To look up an ordinary entity:
00472     f = find_in_catalogue(CAT_ENTITY, name, pubid, sysid, ci);
00473     If the name begins with a % this is just like a CAT_PENTITY search.
00474     If it cannot otherwise be found %{name}.ent will be returned.
00475 
00476     The full catalogue format allows for NOTATION (which we still need
00477     for XML), SGMLDECL, DTDDECL, and LINKTYPE.  At the moment, only
00478     notation is plausible.  To handle such things,
00479     f = find_in_catalogue(CAT_OTHER, name, pubid, sysid, ci);
00480     If it cannot be found, NULL is returned.
00481 
00482     The name, pubid, and sysid may each be NULL.   It doesn't really
00483     make sense for them all to be NULL.
00484 
00485     For SGML, name matching (DOCTYPE, ENTITY) should normally ignore
00486     alphabetic case.  Pass ci=1 to make this happen.  For XML, name
00487     matching must heed alphabetic case.  Pass ci=0 to make that happen.
00488 
00489     A CAT_DOCTYPE, CAT_ENTITY, or CAT_PENTITY search doesn't really make
00490     sense withint a name, so if the name should happen to be 0, the search
00491     kind is converted to CAT_OTHER.
00492 */
00493 
00494 char const *
00495 find_in_catalogue(int kind,
00496                   char const *name,
00497                   char const *pubid, char const *sysid, int ci)
00498 { char penname[FILENAME_MAX];
00499   catalogue_item_ptr item;
00500   char const *result;
00501   catalog_file *catfile;
00502 
00503   init_catalog();
00504 
00505   if (name == 0)
00506   { kind = CAT_OTHER;
00507   } else
00508   { switch (kind)
00509     { case CAT_OTHER:
00510       case CAT_DOCTYPE:
00511         break;
00512       case CAT_PENTITY:
00513         if (name[0] != '%')
00514         { penname[0] = '%';
00515           (void) strcpy(penname + 1, name);
00516           name = penname;
00517         }
00518         break;
00519       case CAT_ENTITY:
00520         if (name[0] == '%')
00521         { kind = CAT_PENTITY;
00522         }
00523         break;
00524       default:
00525         return 0;
00526     }
00527   }
00528 
00529   result = 0;
00530   for (catfile = catalog;; catfile = catfile->next)
00531   { if (catfile)
00532     { if (!catfile->loaded)
00533       { load_one_catalogue(catfile);
00534         catfile->loaded = TRUE;
00535       }
00536       item = catfile->first_item;
00537     } else
00538       item = first_item;
00539 
00540     for (; item != 0; item = item->next)
00541     { switch (item->kind)
00542       { case CAT_PUBLIC:
00543           if (sysid != 0)
00544             break;
00545         /*FALLTHROUGH*/
00546         case OVR_PUBLIC:
00547           if (pubid != 0 && result == 0 && cs_streql(pubid, item->target))
00548             result = item->replacement;
00549           break;
00550         case CAT_SYSTEM:
00551           if (sysid != 0 && cs_streql(sysid, item->target))
00552             return item->replacement;
00553           break;
00554         case CAT_DOCTYPE:
00555           if (sysid != 0)
00556             break;
00557         /*FALLTHROUGH*/
00558         case OVR_DOCTYPE:
00559           if (name != 0 && kind == CAT_DOCTYPE && result == 0
00560               && (ci ? ci_streql : cs_streql) (name, item->target))
00561             result = item->replacement;
00562           break;
00563         case CAT_ENTITY:
00564           if (sysid != 0)
00565             break;
00566          /*FALLTHROUGH*/ case OVR_ENTITY:
00567           if (name != 0 && kind >= CAT_ENTITY && result == 0
00568               && (ci ? ci_streql : cs_streql) (name, item->target))
00569             result = item->replacement;
00570           break;
00571         default:
00572           break;
00573       }
00574     }
00575 
00576     if (!catfile)
00577       break;
00578   }
00579   if ( result != 0 )
00580     return result;
00581   if ( sysid != 0 )
00582     return sysid;
00583   if ( kind == CAT_OTHER || kind == CAT_DOCTYPE )
00584     return 0;
00585 
00586   if ( strlen(name)+4+1 > sizeof(penname) )
00587   { gripe(ERC_REPRESENTATION, "entity name");
00588     return NULL;
00589   }
00590 
00591   item = sgml_malloc(sizeof *item);
00592   item->next = 0;
00593   item->kind = kind;
00594   item->target = istrdup(name);
00595 
00596   switch (kind)
00597   { case CAT_DOCTYPE:
00598       (void) sprintf(penname, "%s.dtd", name);
00599       break;
00600     case CAT_PENTITY:
00601       item->kind = CAT_ENTITY;
00602       (void) sprintf(penname, "%s.pen", name + 1);
00603       break;
00604     case CAT_ENTITY:
00605       (void) sprintf(penname, "%s.ent", name);
00606       break;
00607     default:
00608       abort();
00609   }
00610 
00611   item->replacement = istrdup(penname);
00612   if (first_item == 0)
00613   { first_item = item;
00614   } else
00615   { last_item->next = item;
00616   }
00617   last_item = item;
00618 
00619   return item->replacement;
00620 }

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5