xpathparser.c

00001 /*****************************************************************************
00002  *                       xpathparser.c 
00003  * This is the main file. It provides the bridge between xsb and libxml2 
00004  * xml, xpath processing library. This file includes wrappers for the libxml2 
00005  * library.
00006  *
00007  ***************************************************************************/
00008 
00009 #include "xsb_config.h"
00010 #ifdef WIN_NT
00011 #define XSB_DLL
00012 #endif
00013 
00014 #include <assert.h>
00015 #include "cinterf.h"
00016 #include <libxml/xpathInternals.h>
00017 #include <stdio.h>
00018 #include <string.h>
00019 #include <stdarg.h>
00020 #include "fetch_file.c"
00021 
00022 
00023 #define MY_ENCODING "ISO-8859-1"
00024 
00025 #define MAXSTRLEN 256
00026 #define MAXSTRINGLEN 32000
00027 
00028 typedef enum
00029   { ERR_ERRNO,                          /* , int */
00030                                         /* ENOMEM */
00031                                         /* EACCES --> file, action */
00032                                         /* ENOENT --> file */
00033     ERR_TYPE,                           /* char *expected, term_t actual */
00034     ERR_DOMAIN,                         /* char *expected, term_t actual */
00035     ERR_EXISTENCE,                      /* char *expected, term_t actual */
00036 
00037     ERR_FAIL,                           /* term_t goal */
00038 
00039     ERR_LIMIT,                          /* char *limit, long max */
00040     ERR_MISC                            /* char *fmt, ... */
00041   } plerrorid;
00042 
00043 int     xpath_error(plerrorid, ...);
00044 
00045 
00046 
00047 prolog_term xpath_error_term;
00048 
00049 int execute_xpath_expression(const char* xmlsource, const xmlChar* xpathExpr, const xmlChar* nsList, prolog_term output_term, char flag);
00050 int register_namespaces(xmlXPathContextPtr xpathCtx, const xmlChar* nsList) ;
00051 void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output);
00052 
00053 
00060 DllExport int call_conv allocate_xpath_error_term__()
00061 {
00062   xpath_error_term = reg_term(1);
00063   return TRUE;
00064 }
00065 
00073 DllExport int call_conv parse_xpath__()
00074 {
00075   /*Temporary prolog terms to handle the input*/
00076   prolog_term source_term, xpath_expr_term, output_term, tmp_term, ns_term;
00077   char *source = NULL, *tmpsource = NULL, *xpath_expr = NULL, *tmp = NULL, flag = 0, server[MAXSTRLEN], fname[MAXSTRLEN], *namespace = NULL;
00078   int ret = 0,  n=0;
00079 
00080   /*Initialize the xpath parser*/  
00081   xmlInitParser();
00082   
00083   output_term = reg_term(3);
00084   source_term = reg_term(1);
00085   ns_term = reg_term(4);
00086   
00087   /*Parse the xml source term*/
00088   if( is_functor( source_term)){
00089 
00090     tmp = p2c_functor( source_term);
00091 
00092     tmp_term = p2p_arg( source_term, 1);
00093     /*Source is a file*/ 
00094     if( !strcmp( tmp, "file")){
00095       source = p2c_string( tmp_term);
00096       flag = 1;
00097     }
00098     /*Source is a string*/
00099     else if( !strcmp( tmp, "string")){
00100       source = p2c_string( tmp_term);
00101       flag = 0;
00102     }
00103     /*Source is a url*/
00104     else if( !strcmp( tmp, "url")){
00105 
00106       tmpsource = p2c_string( tmp_term);
00107       source = malloc( strlen( tmpsource));
00108       strcpy( source, tmpsource);
00109       if( parse_url( source, server, fname) != FALSE){
00110 
00111         /*Source is a url is of the form file:// */
00112         if( !strcmp( server, "file")){
00113           strcpy( source, fname);
00114           flag = 1;
00115         }
00116         else{
00117           n = 0;
00118           /*Fetch file from remote location*/
00119           if( get_file_www( server, fname, &source) == FALSE){
00120             return xpath_error( ERR_DOMAIN, "url", tmp_term);
00121           }
00122           else{
00123             n = strlen( source);
00124           }
00125         }
00126       }
00127       else{
00128         return xpath_error( ERR_DOMAIN, "url", tmp_term);
00129       }
00130     }
00131   }
00132 
00133   /*Extract the xpath expression from prolog input*/
00134   xpath_expr_term = reg_term(2);
00135   if( is_nil( xpath_expr_term)){
00136     return xpath_error( ERR_DOMAIN, "xpath expression", xpath_expr_term);
00137   }
00138   xpath_expr = p2c_string( xpath_expr_term);
00139   if( !xpath_expr){
00140     return xpath_error( ERR_DOMAIN, "xpath expression", xpath_expr_term);
00141   }
00142 
00143   /* Takes care of the bug in libxml2. Converts the '/' input expression 
00144    * to '/\*'
00145    */
00146   if(!strcmp( xpath_expr, "/"))
00147     {
00148       free(xpath_expr);
00149       xpath_expr = malloc( 3);
00150       strcpy( xpath_expr, "/*");
00151     }                
00152 
00153   /*Extract the namespace prefix list from the prolog input*/
00154   ns_term = reg_term(4);
00155   if( is_string( ns_term))
00156     {
00157       namespace = p2c_string( ns_term);
00158     }
00159   /*This is the function which evaluates the xpath expression on xml input*/
00160   ret = execute_xpath_expression( source, xpath_expr, namespace, output_term, flag);
00161   if( ret == FALSE){
00162     return xpath_error(ERR_MISC, "xpath", "Unable to parse the xpath expression");
00163   }
00164   xmlCleanupParser();
00165   return TRUE;
00166 }
00167 
00168 
00175 int 
00176 execute_xpath_expression(const char * xmlsource, const xmlChar* xpathExpr, const xmlChar* nsList, prolog_term output_term, char flag) {
00177   xmlDocPtr doc;
00178   xmlXPathContextPtr xpathCtx=NULL; 
00179   xmlXPathObjectPtr xpathObj=NULL;
00180   xmlBufferPtr *buf=NULL;
00181   int size=0, i=0, j=0, bufsize = 0;
00182 
00183   char *output_buffer=NULL, *ptr=NULL;
00184     
00185   /* Load XML document */
00186   if( flag == 1){
00187     doc = xmlParseFile(xmlsource);
00188     if( doc == NULL){
00189       return FALSE;
00190     }
00191   }
00192   else{
00193     doc = xmlParseMemory(xmlsource, strlen( xmlsource ));
00194     if( doc == NULL){
00195       return FALSE;
00196     }
00197   }
00198 
00199   /* Create xpath evaluation context */
00200   xpathCtx = xmlXPathNewContext(doc);
00201   if(xpathCtx == NULL) {
00202     xmlFreeDoc(doc); 
00203     return FALSE;
00204   }
00205     
00206   /* Register namespaces from list (if any) */
00207   if((nsList != NULL) && (register_namespaces(xpathCtx, nsList) < 0)) {
00208     xmlXPathFreeContext(xpathCtx); 
00209     xmlFreeDoc(doc); 
00210     return FALSE;
00211   }
00212 
00213   /* Evaluate xpath expression */
00214   xpathObj = xmlXPathEvalExpression(xpathExpr, xpathCtx);
00215   if(xpathObj == NULL) {
00216     xmlXPathFreeContext(xpathCtx); 
00217     xmlFreeDoc(doc); 
00218     return FALSE;
00219   }
00220    
00221   size = xpathObj->nodesetval->nodeNr;
00222 
00223   buf = malloc( size * sizeof( xmlBufferPtr));
00224   if(!buf){
00225     return FALSE;
00226   }
00227   /*Store the resultant xml in buffer*/
00228   xmlSetBufferAllocationScheme( XML_BUFFER_ALLOC_EXACT);
00229     
00230   for( i = 0; i < size; i++){
00231     buf[i]=xmlBufferCreate();
00232     xmlNodeDump( buf[i], doc, xpathObj->nodesetval->nodeTab[i],0,0);
00233     bufsize+=strlen(buf[i]->content); 
00234   }
00235 
00236   output_buffer = malloc( bufsize);
00237   if(!output_buffer){
00238     return FALSE;
00239   }
00240 
00241   ptr = output_buffer;
00242   for(j=0;j<i;j++){
00243     strcpy( ptr, buf[j]->content);
00244     ptr+=strlen(buf[j]->content);
00245   }
00246   *ptr='\0';
00247 
00248   /*Store the resultant xml in output term*/
00249   if( is_var( output_term))
00250     {
00251       c2p_string( output_buffer, output_term); 
00252     }
00253   else
00254     {
00255       return FALSE;
00256     }
00257   
00258   /* Cleanup */
00259   free(output_buffer);
00260   for(j=0;j<size;j++){
00261     xmlBufferFree(buf[j]);
00262   }
00263   free(buf);
00264   xmlXPathFreeObject(xpathObj);
00265   xmlXPathFreeContext(xpathCtx); 
00266   xmlFreeDoc(doc); 
00267     
00268   return TRUE;
00269 }
00270 
00277 int 
00278 register_namespaces(xmlXPathContextPtr xpathCtx, const xmlChar* nsList) {
00279   xmlChar* nsListDup;
00280   xmlChar* prefix;
00281   xmlChar* href;
00282   xmlChar* next;
00283     
00284   assert(xpathCtx);
00285   assert(nsList);
00286         
00287   nsListDup = xmlStrdup(nsList);
00288   if(nsListDup == NULL) {
00289     return FALSE;       
00290   }
00291     
00292   next = nsListDup; 
00293   while(next != NULL) {
00294     /* skip spaces */
00295     while((*next) == ' ') next++;
00296     if((*next) == '\0') break;
00297 
00298     /* find prefix */
00299     prefix = next;
00300     next = (xmlChar*)xmlStrchr(next, '=');
00301     if(next == NULL) {
00302       xmlFree(nsListDup);
00303       return FALSE;     
00304     }
00305     *(next++) = '\0';   
00306 
00307     /* find href */
00308     href = next;
00309     next = (xmlChar*)xmlStrchr(next, ' ');
00310     if(next != NULL) {
00311       *(next++) = '\0'; 
00312     }
00313 
00314     /* do register namespace */
00315     if(xmlXPathRegisterNs(xpathCtx, prefix, href) != 0) {
00316       xmlFree(nsListDup);
00317       return FALSE;     
00318     }
00319   }
00320    xmlFree(nsListDup);
00321   return TRUE;
00322 }
00323 
00329 int
00330 xpath_error(plerrorid id, ...)
00331 { prolog_term except = p2p_new();
00332   prolog_term formal = p2p_new();
00333   prolog_term swi = p2p_new();
00334   prolog_term tmp1 = p2p_new();
00335   prolog_term tmp;
00336 
00337   va_list args;
00338   char msgbuf[1024];
00339   char *msg = NULL;
00340 
00341   va_start(args, id);
00342   /*Create the error term based on the type of error*/
00343   switch(id)
00344     { case ERR_ERRNO:
00345         { int err = va_arg(args, int);
00346       
00347           msg = strerror(err);
00348 
00349           switch(err)
00350             { 
00351               /*Not enough memory*/
00352             case ENOMEM:
00353           
00354               c2p_functor("xpath", 1, tmp1);    
00355               tmp = p2p_arg( tmp1, 1);
00356               c2p_functor( "resource_error", 1, tmp);
00357               
00358               c2p_string( "no_memory", p2p_arg( tmp, 1));
00359               p2p_unify( tmp1, formal); 
00360               break;
00361               /*Permission denied error*/
00362             case EACCES:
00363               { 
00364                 const char *file = va_arg(args,   const char *);
00365                 const char *action = va_arg(args, const char *);
00366 
00367                 c2p_functor("xpath", 1, tmp1);
00368                 tmp = p2p_arg( tmp1, 1);
00369 
00370                 c2p_functor( "permission_error", 3, tmp);
00371                 c2p_string( (char*)action, p2p_arg(tmp, 1));
00372                 c2p_string( "file", p2p_arg(tmp, 2));
00373                 c2p_string ( (char*)file, p2p_arg(tmp, 3));
00374 
00375                 p2p_unify( tmp1, formal);
00376                 break;
00377               }
00378               /*Entity not found*/
00379             case ENOENT:
00380               { 
00381                 const char *file = va_arg(args, const char *);
00382                 c2p_functor("xpath", 1, tmp1);
00383                 tmp = p2p_arg( tmp1, 1);
00384 
00385                 c2p_functor( "permission_error", 2, tmp);
00386                           
00387                 c2p_string( "file", p2p_arg(tmp, 1));
00388                 c2p_string ( (char*)file, p2p_arg(tmp, 2));
00389 
00390                 p2p_unify( tmp1, formal); 
00391 
00392                 break;
00393               }
00394               /*Defaults to system error*/
00395             default:
00396               {
00397                 c2p_functor("xpath", 1, tmp1);
00398                 tmp = p2p_arg( tmp1, 1);
00399 
00400                 c2p_string("system_error", tmp);
00401                 p2p_unify( tmp1, formal);
00402                 break;
00403               }
00404             }
00405           break;
00406         }
00407     case ERR_TYPE:
00408       { 
00409         /*Type error*/
00410         const char *expected = va_arg(args, const char*);
00411         prolog_term actual        = va_arg(args, prolog_term);
00412 
00413 
00414         c2p_functor("xpath", 1, tmp1);
00415         tmp = p2p_arg( tmp1, 1);
00416 
00417         if( is_attv( actual) && 
00418             strcmp(expected, "variable") != 0 )
00419           {
00420             c2p_string( "instantiation_error", tmp);
00421             p2p_unify( tmp1, formal);
00422           }
00423         else
00424           {
00425 
00426             c2p_functor( "type_error", 2, tmp);
00427             c2p_string( (char*)expected, p2p_arg(tmp, 1));
00428             p2p_unify ( actual, p2p_arg(tmp, 2));
00429             
00430             p2p_unify( tmp1, formal);
00431           }
00432         break;
00433       } 
00434     case ERR_DOMAIN:
00435       { 
00436         /*Functor domain error*/
00437         const char *expected = va_arg(args, const char*);
00438         prolog_term actual        = va_arg(args, prolog_term);
00439 
00440         c2p_functor("xpath", 1, tmp1);
00441         tmp = p2p_arg( tmp1, 1);
00442         
00443         if( is_attv( actual) && strcmp(expected, "variable") != 0 )
00444           {
00445             c2p_string( "instantiation_error", tmp);
00446             p2p_unify( tmp1, formal);
00447           }
00448         else
00449           {
00450             c2p_functor( "domain_error", 2, tmp);
00451             c2p_string( (char*)expected, p2p_arg(tmp, 1));
00452             p2p_unify( actual, p2p_arg(tmp, 2));
00453             p2p_unify( tmp1, formal);
00454           }     
00455         break;
00456       }
00457     case ERR_EXISTENCE:
00458       { 
00459         /*Resource not in existence error*/
00460         const char *type = va_arg(args, const char *);
00461         prolog_term obj  = va_arg(args, prolog_term);
00462 
00463         c2p_functor("xpath", 1, tmp1);
00464         tmp = p2p_arg( tmp1, 1);
00465 
00466         c2p_functor( "existence_error", 2, tmp);
00467         
00468         c2p_string( (char*)type, p2p_arg(tmp, 1));
00469         p2p_unify ( obj, p2p_arg(tmp, 2));
00470         
00471         p2p_unify( tmp1, formal);
00472         break;
00473       }
00474     case ERR_FAIL:
00475       {
00476         /*Goal fail error*/ 
00477         prolog_term goal  = va_arg(args, prolog_term);
00478 
00479         c2p_functor("xpath", 1, tmp1);
00480         tmp = p2p_arg( tmp1, 1);
00481 
00482         c2p_functor( "goal_failed", 1, tmp);
00483 
00484         p2p_unify( p2p_arg( tmp,1), goal);      
00485       
00486         p2p_unify( tmp1, formal);
00487         break;
00488       }
00489     case ERR_LIMIT:
00490       { 
00491         /*Limit exceeded error*/
00492         const char *limit = va_arg(args, const char *);
00493         long maxval  = va_arg(args, long);
00494 
00495         c2p_functor("xpath", 1, tmp1);
00496         tmp = p2p_arg( tmp1, 1);
00497         
00498         c2p_functor( "limit_exceeded", 2, tmp);
00499         c2p_string( (char*)limit, p2p_arg( tmp,1));
00500         c2p_int( maxval, p2p_arg( tmp, 2));
00501 
00502         
00503         p2p_unify( tmp1, formal);
00504         break;
00505       }
00506     case ERR_MISC:
00507       { 
00508         /*Miscellaneous error*/
00509         const char *id = va_arg(args, const char *);
00510       
00511         const char *fmt = va_arg(args, const char *);
00512 
00513         vsprintf(msgbuf, fmt, args);
00514         msg = msgbuf;
00515 
00516         c2p_functor("xpath", 1, tmp1);
00517         tmp = p2p_arg( tmp1, 1);
00518 
00519         
00520         c2p_functor( "miscellaneous", 1, tmp);
00521         c2p_string( (char*)id, p2p_arg( tmp, 1));
00522         p2p_unify( tmp1, formal);
00523         break; 
00524       }
00525     default:
00526       assert(0);
00527     }
00528 
00529   va_end(args);
00530 
00531   if ( msg )
00532     { 
00533       prolog_term msgterm  = p2p_new();
00534 
00535       if ( msg )
00536         { 
00537           c2p_string( msg, msgterm);
00538         }
00539 
00540       tmp = p2p_new();
00541 
00542       c2p_functor( "xpath_context", 1, tmp);
00543       p2p_unify( p2p_arg( tmp, 1), msgterm);    
00544       p2p_unify( tmp, swi);
00545     }
00546   /*Unify the created term with the error term*/
00547   tmp = p2p_new();
00548   c2p_functor( "xpath_error", 2, tmp);
00549   p2p_unify( p2p_arg( tmp, 1), formal);
00550   p2p_unify( p2p_arg( tmp, 2), swi);
00551   p2p_unify( tmp, except);
00552 
00553   return  p2p_unify( xpath_error_term, except);
00554 }
00555 
00556 

Generated on Wed Jul 26 13:30:47 2006 for XSB by  doxygen 1.4.5