xsbpattern.c

00001 /* File:      xsbpattern.c -- XSB-side interface to match() and substitute()
00002 ** Author(s): Jin Yu
00003 ** Contact:   xsb-contact@cs.sunysb.edu
00004 ** 
00005 ** Copyright (C) The Research Foundation of SUNY, 1998
00006 ** 
00007 ** XSB is free software; you can redistribute it and/or modify it under the
00008 ** terms of the GNU Library General Public License as published by the Free
00009 ** Software Foundation; either version 2 of the License, or (at your option)
00010 ** any later version.
00011 ** 
00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014 ** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
00015 ** more details.
00016 ** 
00017 ** You should have received a copy of the GNU Library General Public License
00018 ** along with XSB; if not, write to the Free Software Foundation,
00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00020 **
00021 ** $Id: xsbpattern.c,v 1.10 2003/04/15 17:16:24 kostis Exp $
00022 ** 
00023 */
00024 
00025 
00026 /*----------------------------------------------------------------------------
00027        try_match__() -- find the match pattern,
00028        next_match__() -- find the next match pattern,
00029        do_bulk_match__() -- find the global match patterns,
00030        perl_substitute__() -- substitute the string with expected pattern,
00031        load_perl__() -- load the perl interpretor,
00032        unload_perl__() -- release the perl interpretor object,
00033        get_match_resultC__() -- get the perl pattern match C function,
00034        get_bulk_match_result__() --  get Perl global pattern match results.
00035        
00036 ----------------------------------------------------------------------------*/
00037 
00038 
00039 #include "interface.h"
00040 #include "perlpattern.c"          /* pattern match basic functions */   
00041 
00042 void build_sub_match_spec( void );
00043 int is_global_pattern( char *);
00044 int global_pattern_mode = FALSE;
00045 
00046 extern void xsb_abort(char *, ...);
00047 
00048 #define xsb_warn(warning)       fprintf(stderr, "++Warning: %s\n", warning)
00049 
00050 
00051 /*----------------------------------------------------------------------------
00052 try_match__()
00053 The pattern matching function which includes loading perl interpreter and 
00054 trying the perl pattern matching.
00055 arguments: 
00056   input: char* string,    -- input text
00057          char* pattern    --  match pattern
00058   output:if no match found, return FAILURE (0).
00059 ----------------------------------------------------------------------------*/
00060 int try_match__( void )
00061 {
00062   SV *text;        /* the storage for the string in embedded Perl */
00063   SV *string_buff; /* the storage for the string in embedded Perl */
00064   int was_match;   /* number of the matches */
00065   char *string = ptoc_string(1),
00066     *pattern = ptoc_string(2);
00067 
00068   /* first load the perl interpreter, if unloaded */
00069   if (perlObjectStatus == UNLOADED) load_perl__();
00070 
00071   text = newSV(0);
00072   string_buff = newSV(0);
00073   sv_setpv(text, string);  /* store the string in the SV */
00074     
00075   was_match = match(text, pattern );
00076   
00077   global_pattern_mode = is_global_pattern(pattern);
00078   
00079   SvREFCNT_dec(string_buff);
00080   SvREFCNT_dec(text);
00081   
00082   return(was_match);
00083 }
00084 
00085 
00086 /*----------------------------------------------------------------------------
00087 next_match__()
00088 The pattern match function which repeats pattern match after 
00089 the pattern match of the function try_match__().
00090 If there is no calling of function try_match__() before, give warning! 
00091    output: if no match found, return FAILURE.
00092 ----------------------------------------------------------------------------*/
00093 int next_match__( void )
00094 {
00095   int was_match;        /* return code */
00096 
00097    if ( matchPattern == NULL ) { /*didn't try_match__ before*/
00098      xsb_warn("call try_match/2 first!");
00099      was_match = FAILURE;
00100    }
00101    else /*do next match*/
00102      was_match = match_again( );
00103 
00104    if (global_pattern_mode)
00105      return(was_match);
00106    /* always fail, if Perl pattern is not global */
00107    return FAILURE;
00108 }
00109 
00110 /*----------------------------------------------------------------------------
00111 do_bulk_match__()
00112 The pattern match function which includes loading perl interpreter and 
00113 doing the global perl pattern match, and storing the results in the global 
00114 array of bulkMatchList.
00115 argument: 
00116   input: char* string                -- input text
00117          char* pattern               --  match pattern
00118   output: int* num_match             --  the number of the matches       
00119 ----------------------------------------------------------------------------*/
00120 int do_bulk_match__( void )
00121 {
00122   AV *match_list;           /* AV storage of matches list*/
00123   SV *text;                 /* storage for the embedded perl cmd */
00124   SV *string_buff;          /* storage for the embedded perl cmd */
00125   int num_match;            /* the number of the matches */
00126   int i;
00127  
00128   /* first load the perl interpreter, if unloaded */
00129   if (perlObjectStatus == UNLOADED) load_perl__();
00130 
00131   text = newSV(0);
00132   string_buff = newSV(0);
00133   sv_setpv(text, ptoc_string(1));  /*put the string into an SV */
00134  
00135   /*------------------------------------------------------------------------
00136     free the old match list space and allocate new space for current match list
00137     -----------------------------------------------------------------------*/
00138   for ( i=0; i<preBulkMatchNumber; i++ ) 
00139     free(bulkMatchList[i]);
00140   if (bulkMatchList != NULL ) free(bulkMatchList);
00141   bulkMatchList = NULL;   
00142 
00143   /*------------------------------------------------------------------------
00144     do bulk match
00145     ----------------------------------------------------------------------*/
00146   num_match = all_matches(text, ptoc_string(2),&match_list);
00147     
00148   /* allocate the space to store the matches */
00149   if ( num_match != 0 ) {
00150     preBulkMatchNumber = num_match; /* reset the pre bulk match number */
00151     bulkMatchList = (char **)malloc(num_match*sizeof(char *)); 
00152     if ( bulkMatchList == NULL ) 
00153       xsb_abort("Cannot alocate memory to store the results for bulk match");
00154   }
00155 
00156   /*get the matches from the AV */
00157   for ( i=0;i<num_match;i++ ) {
00158     string_buff = av_shift(match_list);
00159     bulkMatchList[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 
00160     strcpy((char *)bulkMatchList[i], SvPV(string_buff,PL_na) );   
00161   } 
00162 
00163   SvREFCNT_dec(string_buff); /* release space*/
00164   SvREFCNT_dec(text);
00165   
00166   ctop_int(3, num_match);           /*return the number of matches*/
00167   return SUCCESS;
00168 }
00169 
00170 /*----------------------------------------------------------------------------
00171 perl_substitute__()
00172 The pattern substitution function which includes loading perl interpreter 
00173 and doing the pattern substitution, then returning the replaced string.
00174 arguments: 
00175   input: char* string, input text
00176          char* pattern, match pattern
00177   output:char* string, output text
00178 ----------------------------------------------------------------------------*/
00179 int perl_substitute__( void )
00180 {
00181   SV *text;    /* Perl representation for the string to be 
00182                   modified by substitution */ 
00183   char *subst_cmd = ptoc_string(2);
00184   int i;                
00185   
00186   /* first load the perl interpreter, if unloaded */
00187   if (perlObjectStatus == UNLOADED) load_perl__();
00188   
00189   text = newSV(0);
00190   sv_setpv(text, ptoc_string(1));  /* put the string to the SV */
00191      
00192   if( !substitute(&text, subst_cmd) )
00193     return(FAILURE);
00194   
00195   global_pattern_mode = is_global_pattern(subst_cmd);
00196 
00197   if (substituteString != NULL ) free(substituteString);
00198 
00199   substituteString = malloc(strlen(SvPV(text,PL_na))+1);
00200   strcpy(substituteString,SvPV(text,PL_na));
00201   
00202   SvREFCNT_dec(text);  /*release space*/
00203   
00204   ctop_string(3, string_find(substituteString,1));  /*return changed text*/
00205   return SUCCESS;
00206 }
00207 
00208 /*----------------------------------------------------------------------------
00209 load_perl__():
00210 The function to implement the interface of C and Perl, load the perl 
00211 interpreter and initial the global variables. Then the Perl function
00212 is ready to run.
00213 ----------------------------------------------------------------------------*/
00214 
00215 int load_perl__( void ) 
00216 {
00217   char *embedding[] = {"","-e","0"};  /* perl interpreter config params */
00218   int i;
00219 
00220   /* check if the perl interpreter is loaded already*/
00221   if ( perlObjectStatus == LOADED ) return SUCCESS;
00222 
00223   /*------------------------------------------------------------------------
00224     initial the global variables
00225     ----------------------------------------------------------------------*/
00226   for ( i=0; i<MAX_TOTAL_MATCH; i++ ) 
00227     matchResults[i] = NULL;  
00228   preBulkMatchNumber = 0;
00229   bulkMatchList = NULL;
00230   matchPattern = NULL;
00231   substituteString = NULL;
00232   build_sub_match_spec();    /*build the submatch arguments string constant*/  
00233 
00234   my_perl = perl_alloc();
00235   perl_construct( my_perl );
00236   perl_parse( my_perl, NULL, 3, embedding, (char **)NULL );
00237   perl_run(my_perl);
00238 
00239   perlObjectStatus = LOADED;
00240 
00241   return (SUCCESS); 
00242 }
00243 
00244 /*---------------------------------------------------------------------------
00245 unload_perl__():
00246 The function to release the Perl interpreter, and deallocat the memory
00247 ---------------------------------------------------------------------------*/
00248 
00249 int unload_perl__( void )
00250 {
00251   int i;
00252 
00253   PL_perl_destruct_level = 1;
00254   perl_destruct( my_perl );
00255   perl_free( my_perl );
00256 
00257   /*-------------------------------------------------------------------------
00258    free all the space allocated for perl match functions
00259   -------------------------------------------------------------------------*/
00260   for ( i=0; i<preBulkMatchNumber; i++ ) 
00261     free(bulkMatchList[i]);
00262   if (bulkMatchList != NULL ) free(bulkMatchList);
00263   if (matchPattern != NULL ) free(matchPattern);
00264   if (substituteString != NULL ) free(substituteString);
00265   free(subMatchSpec);
00266 
00267   perlObjectStatus = UNLOADED;
00268 
00269   return SUCCESS;
00270 }  
00271 
00272 /*----------------------------------------------------------------------------
00273 get_bulk_match_result__(order, argumentValue):
00274 The function to get the values of all the matches.
00275 input: is the position of the argument;
00276 output: is the string of match result.
00277 ----------------------------------------------------------------------------*/
00278 
00279 int get_bulk_match_result__( void ) {
00280 
00281   if (perlObjectStatus == UNLOADED ) {
00282     load_perl__();
00283     return(FAILURE);
00284   }
00285 
00286   if ( bulkMatchList[ptoc_int(1)] == NULL )
00287     return FAILURE;        /*no match*/
00288   else{
00289     int match_seq_number= ptoc_int(1);
00290     int match_array_sz= ptoc_int(3);
00291     if (match_seq_number < match_array_sz) {
00292       /* c2p_string( bulkMatchList[match_seq_number], reg_term(2)); */
00293       ctop_string(2, (char *)string_find(bulkMatchList[match_seq_number],1));
00294       return SUCCESS;
00295     }
00296     else return FAILURE;
00297   }
00298 }
00299 
00300 
00301 /*----------------------------------------------------------------------------
00302 get_match_resultC__(matchCode, matchResult):
00303 Get the value of the submatch string $1, $2, ... from 
00304 the global string array of matchResults
00305 
00306 input: is the match code. Match codes correspond to Perl match variables as
00307 follows: 
00308              -1 -- $&
00309              -2 -- $`
00310              -3 -- $'
00311              -4 -- $+ 
00312               1 -- $1
00313               2 -- $2
00314               3 -- $3
00315               ....
00316               9 -- $9
00317               .... MAX_SUB_MATCH
00318 
00319 output: the string of match result.
00320 
00321 The results of the matches (the values of Perl vars) are in consecutive
00322 cells of the matchResults array as follows:
00323      cell#  Perl var
00324        0 -- $&
00325        1 -- $`
00326        2 -- $'
00327        3 -- $+
00328        4 -- $1
00329        5 -- $2
00330         .......
00331 
00332 ----------------------------------------------------------------------------*/
00333 
00334 int get_match_resultC__( void ) {
00335 
00336   int order; 
00337  
00338   int submatch_number=ptoc_int(1);
00339   
00340   /*--------------------------------------------------------------------------
00341     Convert from Prolog-side convention for refering to submatches to
00342     the corresponding  array index numbers in match result storage.
00343   --------------------------------------------------------------------------*/
00344   switch (submatch_number) {
00345   case MATCH:     /*MATCH = -1*/
00346     order = 0;    /* actual position in the memory */
00347     break;
00348   case PREMATCH:  /*PREMATCH = -2*/
00349     order = 1;
00350     break; 
00351   case POSTMATCH:  /*POSTMATCH = -3*/
00352     order = 2;
00353     break;
00354   case LAST_PAREN_MATCH:  /*LAST_PAREN_MATCH = -4*/
00355     order = 3;
00356     break;
00357   default:
00358     if ( submatch_number > MAX_SUB_MATCH ) {
00359       char message[120];
00360       sprintf(message,
00361               "Specified submatch number %d exceeds the limit: %d\n",
00362               submatch_number, MAX_SUB_MATCH);
00363       xsb_warn(message);
00364       order = -99;
00365     }
00366     else order = submatch_number+3;  /* actual position in the memory */
00367     break;
00368   }
00369 
00370   if (order == -99) return(FAILURE);
00371 
00372   if ( matchPattern == NULL ) { /*didn't try_match before*/
00373      xsb_warn("Call try_match/2 first!");
00374      return(FAILURE);
00375    } else if ( !strcmp(matchResults[order],"") || matchResults[order] == NULL )
00376      return(FAILURE);           /*no match found, return FAILURE */
00377   else {
00378     c2p_string( matchResults[order], reg_term(2));
00379     return(SUCCESS);
00380   }
00381 }
00382 
00383 /*----------------------------------------------------------------------------
00384 void build_sub_match_string( void )
00385 This function is used to build the submatch arguments list string, 
00386 "($&,$`,$',$+,$1,$2,$3.....,$MAX_SUB_MATCH)"
00387 here the value of MAX_SUB_MATCH is defined in the include file
00388 ----------------------------------------------------------------------------*/
00389 
00390 void build_sub_match_spec( void ) {
00391 
00392   int i,j,k;          /*counter flags*/
00393   int spaceSize;      /*memory space size for the submatch string*/
00394 
00395   /*get the size of the submatch string, the size of string $1, $2 etc., is 2,
00396     when the digit is bigger than 10, the size of string $10, $11 etc., is 3,
00397     so whenever the digit increases by 10, the size of the string $digit will
00398     increase by 1. Following code is to calculate the size of string $1,$2...*/
00399   j = 1;
00400   k = 10;
00401   spaceSize=0;
00402   for (i=1;i<=MAX_SUB_MATCH;i++) {
00403     if ( i%k==0 ) {
00404       j++;
00405       k*=10;
00406     }
00407     spaceSize += 2+j;    /* the size of ",$" is 2 */
00408   }
00409   spaceSize+=(sizeof(FIXEDSUBMATCHSPEC)+1);
00410 
00411 
00412   /*build the submatch string*/
00413   subMatchSpec=(char *)malloc(spaceSize);
00414   strcpy(subMatchSpec, FIXEDSUBMATCHSPEC); /*build the fixed part $&,$`,$',$+*/
00415 
00416   /* add string $1, $2 etc., to the end of the string */
00417   for (i=1; i<=MAX_SUB_MATCH;i++)
00418     sprintf(&(subMatchSpec[strlen(subMatchSpec)]), ",$%d\0", i);
00419             /*add one of $1, $2 etc., to the string each time by order*/
00420   strcat(subMatchSpec, ")");
00421 
00422   return;
00423   
00424 }
00425 
00426 
00427 /* Check if the Perl pattern is global, i.e., contains the `g' modifier.
00428 ** This is needed so that next_match will know that it has to fail immediately,
00429 ** if no `g' has been specified.
00430 */
00431 int is_global_pattern(char *pattern) {
00432   int len = strlen(pattern), i = len-1;
00433 
00434   /* skip other Perl pattern modifiers and spaces */
00435   while ( (i > 0) &&
00436           ( *(pattern+i) == ' ' || *(pattern+i) == '\t'
00437             || *(pattern+i) == 'o' || *(pattern+i) == 'i' ))
00438     i--;
00439 
00440   if (*(pattern+i) == 'g')
00441     return TRUE;
00442   return FALSE;
00443 
00444 }

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5