00001 /* File: xsbpattern.c -- XSB-side interface to match() and substitute() 00002 ** Author(s): Jin Yu 00003 ** Contact: xsb-contact@cs.sunysb.edu 00004 ** 00005 ** Copyright (C) The Research Foundation of SUNY, 1998 00006 ** 00007 ** XSB is free software; you can redistribute it and/or modify it under the 00008 ** terms of the GNU Library General Public License as published by the Free 00009 ** Software Foundation; either version 2 of the License, or (at your option) 00010 ** any later version. 00011 ** 00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY 00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00014 ** FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for 00015 ** more details. 00016 ** 00017 ** You should have received a copy of the GNU Library General Public License 00018 ** along with XSB; if not, write to the Free Software Foundation, 00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00020 ** 00021 ** $Id: xsbpattern.c,v 1.10 2003/04/15 17:16:24 kostis Exp $ 00022 ** 00023 */ 00024 00025 00026 /*---------------------------------------------------------------------------- 00027 try_match__() -- find the match pattern, 00028 next_match__() -- find the next match pattern, 00029 do_bulk_match__() -- find the global match patterns, 00030 perl_substitute__() -- substitute the string with expected pattern, 00031 load_perl__() -- load the perl interpretor, 00032 unload_perl__() -- release the perl interpretor object, 00033 get_match_resultC__() -- get the perl pattern match C function, 00034 get_bulk_match_result__() -- get Perl global pattern match results. 00035 00036 ----------------------------------------------------------------------------*/ 00037 00038 00039 #include "interface.h" 00040 #include "perlpattern.c" /* pattern match basic functions */ 00041 00042 void build_sub_match_spec( void ); 00043 int is_global_pattern( char *); 00044 int global_pattern_mode = FALSE; 00045 00046 extern void xsb_abort(char *, ...); 00047 00048 #define xsb_warn(warning) fprintf(stderr, "++Warning: %s\n", warning) 00049 00050 00051 /*---------------------------------------------------------------------------- 00052 try_match__() 00053 The pattern matching function which includes loading perl interpreter and 00054 trying the perl pattern matching. 00055 arguments: 00056 input: char* string, -- input text 00057 char* pattern -- match pattern 00058 output:if no match found, return FAILURE (0). 00059 ----------------------------------------------------------------------------*/ 00060 int try_match__( void ) 00061 { 00062 SV *text; /* the storage for the string in embedded Perl */ 00063 SV *string_buff; /* the storage for the string in embedded Perl */ 00064 int was_match; /* number of the matches */ 00065 char *string = ptoc_string(1), 00066 *pattern = ptoc_string(2); 00067 00068 /* first load the perl interpreter, if unloaded */ 00069 if (perlObjectStatus == UNLOADED) load_perl__(); 00070 00071 text = newSV(0); 00072 string_buff = newSV(0); 00073 sv_setpv(text, string); /* store the string in the SV */ 00074 00075 was_match = match(text, pattern ); 00076 00077 global_pattern_mode = is_global_pattern(pattern); 00078 00079 SvREFCNT_dec(string_buff); 00080 SvREFCNT_dec(text); 00081 00082 return(was_match); 00083 } 00084 00085 00086 /*---------------------------------------------------------------------------- 00087 next_match__() 00088 The pattern match function which repeats pattern match after 00089 the pattern match of the function try_match__(). 00090 If there is no calling of function try_match__() before, give warning! 00091 output: if no match found, return FAILURE. 00092 ----------------------------------------------------------------------------*/ 00093 int next_match__( void ) 00094 { 00095 int was_match; /* return code */ 00096 00097 if ( matchPattern == NULL ) { /*didn't try_match__ before*/ 00098 xsb_warn("call try_match/2 first!"); 00099 was_match = FAILURE; 00100 } 00101 else /*do next match*/ 00102 was_match = match_again( ); 00103 00104 if (global_pattern_mode) 00105 return(was_match); 00106 /* always fail, if Perl pattern is not global */ 00107 return FAILURE; 00108 } 00109 00110 /*---------------------------------------------------------------------------- 00111 do_bulk_match__() 00112 The pattern match function which includes loading perl interpreter and 00113 doing the global perl pattern match, and storing the results in the global 00114 array of bulkMatchList. 00115 argument: 00116 input: char* string -- input text 00117 char* pattern -- match pattern 00118 output: int* num_match -- the number of the matches 00119 ----------------------------------------------------------------------------*/ 00120 int do_bulk_match__( void ) 00121 { 00122 AV *match_list; /* AV storage of matches list*/ 00123 SV *text; /* storage for the embedded perl cmd */ 00124 SV *string_buff; /* storage for the embedded perl cmd */ 00125 int num_match; /* the number of the matches */ 00126 int i; 00127 00128 /* first load the perl interpreter, if unloaded */ 00129 if (perlObjectStatus == UNLOADED) load_perl__(); 00130 00131 text = newSV(0); 00132 string_buff = newSV(0); 00133 sv_setpv(text, ptoc_string(1)); /*put the string into an SV */ 00134 00135 /*------------------------------------------------------------------------ 00136 free the old match list space and allocate new space for current match list 00137 -----------------------------------------------------------------------*/ 00138 for ( i=0; i<preBulkMatchNumber; i++ ) 00139 free(bulkMatchList[i]); 00140 if (bulkMatchList != NULL ) free(bulkMatchList); 00141 bulkMatchList = NULL; 00142 00143 /*------------------------------------------------------------------------ 00144 do bulk match 00145 ----------------------------------------------------------------------*/ 00146 num_match = all_matches(text, ptoc_string(2),&match_list); 00147 00148 /* allocate the space to store the matches */ 00149 if ( num_match != 0 ) { 00150 preBulkMatchNumber = num_match; /* reset the pre bulk match number */ 00151 bulkMatchList = (char **)malloc(num_match*sizeof(char *)); 00152 if ( bulkMatchList == NULL ) 00153 xsb_abort("Cannot alocate memory to store the results for bulk match"); 00154 } 00155 00156 /*get the matches from the AV */ 00157 for ( i=0;i<num_match;i++ ) { 00158 string_buff = av_shift(match_list); 00159 bulkMatchList[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 00160 strcpy((char *)bulkMatchList[i], SvPV(string_buff,PL_na) ); 00161 } 00162 00163 SvREFCNT_dec(string_buff); /* release space*/ 00164 SvREFCNT_dec(text); 00165 00166 ctop_int(3, num_match); /*return the number of matches*/ 00167 return SUCCESS; 00168 } 00169 00170 /*---------------------------------------------------------------------------- 00171 perl_substitute__() 00172 The pattern substitution function which includes loading perl interpreter 00173 and doing the pattern substitution, then returning the replaced string. 00174 arguments: 00175 input: char* string, input text 00176 char* pattern, match pattern 00177 output:char* string, output text 00178 ----------------------------------------------------------------------------*/ 00179 int perl_substitute__( void ) 00180 { 00181 SV *text; /* Perl representation for the string to be 00182 modified by substitution */ 00183 char *subst_cmd = ptoc_string(2); 00184 int i; 00185 00186 /* first load the perl interpreter, if unloaded */ 00187 if (perlObjectStatus == UNLOADED) load_perl__(); 00188 00189 text = newSV(0); 00190 sv_setpv(text, ptoc_string(1)); /* put the string to the SV */ 00191 00192 if( !substitute(&text, subst_cmd) ) 00193 return(FAILURE); 00194 00195 global_pattern_mode = is_global_pattern(subst_cmd); 00196 00197 if (substituteString != NULL ) free(substituteString); 00198 00199 substituteString = malloc(strlen(SvPV(text,PL_na))+1); 00200 strcpy(substituteString,SvPV(text,PL_na)); 00201 00202 SvREFCNT_dec(text); /*release space*/ 00203 00204 ctop_string(3, string_find(substituteString,1)); /*return changed text*/ 00205 return SUCCESS; 00206 } 00207 00208 /*---------------------------------------------------------------------------- 00209 load_perl__(): 00210 The function to implement the interface of C and Perl, load the perl 00211 interpreter and initial the global variables. Then the Perl function 00212 is ready to run. 00213 ----------------------------------------------------------------------------*/ 00214 00215 int load_perl__( void ) 00216 { 00217 char *embedding[] = {"","-e","0"}; /* perl interpreter config params */ 00218 int i; 00219 00220 /* check if the perl interpreter is loaded already*/ 00221 if ( perlObjectStatus == LOADED ) return SUCCESS; 00222 00223 /*------------------------------------------------------------------------ 00224 initial the global variables 00225 ----------------------------------------------------------------------*/ 00226 for ( i=0; i<MAX_TOTAL_MATCH; i++ ) 00227 matchResults[i] = NULL; 00228 preBulkMatchNumber = 0; 00229 bulkMatchList = NULL; 00230 matchPattern = NULL; 00231 substituteString = NULL; 00232 build_sub_match_spec(); /*build the submatch arguments string constant*/ 00233 00234 my_perl = perl_alloc(); 00235 perl_construct( my_perl ); 00236 perl_parse( my_perl, NULL, 3, embedding, (char **)NULL ); 00237 perl_run(my_perl); 00238 00239 perlObjectStatus = LOADED; 00240 00241 return (SUCCESS); 00242 } 00243 00244 /*--------------------------------------------------------------------------- 00245 unload_perl__(): 00246 The function to release the Perl interpreter, and deallocat the memory 00247 ---------------------------------------------------------------------------*/ 00248 00249 int unload_perl__( void ) 00250 { 00251 int i; 00252 00253 PL_perl_destruct_level = 1; 00254 perl_destruct( my_perl ); 00255 perl_free( my_perl ); 00256 00257 /*------------------------------------------------------------------------- 00258 free all the space allocated for perl match functions 00259 -------------------------------------------------------------------------*/ 00260 for ( i=0; i<preBulkMatchNumber; i++ ) 00261 free(bulkMatchList[i]); 00262 if (bulkMatchList != NULL ) free(bulkMatchList); 00263 if (matchPattern != NULL ) free(matchPattern); 00264 if (substituteString != NULL ) free(substituteString); 00265 free(subMatchSpec); 00266 00267 perlObjectStatus = UNLOADED; 00268 00269 return SUCCESS; 00270 } 00271 00272 /*---------------------------------------------------------------------------- 00273 get_bulk_match_result__(order, argumentValue): 00274 The function to get the values of all the matches. 00275 input: is the position of the argument; 00276 output: is the string of match result. 00277 ----------------------------------------------------------------------------*/ 00278 00279 int get_bulk_match_result__( void ) { 00280 00281 if (perlObjectStatus == UNLOADED ) { 00282 load_perl__(); 00283 return(FAILURE); 00284 } 00285 00286 if ( bulkMatchList[ptoc_int(1)] == NULL ) 00287 return FAILURE; /*no match*/ 00288 else{ 00289 int match_seq_number= ptoc_int(1); 00290 int match_array_sz= ptoc_int(3); 00291 if (match_seq_number < match_array_sz) { 00292 /* c2p_string( bulkMatchList[match_seq_number], reg_term(2)); */ 00293 ctop_string(2, (char *)string_find(bulkMatchList[match_seq_number],1)); 00294 return SUCCESS; 00295 } 00296 else return FAILURE; 00297 } 00298 } 00299 00300 00301 /*---------------------------------------------------------------------------- 00302 get_match_resultC__(matchCode, matchResult): 00303 Get the value of the submatch string $1, $2, ... from 00304 the global string array of matchResults 00305 00306 input: is the match code. Match codes correspond to Perl match variables as 00307 follows: 00308 -1 -- $& 00309 -2 -- $` 00310 -3 -- $' 00311 -4 -- $+ 00312 1 -- $1 00313 2 -- $2 00314 3 -- $3 00315 .... 00316 9 -- $9 00317 .... MAX_SUB_MATCH 00318 00319 output: the string of match result. 00320 00321 The results of the matches (the values of Perl vars) are in consecutive 00322 cells of the matchResults array as follows: 00323 cell# Perl var 00324 0 -- $& 00325 1 -- $` 00326 2 -- $' 00327 3 -- $+ 00328 4 -- $1 00329 5 -- $2 00330 ....... 00331 00332 ----------------------------------------------------------------------------*/ 00333 00334 int get_match_resultC__( void ) { 00335 00336 int order; 00337 00338 int submatch_number=ptoc_int(1); 00339 00340 /*-------------------------------------------------------------------------- 00341 Convert from Prolog-side convention for refering to submatches to 00342 the corresponding array index numbers in match result storage. 00343 --------------------------------------------------------------------------*/ 00344 switch (submatch_number) { 00345 case MATCH: /*MATCH = -1*/ 00346 order = 0; /* actual position in the memory */ 00347 break; 00348 case PREMATCH: /*PREMATCH = -2*/ 00349 order = 1; 00350 break; 00351 case POSTMATCH: /*POSTMATCH = -3*/ 00352 order = 2; 00353 break; 00354 case LAST_PAREN_MATCH: /*LAST_PAREN_MATCH = -4*/ 00355 order = 3; 00356 break; 00357 default: 00358 if ( submatch_number > MAX_SUB_MATCH ) { 00359 char message[120]; 00360 sprintf(message, 00361 "Specified submatch number %d exceeds the limit: %d\n", 00362 submatch_number, MAX_SUB_MATCH); 00363 xsb_warn(message); 00364 order = -99; 00365 } 00366 else order = submatch_number+3; /* actual position in the memory */ 00367 break; 00368 } 00369 00370 if (order == -99) return(FAILURE); 00371 00372 if ( matchPattern == NULL ) { /*didn't try_match before*/ 00373 xsb_warn("Call try_match/2 first!"); 00374 return(FAILURE); 00375 } else if ( !strcmp(matchResults[order],"") || matchResults[order] == NULL ) 00376 return(FAILURE); /*no match found, return FAILURE */ 00377 else { 00378 c2p_string( matchResults[order], reg_term(2)); 00379 return(SUCCESS); 00380 } 00381 } 00382 00383 /*---------------------------------------------------------------------------- 00384 void build_sub_match_string( void ) 00385 This function is used to build the submatch arguments list string, 00386 "($&,$`,$',$+,$1,$2,$3.....,$MAX_SUB_MATCH)" 00387 here the value of MAX_SUB_MATCH is defined in the include file 00388 ----------------------------------------------------------------------------*/ 00389 00390 void build_sub_match_spec( void ) { 00391 00392 int i,j,k; /*counter flags*/ 00393 int spaceSize; /*memory space size for the submatch string*/ 00394 00395 /*get the size of the submatch string, the size of string $1, $2 etc., is 2, 00396 when the digit is bigger than 10, the size of string $10, $11 etc., is 3, 00397 so whenever the digit increases by 10, the size of the string $digit will 00398 increase by 1. Following code is to calculate the size of string $1,$2...*/ 00399 j = 1; 00400 k = 10; 00401 spaceSize=0; 00402 for (i=1;i<=MAX_SUB_MATCH;i++) { 00403 if ( i%k==0 ) { 00404 j++; 00405 k*=10; 00406 } 00407 spaceSize += 2+j; /* the size of ",$" is 2 */ 00408 } 00409 spaceSize+=(sizeof(FIXEDSUBMATCHSPEC)+1); 00410 00411 00412 /*build the submatch string*/ 00413 subMatchSpec=(char *)malloc(spaceSize); 00414 strcpy(subMatchSpec, FIXEDSUBMATCHSPEC); /*build the fixed part $&,$`,$',$+*/ 00415 00416 /* add string $1, $2 etc., to the end of the string */ 00417 for (i=1; i<=MAX_SUB_MATCH;i++) 00418 sprintf(&(subMatchSpec[strlen(subMatchSpec)]), ",$%d\0", i); 00419 /*add one of $1, $2 etc., to the string each time by order*/ 00420 strcat(subMatchSpec, ")"); 00421 00422 return; 00423 00424 } 00425 00426 00427 /* Check if the Perl pattern is global, i.e., contains the `g' modifier. 00428 ** This is needed so that next_match will know that it has to fail immediately, 00429 ** if no `g' has been specified. 00430 */ 00431 int is_global_pattern(char *pattern) { 00432 int len = strlen(pattern), i = len-1; 00433 00434 /* skip other Perl pattern modifiers and spaces */ 00435 while ( (i > 0) && 00436 ( *(pattern+i) == ' ' || *(pattern+i) == '\t' 00437 || *(pattern+i) == 'o' || *(pattern+i) == 'i' )) 00438 i--; 00439 00440 if (*(pattern+i) == 'g') 00441 return TRUE; 00442 return FALSE; 00443 00444 }