00001 /* File: perlpattern.c -- interface to Perl's match() and substitute() 00002 ** Author(s): Jin Yu 00003 ** Contact: xsb-contact@cs.sunysb.edu 00004 ** 00005 ** Copyright (C) The Research Foundation of SUNY, 1998 00006 ** 00007 ** XSB is free software; you can redistribute it and/or modify it under the 00008 ** terms of the GNU Library General Public License as published by the Free 00009 ** Software Foundation; either version 2 of the License, or (at your option) 00010 ** any later version. 00011 ** 00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY 00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 00014 ** FOR A PARTICULAR PURPOSE. See the GNU Library General Public License for 00015 ** more details. 00016 ** 00017 ** You should have received a copy of the GNU Library General Public License 00018 ** along with XSB; if not, write to the Free Software Foundation, 00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 00020 ** 00021 ** $Id: perlpattern.c,v 1.5 2004/10/04 13:38:08 dwarren Exp $ 00022 ** 00023 */ 00024 00025 00026 00027 /*---------------------------------------------------------------------------- 00028 int match(SV *string, char *pattern) 00029 Try to find the match pattern in the given string. 00030 If found, save the submatch strings in the global string array "matchResults". 00031 00032 input: char *string: text string 00033 char *pattern: match pattern 00034 output: if match, return SUCCESS(1), otherwise return FAILURE(0). 00035 00036 ----------------------------------------------------------------------------*/ 00037 00038 int match(SV *string, char *pattern) 00039 { 00040 00041 SV *command = newSV(0), *retval; /*allocate space for SV data*/ 00042 SV *buffer = newSV(0); 00043 SV *string_buff = newSV(0); 00044 AV *matchArray; /* AV storage for submatch lists */ 00045 int number,i; 00046 int returnCode = FAILURE; /*return code*/ 00047 00048 /*------------------------------------------------------------------------- 00049 allocate the space for the match pattern string, and initial it 00050 -----------------------------------------------------------------------*/ 00051 if ( matchPattern!=NULL )free(matchPattern); 00052 matchPattern = malloc(strlen(pattern)+1); 00053 strcpy(matchPattern, pattern); 00054 00055 /*------------------------------------------------------------------------- 00056 initialize the seaching string 00057 -----------------------------------------------------------------------*/ 00058 /* sv_setpvf(command, "$__text__='%s'", SvPV(string,PL_na)); */ 00059 sv_setpvf(command, "$__text__= <<'ENDj9yq6QC43b'; chop($__text__);\n%s\nENDj9yq6QC43b\n", SvPV(string,PL_na)); 00060 my_perl_eval_sv(command, TRUE); /* execute the perl command */ 00061 00062 /*------------------------------------------------------------------------- 00063 do the perl pattern matching 00064 ------------------------------------------------------------------------*/ 00065 /* use an unlikely variable name __match__ to avoid name clashes*/ 00066 sv_setpvf(command, 00067 "$__text__=~%s; @__match__=%s", 00068 matchPattern, subMatchSpec); 00069 00070 retval=my_perl_eval_sv(command, TRUE); 00071 if (retval == NULL ) return returnCode; 00072 00073 00074 /*------------------------------------------------------------------------- 00075 get submatch from @__match__ 00076 -----------------------------------------------------------------------*/ 00077 matchArray = perl_get_av("__match__", FALSE); 00078 /* +1 because av_len gives the last index, and indices start with 0 */ 00079 number = av_len(matchArray) + 1; 00080 /* MAX_TOTAL_MATCH is also the size of subMatchSpec, so they must be equal */ 00081 if ( number != MAX_TOTAL_MATCH ) { 00082 return returnCode; 00083 } 00084 00085 for ( i=0;i<number;i++ ) { 00086 string_buff = av_shift(matchArray); 00087 if (matchResults[i] != NULL) free(matchResults[i]); 00088 matchResults[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 00089 strcpy((char *)matchResults[i], SvPV(string_buff,PL_na) ); 00090 } 00091 00092 if ( strcmp(matchResults[0],"") ) returnCode=SUCCESS; 00093 /*if $& != "", match is found*/ 00094 00095 SvREFCNT_dec(command); /* free space for the SV data */ 00096 SvREFCNT_dec(buffer); 00097 SvREFCNT_dec(string_buff); 00098 00099 return returnCode; 00100 } 00101 00102 /*---------------------------------------------------------------------------- 00103 int matchAgain( void ) 00104 Try to find the next occurrence of the match, 00105 where the string and pattern are same as those of last try_match__ function. 00106 00107 return value: if match, return SUCCESS; 00108 otherwise, FAILURE. 00109 ----------------------------------------------------------------------------*/ 00110 00111 int match_again( void ) 00112 { 00113 00114 SV *command = newSV(0), *retval; /*allocate space for SV data*/ 00115 SV *buffer = newSV(0); 00116 SV *string_buff = newSV(0); 00117 AV *matchArray; /*AV storage for the submatch list*/ 00118 int number,i; 00119 int returnCode=FAILURE; /*return code*/ 00120 00121 /*------------------------------------------------------------------------- 00122 do the perl pattern matching 00123 ------------------------------------------------------------------------*/ 00124 sv_setpvf(command, "$__text__=~%s; @__match__=%s", 00125 matchPattern,subMatchSpec); 00126 00127 retval=my_perl_eval_sv(command, TRUE); 00128 if (retval == NULL ) return returnCode; 00129 00130 /*------------------------------------------------------------------------- 00131 get submatches from @__match__ 00132 -----------------------------------------------------------------------*/ 00133 matchArray = perl_get_av("__match__", FALSE); 00134 /* +1 because av_len gives the last index, and indices start with 0 */ 00135 number = av_len(matchArray) + 1; 00136 if ( number != MAX_TOTAL_MATCH ) { 00137 return returnCode; 00138 } 00139 00140 for ( i=0;i<number;i++ ) { 00141 string_buff = av_shift(matchArray); 00142 if (matchResults[i] != NULL) free(matchResults[i]); 00143 matchResults[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 00144 strcpy((char *)matchResults[i], SvPV(string_buff,PL_na) ); 00145 } 00146 00147 if ( strcmp(matchResults[0],"") ) returnCode=SUCCESS; 00148 /* if $&!="", match is found*/ 00149 00150 SvREFCNT_dec(command); /* free space for the SV data */ 00151 SvREFCNT_dec(buffer); 00152 SvREFCNT_dec(string_buff); 00153 00154 return returnCode; 00155 } 00156 00157 /*---------------------------------------------------------------------------- 00158 int substitute(SV **string, char *pattern) 00159 Try to find the match pattern in the string, and substitute it by 00160 the expected pattern, the input string is ressigned with the substituted 00161 string. And return the number of the substituted substrings. 00162 00163 input: SV **string: the pointer to the text string, it is ressigned when 00164 returning. 00165 char *pattern: the pattern string. 00166 output: SV **string: the pointer to the text string, it is ressigned when 00167 returning. 00168 return value: number of the substituted substrings. 00169 ----------------------------------------------------------------------------*/ 00170 00171 int substitute(SV **string, char *pattern) 00172 { 00173 SV *command = newSV(0), *retval; /*allocate space for SV data */ 00174 00175 /* sv_setpvf(command, "$__string__ = '%s'; ($__string__ =~ %s)",*/ 00176 sv_setpvf(command, "$__string__ = <<'ENDj9yq6QC43b'; chop($__string__);\n%s\nENDj9yq6QC43b\n ($__string__ =~ %s)", 00177 SvPV(*string,PL_na), pattern); 00178 00179 retval = my_perl_eval_sv(command, TRUE); 00180 if (retval == 0 ) return FAILURE; 00181 00182 SvREFCNT_dec(command); /* release the space */ 00183 00184 *string = perl_get_sv("__string__", FALSE); 00185 return SvIV(retval); 00186 } 00187 00188 /*---------------------------------------------------------------------------- 00189 int all_matches(SV *string, char *pattern, AV **match_list) 00190 Try to find the global pattern match in the input string. 00191 Store all matches in an array, then put the contents of the array 00192 into the AV storage match_list. 00193 00194 input: char *string: the text string 00195 char *pattern: the match pattern 00196 output: AV **match_list: pointer to the AV storage for all matches 00197 return value: the number of the matches. 00198 00199 ----------------------------------------------------------------------------*/ 00200 00201 int all_matches(SV *string, char *pattern, AV **match_list) 00202 { 00203 SV *command = newSV(0), *retval; /*allocate space for SV data */ 00204 I32 num_matches; 00205 00206 /* sv_setpvf(command, "my $string = '%s'; @__array__ = ($string =~ %s)",*/ 00207 sv_setpvf(command, "my $string = <<'ENDj9yq6QC43b'; chop($string);\n%s\nENDj9yq6QC43b\n @__array__ = ($string =~ %s)", 00208 SvPV(string,PL_na), pattern); 00209 00210 retval=my_perl_eval_sv(command, TRUE); 00211 if (retval == 0 ) return FAILURE; 00212 00213 SvREFCNT_dec(command); /*release the space */ 00214 00215 *match_list = perl_get_av("__array__", FALSE); 00216 num_matches = av_len(*match_list) + 1; 00217 /* +1 because av_len gives the last index, and indices start with 0 */ 00218 00219 return num_matches; 00220 00221 } 00222 00223 /*---------------------------------------------------------------------------- 00224 my_perl_eval_sv() 00225 the function to interpret the input perl command and execute it 00226 input: SV* sv: the SV data containing the perl command string 00227 I32 croak_on_error: error handler flag 00228 return value: perl command executing results. 00229 ---------------------------------------------------------------------------*/ 00230 00231 SV* my_perl_eval_sv(SV *sv, I32 croak_on_error) 00232 { 00233 dSP; 00234 SV* retval; 00235 00236 PUSHMARK(sp); /*push in*/ 00237 perl_eval_sv(sv, G_SCALAR); /*interpret the perl command*/ 00238 00239 SPAGAIN; 00240 retval = POPs; 00241 PUTBACK; /*pop out*/ 00242 00243 if (croak_on_error && SvTRUE(GvSV(PL_errgv))) { 00244 printf("Warning: syntax error in the pattern expression!\n"); 00245 /* 00246 printf("The entire Perl expression was: \n\t %s\n", SvPV(sv)); 00247 */ 00248 return FAILURE; 00249 } 00250 00251 return retval; 00252 }