perlpattern.c

00001 /* File:      perlpattern.c -- interface to Perl's match() and substitute()
00002 ** Author(s): Jin Yu
00003 ** Contact:   xsb-contact@cs.sunysb.edu
00004 ** 
00005 ** Copyright (C) The Research Foundation of SUNY, 1998
00006 ** 
00007 ** XSB is free software; you can redistribute it and/or modify it under the
00008 ** terms of the GNU Library General Public License as published by the Free
00009 ** Software Foundation; either version 2 of the License, or (at your option)
00010 ** any later version.
00011 ** 
00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014 ** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
00015 ** more details.
00016 ** 
00017 ** You should have received a copy of the GNU Library General Public License
00018 ** along with XSB; if not, write to the Free Software Foundation,
00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00020 **
00021 ** $Id: perlpattern.c,v 1.5 2004/10/04 13:38:08 dwarren Exp $
00022 ** 
00023 */
00024 
00025 
00026  
00027 /*----------------------------------------------------------------------------
00028 int match(SV *string, char *pattern)
00029 Try to find the match pattern in the given string. 
00030 If found, save the submatch strings in the global string array "matchResults".
00031 
00032      input: char *string: text string
00033             char *pattern: match pattern
00034      output: if match, return SUCCESS(1), otherwise return FAILURE(0).
00035 
00036 ----------------------------------------------------------------------------*/
00037 
00038 int match(SV *string, char *pattern)
00039 {
00040 
00041   SV *command = newSV(0), *retval;  /*allocate space for SV data*/
00042   SV *buffer = newSV(0);
00043   SV *string_buff = newSV(0);
00044   AV *matchArray;                   /* AV storage for submatch lists */
00045   int number,i;
00046   int returnCode = FAILURE;         /*return code*/
00047 
00048   /*-------------------------------------------------------------------------
00049     allocate the space for the match pattern string, and initial it
00050     -----------------------------------------------------------------------*/
00051   if ( matchPattern!=NULL )free(matchPattern);
00052   matchPattern = malloc(strlen(pattern)+1);
00053   strcpy(matchPattern, pattern);  
00054 
00055   /*-------------------------------------------------------------------------
00056     initialize the seaching string 
00057     -----------------------------------------------------------------------*/
00058   /* sv_setpvf(command, "$__text__='%s'", SvPV(string,PL_na)); */
00059   sv_setpvf(command, "$__text__= <<'ENDj9yq6QC43b'; chop($__text__);\n%s\nENDj9yq6QC43b\n", SvPV(string,PL_na)); 
00060   my_perl_eval_sv(command, TRUE);     /* execute the perl command */
00061   
00062   /*-------------------------------------------------------------------------
00063     do the perl pattern matching
00064     ------------------------------------------------------------------------*/
00065   /* use an unlikely variable name __match__ to avoid name clashes*/
00066   sv_setpvf(command,
00067             "$__text__=~%s; @__match__=%s",
00068             matchPattern, subMatchSpec);
00069  
00070   retval=my_perl_eval_sv(command, TRUE);
00071   if (retval == NULL ) return returnCode;
00072 
00073 
00074   /*-------------------------------------------------------------------------
00075     get submatch from @__match__
00076     -----------------------------------------------------------------------*/
00077   matchArray = perl_get_av("__match__", FALSE);
00078   /* +1 because av_len gives the last index, and indices start with 0 */
00079   number = av_len(matchArray) + 1; 
00080   /* MAX_TOTAL_MATCH is also the size of subMatchSpec, so they must be equal */
00081   if ( number != MAX_TOTAL_MATCH ) {
00082     return returnCode;
00083   }
00084 
00085   for ( i=0;i<number;i++ ) {
00086     string_buff = av_shift(matchArray);
00087     if (matchResults[i] != NULL) free(matchResults[i]);
00088     matchResults[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 
00089     strcpy((char *)matchResults[i], SvPV(string_buff,PL_na) );   
00090   } 
00091 
00092   if ( strcmp(matchResults[0],"") ) returnCode=SUCCESS; 
00093                               /*if $& != "", match is found*/
00094 
00095   SvREFCNT_dec(command);      /* free space for the SV data */   
00096   SvREFCNT_dec(buffer);
00097   SvREFCNT_dec(string_buff);
00098 
00099   return returnCode;
00100 }
00101 
00102 /*----------------------------------------------------------------------------
00103 int matchAgain( void )
00104 Try to find the next occurrence of the match, 
00105 where the string and pattern are same as those of last try_match__ function.
00106 
00107      return value: if match, return SUCCESS;
00108                    otherwise, FAILURE.
00109 ----------------------------------------------------------------------------*/
00110 
00111 int match_again( void )
00112 {
00113 
00114   SV *command = newSV(0), *retval; /*allocate space for SV data*/
00115   SV *buffer = newSV(0);
00116   SV *string_buff = newSV(0);
00117   AV *matchArray;                  /*AV storage for the submatch list*/
00118   int number,i;
00119   int returnCode=FAILURE;                /*return code*/  
00120     
00121   /*-------------------------------------------------------------------------
00122     do the perl pattern matching
00123    ------------------------------------------------------------------------*/
00124   sv_setpvf(command, "$__text__=~%s; @__match__=%s",
00125             matchPattern,subMatchSpec);
00126 
00127   retval=my_perl_eval_sv(command, TRUE);
00128   if (retval == NULL ) return returnCode;
00129 
00130   /*-------------------------------------------------------------------------
00131     get submatches from @__match__
00132     -----------------------------------------------------------------------*/
00133   matchArray = perl_get_av("__match__", FALSE);
00134   /* +1 because av_len gives the last index, and indices start with 0 */
00135   number = av_len(matchArray) + 1;
00136   if ( number != MAX_TOTAL_MATCH ) {
00137     return returnCode;
00138   }
00139 
00140   for ( i=0;i<number;i++ ) {
00141     string_buff = av_shift(matchArray);
00142     if (matchResults[i] != NULL) free(matchResults[i]);
00143     matchResults[i] = (char *)malloc( strlen(SvPV(string_buff,PL_na))+1 ); 
00144     strcpy((char *)matchResults[i], SvPV(string_buff,PL_na) );   
00145   } 
00146 
00147   if ( strcmp(matchResults[0],"") ) returnCode=SUCCESS; 
00148                               /* if $&!="", match is found*/
00149 
00150   SvREFCNT_dec(command);      /* free space for the SV data */  
00151   SvREFCNT_dec(buffer);
00152   SvREFCNT_dec(string_buff);
00153 
00154   return returnCode;
00155 } 
00156 
00157 /*----------------------------------------------------------------------------
00158 int substitute(SV **string, char *pattern)
00159 Try to find the match pattern in the string, and substitute it by 
00160 the expected pattern, the input string is ressigned with the substituted 
00161 string. And return the number of the substituted substrings.
00162 
00163      input: SV **string: the pointer to the text string, it is ressigned when
00164                          returning.
00165             char *pattern: the pattern string.
00166      output: SV **string: the pointer to the text string, it is ressigned when
00167                           returning.  
00168      return value: number of the substituted substrings.
00169 ----------------------------------------------------------------------------*/
00170 
00171 int substitute(SV **string, char *pattern)
00172 {
00173   SV *command = newSV(0), *retval; /*allocate space for SV data */
00174   
00175   /*  sv_setpvf(command, "$__string__ = '%s'; ($__string__ =~ %s)",*/
00176   sv_setpvf(command, "$__string__ = <<'ENDj9yq6QC43b'; chop($__string__);\n%s\nENDj9yq6QC43b\n ($__string__ =~ %s)",
00177             SvPV(*string,PL_na), pattern);
00178 
00179   retval = my_perl_eval_sv(command, TRUE);
00180   if (retval == 0 ) return FAILURE;
00181 
00182   SvREFCNT_dec(command);           /* release the space */
00183 
00184   *string = perl_get_sv("__string__", FALSE);
00185   return SvIV(retval);
00186 }
00187 
00188 /*----------------------------------------------------------------------------
00189 int all_matches(SV *string, char *pattern, AV **match_list)
00190 Try to find the global pattern match in the input string.
00191 Store all matches in an array, then put the contents of the array 
00192 into the AV storage match_list.
00193 
00194      input: char *string: the text string
00195             char *pattern: the match pattern
00196      output: AV **match_list: pointer to the AV storage for all matches
00197      return value: the number of the matches.
00198    
00199 ----------------------------------------------------------------------------*/
00200 
00201 int all_matches(SV *string, char *pattern, AV **match_list)
00202 {
00203   SV *command = newSV(0), *retval; /*allocate space for SV data */
00204   I32 num_matches;
00205 
00206   /*  sv_setpvf(command, "my $string = '%s'; @__array__ = ($string =~ %s)",*/
00207   sv_setpvf(command, "my $string = <<'ENDj9yq6QC43b'; chop($string);\n%s\nENDj9yq6QC43b\n @__array__ = ($string =~ %s)",
00208             SvPV(string,PL_na), pattern);
00209 
00210   retval=my_perl_eval_sv(command, TRUE);
00211   if (retval == 0 ) return FAILURE;
00212 
00213   SvREFCNT_dec(command);  /*release the space */
00214 
00215   *match_list = perl_get_av("__array__", FALSE);
00216   num_matches = av_len(*match_list) + 1;
00217        /* +1 because av_len gives the last index, and indices start with 0 */
00218   
00219   return num_matches;
00220 
00221 }
00222 
00223 /*----------------------------------------------------------------------------
00224 my_perl_eval_sv()
00225 the function to interpret the input perl command and execute it
00226       input: SV* sv: the SV data containing the perl command string
00227              I32 croak_on_error: error handler flag
00228       return value: perl command executing results.
00229 ---------------------------------------------------------------------------*/ 
00230 
00231 SV* my_perl_eval_sv(SV *sv, I32 croak_on_error)
00232 {
00233     dSP;
00234     SV* retval;
00235 
00236     PUSHMARK(sp);      /*push in*/
00237     perl_eval_sv(sv, G_SCALAR); /*interpret the perl command*/
00238 
00239     SPAGAIN;
00240     retval = POPs;
00241     PUTBACK;           /*pop out*/
00242 
00243     if (croak_on_error && SvTRUE(GvSV(PL_errgv))) {
00244       printf("Warning: syntax error in the pattern expression!\n");
00245       /*
00246         printf("The entire Perl expression was: \n\t %s\n", SvPV(sv));
00247        */
00248       return FAILURE;
00249     }
00250 
00251     return retval;
00252 }

Generated on Wed Jul 26 13:30:45 2006 for XSB by  doxygen 1.4.5