00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #include "xsb_config.h"
00026 #include "xsb_debug.h"
00027
00028 #include <stdio.h>
00029 #include <stdlib.h>
00030 #include <string.h>
00031
00032 #include "auxlry.h"
00033 #include "cell_xsb.h"
00034 #include "error_xsb.h"
00035 #include "psc_xsb.h"
00036 #include "hash_xsb.h"
00037 #include "tries.h"
00038 #include "choice.h"
00039 #include "deref.h"
00040 #include "memory_xsb.h"
00041 #include "heap_xsb.h"
00042 #include "register.h"
00043 #include "flags_xsb.h"
00044 #include "binding.h"
00045 #include "builtin.h"
00046 #include "cinterf.h"
00047
00048 #include "sp_unify_xsb_i.h"
00049 #include "string_xsb.h"
00050
00051 extern char *p_charlist_to_c_string(CTXTdeclc prolog_term term, VarString *outstring,
00052 char *in_func, char *where);
00053 extern void c_string_to_p_charlist(char *name, prolog_term list,
00054 int regs_to_protect, char *in_func, char *where);
00055
00056 static Cell term, term2, term3;
00057
00058 static XSB_StrDefine(input_buffer);
00059 static XSB_StrDefine(subst_buf);
00060 static XSB_StrDefine(output_buffer);
00061
00062
00063 #include "ptoc_tag_xsb_i.h"
00064
00065
00066 xsbBool str_cat(CTXTdecl)
00067 {
00068 char *str1, *str2, *tmpstr;
00069 int tmpstr_len;
00070
00071 term = ptoc_tag(CTXTc 1);
00072 term2 = ptoc_tag(CTXTc 2);
00073 if (isatom(term) && isatom(term2)) {
00074 str1 = string_val(term);
00075 str2 = string_val(term2);
00076 tmpstr_len = strlen(str1) + strlen(str2) + 1;
00077
00078 tmpstr = (char *)mem_alloc(tmpstr_len,LEAK_SPACE);
00079 strcpy(tmpstr, str1);
00080 strcat(tmpstr, str2);
00081 str1 = string_find(tmpstr, 1);
00082 mem_dealloc(tmpstr,tmpstr_len,LEAK_SPACE);
00083 return atom_unify(CTXTc makestring(str1), ptoc_tag(CTXTc 3));
00084 } else return FALSE;
00085 }
00086
00087
00088
00089
00090
00091
00092
00093
00094
00095
00096
00097
00098
00099
00100
00101
00102
00103
00104
00105
00106
00107
00108
00109
00110
00111 xsbBool str_match(CTXTdecl)
00112 {
00113 static char *subptr, *stringptr, *direction, *matchptr;
00114 static int substr_beg, substr_end;
00115 int reverse=TRUE;
00116 int beg_bos_offset=TRUE;
00117 int end_bos_offset=TRUE;
00118 int str_len, sub_len;
00119 Cell beg_offset_term, end_offset_term;
00120
00121 term = ptoc_tag(CTXTc 1);
00122 term2 = ptoc_tag(CTXTc 2);
00123 term3 = ptoc_tag(CTXTc 3);
00124 beg_offset_term = ptoc_tag(CTXTc 4);
00125 end_offset_term = ptoc_tag(CTXTc 5);
00126 if (!isatom(term) || !isatom(term2) || !isatom(term3)) {
00127 xsb_abort("STR_MATCH: Arguments 1,2,3 must be bound to strings");
00128 }
00129 subptr = string_val(term);
00130 stringptr = string_val(term2);
00131 direction = string_val(term3);
00132
00133 if (*direction == 'f')
00134 reverse=FALSE;
00135 else if (*direction != 'r')
00136 xsb_abort("STR_MATCH: Argument 3 must be bound to forward/reverse");
00137
00138 str_len=strlen(stringptr);
00139 sub_len=strlen(subptr);
00140
00141 if (isinteger(beg_offset_term)|isboxedinteger(beg_offset_term)) {
00142 if (int_val(beg_offset_term) < 0) {
00143 beg_bos_offset = FALSE;
00144 }
00145 }
00146 if (isinteger(end_offset_term)|isboxedinteger(end_offset_term)) {
00147 if (int_val(end_offset_term) < 0) {
00148 end_bos_offset = FALSE;
00149 }
00150 }
00151
00152 if (reverse)
00153 matchptr = xsb_strrstr(stringptr, subptr);
00154 else
00155 matchptr = strstr(stringptr, subptr);
00156
00157 if (matchptr == NULL) return FALSE;
00158
00159 substr_beg = (beg_bos_offset?
00160 matchptr - stringptr : -(str_len - (matchptr - stringptr))
00161 );
00162 substr_end = (end_bos_offset?
00163 (matchptr - stringptr) + sub_len
00164 : -(str_len + 1 - (matchptr - stringptr) - sub_len)
00165 );
00166
00167 return
00168 (p2p_unify(CTXTc beg_offset_term, makeint(substr_beg))
00169 && p2p_unify(CTXTc end_offset_term, makeint(substr_end)));
00170 }
00171
00172
00173
00174
00175
00176
00177
00178
00179
00180
00181
00182
00183
00184 xsbBool substring(CTXTdecl)
00185 {
00186
00187
00188 prolog_term input_term, output_term;
00189 prolog_term beg_offset_term, end_offset_term;
00190 char *input_string=NULL;
00191 int beg_offset=0, end_offset=0, input_len=0, substring_len=0;
00192 int conversion_required=FALSE;
00193
00194 XSB_StrSet(&output_buffer,"");
00195
00196 input_term = reg_term(CTXTc 1);
00197 if (isatom(input_term))
00198 input_string = string_val(input_term);
00199 else if (islist(input_term)) {
00200 input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
00201 "SUBSTRING", "input string");
00202 conversion_required = TRUE;
00203 } else
00204 xsb_abort("[SUBSTRING] Arg 1 (the input string) must be an atom or a character list");
00205
00206 input_len = strlen(input_string);
00207
00208
00209 beg_offset_term = reg_term(CTXTc 2);
00210 if (! (isinteger(beg_offset_term)|isboxedinteger(beg_offset_term)))
00211 xsb_abort("[SUBSTRING] Arg 2 (the beginning offset) must be an integer");
00212 beg_offset = int_val(beg_offset_term);
00213 if (beg_offset < 0)
00214 beg_offset = 0;
00215 else if (beg_offset > input_len)
00216 beg_offset = input_len;
00217
00218
00219 end_offset_term = reg_term(CTXTc 3);
00220 if (isref(end_offset_term))
00221 end_offset = input_len;
00222 else if (! (isinteger(end_offset_term)|isboxedinteger(end_offset_term)))
00223 xsb_abort("[SUBSTRING] Arg 3 (the end offset) must be integer or _");
00224 else end_offset = int_val(end_offset_term);
00225
00226 if (end_offset < 0)
00227 end_offset = input_len + 1 + end_offset;
00228 else if (end_offset > input_len)
00229 end_offset = input_len;
00230 else if (end_offset < beg_offset)
00231 end_offset = beg_offset;
00232
00233 output_term = reg_term(CTXTc 4);
00234 if (! isref(output_term))
00235 xsb_abort("[SUBSTRING] Arg 4 (the output string) must be an unbound variable");
00236
00237
00238 substring_len = end_offset-beg_offset;
00239 XSB_StrAppendBlk(&output_buffer, input_string+beg_offset, substring_len);
00240 XSB_StrNullTerminate(&output_buffer);
00241
00242
00243 if (conversion_required)
00244 c_string_to_p_charlist(output_buffer.string, output_term,
00245 4, "SUBSTRING", "Arg 4");
00246 else
00247 c2p_string(CTXTc output_buffer.string, output_term);
00248
00249 return(TRUE);
00250 }
00251
00252
00253
00254
00255
00256
00257
00258
00259
00260
00261
00262
00263 xsbBool string_substitute(CTXTdecl)
00264 {
00265
00266
00267 prolog_term input_term, output_term;
00268 prolog_term subst_reg_term, subst_spec_list_term, subst_spec_list_term1;
00269 prolog_term subst_str_term=(prolog_term)0,
00270 subst_str_list_term, subst_str_list_term1;
00271 char *input_string=NULL;
00272 char *subst_string=NULL;
00273 prolog_term beg_term, end_term;
00274 int beg_offset=0, end_offset=0, input_len;
00275 int last_pos = 0;
00276
00277
00278 int conversion_required=FALSE;
00279
00280 XSB_StrSet(&output_buffer,"");
00281
00282 input_term = reg_term(CTXTc 1);
00283 if (isatom(input_term))
00284 input_string = string_val(input_term);
00285 else if (islist(input_term)) {
00286 input_string = p_charlist_to_c_string(CTXTc input_term, &input_buffer,
00287 "STRING_SUBSTITUTE", "input string");
00288 conversion_required = TRUE;
00289 } else
00290 xsb_abort("[STRING_SUBSTITUTE] Arg 1 (the input string) must be an atom or a character list");
00291
00292 input_len = strlen(input_string);
00293
00294
00295 subst_spec_list_term = reg_term(CTXTc 2);
00296 if (!islist(subst_spec_list_term) && !isnil(subst_spec_list_term))
00297 xsb_abort("[STRING_SUBSTITUTE] Arg 2 must be a list [s(B1,E1),s(B2,E2),...]");
00298
00299
00300 subst_str_list_term = reg_term(CTXTc 3);
00301 if (! islist(subst_str_list_term))
00302 xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings");
00303
00304 output_term = reg_term(CTXTc 4);
00305 if (! isref(output_term))
00306 xsb_abort("[STRING_SUBSTITUTE] Arg 4 (the output) must be an unbound variable");
00307
00308 subst_spec_list_term1 = subst_spec_list_term;
00309 subst_str_list_term1 = subst_str_list_term;
00310
00311 if (isnil(subst_spec_list_term1)) {
00312 XSB_StrSet(&output_buffer, input_string);
00313 goto EXIT;
00314 }
00315 if (isnil(subst_str_list_term1))
00316 xsb_abort("[STRING_SUBSTITUTE] Arg 3 must not be an empty list");
00317
00318 do {
00319 subst_reg_term = p2p_car(subst_spec_list_term1);
00320 subst_spec_list_term1 = p2p_cdr(subst_spec_list_term1);
00321
00322 if (!isnil(subst_str_list_term1)) {
00323 subst_str_term = p2p_car(subst_str_list_term1);
00324 subst_str_list_term1 = p2p_cdr(subst_str_list_term1);
00325
00326 if (isatom(subst_str_term)) {
00327 subst_string = string_val(subst_str_term);
00328 } else if (islist(subst_str_term)) {
00329 subst_string = p_charlist_to_c_string(CTXTc subst_str_term, &subst_buf,
00330 "STRING_SUBSTITUTE",
00331 "substitution string");
00332 } else
00333 xsb_abort("[STRING_SUBSTITUTE] Arg 3 must be a list of strings");
00334 }
00335
00336 beg_term = p2p_arg(subst_reg_term,1);
00337 end_term = p2p_arg(subst_reg_term,2);
00338
00339 if (!(isinteger(beg_term)|isboxedinteger(beg_term)) ||
00340 !(isinteger(end_term)|isboxedinteger(end_term)))
00341 xsb_abort("[STRING_SUBSTITUTE] Non-integer in Arg 2");
00342 else{
00343 beg_offset = int_val(beg_term);
00344 end_offset = int_val(end_term);
00345 }
00346
00347 if (end_offset < 0)
00348 end_offset = input_len;
00349 if ((end_offset < beg_offset) || (beg_offset < last_pos))
00350 xsb_abort("[STRING_SUBSTITUTE] Substitution regions in Arg 2 not sorted");
00351
00352
00353 XSB_StrAppendBlk(&output_buffer,input_string+last_pos,beg_offset-last_pos);
00354 XSB_StrAppend(&output_buffer, subst_string);
00355
00356 last_pos = end_offset;
00357
00358 } while (!isnil(subst_spec_list_term1));
00359
00360 XSB_StrAppend(&output_buffer, input_string+end_offset);
00361
00362 EXIT:
00363
00364 if (conversion_required)
00365 c_string_to_p_charlist(output_buffer.string, output_term,
00366 4, "STRING_SUBSTITUTE", "Arg 4");
00367 else
00368 c2p_string(CTXTc output_buffer.string, output_term);
00369
00370 return(TRUE);
00371 }
00372
00373
00374
00375
00376
00377
00378
00379 char *xsb_strrstr(char *str, char *pat)
00380 {
00381 size_t len, patlen;
00382 const char *p;
00383
00384 len = strlen(str);
00385 patlen = strlen(pat);
00386
00387 if (patlen > len)
00388 return NULL;
00389 for (p = str + (len - patlen); p >= str; --p)
00390 if (*p == *pat && strncmp(p, pat, patlen) == 0)
00391 return (char *) p;
00392 return NULL;
00393 }