hash_xsb.c

00001 /* File:      hash_xsb.c
00002 ** Author(s): Ernie Johnson
00003 ** Contact:   xsb-contact@cs.sunysb.edu
00004 ** 
00005 ** Copyright (C) The Research Foundation of SUNY, 1986, 1993-1998
00006 ** 
00007 ** XSB is free software; you can redistribute it and/or modify it under the
00008 ** terms of the GNU Library General Public License as published by the Free
00009 ** Software Foundation; either version 2 of the License, or (at your option)
00010 ** any later version.
00011 ** 
00012 ** XSB is distributed in the hope that it will be useful, but WITHOUT ANY
00013 ** WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
00014 ** FOR A PARTICULAR PURPOSE.  See the GNU Library General Public License for
00015 ** more details.
00016 ** 
00017 ** You should have received a copy of the GNU Library General Public License
00018 ** along with XSB; if not, write to the Free Software Foundation,
00019 ** Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
00020 **
00021 ** $Id: hash_xsb.c,v 1.15 2005/12/22 23:33:56 tswift Exp $
00022 ** 
00023 */
00024 
00025 
00026 #include "xsb_config.h"
00027 #include "xsb_debug.h"
00028 
00029 #include <string.h>
00030 #include <stdio.h>
00031 #include <stdlib.h>
00032 
00033 #include "auxlry.h"
00034 #include "cell_xsb.h"
00035 #include "hash_xsb.h"
00036 #include "psc_xsb.h"
00037 #include "flags_xsb.h"
00038 #include "memory_xsb.h"
00039 #include "thread_xsb.h"
00040 
00041 /*
00042  *  The "Atom Table" is divided into two structures, based on the type
00043  *  of the information to be interned.  Symbolic constants, structures,
00044  *  and predicates which are assigned Psc records are stored in the
00045  *  "symbol table", while simple strings are stored in the "string table."
00046  *  Both subtables are maintained as hash tables.
00047  */
00048 
00049 Hash_Table symbol_table = {2053, 0, NULL};
00050 Hash_Table string_table = {16381, 0, NULL};
00051 
00052 
00053 /*
00054  *  Prime numbers which are close to powers of 2.  Used for choosing
00055  *  the next size for a hash table.
00056  */
00057 
00058 #define NUM_OF_PRIMES 16
00059 static unsigned int primes[NUM_OF_PRIMES] =
00060     {127, 257, 509, 1021, 2053, 4099, 8191, 16381, 32771, 65537, 131071,
00061        262147, 524287, 1048573, 2097143, 4194301};
00062 
00063 
00064 /*
00065  *  Return the next prime number greater than the number received.
00066  *  If no such prime number can be found, compute an approximate one.
00067  */
00068 
00069 unsigned long next_prime(unsigned long some_int) {
00070 
00071   byte i;
00072 
00073   i = 0;
00074   while ( (some_int >= primes[i]) && (i < NUM_OF_PRIMES) )
00075     i++;
00076 
00077   if (i < NUM_OF_PRIMES)
00078     return (primes[i]);
00079   else    /* Ran out of prime numbers */
00080     return (2 * some_int - 1);
00081 }
00082 
00083 
00084 /*
00085  *  Hash object on first 25 characters of its name, plus its arity.
00086  *  Return a bucket number in the range [0 .. (hash_table_size - 1)].
00087  */
00088 
00089 unsigned long hash(char *obj_name, byte arity, unsigned long hash_table_size) {
00090 
00091   unsigned long hashval, temp;
00092   int i, j, k;
00093 
00094   hashval = 0;
00095   if (*obj_name != '\0')
00096     for (k=0; k<10; k++) {
00097       for (i = 4; i >= 0; i--) {
00098         temp = 0;
00099         for (j = 0; j < 5; j++) {
00100           temp = (temp << i) + *obj_name;
00101           obj_name++;
00102           if (*obj_name == '\0') {
00103             hashval = hashval + temp;
00104             goto Done;
00105           }
00106         }
00107         hashval = hashval + temp;
00108       }
00109     }
00110  Done:
00111   return ((hashval + arity) MOD hash_table_size);
00112 }
00113 
00114 /* unsigned long hash(char *obj_name, byte arity, unsigned long hash_table_size) { */
00115 
00116 /*   unsigned long hashval, temp; */
00117 /*   int i, j; */
00118 
00119 /*   hashval = 0; */
00120 /*   if (*obj_name != '\0') */
00121 /*     for (i = 4; i >= 0; i--) { */
00122 /*       temp = 0; */
00123 /*       for (j = 0; j < 5; j++) { */
00124 /*      temp = (temp << i) + *obj_name; */
00125 /*      obj_name++; */
00126 /*      if (*obj_name == '\0') { */
00127 /*        hashval = hashval + temp; */
00128 /*        goto Done; */
00129 /*      } */
00130 /*       } */
00131 /*       hashval = hashval + temp; */
00132 /*     } */
00133 /*  Done: */
00134 /*   return ((hashval + arity) MOD hash_table_size); */
00135 /* } */
00136 
00137 
00138 /*
00139  *  Increase the size of the Symbol Table and rehash each entry.
00140  */
00141 
00142 void expand_symbol_table() {
00143 
00144   Pair *new_table, *bucket_ptr, cur_pair, next_pair;
00145   Psc cur_psc;
00146   unsigned long index, new_size, new_index;
00147 
00148   new_size = next_prime(symbol_table.size);
00149   new_table = (Pair *)mem_calloc(new_size, sizeof(void *),ATOM_SPACE);
00150 
00151   for (bucket_ptr = (Pair *)symbol_table.table, index = 0;
00152        index < symbol_table.size;  bucket_ptr++, index++)
00153 
00154     for (cur_pair = *bucket_ptr; cur_pair != NULL; cur_pair = next_pair) {
00155       next_pair = pair_next(cur_pair);
00156       cur_psc = pair_psc(cur_pair);
00157       new_index = hash(get_name(cur_psc), get_arity(cur_psc), new_size);
00158       pair_next(cur_pair) = new_table[new_index];
00159       new_table[new_index] = cur_pair;
00160     }
00161 
00162   mem_dealloc((void *)symbol_table.table,symbol_table.size,ATOM_SPACE);
00163   symbol_table.size = new_size;
00164   symbol_table.table = (void **)new_table;
00165   /*printf("expanded atom table to: %d\n",new_size);*/
00166 }
00167 
00168 
00169 /*
00170  *  Increase the size of the String Table and rehash each entry.
00171  *                      
00172  *  String Table entries have the form:
00173  *           +--------------------------+
00174  *           | Ptr_to_Next | String ... |
00175  *           +--------------------------+
00176  */
00177 
00178 void expand_string_table() {
00179 
00180   void **new_table, **bucket_ptr, **cur_entry, **next_entry;
00181   char *string;
00182   unsigned long index, new_size, new_index;
00183 
00184   new_size = next_prime(string_table.size);
00185   new_table = (void **)mem_calloc(new_size, sizeof(void *),STRING_SPACE);
00186 
00187   for (bucket_ptr = string_table.table, index = 0;
00188        index < string_table.size;
00189        bucket_ptr++, index++)
00190     for (cur_entry = (void **)*bucket_ptr;
00191          cur_entry != NULL;
00192          cur_entry = next_entry) {
00193       next_entry = (void **)*cur_entry;
00194       string = (char *)(cur_entry + 1);
00195       new_index = hash(string, 0, new_size);
00196       *cur_entry = new_table[new_index];
00197       new_table[new_index] = (void *)cur_entry;
00198     }
00199 
00200   mem_dealloc((void *)string_table.table,string_table.size,STRING_SPACE);
00201   string_table.size = new_size;
00202   string_table.table = new_table;
00203   //  printf("expanded string table to: %ld\n",new_size);
00204 }
00205 
00206 
00207 /*
00208  *  Collect and report statistics on the symbol and string tables:
00209  *   - number of used and unused buckets
00210  *   - first and last buckets containing objects
00211  *   - buckets which are most full
00212  */
00213 
00214 void symbol_table_stats(CTXTdecl) {
00215 
00216   unsigned long   i, symbols, bucket_contains, used_buckets, unused_buckets,
00217                   fullest_bucket_size, fullest_bucket_num, last_index;
00218   int first_index;
00219   Pair pair_ptr;
00220 
00221   SYS_MUTEX_LOCK( MUTEX_SYMBOL ) ;
00222 
00223   symbols = used_buckets = unused_buckets = last_index = 0;
00224   fullest_bucket_size = fullest_bucket_num = 0;
00225   first_index = -1;
00226   
00227   for (i = 0; i < symbol_table.size; i++) {
00228     if (symbol_table.table[i] != NULL) {
00229       if (first_index == -1)
00230         first_index = i;
00231       last_index = i;
00232       used_buckets++;
00233       bucket_contains = 0;
00234       for (pair_ptr = (Pair)symbol_table.table[i];   pair_ptr != NULL;
00235            pair_ptr = pair_next(pair_ptr)) {
00236         symbols++;
00237         bucket_contains++;
00238       }
00239       if (bucket_contains > fullest_bucket_size) {
00240         fullest_bucket_size = bucket_contains;
00241         fullest_bucket_num = i;
00242       }
00243     }
00244     else
00245       unused_buckets++;
00246   }
00247   printf("\nSymbol table statistics:");
00248   printf("\n------------------------\n");
00249   printf("Table Size:\t%lu\n", symbol_table.size);
00250   printf("Total Symbols:\t%lu\n", symbols);
00251   if (symbols != symbol_table.contains)
00252     printf("Symbol count incorrect in 'symbol_table': %lu\n",
00253            symbol_table.contains);
00254   printf("\tused buckets:\t%lu  (range: [%d, %lu])\n", used_buckets,
00255          first_index, last_index);
00256   printf("\tunused buckets:\t%lu\n", unused_buckets);
00257   printf("\tmaximum bucket size:\t%lu  (#: %lu)\n", fullest_bucket_size, 
00258          fullest_bucket_num);
00259 
00260  SYS_MUTEX_UNLOCK( MUTEX_SYMBOL ) ;
00261 
00262 }  
00263 
00264 
00265 void string_table_stats(CTXTdecl) {
00266 
00267   unsigned long   i, strings, bucket_contains, used_buckets, unused_buckets,
00268                   fullest_bucket_size, fullest_bucket_num, last_index;
00269   int first_index;
00270   void *ptr;
00271 
00272  SYS_MUTEX_LOCK( MUTEX_STRING ) ;
00273 
00274   strings = used_buckets = unused_buckets = last_index = 0;
00275   fullest_bucket_size = fullest_bucket_num = 0;
00276   first_index = -1;
00277   
00278   for (i = 0; i < string_table.size; i++) {
00279     if (string_table.table[i] != NULL) {
00280       if (first_index == -1)
00281         first_index = i;
00282       last_index = i;
00283       used_buckets++;
00284       bucket_contains = 0;
00285       for (ptr = string_table.table[i];  ptr != NULL;  ptr = *(void **)ptr) {
00286         strings++;
00287         bucket_contains++;
00288       }
00289       if (bucket_contains > fullest_bucket_size) {
00290         fullest_bucket_size = bucket_contains;
00291         fullest_bucket_num = i;
00292       }
00293     }
00294     else
00295       unused_buckets++;
00296   }
00297   printf("\nString table statistics:");
00298   printf("\n------------------------\n");
00299   printf("Table Size:\t%lu\n", string_table.size);
00300   printf("Total Strings:\t%lu\n", strings);
00301   if (strings != string_table.contains)
00302     printf("String count incorrect in 'string_table': %lu\n",
00303            string_table.contains);
00304   printf("\tused buckets:\t%lu  (range: [%d, %lu])\n", used_buckets,
00305          first_index, last_index);
00306   printf("\tunused buckets:\t%lu\n", unused_buckets);
00307   printf("\tmaximum bucket size:\t%lu  (#: %lu)\n", fullest_bucket_size, 
00308          fullest_bucket_num);
00309 
00310  SYS_MUTEX_UNLOCK( MUTEX_STRING ) ;
00311 
00312 }
00313 
00314 void free_unused_strings() {
00315   unsigned long i;
00316   void *ptr, *prevptr;
00317   int used = 0, unused = 0;
00318 
00319   for (i = 0; i < string_table.size; i++) {
00320     if (string_table.table[i] != NULL) {
00321       prevptr = &string_table.table[i];
00322       ptr = *(void **)prevptr;
00323       while (ptr != NULL) {
00324         if ((*(Integer *)ptr) & 1) {
00325           used++;
00326           (*(Integer *)ptr) &= ~1;
00327           prevptr = ptr;
00328           ptr = *(void **)ptr;
00329         } else {
00330           unused++;
00331           //      printf("unused: '%s'\n",(char *)ptr+4);
00332           *(void **)prevptr = *(void **)ptr;
00333           mem_dealloc(ptr,strlen(((char *)ptr)+4)+sizeof(void *)+1,STRING_SPACE);
00334           //      *(((char *)ptr)+4) = '?';
00335           ptr = *(void **)prevptr;
00336           string_table.contains--;
00337           //      ptr = *(void **)ptr;  // replace with above when have marked all
00338         }
00339       }
00340     }
00341   }
00342   //  if (unused > 0) printf("%d",unused);
00343   //  printf("string_table scanned. used=%d, unused=%d\n",used,unused);
00344 }
00345 
00346 
00347   

Generated on Wed Jul 26 13:30:41 2006 for XSB by  doxygen 1.4.5