new implementation for string hashing, with chaining.

This commit is contained in:
Roberto Ierusalimschy 1999-10-11 14:13:42 -02:00
parent cca78b5c71
commit c5fee7615e
7 changed files with 117 additions and 127 deletions

10
lapi.c
View File

@ -1,5 +1,5 @@
/* /*
** $Id: lapi.c,v 1.51 1999/10/04 17:51:04 roberto Exp roberto $ ** $Id: lapi.c,v 1.52 1999/10/07 19:04:30 roberto Exp roberto $
** Lua API ** Lua API
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -393,11 +393,11 @@ TaggedString *luaA_nextvar (TaggedString *g) {
g = L->rootglobal; /* first variable */ g = L->rootglobal; /* first variable */
else { else {
/* check whether name is in global var list */ /* check whether name is in global var list */
luaL_arg_check(g != g->next, 1, "variable name expected"); luaL_arg_check(g != g->nextglobal, 1, "variable name expected");
g = g->next; /* get next */ g = g->nextglobal; /* get next */
} }
while (g && g->u.s.globalval.ttype == LUA_T_NIL) /* skip globals with nil */ while (g && g->u.s.globalval.ttype == LUA_T_NIL) /* skip globals with nil */
g = g->next; g = g->nextglobal;
if (g) { if (g) {
ttype(L->stack.top) = LUA_T_STRING; tsvalue(L->stack.top) = g; ttype(L->stack.top) = LUA_T_STRING; tsvalue(L->stack.top) = g;
incr_top; incr_top;
@ -579,7 +579,7 @@ const char *lua_getobjname (lua_Object o, const char **name) {
/* try to find a name for given function */ /* try to find a name for given function */
TaggedString *g; TaggedString *g;
set_normalized(L->stack.top, Address(o)); /* to be accessed by "checkfunc" */ set_normalized(L->stack.top, Address(o)); /* to be accessed by "checkfunc" */
for (g=L->rootglobal; g; g=g->next) { for (g=L->rootglobal; g; g=g->nextglobal) {
if (checkfunc(&g->u.s.globalval)) { if (checkfunc(&g->u.s.globalval)) {
*name = g->str; *name = g->str;
return "global"; return "global";

View File

@ -1,5 +1,5 @@
/* /*
** $Id: lbuiltin.c,v 1.64 1999/10/04 17:51:04 roberto Exp roberto $ ** $Id: lbuiltin.c,v 1.65 1999/10/07 19:04:30 roberto Exp roberto $
** Built-in functions ** Built-in functions
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -447,7 +447,7 @@ static void luaB_foreachvar (void) {
TObject f; /* see comment in 'foreachi' */ TObject f; /* see comment in 'foreachi' */
f = *luaA_Address(luaL_functionarg(1)); f = *luaA_Address(luaL_functionarg(1));
luaD_checkstack(4); /* for extra var name, f, var name, and globalval */ luaD_checkstack(4); /* for extra var name, f, var name, and globalval */
for (s = L->rootglobal; s; s = s->next) { for (s = L->rootglobal; s; s = s->nextglobal) {
if (s->u.s.globalval.ttype != LUA_T_NIL) { if (s->u.s.globalval.ttype != LUA_T_NIL) {
pushtagstring(s); /* keep (extra) s on stack to avoid GC */ pushtagstring(s); /* keep (extra) s on stack to avoid GC */
*(L->stack.top++) = f; *(L->stack.top++) = f;
@ -606,6 +606,12 @@ static void mem_query (void) {
} }
static void hash_query (void) {
const TObject *o = luaA_Address(luaL_nonnullarg(1));
lua_pushnumber(luaH_hashindex(o));
}
static void query_strings (void) { static void query_strings (void) {
int h = luaL_check_int(1) - 1; int h = luaL_check_int(1) - 1;
int s = luaL_opt_int(2, 0) - 1; int s = luaL_opt_int(2, 0) - 1;
@ -617,10 +623,10 @@ static void query_strings (void) {
} }
else { else {
TaggedString *ts = L->string_root[h].hash[s]; TaggedString *ts = L->string_root[h].hash[s];
if (ts == NULL) lua_pushstring("<NIL>"); for (ts = L->string_root[h].hash[s]; ts; ts = ts->nexthash) {
else if (ts == &luaS_EMPTY) lua_pushstring("<EMPTY>"); if (ts->constindex == -1) lua_pushstring("<USERDATA>");
else if (ts->constindex == -1) lua_pushstring("<USERDATA>"); else lua_pushstring(ts->str);
else lua_pushstring(ts->str); }
} }
} }
@ -707,9 +713,10 @@ static void testC (void) {
static const struct luaL_reg builtin_funcs[] = { static const struct luaL_reg builtin_funcs[] = {
#ifdef DEBUG #ifdef DEBUG
{"extra", extra_services}, {"extra", extra_services},
{"hash", hash_query},
{"querystr", query_strings},
{"testC", testC}, {"testC", testC},
{"totalmem", mem_query}, {"totalmem", mem_query},
{"querystr", query_strings},
#endif #endif
{"_ALERT", luaB_alert}, {"_ALERT", luaB_alert},
{"_ERRORMESSAGE", error_message}, {"_ERRORMESSAGE", error_message},

38
lgc.c
View File

@ -1,5 +1,5 @@
/* /*
** $Id: lgc.c,v 1.26 1999/09/27 18:00:25 roberto Exp roberto $ ** $Id: lgc.c,v 1.27 1999/10/04 17:51:04 roberto Exp roberto $
** Garbage Collector ** Garbage Collector
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -64,7 +64,7 @@ static void hashmark (Hash *h) {
static void globalmark (void) { static void globalmark (void) {
TaggedString *g; TaggedString *g;
for (g=L->rootglobal; g; g=g->next) { for (g=L->rootglobal; g; g=g->nextglobal) {
LUA_ASSERT(g->constindex >= 0, "userdata in global list"); LUA_ASSERT(g->constindex >= 0, "userdata in global list");
if (g->u.s.globalval.ttype != LUA_T_NIL) { if (g->u.s.globalval.ttype != LUA_T_NIL) {
markobject(&g->u.s.globalval); markobject(&g->u.s.globalval);
@ -161,8 +161,8 @@ static void clear_global_list (void) {
TaggedString **p = &L->rootglobal; TaggedString **p = &L->rootglobal;
TaggedString *next; TaggedString *next;
while ((next = *p) != NULL) { while ((next = *p) != NULL) {
if (next->marked) p = &next->next; if (next->marked) p = &next->nextglobal;
else *p = next->next; else *p = next->nextglobal;
} }
} }
@ -177,22 +177,28 @@ static void collectstring (int limit) {
int i; int i;
ttype(&o) = LUA_T_USERDATA; ttype(&o) = LUA_T_USERDATA;
clear_global_list(); clear_global_list();
for (i=0; i<NUM_HASHS; i++) { for (i=0; i<NUM_HASHS; i++) { /* for each hash table */
stringtable *tb = &L->string_root[i]; stringtable *tb = &L->string_root[i];
int j; int j;
for (j=0; j<tb->size; j++) { for (j=0; j<tb->size; j++) { /* for each list */
TaggedString *t = tb->hash[j]; TaggedString **p = &tb->hash[j];
if (t == NULL) continue; TaggedString *next;
if (t->marked < limit) { while ((next = *p) != NULL) {
if (t->constindex == -1) { /* is userdata? */ if (next->marked >= limit) {
tsvalue(&o) = t; if (next->marked < FIXMARK) /* does not change FIXMARKs */
luaD_gcIM(&o); next->marked = 0;
p = &next->nexthash;
}
else { /* collect */
if (next->constindex == -1) { /* is userdata? */
tsvalue(&o) = next;
luaD_gcIM(&o);
}
*p = next->nexthash;
luaS_free(next);
tb->nuse--;
} }
luaS_free(t);
tb->hash[j] = &luaS_EMPTY;
} }
else if (t->marked == 1)
t->marked = 0;
} }
} }
} }

8
llex.c
View File

@ -1,5 +1,5 @@
/* /*
** $Id: llex.c,v 1.39 1999/09/06 13:55:09 roberto Exp roberto $ ** $Id: llex.c,v 1.40 1999/10/04 17:51:04 roberto Exp roberto $
** Lexical Analyzer ** Lexical Analyzer
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -37,7 +37,7 @@ void luaX_init (void) {
int i; int i;
for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) { for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) {
TaggedString *ts = luaS_new(reserved[i]); TaggedString *ts = luaS_new(reserved[i]);
ts->marked = FIRST_RESERVED+i; /* reserved word (always > 255) */ ts->marked = RESERVEDMARK+i; /* reserved word */
} }
} }
@ -426,8 +426,8 @@ int luaX_lex (LexState *LS) {
} while (isalnum(LS->current) || LS->current == '_'); } while (isalnum(LS->current) || LS->current == '_');
save('\0'); save('\0');
ts = luaS_new(L->Mbuffer+L->Mbuffbase); ts = luaS_new(L->Mbuffer+L->Mbuffbase);
if (ts->marked >= FIRST_RESERVED) if (ts->marked >= RESERVEDMARK) /* reserved word? */
return ts->marked; /* reserved word */ return ts->marked-RESERVEDMARK+FIRST_RESERVED;
LS->seminfo.ts = ts; LS->seminfo.ts = ts;
return NAME; return NAME;
} }

View File

@ -1,5 +1,5 @@
/* /*
** $Id: lobject.h,v 1.30 1999/09/06 20:34:18 roberto Exp roberto $ ** $Id: lobject.h,v 1.31 1999/10/04 17:51:04 roberto Exp roberto $
** Type definitions for Lua objects ** Type definitions for Lua objects
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -14,13 +14,15 @@
#ifdef DEBUG #ifdef DEBUG
#include "lauxlib.h" #ifdef NDEBUG
#define LUA_INTERNALERROR(s) \ #undef NDEBUG
luaL_verror("INTERNAL ERROR - %s [%s:%d]",(s),__FILE__,__LINE__) #endif
#define LUA_ASSERT(c,s) { if (!(c)) LUA_INTERNALERROR(s); } #include <assert.h>
#define LUA_INTERNALERROR(s) assert(0)
#define LUA_ASSERT(c,s) assert(c)
#else #else
#define LUA_INTERNALERROR(s) /* empty */ #define LUA_INTERNALERROR(s) /* empty */
#define LUA_ASSERT(c,s) /* empty */ #define LUA_ASSERT(c,s) /* empty */
#endif #endif
@ -90,8 +92,8 @@ typedef struct TObject {
*/ */
typedef struct TaggedString { typedef struct TaggedString {
struct TaggedString *next; struct TaggedString *nexthash; /* chain hash table */
int marked; struct TaggedString *nextglobal; /* chain global variables */
unsigned long hash; unsigned long hash;
int constindex; /* hint to reuse constants (= -1 if this is a userdata) */ int constindex; /* hint to reuse constants (= -1 if this is a userdata) */
union { union {
@ -104,6 +106,7 @@ typedef struct TaggedString {
void *v; /* if this is a userdata, here is its value */ void *v; /* if this is a userdata, here is its value */
} d; } d;
} u; } u;
unsigned char marked;
char str[1]; /* \0 byte already reserved */ char str[1]; /* \0 byte already reserved */
} TaggedString; } TaggedString;

132
lstring.c
View File

@ -1,5 +1,5 @@
/* /*
** $Id: lstring.c,v 1.21 1999/09/28 12:27:06 roberto Exp roberto $ ** $Id: lstring.c,v 1.22 1999/10/04 17:51:04 roberto Exp roberto $
** String table (keeps all strings handled by Lua) ** String table (keeps all strings handled by Lua)
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -19,11 +19,6 @@
TaggedString luaS_EMPTY = {NULL, MAX_INT, 0L, 0,
{{{LUA_T_NIL, {NULL}}, 0L}}, {0}};
/* /*
** to avoid hash tables with size = 0 (cannot hash with size=0), all ** to avoid hash tables with size = 0 (cannot hash with size=0), all
** hash tables are initialized with this `array'. Elements are never ** hash tables are initialized with this `array'. Elements are never
@ -48,6 +43,7 @@ void luaS_init (void) {
void luaS_freeall (void) { void luaS_freeall (void) {
int i; int i;
for (i=0; i<NUM_HASHS; i++) { for (i=0; i<NUM_HASHS; i++) {
LUA_ASSERT(L->string_root[i].nuse==0, "non-empty string table");
if (L->string_root[i].hash != init_hash) if (L->string_root[i].hash != init_hash)
luaM_free(L->string_root[i].hash); luaM_free(L->string_root[i].hash);
} }
@ -56,43 +52,27 @@ void luaS_freeall (void) {
static unsigned long hash_s (const char *s, long l) { static unsigned long hash_s (const char *s, long l) {
unsigned long h = 0; /* seed */ unsigned long h = l; /* seed */
while (l--) while (l--)
h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++)); h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++));
return h; return h;
} }
static int newsize (const stringtable *tb) {
int realuse = 0;
int i;
/* count how many entries are really in use */
for (i=0; i<tb->size; i++) {
if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY)
realuse++;
}
return luaO_redimension(realuse*2);
}
static void grow (stringtable *tb) { static void grow (stringtable *tb) {
int ns = newsize(tb); int ns = luaO_redimension(tb->size*2); /* new size */
TaggedString **newhash = luaM_newvector(ns, TaggedString *); TaggedString **newhash = luaM_newvector(ns, TaggedString *);
int i; int i;
for (i=0; i<ns; i++) for (i=0; i<ns; i++) newhash[i] = NULL;
newhash[i] = NULL;
/* rehash */ /* rehash */
tb->nuse = 0;
for (i=0; i<tb->size; i++) { for (i=0; i<tb->size; i++) {
if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) { TaggedString *p = tb->hash[i];
unsigned long h = tb->hash[i]->hash; while (p) { /* for each node in the list */
int h1 = h%ns; TaggedString *next = p->nexthash; /* save next */
while (newhash[h1]) { int h = p->hash%ns; /* new position */
h1 += (h&(ns-2)) + 1; /* double hashing */ p->nexthash = newhash[h]; /* chain it in new position */
if (h1 >= ns) h1 -= ns; newhash[h] = p;
} p = next;
newhash[h1] = tb->hash[i];
tb->nuse++;
} }
} }
luaM_free(tb->hash); luaM_free(tb->hash);
@ -101,91 +81,79 @@ static void grow (stringtable *tb) {
} }
static TaggedString *newone (long l, unsigned long h) {
TaggedString *ts = (TaggedString *)luaM_malloc(
sizeof(TaggedString)+l*sizeof(char));
ts->marked = 0;
ts->nexthash = NULL;
ts->nextglobal = ts; /* signal it is not in global list */
ts->hash = h;
return ts;
}
static TaggedString *newone_s (const char *str, long l, unsigned long h) { static TaggedString *newone_s (const char *str, long l, unsigned long h) {
TaggedString *ts = (TaggedString *)luaM_malloc(sizeof(TaggedString)+l); TaggedString *ts = newone(l, h);
memcpy(ts->str, str, l); memcpy(ts->str, str, l);
ts->str[l] = 0; /* ending 0 */ ts->str[l] = 0; /* ending 0 */
ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */ ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */
ts->u.s.len = l; ts->u.s.len = l;
ts->constindex = 0; ts->constindex = 0;
L->nblocks += gcsizestring(l); L->nblocks += gcsizestring(l);
ts->marked = 0;
ts->next = ts; /* signal it is in no list */
ts->hash = h;
return ts; return ts;
} }
static TaggedString *newone_u (void *buff, int tag, unsigned long h) { static TaggedString *newone_u (void *buff, int tag, unsigned long h) {
TaggedString *ts = luaM_new(TaggedString); TaggedString *ts = newone(0, h);
ts->u.d.v = buff; ts->u.d.v = buff;
ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag; ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag;
ts->constindex = -1; /* tag -> this is a userdata */ ts->constindex = -1; /* tag -> this is a userdata */
L->nblocks++; L->nblocks++;
ts->marked = 0;
ts->next = ts; /* signal it is in no list */
ts->hash = h;
return ts; return ts;
} }
static void newentry (stringtable *tb, TaggedString *ts, int h1) { static void newentry (stringtable *tb, TaggedString *ts, int h) {
tb->nuse++; tb->nuse++;
if ((long)tb->nuse*3 < (long)tb->size*2) /* still have room? */ if (tb->nuse >= tb->size) { /* no more room? */
tb->hash[h1] = ts;
else { /* must grow */
if (tb->hash == init_hash) { /* cannot change init_hash */ if (tb->hash == init_hash) { /* cannot change init_hash */
LUA_ASSERT(h1==0, "`init_hash' has size 1"); LUA_ASSERT(h==0, "`init_hash' has size 1");
tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */ tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */
tb->hash[0] = NULL;
} }
tb->hash[h1] = ts;
grow(tb); grow(tb);
h = ts->hash%tb->size; /* new hash position */
} }
ts->nexthash = tb->hash[h]; /* chain new entry */
tb->hash[h] = ts;
} }
static TaggedString *insert_s (const char *str, long l, stringtable *tb) { static TaggedString *insert_s (const char *str, long l,
stringtable *tb, unsigned long h) {
int h1 = h%tb->size;
TaggedString *ts; TaggedString *ts;
unsigned long h = hash_s(str, l); for (ts = tb->hash[h1]; ts; ts = ts->nexthash)
int size = tb->size; if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0))
int j = -1; /* last empty place found (or -1) */
int h1 = h%size;
while ((ts = tb->hash[h1]) != NULL) {
if (ts == &luaS_EMPTY)
j = h1;
else if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0))
return ts; return ts;
h1 += (h&(size-2)) + 1; /* double hashing */
if (h1 >= size) h1 -= size;
}
/* not found */ /* not found */
ts = newone_s(str, l, h); /* create new entry */ ts = newone_s(str, l, h); /* create new entry */
if (j != -1) /* is there an EMPTY space? */ newentry(tb, ts, h1); /* insert it on table */
tb->hash[j] = ts; /* use it for new element */
else
newentry(tb, ts, h1); /* no EMPTY places; must use a virgin one */
return ts; return ts;
} }
static TaggedString *insert_u (void *buff, int tag, stringtable *tb) { static TaggedString *insert_u (void *buff, int tag, stringtable *tb) {
TaggedString *ts;
unsigned long h = (unsigned long)buff; unsigned long h = (unsigned long)buff;
int size = tb->size; int h1 = h%tb->size;
int j = -1; TaggedString *ts;
int h1 = h%size; for (ts = tb->hash[h1]; ts; ts = ts->nexthash)
while ((ts = tb->hash[h1]) != NULL) { if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v)
if (ts == &luaS_EMPTY)
j = h1;
else if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v)
return ts; return ts;
h1 += (h&(size-2)) + 1; /* not found */
if (h1 >= size) h1 -= size;
}
ts = newone_u(buff, tag, h); ts = newone_u(buff, tag, h);
if (j != -1) newentry(tb, ts, h1);
tb->hash[j] = ts;
else
newentry(tb, ts, h1);
return ts; return ts;
} }
@ -196,8 +164,8 @@ TaggedString *luaS_createudata (void *udata, int tag) {
} }
TaggedString *luaS_newlstr (const char *str, long l) { TaggedString *luaS_newlstr (const char *str, long l) {
int t = (l==0) ? 0 : ((int)((unsigned char)str[0]+l))%NUM_HASHSTR; unsigned long h = hash_s(str, l);
return insert_s(str, l, &L->string_root[t]); return insert_s(str, l, &L->string_root[h%NUM_HASHSTR], h);
} }
TaggedString *luaS_new (const char *str) { TaggedString *luaS_new (const char *str) {
@ -206,7 +174,7 @@ TaggedString *luaS_new (const char *str) {
TaggedString *luaS_newfixedstring (const char *str) { TaggedString *luaS_newfixedstring (const char *str) {
TaggedString *ts = luaS_new(str); TaggedString *ts = luaS_new(str);
if (ts->marked == 0) ts->marked = 2; /* avoid GC */ if (ts->marked == 0) ts->marked = FIXMARK; /* avoid GC */
return ts; return ts;
} }
@ -219,8 +187,8 @@ void luaS_free (TaggedString *t) {
void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) { void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) {
ts->u.s.globalval = *newval; ts->u.s.globalval = *newval;
if (ts->next == ts) { /* is not in list? */ if (ts->nextglobal == ts) { /* is not in list? */
ts->next = L->rootglobal; ts->nextglobal = L->rootglobal;
L->rootglobal = ts; L->rootglobal = ts;
} }
} }

View File

@ -1,5 +1,5 @@
/* /*
** $Id: lstring.h,v 1.8 1999/08/16 20:52:00 roberto Exp roberto $ ** $Id: lstring.h,v 1.9 1999/10/04 17:51:04 roberto Exp roberto $
** String table (keep all strings handled by Lua) ** String table (keep all strings handled by Lua)
** See Copyright Notice in lua.h ** See Copyright Notice in lua.h
*/ */
@ -11,12 +11,18 @@
#include "lobject.h" #include "lobject.h"
#define NUM_HASHSTR 31 #define NUM_HASHSTR 31 /* a prime not in array `dimensions' */
#define NUM_HASHUDATA 31 #define NUM_HASHUDATA 31 /* idem */
#define NUM_HASHS (NUM_HASHSTR+NUM_HASHUDATA) #define NUM_HASHS (NUM_HASHSTR+NUM_HASHUDATA)
extern TaggedString luaS_EMPTY; /*
** any taggedstring with mark>=FIXMARK is never collected.
** Marks>=RESERVEDMARK are used to identify reserved words.
*/
#define FIXMARK 2
#define RESERVEDMARK 3
void luaS_init (void); void luaS_init (void);
TaggedString *luaS_createudata (void *udata, int tag); TaggedString *luaS_createudata (void *udata, int tag);