From c5fee7615e979e3a39af44614f82938519dedb68 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Mon, 11 Oct 1999 14:13:42 -0200 Subject: [PATCH] new implementation for string hashing, with chaining. --- lapi.c | 10 ++-- lbuiltin.c | 21 ++++++--- lgc.c | 38 ++++++++------- llex.c | 8 ++-- lobject.h | 21 +++++---- lstring.c | 132 ++++++++++++++++++++--------------------------------- lstring.h | 14 ++++-- 7 files changed, 117 insertions(+), 127 deletions(-) diff --git a/lapi.c b/lapi.c index 9048d5af..5c0646ec 100644 --- a/lapi.c +++ b/lapi.c @@ -1,5 +1,5 @@ /* -** $Id: lapi.c,v 1.51 1999/10/04 17:51:04 roberto Exp roberto $ +** $Id: lapi.c,v 1.52 1999/10/07 19:04:30 roberto Exp roberto $ ** Lua API ** See Copyright Notice in lua.h */ @@ -393,11 +393,11 @@ TaggedString *luaA_nextvar (TaggedString *g) { g = L->rootglobal; /* first variable */ else { /* check whether name is in global var list */ - luaL_arg_check(g != g->next, 1, "variable name expected"); - g = g->next; /* get next */ + luaL_arg_check(g != g->nextglobal, 1, "variable name expected"); + g = g->nextglobal; /* get next */ } while (g && g->u.s.globalval.ttype == LUA_T_NIL) /* skip globals with nil */ - g = g->next; + g = g->nextglobal; if (g) { ttype(L->stack.top) = LUA_T_STRING; tsvalue(L->stack.top) = g; incr_top; @@ -579,7 +579,7 @@ const char *lua_getobjname (lua_Object o, const char **name) { /* try to find a name for given function */ TaggedString *g; set_normalized(L->stack.top, Address(o)); /* to be accessed by "checkfunc" */ - for (g=L->rootglobal; g; g=g->next) { + for (g=L->rootglobal; g; g=g->nextglobal) { if (checkfunc(&g->u.s.globalval)) { *name = g->str; return "global"; diff --git a/lbuiltin.c b/lbuiltin.c index 2e5f1fb1..f3fbce43 100644 --- a/lbuiltin.c +++ b/lbuiltin.c @@ -1,5 +1,5 @@ /* -** $Id: lbuiltin.c,v 1.64 1999/10/04 17:51:04 roberto Exp roberto $ +** $Id: lbuiltin.c,v 1.65 1999/10/07 19:04:30 roberto Exp roberto $ ** Built-in functions ** See Copyright Notice in lua.h */ @@ -447,7 +447,7 @@ static void luaB_foreachvar (void) { TObject f; /* see comment in 'foreachi' */ f = *luaA_Address(luaL_functionarg(1)); luaD_checkstack(4); /* for extra var name, f, var name, and globalval */ - for (s = L->rootglobal; s; s = s->next) { + for (s = L->rootglobal; s; s = s->nextglobal) { if (s->u.s.globalval.ttype != LUA_T_NIL) { pushtagstring(s); /* keep (extra) s on stack to avoid GC */ *(L->stack.top++) = f; @@ -606,6 +606,12 @@ static void mem_query (void) { } +static void hash_query (void) { + const TObject *o = luaA_Address(luaL_nonnullarg(1)); + lua_pushnumber(luaH_hashindex(o)); +} + + static void query_strings (void) { int h = luaL_check_int(1) - 1; int s = luaL_opt_int(2, 0) - 1; @@ -617,10 +623,10 @@ static void query_strings (void) { } else { TaggedString *ts = L->string_root[h].hash[s]; - if (ts == NULL) lua_pushstring(""); - else if (ts == &luaS_EMPTY) lua_pushstring(""); - else if (ts->constindex == -1) lua_pushstring(""); - else lua_pushstring(ts->str); + for (ts = L->string_root[h].hash[s]; ts; ts = ts->nexthash) { + if (ts->constindex == -1) lua_pushstring(""); + else lua_pushstring(ts->str); + } } } @@ -707,9 +713,10 @@ static void testC (void) { static const struct luaL_reg builtin_funcs[] = { #ifdef DEBUG {"extra", extra_services}, + {"hash", hash_query}, + {"querystr", query_strings}, {"testC", testC}, {"totalmem", mem_query}, - {"querystr", query_strings}, #endif {"_ALERT", luaB_alert}, {"_ERRORMESSAGE", error_message}, diff --git a/lgc.c b/lgc.c index e78f0fdb..350cd65e 100644 --- a/lgc.c +++ b/lgc.c @@ -1,5 +1,5 @@ /* -** $Id: lgc.c,v 1.26 1999/09/27 18:00:25 roberto Exp roberto $ +** $Id: lgc.c,v 1.27 1999/10/04 17:51:04 roberto Exp roberto $ ** Garbage Collector ** See Copyright Notice in lua.h */ @@ -64,7 +64,7 @@ static void hashmark (Hash *h) { static void globalmark (void) { TaggedString *g; - for (g=L->rootglobal; g; g=g->next) { + for (g=L->rootglobal; g; g=g->nextglobal) { LUA_ASSERT(g->constindex >= 0, "userdata in global list"); if (g->u.s.globalval.ttype != LUA_T_NIL) { markobject(&g->u.s.globalval); @@ -161,8 +161,8 @@ static void clear_global_list (void) { TaggedString **p = &L->rootglobal; TaggedString *next; while ((next = *p) != NULL) { - if (next->marked) p = &next->next; - else *p = next->next; + if (next->marked) p = &next->nextglobal; + else *p = next->nextglobal; } } @@ -177,22 +177,28 @@ static void collectstring (int limit) { int i; ttype(&o) = LUA_T_USERDATA; clear_global_list(); - for (i=0; istring_root[i]; int j; - for (j=0; jsize; j++) { - TaggedString *t = tb->hash[j]; - if (t == NULL) continue; - if (t->marked < limit) { - if (t->constindex == -1) { /* is userdata? */ - tsvalue(&o) = t; - luaD_gcIM(&o); + for (j=0; jsize; j++) { /* for each list */ + TaggedString **p = &tb->hash[j]; + TaggedString *next; + while ((next = *p) != NULL) { + if (next->marked >= limit) { + if (next->marked < FIXMARK) /* does not change FIXMARKs */ + next->marked = 0; + p = &next->nexthash; + } + else { /* collect */ + if (next->constindex == -1) { /* is userdata? */ + tsvalue(&o) = next; + luaD_gcIM(&o); + } + *p = next->nexthash; + luaS_free(next); + tb->nuse--; } - luaS_free(t); - tb->hash[j] = &luaS_EMPTY; } - else if (t->marked == 1) - t->marked = 0; } } } diff --git a/llex.c b/llex.c index 54213e9c..f0bcc514 100644 --- a/llex.c +++ b/llex.c @@ -1,5 +1,5 @@ /* -** $Id: llex.c,v 1.39 1999/09/06 13:55:09 roberto Exp roberto $ +** $Id: llex.c,v 1.40 1999/10/04 17:51:04 roberto Exp roberto $ ** Lexical Analyzer ** See Copyright Notice in lua.h */ @@ -37,7 +37,7 @@ void luaX_init (void) { int i; for (i=0; i<(sizeof(reserved)/sizeof(reserved[0])); i++) { TaggedString *ts = luaS_new(reserved[i]); - ts->marked = FIRST_RESERVED+i; /* reserved word (always > 255) */ + ts->marked = RESERVEDMARK+i; /* reserved word */ } } @@ -426,8 +426,8 @@ int luaX_lex (LexState *LS) { } while (isalnum(LS->current) || LS->current == '_'); save('\0'); ts = luaS_new(L->Mbuffer+L->Mbuffbase); - if (ts->marked >= FIRST_RESERVED) - return ts->marked; /* reserved word */ + if (ts->marked >= RESERVEDMARK) /* reserved word? */ + return ts->marked-RESERVEDMARK+FIRST_RESERVED; LS->seminfo.ts = ts; return NAME; } diff --git a/lobject.h b/lobject.h index 99239909..d69f7c5a 100644 --- a/lobject.h +++ b/lobject.h @@ -1,5 +1,5 @@ /* -** $Id: lobject.h,v 1.30 1999/09/06 20:34:18 roberto Exp roberto $ +** $Id: lobject.h,v 1.31 1999/10/04 17:51:04 roberto Exp roberto $ ** Type definitions for Lua objects ** See Copyright Notice in lua.h */ @@ -14,13 +14,15 @@ #ifdef DEBUG -#include "lauxlib.h" -#define LUA_INTERNALERROR(s) \ - luaL_verror("INTERNAL ERROR - %s [%s:%d]",(s),__FILE__,__LINE__) -#define LUA_ASSERT(c,s) { if (!(c)) LUA_INTERNALERROR(s); } +#ifdef NDEBUG +#undef NDEBUG +#endif +#include +#define LUA_INTERNALERROR(s) assert(0) +#define LUA_ASSERT(c,s) assert(c) #else -#define LUA_INTERNALERROR(s) /* empty */ -#define LUA_ASSERT(c,s) /* empty */ +#define LUA_INTERNALERROR(s) /* empty */ +#define LUA_ASSERT(c,s) /* empty */ #endif @@ -90,8 +92,8 @@ typedef struct TObject { */ typedef struct TaggedString { - struct TaggedString *next; - int marked; + struct TaggedString *nexthash; /* chain hash table */ + struct TaggedString *nextglobal; /* chain global variables */ unsigned long hash; int constindex; /* hint to reuse constants (= -1 if this is a userdata) */ union { @@ -104,6 +106,7 @@ typedef struct TaggedString { void *v; /* if this is a userdata, here is its value */ } d; } u; + unsigned char marked; char str[1]; /* \0 byte already reserved */ } TaggedString; diff --git a/lstring.c b/lstring.c index 633fbcd4..f45f2e32 100644 --- a/lstring.c +++ b/lstring.c @@ -1,5 +1,5 @@ /* -** $Id: lstring.c,v 1.21 1999/09/28 12:27:06 roberto Exp roberto $ +** $Id: lstring.c,v 1.22 1999/10/04 17:51:04 roberto Exp roberto $ ** String table (keeps all strings handled by Lua) ** See Copyright Notice in lua.h */ @@ -19,11 +19,6 @@ -TaggedString luaS_EMPTY = {NULL, MAX_INT, 0L, 0, - {{{LUA_T_NIL, {NULL}}, 0L}}, {0}}; - - - /* ** to avoid hash tables with size = 0 (cannot hash with size=0), all ** hash tables are initialized with this `array'. Elements are never @@ -48,6 +43,7 @@ void luaS_init (void) { void luaS_freeall (void) { int i; for (i=0; istring_root[i].nuse==0, "non-empty string table"); if (L->string_root[i].hash != init_hash) luaM_free(L->string_root[i].hash); } @@ -56,43 +52,27 @@ void luaS_freeall (void) { static unsigned long hash_s (const char *s, long l) { - unsigned long h = 0; /* seed */ + unsigned long h = l; /* seed */ while (l--) h = h ^ ((h<<5)+(h>>2)+(unsigned char)*(s++)); return h; } -static int newsize (const stringtable *tb) { - int realuse = 0; - int i; - /* count how many entries are really in use */ - for (i=0; isize; i++) { - if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) - realuse++; - } - return luaO_redimension(realuse*2); -} - - static void grow (stringtable *tb) { - int ns = newsize(tb); + int ns = luaO_redimension(tb->size*2); /* new size */ TaggedString **newhash = luaM_newvector(ns, TaggedString *); int i; - for (i=0; inuse = 0; for (i=0; isize; i++) { - if (tb->hash[i] != NULL && tb->hash[i] != &luaS_EMPTY) { - unsigned long h = tb->hash[i]->hash; - int h1 = h%ns; - while (newhash[h1]) { - h1 += (h&(ns-2)) + 1; /* double hashing */ - if (h1 >= ns) h1 -= ns; - } - newhash[h1] = tb->hash[i]; - tb->nuse++; + TaggedString *p = tb->hash[i]; + while (p) { /* for each node in the list */ + TaggedString *next = p->nexthash; /* save next */ + int h = p->hash%ns; /* new position */ + p->nexthash = newhash[h]; /* chain it in new position */ + newhash[h] = p; + p = next; } } luaM_free(tb->hash); @@ -101,91 +81,79 @@ static void grow (stringtable *tb) { } +static TaggedString *newone (long l, unsigned long h) { + TaggedString *ts = (TaggedString *)luaM_malloc( + sizeof(TaggedString)+l*sizeof(char)); + ts->marked = 0; + ts->nexthash = NULL; + ts->nextglobal = ts; /* signal it is not in global list */ + ts->hash = h; + return ts; +} + + static TaggedString *newone_s (const char *str, long l, unsigned long h) { - TaggedString *ts = (TaggedString *)luaM_malloc(sizeof(TaggedString)+l); + TaggedString *ts = newone(l, h); memcpy(ts->str, str, l); ts->str[l] = 0; /* ending 0 */ ts->u.s.globalval.ttype = LUA_T_NIL; /* initialize global value */ ts->u.s.len = l; ts->constindex = 0; L->nblocks += gcsizestring(l); - ts->marked = 0; - ts->next = ts; /* signal it is in no list */ - ts->hash = h; return ts; } + static TaggedString *newone_u (void *buff, int tag, unsigned long h) { - TaggedString *ts = luaM_new(TaggedString); + TaggedString *ts = newone(0, h); ts->u.d.v = buff; ts->u.d.tag = (tag == LUA_ANYTAG) ? 0 : tag; ts->constindex = -1; /* tag -> this is a userdata */ L->nblocks++; - ts->marked = 0; - ts->next = ts; /* signal it is in no list */ - ts->hash = h; return ts; } -static void newentry (stringtable *tb, TaggedString *ts, int h1) { +static void newentry (stringtable *tb, TaggedString *ts, int h) { tb->nuse++; - if ((long)tb->nuse*3 < (long)tb->size*2) /* still have room? */ - tb->hash[h1] = ts; - else { /* must grow */ + if (tb->nuse >= tb->size) { /* no more room? */ if (tb->hash == init_hash) { /* cannot change init_hash */ - LUA_ASSERT(h1==0, "`init_hash' has size 1"); + LUA_ASSERT(h==0, "`init_hash' has size 1"); tb->hash = luaM_newvector(1, TaggedString *); /* so, `clone' it */ + tb->hash[0] = NULL; } - tb->hash[h1] = ts; grow(tb); + h = ts->hash%tb->size; /* new hash position */ } + ts->nexthash = tb->hash[h]; /* chain new entry */ + tb->hash[h] = ts; } -static TaggedString *insert_s (const char *str, long l, stringtable *tb) { +static TaggedString *insert_s (const char *str, long l, + stringtable *tb, unsigned long h) { + int h1 = h%tb->size; TaggedString *ts; - unsigned long h = hash_s(str, l); - int size = tb->size; - int j = -1; /* last empty place found (or -1) */ - int h1 = h%size; - while ((ts = tb->hash[h1]) != NULL) { - if (ts == &luaS_EMPTY) - j = h1; - else if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0)) + for (ts = tb->hash[h1]; ts; ts = ts->nexthash) + if (ts->u.s.len == l && (memcmp(str, ts->str, l) == 0)) return ts; - h1 += (h&(size-2)) + 1; /* double hashing */ - if (h1 >= size) h1 -= size; - } /* not found */ ts = newone_s(str, l, h); /* create new entry */ - if (j != -1) /* is there an EMPTY space? */ - tb->hash[j] = ts; /* use it for new element */ - else - newentry(tb, ts, h1); /* no EMPTY places; must use a virgin one */ + newentry(tb, ts, h1); /* insert it on table */ return ts; } static TaggedString *insert_u (void *buff, int tag, stringtable *tb) { - TaggedString *ts; unsigned long h = (unsigned long)buff; - int size = tb->size; - int j = -1; - int h1 = h%size; - while ((ts = tb->hash[h1]) != NULL) { - if (ts == &luaS_EMPTY) - j = h1; - else if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v) + int h1 = h%tb->size; + TaggedString *ts; + for (ts = tb->hash[h1]; ts; ts = ts->nexthash) + if ((tag == ts->u.d.tag || tag == LUA_ANYTAG) && buff == ts->u.d.v) return ts; - h1 += (h&(size-2)) + 1; - if (h1 >= size) h1 -= size; - } + /* not found */ ts = newone_u(buff, tag, h); - if (j != -1) - tb->hash[j] = ts; - else - newentry(tb, ts, h1); + newentry(tb, ts, h1); return ts; } @@ -196,8 +164,8 @@ TaggedString *luaS_createudata (void *udata, int tag) { } TaggedString *luaS_newlstr (const char *str, long l) { - int t = (l==0) ? 0 : ((int)((unsigned char)str[0]+l))%NUM_HASHSTR; - return insert_s(str, l, &L->string_root[t]); + unsigned long h = hash_s(str, l); + return insert_s(str, l, &L->string_root[h%NUM_HASHSTR], h); } TaggedString *luaS_new (const char *str) { @@ -206,7 +174,7 @@ TaggedString *luaS_new (const char *str) { TaggedString *luaS_newfixedstring (const char *str) { TaggedString *ts = luaS_new(str); - if (ts->marked == 0) ts->marked = 2; /* avoid GC */ + if (ts->marked == 0) ts->marked = FIXMARK; /* avoid GC */ return ts; } @@ -219,8 +187,8 @@ void luaS_free (TaggedString *t) { void luaS_rawsetglobal (TaggedString *ts, const TObject *newval) { ts->u.s.globalval = *newval; - if (ts->next == ts) { /* is not in list? */ - ts->next = L->rootglobal; + if (ts->nextglobal == ts) { /* is not in list? */ + ts->nextglobal = L->rootglobal; L->rootglobal = ts; } } diff --git a/lstring.h b/lstring.h index 7fc50810..c1323808 100644 --- a/lstring.h +++ b/lstring.h @@ -1,5 +1,5 @@ /* -** $Id: lstring.h,v 1.8 1999/08/16 20:52:00 roberto Exp roberto $ +** $Id: lstring.h,v 1.9 1999/10/04 17:51:04 roberto Exp roberto $ ** String table (keep all strings handled by Lua) ** See Copyright Notice in lua.h */ @@ -11,12 +11,18 @@ #include "lobject.h" -#define NUM_HASHSTR 31 -#define NUM_HASHUDATA 31 +#define NUM_HASHSTR 31 /* a prime not in array `dimensions' */ +#define NUM_HASHUDATA 31 /* idem */ #define NUM_HASHS (NUM_HASHSTR+NUM_HASHUDATA) -extern TaggedString luaS_EMPTY; +/* +** any taggedstring with mark>=FIXMARK is never collected. +** Marks>=RESERVEDMARK are used to identify reserved words. +*/ +#define FIXMARK 2 +#define RESERVEDMARK 3 + void luaS_init (void); TaggedString *luaS_createudata (void *udata, int tag);