From 75ac0d217266dba48a887df96d37398140e22b9e Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Tue, 16 Sep 1997 16:25:59 -0300 Subject: [PATCH] Standard library for strings and pattern-matching --- strlib.c => lstrlib.c | 275 ++++++++++++++++++++---------------------- strlib.h | 13 -- 2 files changed, 134 insertions(+), 154 deletions(-) rename strlib.c => lstrlib.c (80%) delete mode 100644 strlib.h diff --git a/strlib.c b/lstrlib.c similarity index 80% rename from strlib.c rename to lstrlib.c index 6d16043a..eba282de 100644 --- a/strlib.c +++ b/lstrlib.c @@ -1,17 +1,17 @@ /* -** strlib.c -** String library to LUA +** $Id: lstrlib.c,v 1.1 1997/08/14 19:47:57 roberto Exp roberto $ +** Standard library for strings and pattern-matching +** See Copyright Notice in lua.h */ -char *rcs_strlib="$Id: strlib.c,v 1.46 1997/06/19 18:49:40 roberto Exp roberto $"; -#include +#include #include #include -#include +#include +#include "lauxlib.h" #include "lua.h" -#include "auxlib.h" #include "lualib.h" @@ -37,12 +37,14 @@ static char *strbuffer (unsigned long size) return lbuffer.b; } + static char *openspace (unsigned long size) { char *buff = strbuffer(lbuffer.size+size); return buff+lbuffer.size; } + char *luaI_addchar (int c) { if (lbuffer.size >= lbuffer.max) @@ -51,6 +53,7 @@ char *luaI_addchar (int c) return lbuffer.b; } + void luaI_emptybuff (void) { lbuffer.size = 0; /* prepare for next string */ @@ -64,23 +67,19 @@ static void addnchar (char *s, int n) lbuffer.size += n; } + static void addstr (char *s) { addnchar(s, strlen(s)); } -/* -** Return the string length -*/ static void str_len (void) { lua_pushnumber(strlen(luaL_check_string(1))); } -/* -** Return the substring of a string -*/ + static void str_sub (void) { char *s = luaL_check_string(1); @@ -97,9 +96,7 @@ static void str_sub (void) else lua_pushstring(""); } -/* -** Convert a string to lower case. -*/ + static void str_lower (void) { char *s; @@ -109,9 +106,7 @@ static void str_lower (void) lua_pushstring(luaI_addchar(0)); } -/* -** Convert a string to upper case. -*/ + static void str_upper (void) { char *s; @@ -131,9 +126,7 @@ static void str_rep (void) lua_pushstring(luaI_addchar(0)); } -/* -** get ascii value of a character in a string -*/ + static void str_ascii (void) { char *s = luaL_check_string(1); @@ -143,74 +136,12 @@ static void str_ascii (void) } -/* pattern matching */ -#define ESC '%' -#define SPECIALS "^$*?.([%-" - -static char *bracket_end (char *p) -{ - return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); -} - -char *luaL_item_end (char *p) -{ - switch (*p++) { - case '\0': return p-1; - case ESC: - if (*p == 0) luaL_verror("incorrect pattern (ends with `%c')", ESC); - return p+1; - case '[': { - char *end = bracket_end(p); - if (end == NULL) lua_error("incorrect pattern (missing `]')"); - return end+1; - } - default: - return p; - } -} - -static int matchclass (int c, int cl) -{ - int res; - switch (tolower((unsigned char)cl)) { - case 'a' : res = isalpha((unsigned char)c); break; - case 'c' : res = iscntrl((unsigned char)c); break; - case 'd' : res = isdigit((unsigned char)c); break; - case 'l' : res = islower((unsigned char)c); break; - case 'p' : res = ispunct((unsigned char)c); break; - case 's' : res = isspace((unsigned char)c); break; - case 'u' : res = isupper((unsigned char)c); break; - case 'w' : res = isalnum((unsigned char)c); break; - default: return (cl == c); - } - return (islower((unsigned char)cl) ? res : !res); -} - -int luaL_singlematch (int c, char *p) -{ - if (c == 0) return 0; - switch (*p) { - case '.': return 1; - case ESC: return matchclass(c, *(p+1)); - case '[': { - char *end = bracket_end(p+1); - int sig = *(p+1) == '^' ? (p++, 0) : 1; - while (++p < end) { - if (*p == ESC) { - if (((p+1) < end) && matchclass(c, *++p)) return sig; - } - else if ((*(p+1) == '-') && (p+2 < end)) { - p+=2; - if (*(p-2) <= c && c <= *p) return sig; - } - else if (*p == c) return sig; - } - return !sig; - } - default: return (*p == c); - } -} +/* +** ======================================================= +** PATTERN MATCHING +** ======================================================= +*/ #define MAX_CAPT 9 @@ -222,6 +153,10 @@ static struct { static int num_captures; /* only valid after a sucessful call to match */ +#define ESC '%' +#define SPECIALS "^$*?.([%-" + + static void push_captures (void) { int i; @@ -235,6 +170,7 @@ static void push_captures (void) } } + static int check_cap (int l, int level) { l -= '1'; @@ -243,6 +179,7 @@ static int check_cap (int l, int level) return l; } + static int capture_to_close (int level) { for (level--; level>=0; level--) @@ -251,6 +188,71 @@ static int capture_to_close (int level) return 0; /* to avoid warnings */ } + +static char *bracket_end (char *p) +{ + return (*p == 0) ? NULL : strchr((*p=='^') ? p+2 : p+1, ']'); +} + + +static int matchclass (int c, int cl) +{ + int res; + if (c == 0) return 0; + switch (tolower((unsigned char)cl)) { + case 'a' : res = isalpha((unsigned char)c); break; + case 'c' : res = iscntrl((unsigned char)c); break; + case 'd' : res = isdigit((unsigned char)c); break; + case 'l' : res = islower((unsigned char)c); break; + case 'p' : res = ispunct((unsigned char)c); break; + case 's' : res = isspace((unsigned char)c); break; + case 'u' : res = isupper((unsigned char)c); break; + case 'w' : res = isalnum((unsigned char)c); break; + default: return (cl == c); + } + return (islower((unsigned char)cl) ? res : !res); +} + + +int luaI_singlematch (int c, char *p, char **ep) +{ + switch (*p) { + case '\0': + *ep = p; + return 0; + case '.': + *ep = p+1; + return (c != 0); + case ESC: + if (*(++p) == '\0') + luaL_verror("incorrect pattern (ends with `%c')", ESC); + *ep = p+1; + return matchclass(c, *p); + case '[': { + char *end = bracket_end(p+1); + int sig = *(p+1) == '^' ? (p++, 0) : 1; + if (end == NULL) lua_error("incorrect pattern (missing `]')"); + *ep = end+1; + if (c == 0) return 0; + while (++p < end) { + if (*p == ESC) { + if (((p+1) < end) && matchclass(c, *++p)) return sig; + } + else if ((*(p+1) == '-') && (p+2 < end)) { + p+=2; + if (*(p-2) <= c && c <= *p) return sig; + } + else if (*p == c) return sig; + } + return !sig; + } + default: + *ep = p+1; + return (*p == c); + } +} + + static char *matchbalance (char *s, int b, int e) { if (*s != b) return NULL; @@ -266,6 +268,31 @@ static char *matchbalance (char *s, int b, int e) return NULL; /* string ends out of balance */ } + +static char *matchitem (char *s, char *p, int level, char **ep) +{ + if (*p == ESC) { + p++; + if (isdigit((unsigned char)*p)) { /* capture */ + int l = check_cap(*p, level); + *ep = p+1; + if (strncmp(capture[l].init, s, capture[l].len) == 0) + return s+capture[l].len; + else return NULL; + } + else if (*p == 'b') { /* balanced string */ + p++; + if (*p == 0 || *(p+1) == 0) + lua_error("bad balanced pattern specification"); + *ep = p+2; + return matchbalance(s, *p, *(p+1)); + } + else p--; /* and go through */ + } + return (luaI_singlematch(*s, p, ep) ? s+1 : NULL); +} + + static char *match (char *s, char *p, int level) { init: /* using goto's to optimize tail recursion */ @@ -283,38 +310,19 @@ static char *match (char *s, char *p, int level) capture[l].len = -1; /* undo capture */ return res; } - case ESC: - if (isdigit((unsigned char)(*(p+1)))) { /* capture */ - int l = check_cap(*(p+1), level); - if (strncmp(capture[l].init, s, capture[l].len) == 0) { - /* return match(p+2, s+capture[l].len, level); */ - p+=2; s+=capture[l].len; goto init; - } - else return NULL; - } - else if (*(p+1) == 'b') { /* balanced string */ - if (*(p+2) == 0 || *(p+3) == 0) - lua_error("bad balanced pattern specification"); - s = matchbalance(s, *(p+2), *(p+3)); - if (s == NULL) return NULL; - else { /* return match(p+4, s, level); */ - p+=4; goto init; - } - } - else goto dflt; case '\0': case '$': /* (possibly) end of pattern */ if (*p == 0 || (*(p+1) == 0 && *s == 0)) { num_captures = level; return s; } - else goto dflt; - default: dflt: { /* it is a pattern item */ - int m = luaL_singlematch(*s, p); - char *ep = luaL_item_end(p); /* get what is next */ + /* else go through */ + default: { /* it is a pattern item */ + char *ep; /* get what is next */ + char *s1 = matchitem(s, p, level, &ep); switch (*ep) { case '*': { /* repetition */ char *res; - if (m && (res = match(s+1, p, level))) + if (s1 && (res = match(s1, p, level))) return res; p=ep+1; goto init; /* else return match(s, ep+1, level); */ } @@ -322,34 +330,35 @@ static char *match (char *s, char *p, int level) char *res; if ((res = match(s, ep+1, level)) != 0) return res; - else if (m) { - s++; - goto init; /* return match(s+1, p, level); */ + else if (s1) { + s = s1; + goto init; /* return match(s1, p, level); */ } else return NULL; } case '?': { /* optional */ char *res; - if (m && (res = match(s+1, ep+1, level))) + if (s1 && (res = match(s1, ep+1, level))) return res; p=ep+1; goto init; /* else return match(s, ep+1, level); */ } default: - if (m) { s++; p=ep; goto init; } /* return match(s+1, ep, level); */ + if (s1) { s=s1; p=ep; goto init; } /* return match(s1, ep, level); */ else return NULL; } } } } + static void str_find (void) { char *s = luaL_check_string(1); char *p = luaL_check_string(2); long init = (long)luaL_opt_number(3, 1) - 1; luaL_arg_check(0 <= init && init <= strlen(s), 3, "out of range"); - if (lua_getparam(4) != LUA_NOOBJECT || + if (lua_getparam(4) != LUA_NOOBJECT || strpbrk(p, SPECIALS) == NULL) { /* no special caracters? */ char *s2 = strstr(s+init, p); if (s2) { @@ -372,7 +381,8 @@ static void str_find (void) } } -static void add_s (lua_Object newp, lua_Object table, int n) + +static void add_s (lua_Object newp) { if (lua_isstring(newp)) { char *news = lua_getstring(newp); @@ -390,12 +400,8 @@ static void add_s (lua_Object newp, lua_Object table, int n) struct lbuff oldbuff; int status; lua_beginblock(); - if (lua_istable(table)) { - lua_pushobject(table); - lua_pushnumber(n); - } push_captures(); - /* function may use lbuffer, so save it and create a new one */ + /* function may use lbuffer, so save it and create a luaM_new one */ oldbuff = lbuffer; lbuffer.b = NULL; lbuffer.max = lbuffer.size = 0; status = lua_callfunction(newp); @@ -411,13 +417,13 @@ static void add_s (lua_Object newp, lua_Object table, int n) else luaL_arg_check(0, 3, NULL); } + static void str_gsub (void) { char *src = luaL_check_string(1); char *p = luaL_check_string(2); lua_Object newp = lua_getparam(3); - lua_Object table = lua_getparam(4); - int max_s = (int)luaL_opt_number(lua_istable(table)?5:4, strlen(src)+1); + int max_s = (int)luaL_opt_number(4, strlen(src)+1); int anchor = (*p == '^') ? (p++, 1) : 0; int n = 0; luaI_emptybuff(); @@ -425,7 +431,7 @@ static void str_gsub (void) char *e = match(src, p, 0); if (e) { n++; - add_s(newp, table, n); + add_s(newp); } if (e && e>src) /* non empty match? */ src = e; /* skip it */ @@ -439,18 +445,6 @@ static void str_gsub (void) lua_pushnumber(n); /* number of substitutions */ } -static void str_set (void) -{ - char *item = luaL_check_string(1); - int i; - luaL_arg_check(*luaL_item_end(item) == 0, 1, "wrong format"); - luaI_emptybuff(); - for (i=1; i<256; i++) /* 0 cannot be part of a set */ - if (luaL_singlematch(i, item)) - luaI_addchar(i); - lua_pushstring(luaI_addchar(0)); -} - void luaI_addquoted (char *s) { @@ -505,7 +499,7 @@ static void str_format (void) case 'u': case 'x': case 'X': sprintf(buff, form, (int)luaL_check_number(arg)); break; - case 'e': case 'E': case 'f': case 'g': + case 'e': case 'E': case 'f': case 'g': case 'G': sprintf(buff, form, luaL_check_number(arg)); break; default: /* also treat cases 'pnLlh' */ @@ -521,7 +515,6 @@ static void str_format (void) static struct luaL_reg strlib[] = { {"strlen", str_len}, {"strsub", str_sub}, -{"strset", str_set}, {"strlower", str_lower}, {"strupper", str_upper}, {"strrep", str_rep}, diff --git a/strlib.h b/strlib.h deleted file mode 100644 index 3e650be5..00000000 --- a/strlib.h +++ /dev/null @@ -1,13 +0,0 @@ -/* -** String library to LUA -** TeCGraf - PUC-Rio -** $Id: $ -*/ - - -#ifndef strlib_h - -void strlib_open (void); - -#endif -