From 144afa4d476c8a510ea93f02dd19dc20ce5e6c16 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 3 Apr 2014 10:45:09 -0300 Subject: [PATCH] several changes in 'utf8.offset' --- lutf8lib.c | 49 +++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/lutf8lib.c b/lutf8lib.c index 9c88f00a..f7eb676f 100644 --- a/lutf8lib.c +++ b/lutf8lib.c @@ -1,5 +1,5 @@ /* -** $Id: lutf8lib.c,v 1.5 2014/04/01 14:39:55 roberto Exp roberto $ +** $Id: lutf8lib.c,v 1.6 2014/04/02 17:01:22 roberto Exp roberto $ ** Standard library for UTF-8 manipulation ** See Copyright Notice in lua.h */ @@ -150,41 +150,46 @@ static int utfchar (lua_State *L) { /* -** offset(s, n, [i]) -> index where n-th character *after* +** offset(s, n, [i]) -> index where n-th character counting from ** position 'i' starts; 0 means character at 'i'. */ static int byteoffset (lua_State *L) { size_t len; const char *s = luaL_checklstring(L, 1, &len); int n = luaL_checkint(L, 2); - lua_Integer posi = u_posrelat(luaL_optinteger(L, 3, 1), len) - 1; - luaL_argcheck(L, 0 <= posi && posi <= (lua_Integer)len, 3, + lua_Integer posi = (n >= 0) ? 1 : len + 1; + posi = u_posrelat(luaL_optinteger(L, 3, posi), len); + luaL_argcheck(L, 1 <= posi && --posi <= (lua_Integer)len, 3, "position out of range"); if (n == 0) { /* find beginning of current byte sequence */ while (posi > 0 && iscont(s + posi)) posi--; } - else if (n < 0) { - while (n < 0 && posi > 0) { /* move back */ - do { /* find beginning of previous character */ - posi--; - } while (posi > 0 && iscont(s + posi)); - n++; - } - } else { - n--; /* do not move for 1st character */ - while (n > 0 && posi < (lua_Integer)len) { - do { /* find beginning of next character */ - posi++; - } while (iscont(s + posi)); /* ('\0' is not continuation) */ - n--; - } + if (iscont(s + posi)) + luaL_error(L, "initial position is a continuation byte"); + if (n < 0) { + while (n < 0 && posi > 0) { /* move back */ + do { /* find beginning of previous character */ + posi--; + } while (posi > 0 && iscont(s + posi)); + n++; + } + } + else { + n--; /* do not move for 1st character */ + while (n > 0 && posi < (lua_Integer)len) { + do { /* find beginning of next character */ + posi++; + } while (iscont(s + posi)); /* (cannot pass final '\0') */ + n--; + } + } } - if (n == 0) + if (n == 0) /* did it find given character? */ lua_pushinteger(L, posi + 1); - else - lua_pushnil(L); /* no such position */ + else /* no such character */ + lua_pushnil(L); return 1; }