From f4211a5ea4e235ccfa8b8dfa46031c23e9e839e2 Mon Sep 17 00:00:00 2001 From: Roberto Ierusalimschy Date: Thu, 17 Aug 2023 10:42:56 -0300 Subject: [PATCH] More control over encoding of test files The few UTF-8 test files are commented as such, and there is only one non UTF-8 test file (to test non UTF-8 sources). --- testes/db.lua | 6 ++--- testes/files.lua | 8 +++---- testes/pm.lua | 56 +++++++++++++++++++++++++++++----------------- testes/sort.lua | 2 +- testes/strings.lua | 3 +++ testes/utf8.lua | 2 ++ 6 files changed, 49 insertions(+), 28 deletions(-) diff --git a/testes/db.lua b/testes/db.lua index 67b58934..d3758c41 100644 --- a/testes/db.lua +++ b/testes/db.lua @@ -345,7 +345,7 @@ function f(a,b) local _, y = debug.getlocal(1, 2) assert(x == a and y == b) assert(debug.setlocal(2, 3, "pera") == "AA".."AA") - assert(debug.setlocal(2, 4, "maçã") == "B") + assert(debug.setlocal(2, 4, "manga") == "B") x = debug.getinfo(2) assert(x.func == g and x.what == "Lua" and x.name == 'g' and x.nups == 2 and string.find(x.source, "^@.*db%.lua$")) @@ -373,9 +373,9 @@ function g (...) local arg = {...} do local a,b,c; a=math.sin(40); end local feijao - local AAAA,B = "xuxu", "mamão" + local AAAA,B = "xuxu", "abacate" f(AAAA,B) - assert(AAAA == "pera" and B == "maçã") + assert(AAAA == "pera" and B == "manga") do local B = 13 local x,y = debug.getlocal(1,5) diff --git a/testes/files.lua b/testes/files.lua index be00bf3f..1476006e 100644 --- a/testes/files.lua +++ b/testes/files.lua @@ -92,8 +92,8 @@ assert(io.output():seek("end") == string.len("alo joao")) assert(io.output():seek("set") == 0) -assert(io.write('"álo"', "{a}\n", "second line\n", "third line \n")) -assert(io.write('çfourth_line')) +assert(io.write('"alo"', "{a}\n", "second line\n", "third line \n")) +assert(io.write('Xfourth_line')) io.output(io.stdout) collectgarbage() -- file should be closed by GC assert(io.input() == io.stdin and rawequal(io.output(), io.stdout)) @@ -300,14 +300,14 @@ do -- test error returns end checkerr("invalid format", io.read, "x") assert(io.read(0) == "") -- not eof -assert(io.read(5, 'l') == '"álo"') +assert(io.read(5, 'l') == '"alo"') assert(io.read(0) == "") assert(io.read() == "second line") local x = io.input():seek() assert(io.read() == "third line ") assert(io.input():seek("set", x)) assert(io.read('L') == "third line \n") -assert(io.read(1) == "ç") +assert(io.read(1) == "X") assert(io.read(string.len"fourth_line") == "fourth_line") assert(io.input():seek("cur", -string.len"fourth_line")) assert(io.read() == "fourth_line") diff --git a/testes/pm.lua b/testes/pm.lua index 795596d4..44454dff 100644 --- a/testes/pm.lua +++ b/testes/pm.lua @@ -1,6 +1,9 @@ -- $Id: testes/pm.lua $ -- See Copyright Notice in file all.lua +-- UTF-8 file + + print('testing pattern matching') local function checkerror (msg, f, ...) @@ -50,6 +53,19 @@ assert(f('aLo_ALO', '%a*') == 'aLo') assert(f(" \n\r*&\n\r xuxu \n\n", "%g%g%g+") == "xuxu") + +-- Adapt a pattern to UTF-8 +local function PU (p) + -- break '?' into each individual byte of a character + p = string.gsub(p, "(" .. utf8.charpattern .. ")%?", function (c) + return string.gsub(c, ".", "%0?") + end) + -- change '.' to utf-8 character patterns + p = string.gsub(p, "%.", utf8.charpattern) + return p +end + + assert(f('aaab', 'a*') == 'aaa'); assert(f('aaa', '^.*$') == 'aaa'); assert(f('aaa', 'b*') == ''); @@ -73,16 +89,16 @@ assert(f('aaa', '^.-$') == 'aaa') assert(f('aabaaabaaabaaaba', 'b.*b') == 'baaabaaabaaab') assert(f('aabaaabaaabaaaba', 'b.-b') == 'baaab') assert(f('alo xo', '.o$') == 'xo') -assert(f(' \n isto é assim', '%S%S*') == 'isto') -assert(f(' \n isto é assim', '%S*$') == 'assim') -assert(f(' \n isto é assim', '[a-z]*$') == 'assim') +assert(f(' \n isto é assim', '%S%S*') == 'isto') +assert(f(' \n isto é assim', '%S*$') == 'assim') +assert(f(' \n isto é assim', '[a-z]*$') == 'assim') assert(f('um caracter ? extra', '[^%sa-z]') == '?') assert(f('', 'a?') == '') -assert(f('á', 'á?') == 'á') -assert(f('ábl', 'á?b?l?') == 'ábl') -assert(f(' ábl', 'á?b?l?') == '') +assert(f('á', PU'á?') == 'á') +assert(f('ábl', PU'á?b?l?') == 'ábl') +assert(f(' ábl', PU'á?b?l?') == '') assert(f('aa', '^aa?a?a') == 'aa') -assert(f(']]]áb', '[^]]') == 'á') +assert(f(']]]áb', '[^]]+') == 'áb') assert(f("0alo alo", "%x*") == "0a") assert(f("alo alo", "%C+") == "alo alo") print('+') @@ -136,28 +152,28 @@ assert(string.match("alo xyzK", "(%w+)K") == "xyz") assert(string.match("254 K", "(%d*)K") == "") assert(string.match("alo ", "(%w*)$") == "") assert(not string.match("alo ", "(%w+)$")) -assert(string.find("(álo)", "%(á") == 1) -local a, b, c, d, e = string.match("âlo alo", "^(((.).).* (%w*))$") -assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) +assert(string.find("(álo)", "%(á") == 1) +local a, b, c, d, e = string.match("âlo alo", PU"^(((.).). (%w*))$") +assert(a == 'âlo alo' and b == 'âl' and c == 'â' and d == 'alo' and e == nil) a, b, c, d = string.match('0123456789', '(.+(.?)())') assert(a == '0123456789' and b == '' and c == 11 and d == nil) print('+') -assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') -assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim +assert(string.gsub('ülo ülo', 'ü', 'x') == 'xlo xlo') +assert(string.gsub('alo úlo ', ' +$', '') == 'alo úlo') -- trim assert(string.gsub(' alo alo ', '^%s*(.-)%s*$', '%1') == 'alo alo') -- double trim assert(string.gsub('alo alo \n 123\n ', '%s+', ' ') == 'alo alo 123 ') -local t = "abç d" -a, b = string.gsub(t, '(.)', '%1@') -assert('@'..a == string.gsub(t, '', '@') and b == 5) -a, b = string.gsub('abçd', '(.)', '%0@', 2) -assert(a == 'a@b@çd' and b == 2) +local t = "abç d" +a, b = string.gsub(t, PU'(.)', '%1@') +assert(a == "a@b@ç@ @d@" and b == 5) +a, b = string.gsub('abçd', PU'(.)', '%0@', 2) +assert(a == 'a@b@çd' and b == 2) assert(string.gsub('alo alo', '()[al]', '%1') == '12o 56o') assert(string.gsub("abc=xyz", "(%w*)(%p)(%w+)", "%3%2%1-%0") == "xyz=abc-abc=xyz") assert(string.gsub("abc", "%w", "%1%0") == "aabbcc") assert(string.gsub("abc", "%w+", "%0%1") == "abcabc") -assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') +assert(string.gsub('áéí', '$', '\0óú') == 'áéí\0óú') assert(string.gsub('', '^', 'r') == 'r') assert(string.gsub('', '$', 'r') == 'r') print('+') @@ -188,8 +204,8 @@ do end function f(a,b) return string.gsub(a,'.',b) end -assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == - "trocar tudo em bbbbb é alalalalalal") +assert(string.gsub("trocar tudo em |teste|b| é |beleza|al|", "|([^|]*)|([^|]*)|", f) == + "trocar tudo em bbbbb é alalalalalal") local function dostring (s) return load(s, "")() or "" end assert(string.gsub("alo $a='x'$ novamente $return a$", diff --git a/testes/sort.lua b/testes/sort.lua index 52919b8c..40bb2d8a 100644 --- a/testes/sort.lua +++ b/testes/sort.lua @@ -289,7 +289,7 @@ timesort(a, limit, function(x,y) return nil end, "equal") for i,v in pairs(a) do assert(v == false) end -AA = {"álo", "\0first :-)", "alo", "then this one", "45", "and a new"} +AA = {"\xE1lo", "\0first :-)", "alo", "then this one", "45", "and a new"} table.sort(AA) check(AA) diff --git a/testes/strings.lua b/testes/strings.lua index b033c6ab..90983edd 100644 --- a/testes/strings.lua +++ b/testes/strings.lua @@ -1,6 +1,9 @@ -- $Id: testes/strings.lua $ -- See Copyright Notice in file all.lua +-- ISO Latin encoding + + print('testing strings and string library') local maxi = math.maxinteger diff --git a/testes/utf8.lua b/testes/utf8.lua index c5a9dd3f..efadbd5c 100644 --- a/testes/utf8.lua +++ b/testes/utf8.lua @@ -1,6 +1,8 @@ -- $Id: testes/utf8.lua $ -- See Copyright Notice in file all.lua +-- UTF-8 file + print "testing UTF-8 library" local utf8 = require'utf8'