From 18a0bed48264e5fb2db07974dd912f7b5e940f65 Mon Sep 17 00:00:00 2001 From: equa Date: Sun, 15 Jan 2023 11:58:00 -0600 Subject: organize! --- TODO | 5 + dictgen/config.embedded.lua | 18 + dictgen/config.main.lua | 18 + dictgen/dkjson.lua | 714 +++++++++++++++++++++++++++++++ dictgen/steno.lua | 251 +++++++++++ dictgen/words.lua | 127 ++++++ plover_toki_pona/dictionaries/dkjson.lua | 714 ------------------------------- plover_toki_pona/dictionaries/steno.lua | 256 ----------- plover_toki_pona/dictionaries/words.lua | 126 ------ 9 files changed, 1133 insertions(+), 1096 deletions(-) create mode 100644 TODO create mode 100644 dictgen/config.embedded.lua create mode 100644 dictgen/config.main.lua create mode 100644 dictgen/dkjson.lua create mode 100644 dictgen/steno.lua create mode 100644 dictgen/words.lua delete mode 100644 plover_toki_pona/dictionaries/dkjson.lua delete mode 100644 plover_toki_pona/dictionaries/steno.lua delete mode 100644 plover_toki_pona/dictionaries/words.lua diff --git a/TODO b/TODO new file mode 100644 index 0000000..6d44083 --- /dev/null +++ b/TODO @@ -0,0 +1,5 @@ +make fingerspelling syllable-based, maybe +add punctuation and commands to dictionary +documentation!!! +add qwerty keyboard layout +make the gemini PR layout more "forgiving" (making both rows of keys do the same thing?) diff --git a/dictgen/config.embedded.lua b/dictgen/config.embedded.lua new file mode 100644 index 0000000..8222951 --- /dev/null +++ b/dictgen/config.embedded.lua @@ -0,0 +1,18 @@ +return { + words = require("words"), + convert_to_english = true, + extra_briefs = { + ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu", + ["HR"] = "{^.}", + ["HWNR"] = "{^\n^}", + S = "sina", + HS = "tawa", + P = "pi", + HP = "wile", + K = "kama", + HK = "jan", + L = "li", + M = "mi", + HM = "ni", + } +} diff --git a/dictgen/config.main.lua b/dictgen/config.main.lua new file mode 100644 index 0000000..640b973 --- /dev/null +++ b/dictgen/config.main.lua @@ -0,0 +1,18 @@ +return { + words = require("words"), + convert_to_english = false, + extra_briefs = { + ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu", + ["HR"] = "{^.}", + ["HWNR"] = "{^\n^}", + S = "sina", + HS = "tawa", + P = "pi", + HP = "wile", + K = "kama", + HK = "jan", + L = "li", + M = "mi", + HM = "ni", + } +} diff --git a/dictgen/dkjson.lua b/dictgen/dkjson.lua new file mode 100644 index 0000000..fa50b9f --- /dev/null +++ b/dictgen/dkjson.lua @@ -0,0 +1,714 @@ +-- Module options: +local always_try_using_lpeg = true +local register_global_module_table = false +local global_module_name = 'json' + +--[==[ + +David Kolf's JSON module for Lua 5.1/5.2 + +Version 2.5 + + +For the documentation see the corresponding readme.txt or visit +. + +You can contact the author by sending an e-mail to 'david' at the +domain 'dkolf.de'. + + +Copyright (C) 2010-2013 David Heiko Kolf + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--]==] + +-- global dependencies: +local pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset = + pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset +local error, require, pcall, select = error, require, pcall, select +local floor, huge = math.floor, math.huge +local strrep, gsub, strsub, strbyte, strchar, strfind, strlen, strformat = + string.rep, string.gsub, string.sub, string.byte, string.char, + string.find, string.len, string.format +local strmatch = string.match +local concat = table.concat + +local json = { version = "dkjson 2.5" } + +if register_global_module_table then + _G[global_module_name] = json +end + +local _ENV = nil -- blocking globals in Lua 5.2 + +pcall (function() + -- Enable access to blocked metatables. + -- Don't worry, this module doesn't change anything in them. + local debmeta = require "debug".getmetatable + if debmeta then getmetatable = debmeta end +end) + +json.null = setmetatable ({}, { + __tojson = function () return "null" end +}) + +local function isarray (tbl) + local max, n, arraylen = 0, 0, 0 + for k,v in pairs (tbl) do + if k == 'n' and type(v) == 'number' then + arraylen = v + if v > max then + max = v + end + else + if type(k) ~= 'number' or k < 1 or floor(k) ~= k then + return false + end + if k > max then + max = k + end + n = n + 1 + end + end + if max > 10 and max > arraylen and max > n * 2 then + return false -- don't create an array with too many holes + end + return true, max +end + +local escapecodes = { + ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b", ["\f"] = "\\f", + ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t" +} + +local function escapeutf8 (uchar) + local value = escapecodes[uchar] + if value then + return value + end + local a, b, c, d = strbyte (uchar, 1, 4) + a, b, c, d = a or 0, b or 0, c or 0, d or 0 + if a <= 0x7f then + value = a + elseif 0xc0 <= a and a <= 0xdf and b >= 0x80 then + value = (a - 0xc0) * 0x40 + b - 0x80 + elseif 0xe0 <= a and a <= 0xef and b >= 0x80 and c >= 0x80 then + value = ((a - 0xe0) * 0x40 + b - 0x80) * 0x40 + c - 0x80 + elseif 0xf0 <= a and a <= 0xf7 and b >= 0x80 and c >= 0x80 and d >= 0x80 then + value = (((a - 0xf0) * 0x40 + b - 0x80) * 0x40 + c - 0x80) * 0x40 + d - 0x80 + else + return "" + end + if value <= 0xffff then + return strformat ("\\u%.4x", value) + elseif value <= 0x10ffff then + -- encode as UTF-16 surrogate pair + value = value - 0x10000 + local highsur, lowsur = 0xD800 + floor (value/0x400), 0xDC00 + (value % 0x400) + return strformat ("\\u%.4x\\u%.4x", highsur, lowsur) + else + return "" + end +end + +local function fsub (str, pattern, repl) + -- gsub always builds a new string in a buffer, even when no match + -- exists. First using find should be more efficient when most strings + -- don't contain the pattern. + if strfind (str, pattern) then + return gsub (str, pattern, repl) + else + return str + end +end + +local function quotestring (value) + -- based on the regexp "escapable" in https://github.com/douglascrockford/JSON-js + value = fsub (value, "[%z\1-\31\"\\\127]", escapeutf8) + if strfind (value, "[\194\216\220\225\226\239]") then + value = fsub (value, "\194[\128-\159\173]", escapeutf8) + value = fsub (value, "\216[\128-\132]", escapeutf8) + value = fsub (value, "\220\143", escapeutf8) + value = fsub (value, "\225\158[\180\181]", escapeutf8) + value = fsub (value, "\226\128[\140-\143\168-\175]", escapeutf8) + value = fsub (value, "\226\129[\160-\175]", escapeutf8) + value = fsub (value, "\239\187\191", escapeutf8) + value = fsub (value, "\239\191[\176-\191]", escapeutf8) + end + return "\"" .. value .. "\"" +end +json.quotestring = quotestring + +local function replace(str, o, n) + local i, j = strfind (str, o, 1, true) + if i then + return strsub(str, 1, i-1) .. n .. strsub(str, j+1, -1) + else + return str + end +end + +-- locale independent num2str and str2num functions +local decpoint, numfilter + +local function updatedecpoint () + decpoint = strmatch(tostring(0.5), "([^05+])") + -- build a filter that can be used to remove group separators + numfilter = "[^0-9%-%+eE" .. gsub(decpoint, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0") .. "]+" +end + +updatedecpoint() + +local function num2str (num) + return replace(fsub(tostring(num), numfilter, ""), decpoint, ".") +end + +local function str2num (str) + local num = tonumber(replace(str, ".", decpoint)) + if not num then + updatedecpoint() + num = tonumber(replace(str, ".", decpoint)) + end + return num +end + +local function addnewline2 (level, buffer, buflen) + buffer[buflen+1] = "\n" + buffer[buflen+2] = strrep (" ", level) + buflen = buflen + 2 + return buflen +end + +function json.addnewline (state) + if state.indent then + state.bufferlen = addnewline2 (state.level or 0, + state.buffer, state.bufferlen or #(state.buffer)) + end +end + +local encode2 -- forward declaration + +local function addpair (key, value, prev, indent, level, buffer, buflen, tables, globalorder, state) + local kt = type (key) + if kt ~= 'string' and kt ~= 'number' then + return nil, "type '" .. kt .. "' is not supported as a key by JSON." + end + if prev then + buflen = buflen + 1 + buffer[buflen] = "," + end + if indent then + buflen = addnewline2 (level, buffer, buflen) + end + buffer[buflen+1] = quotestring (key) + buffer[buflen+2] = ":" + return encode2 (value, indent, level, buffer, buflen + 2, tables, globalorder, state) +end + +local function appendcustom(res, buffer, state) + local buflen = state.bufferlen + if type (res) == 'string' then + buflen = buflen + 1 + buffer[buflen] = res + end + return buflen +end + +local function exception(reason, value, state, buffer, buflen, defaultmessage) + defaultmessage = defaultmessage or reason + local handler = state.exception + if not handler then + return nil, defaultmessage + else + state.bufferlen = buflen + local ret, msg = handler (reason, value, state, defaultmessage) + if not ret then return nil, msg or defaultmessage end + return appendcustom(ret, buffer, state) + end +end + +function json.encodeexception(reason, value, state, defaultmessage) + return quotestring("<" .. defaultmessage .. ">") +end + +encode2 = function (value, indent, level, buffer, buflen, tables, globalorder, state) + local valtype = type (value) + local valmeta = getmetatable (value) + valmeta = type (valmeta) == 'table' and valmeta -- only tables + local valtojson = valmeta and valmeta.__tojson + if valtojson then + if tables[value] then + return exception('reference cycle', value, state, buffer, buflen) + end + tables[value] = true + state.bufferlen = buflen + local ret, msg = valtojson (value, state) + if not ret then return exception('custom encoder failed', value, state, buffer, buflen, msg) end + tables[value] = nil + buflen = appendcustom(ret, buffer, state) + elseif value == nil then + buflen = buflen + 1 + buffer[buflen] = "null" + elseif valtype == 'number' then + local s + if value ~= value or value >= huge or -value >= huge then + -- This is the behaviour of the original JSON implementation. + s = "null" + else + s = num2str (value) + end + buflen = buflen + 1 + buffer[buflen] = s + elseif valtype == 'boolean' then + buflen = buflen + 1 + buffer[buflen] = value and "true" or "false" + elseif valtype == 'string' then + buflen = buflen + 1 + buffer[buflen] = quotestring (value) + elseif valtype == 'table' then + if tables[value] then + return exception('reference cycle', value, state, buffer, buflen) + end + tables[value] = true + level = level + 1 + local isa, n = isarray (value) + if n == 0 and valmeta and valmeta.__jsontype == 'object' then + isa = false + end + local msg + if isa then -- JSON array + buflen = buflen + 1 + buffer[buflen] = "[" + for i = 1, n do + buflen, msg = encode2 (value[i], indent, level, buffer, buflen, tables, globalorder, state) + if not buflen then return nil, msg end + if i < n then + buflen = buflen + 1 + buffer[buflen] = "," + end + end + buflen = buflen + 1 + buffer[buflen] = "]" + else -- JSON object + local prev = false + buflen = buflen + 1 + buffer[buflen] = "{" + local order = valmeta and valmeta.__jsonorder or globalorder + if order then + local used = {} + n = #order + for i = 1, n do + local k = order[i] + local v = value[k] + if v then + used[k] = true + buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) + prev = true -- add a seperator before the next element + end + end + for k,v in pairs (value) do + if not used[k] then + buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) + if not buflen then return nil, msg end + prev = true -- add a seperator before the next element + end + end + else -- unordered + for k,v in pairs (value) do + buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) + if not buflen then return nil, msg end + prev = true -- add a seperator before the next element + end + end + if indent then + buflen = addnewline2 (level - 1, buffer, buflen) + end + buflen = buflen + 1 + buffer[buflen] = "}" + end + tables[value] = nil + else + return exception ('unsupported type', value, state, buffer, buflen, + "type '" .. valtype .. "' is not supported by JSON.") + end + return buflen +end + +function json.encode (value, state) + state = state or {} + local oldbuffer = state.buffer + local buffer = oldbuffer or {} + state.buffer = buffer + updatedecpoint() + local ret, msg = encode2 (value, state.indent, state.level or 0, + buffer, state.bufferlen or 0, state.tables or {}, state.keyorder, state) + if not ret then + error (msg, 2) + elseif oldbuffer == buffer then + state.bufferlen = ret + return true + else + state.bufferlen = nil + state.buffer = nil + return concat (buffer) + end +end + +local function loc (str, where) + local line, pos, linepos = 1, 1, 0 + while true do + pos = strfind (str, "\n", pos, true) + if pos and pos < where then + line = line + 1 + linepos = pos + pos = pos + 1 + else + break + end + end + return "line " .. line .. ", column " .. (where - linepos) +end + +local function unterminated (str, what, where) + return nil, strlen (str) + 1, "unterminated " .. what .. " at " .. loc (str, where) +end + +local function scanwhite (str, pos) + while true do + pos = strfind (str, "%S", pos) + if not pos then return nil end + local sub2 = strsub (str, pos, pos + 1) + if sub2 == "\239\187" and strsub (str, pos + 2, pos + 2) == "\191" then + -- UTF-8 Byte Order Mark + pos = pos + 3 + elseif sub2 == "//" then + pos = strfind (str, "[\n\r]", pos + 2) + if not pos then return nil end + elseif sub2 == "/*" then + pos = strfind (str, "*/", pos + 2) + if not pos then return nil end + pos = pos + 2 + else + return pos + end + end +end + +local escapechars = { + ["\""] = "\"", ["\\"] = "\\", ["/"] = "/", ["b"] = "\b", ["f"] = "\f", + ["n"] = "\n", ["r"] = "\r", ["t"] = "\t" +} + +local function unichar (value) + if value < 0 then + return nil + elseif value <= 0x007f then + return strchar (value) + elseif value <= 0x07ff then + return strchar (0xc0 + floor(value/0x40), + 0x80 + (floor(value) % 0x40)) + elseif value <= 0xffff then + return strchar (0xe0 + floor(value/0x1000), + 0x80 + (floor(value/0x40) % 0x40), + 0x80 + (floor(value) % 0x40)) + elseif value <= 0x10ffff then + return strchar (0xf0 + floor(value/0x40000), + 0x80 + (floor(value/0x1000) % 0x40), + 0x80 + (floor(value/0x40) % 0x40), + 0x80 + (floor(value) % 0x40)) + else + return nil + end +end + +local function scanstring (str, pos) + local lastpos = pos + 1 + local buffer, n = {}, 0 + while true do + local nextpos = strfind (str, "[\"\\]", lastpos) + if not nextpos then + return unterminated (str, "string", pos) + end + if nextpos > lastpos then + n = n + 1 + buffer[n] = strsub (str, lastpos, nextpos - 1) + end + if strsub (str, nextpos, nextpos) == "\"" then + lastpos = nextpos + 1 + break + else + local escchar = strsub (str, nextpos + 1, nextpos + 1) + local value + if escchar == "u" then + value = tonumber (strsub (str, nextpos + 2, nextpos + 5), 16) + if value then + local value2 + if 0xD800 <= value and value <= 0xDBff then + -- we have the high surrogate of UTF-16. Check if there is a + -- low surrogate escaped nearby to combine them. + if strsub (str, nextpos + 6, nextpos + 7) == "\\u" then + value2 = tonumber (strsub (str, nextpos + 8, nextpos + 11), 16) + if value2 and 0xDC00 <= value2 and value2 <= 0xDFFF then + value = (value - 0xD800) * 0x400 + (value2 - 0xDC00) + 0x10000 + else + value2 = nil -- in case it was out of range for a low surrogate + end + end + end + value = value and unichar (value) + if value then + if value2 then + lastpos = nextpos + 12 + else + lastpos = nextpos + 6 + end + end + end + end + if not value then + value = escapechars[escchar] or escchar + lastpos = nextpos + 2 + end + n = n + 1 + buffer[n] = value + end + end + if n == 1 then + return buffer[1], lastpos + elseif n > 1 then + return concat (buffer), lastpos + else + return "", lastpos + end +end + +local scanvalue -- forward declaration + +local function scantable (what, closechar, str, startpos, nullval, objectmeta, arraymeta) + local len = strlen (str) + local tbl, n = {}, 0 + local pos = startpos + 1 + if what == 'object' then + setmetatable (tbl, objectmeta) + else + setmetatable (tbl, arraymeta) + end + while true do + pos = scanwhite (str, pos) + if not pos then return unterminated (str, what, startpos) end + local char = strsub (str, pos, pos) + if char == closechar then + return tbl, pos + 1 + end + local val1, err + val1, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta) + if err then return nil, pos, err end + pos = scanwhite (str, pos) + if not pos then return unterminated (str, what, startpos) end + char = strsub (str, pos, pos) + if char == ":" then + if val1 == nil then + return nil, pos, "cannot use nil as table index (at " .. loc (str, pos) .. ")" + end + pos = scanwhite (str, pos + 1) + if not pos then return unterminated (str, what, startpos) end + local val2 + val2, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta) + if err then return nil, pos, err end + tbl[val1] = val2 + pos = scanwhite (str, pos) + if not pos then return unterminated (str, what, startpos) end + char = strsub (str, pos, pos) + else + n = n + 1 + tbl[n] = val1 + end + if char == "," then + pos = pos + 1 + end + end +end + +scanvalue = function (str, pos, nullval, objectmeta, arraymeta) + pos = pos or 1 + pos = scanwhite (str, pos) + if not pos then + return nil, strlen (str) + 1, "no valid JSON value (reached the end)" + end + local char = strsub (str, pos, pos) + if char == "{" then + return scantable ('object', "}", str, pos, nullval, objectmeta, arraymeta) + elseif char == "[" then + return scantable ('array', "]", str, pos, nullval, objectmeta, arraymeta) + elseif char == "\"" then + return scanstring (str, pos) + else + local pstart, pend = strfind (str, "^%-?[%d%.]+[eE]?[%+%-]?%d*", pos) + if pstart then + local number = str2num (strsub (str, pstart, pend)) + if number then + return number, pend + 1 + end + end + pstart, pend = strfind (str, "^%a%w*", pos) + if pstart then + local name = strsub (str, pstart, pend) + if name == "true" then + return true, pend + 1 + elseif name == "false" then + return false, pend + 1 + elseif name == "null" then + return nullval, pend + 1 + end + end + return nil, pos, "no valid JSON value at " .. loc (str, pos) + end +end + +local function optionalmetatables(...) + if select("#", ...) > 0 then + return ... + else + return {__jsontype = 'object'}, {__jsontype = 'array'} + end +end + +function json.decode (str, pos, nullval, ...) + local objectmeta, arraymeta = optionalmetatables(...) + return scanvalue (str, pos, nullval, objectmeta, arraymeta) +end + +function json.use_lpeg () + local g = require ("lpeg") + + if g.version() == "0.11" then + error "due to a bug in LPeg 0.11, it cannot be used for JSON matching" + end + + local pegmatch = g.match + local P, S, R = g.P, g.S, g.R + + local function ErrorCall (str, pos, msg, state) + if not state.msg then + state.msg = msg .. " at " .. loc (str, pos) + state.pos = pos + end + return false + end + + local function Err (msg) + return g.Cmt (g.Cc (msg) * g.Carg (2), ErrorCall) + end + + local SingleLineComment = P"//" * (1 - S"\n\r")^0 + local MultiLineComment = P"/*" * (1 - P"*/")^0 * P"*/" + local Space = (S" \n\r\t" + P"\239\187\191" + SingleLineComment + MultiLineComment)^0 + + local PlainChar = 1 - S"\"\\\n\r" + local EscapeSequence = (P"\\" * g.C (S"\"\\/bfnrt" + Err "unsupported escape sequence")) / escapechars + local HexDigit = R("09", "af", "AF") + local function UTF16Surrogate (match, pos, high, low) + high, low = tonumber (high, 16), tonumber (low, 16) + if 0xD800 <= high and high <= 0xDBff and 0xDC00 <= low and low <= 0xDFFF then + return true, unichar ((high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000) + else + return false + end + end + local function UTF16BMP (hex) + return unichar (tonumber (hex, 16)) + end + local U16Sequence = (P"\\u" * g.C (HexDigit * HexDigit * HexDigit * HexDigit)) + local UnicodeEscape = g.Cmt (U16Sequence * U16Sequence, UTF16Surrogate) + U16Sequence/UTF16BMP + local Char = UnicodeEscape + EscapeSequence + PlainChar + local String = P"\"" * g.Cs (Char ^ 0) * (P"\"" + Err "unterminated string") + local Integer = P"-"^(-1) * (P"0" + (R"19" * R"09"^0)) + local Fractal = P"." * R"09"^0 + local Exponent = (S"eE") * (S"+-")^(-1) * R"09"^1 + local Number = (Integer * Fractal^(-1) * Exponent^(-1))/str2num + local Constant = P"true" * g.Cc (true) + P"false" * g.Cc (false) + P"null" * g.Carg (1) + local SimpleValue = Number + String + Constant + local ArrayContent, ObjectContent + + -- The functions parsearray and parseobject parse only a single value/pair + -- at a time and store them directly to avoid hitting the LPeg limits. + local function parsearray (str, pos, nullval, state) + local obj, cont + local npos + local t, nt = {}, 0 + repeat + obj, cont, npos = pegmatch (ArrayContent, str, pos, nullval, state) + if not npos then break end + pos = npos + nt = nt + 1 + t[nt] = obj + until cont == 'last' + return pos, setmetatable (t, state.arraymeta) + end + + local function parseobject (str, pos, nullval, state) + local obj, key, cont + local npos + local t = {} + repeat + key, obj, cont, npos = pegmatch (ObjectContent, str, pos, nullval, state) + if not npos then break end + pos = npos + t[key] = obj + until cont == 'last' + return pos, setmetatable (t, state.objectmeta) + end + + local Array = P"[" * g.Cmt (g.Carg(1) * g.Carg(2), parsearray) * Space * (P"]" + Err "']' expected") + local Object = P"{" * g.Cmt (g.Carg(1) * g.Carg(2), parseobject) * Space * (P"}" + Err "'}' expected") + local Value = Space * (Array + Object + SimpleValue) + local ExpectedValue = Value + Space * Err "value expected" + ArrayContent = Value * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp() + local Pair = g.Cg (Space * String * Space * (P":" + Err "colon expected") * ExpectedValue) + ObjectContent = Pair * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp() + local DecodeValue = ExpectedValue * g.Cp () + + function json.decode (str, pos, nullval, ...) + local state = {} + state.objectmeta, state.arraymeta = optionalmetatables(...) + local obj, retpos = pegmatch (DecodeValue, str, pos, nullval, state) + if state.msg then + return nil, state.pos, state.msg + else + return obj, retpos + end + end + + -- use this function only once: + json.use_lpeg = function () return json end + + json.using_lpeg = true + + return json -- so you can get the module using json = require "dkjson".use_lpeg() +end + +if always_try_using_lpeg then + pcall (json.use_lpeg) +end + +return json + diff --git a/dictgen/steno.lua b/dictgen/steno.lua new file mode 100644 index 0000000..95040d6 --- /dev/null +++ b/dictgen/steno.lua @@ -0,0 +1,251 @@ +-- toki pona steno generator +-- TODO: +-- - make fingerspelling syllable-based MAYBE +-- - punctuation? + +local dkjson = require("dkjson") + +local function warn(str) + io.stderr:write(str .. "\n") +end + +local function nop() end + +local tp_order = { "S", "P", "K", "L", "M", "H", "W", "N", "R", "A", "I", "E", "O", "U" } + +-- takes a table of chords - each chord is just a table where any key defined is a key pressedi n the chord +local function write_tp_outline(outline) + local out = {} + for i, chord in ipairs(outline) do + if i > 1 then table.insert(out, "/") end + + for _, k in ipairs(tp_order) do + if chord[k] then table.insert(out, k) end + end + end + + return table.concat(out) +end + +local function read_tp_outline(str) + local out = {} + + for chord in string.gmatch(str, "[^/]+") do + local map = {} + for key in string.gmatch(chord, ".") do map[key] = true end + table.insert(out, map) + end + + return out +end + +assert(write_tp_outline({{S = true, L = true}}) == "SL") +assert(write_tp_outline({{S = true, P = true, O = true}, {N = true, I = true}}) == "SPO/NI") + +local function merge_outline(outline) + local out = {} + + for i, chord in ipairs(outline) do + for k in pairs(chord) do out[k] = true end + end + + return { out } +end + +assert(write_tp_outline(merge_outline({{S = true, P = true, O = true}, {N = true, I = true}})) == "SPNIO") + +local steno_order = { "S-", "T-", "K-", "P-", "W-", "H-", "R-", "A-", "O-", "*", "-E", "-U", "-F", "-R", "-P", "-B", "-L", "-G", "-T", "-S", "-D", "-Z" } + +-- convert a traditional-style steno chord to a string +local function write_chord(chord) + local out = {} + + local needs_dash = true + + for _, key in ipairs(steno_order) do + if chord[key] then + local letter = string.gsub(key, "-", "") + if string.match("AOEU*", letter) then needs_dash = false end + + if string.match(key, "^-") and needs_dash then + table.insert(out, "-") + needs_dash = false + end + + table.insert(out, letter) + end + end + + return table.concat(out) +end + +local function write_outline(outline) + local out = {} + + for _, chord in ipairs(outline) do + table.insert(out, write_chord(chord)) + end + + return table.concat(out, "/") +end + +assert(write_outline({{["P-"] = 1, ["O-"] = 1, ["-T"] = 1}}) == "POT") +assert(write_outline({{["-E"] = 1}}) == "E") +assert(write_outline({{["T-"] = 1, ["-T"] = 1}}) == "T-T") +assert(write_outline({{["T-"] = 1}, {["T-"] = 1}}) == "T/T") + +local tp_steno_map = { + S = "S-", P = "T-", K = "P-", L = "H-", M = "*", + H = "A-", W = "O-", N = "-E", R = "-U", + A = "-F", I = "-P", E = "-L", O = "-T", U = "-D", +} + +local function chord_to_steno(chord) + local out = {} + for key in pairs(chord) do + out[tp_steno_map[key]] = true + end + return out +end + +local function outline_to_steno(outline) + local out = {} + for _, chord in ipairs(outline) do + table.insert(out, chord_to_steno(chord)) + end + return out +end + +assert(write_outline(outline_to_steno({{K = 1, E = 1}, {P = 1, E = 1}, {K = 1, E = 1, N = 1}})) == "P-L/T-L/PEL") + +-- ok now we're actually generating the maps for tp words + +-- returns iterator of syllables +local function split_word(word) + local word = string.gsub(word, "(n?)([^aeiou][aeiou])", "%1-%2") + return string.gmatch(word, "[^-]+") +end + +local letter_map = { + s = "S", t = "SH", p = "P", w = "PH", + k = "K", j = "KH", l = "L", m = "M", + n = "MH", a = "A", e = "E", i = "I", + o = "O", u = "U", +} + +local function convert_syllable(s) + local out = {} + if string.match(s, "n$") then + out.N = true + s = string.gsub(s, "n$", "") + end + + for i in string.gmatch(s, ".") do + for k in string.gmatch(letter_map[i], ".") do out[k] = true end + end + + return out +end + +local function word_outline(word) + local out = {} + + for s in split_word(word) do + table.insert(out, convert_syllable(s)) + end + + return out +end + +local function assert_tp(outline, expected) + assert(write_tp_outline(outline) == expected, expected .. " " .. write_tp_outline(outline)) +end + +assert_tp(word_outline("ken"), "KNE") +assert_tp(word_outline("kepeken"), "KE/PE/KNE") +assert_tp(word_outline("linja"), "LNI/KHA") + +-- word briefs are single-chord verisons with W added +local function word_brief(word, modifier) + local out = word_outline(word) + + -- we don't wanna take space with single-syllable briefs + if #out == 1 then return nil end + + if modifier then table.insert(out, {W = true}) end + return merge_outline(out) +end + +assert_tp(word_brief("linja"), "KLHNAI") +assert_tp(word_brief("lili", true), "LWI") + +-- +do + if not arg[1] then + warn("usage: steno.lua [CONFIG]") + os.exit(1) + end + + local config = assert(dofile(arg[1])) + assert(config.words) + if not config.extra_briefs then config.extra_briefs = {} end + local dictionary = {} + + -- returns whether insertion was successful + -- you can pass a nil outline to make this a noöp + local function add_word(outline, word, force) + -- it's more useful to just pretend the insertion was successful + if not outline then return true end + -- we can change this for keymaps later! + local steno + if config.convert_to_english then + steno = write_outline(outline_to_steno(outline)) + else + steno = write_tp_outline(outline) + end + + if not force and dictionary[steno] then + nop(string.format( + "duplicate: %s = %s, %s", + steno, dictionary[steno], word + )) + return false + else + dictionary[steno] = word + return true + end + end + + for _, word in ipairs(config.words) do + add_word(word_outline(word), word, true) + if not add_word(word_brief(word, false), word, false) then + add_word(word_brief(word, true), word, false) + end + + local partial = "" + for syl in split_word(word) do + partial = partial .. syl + if syl == word then break end + add_word(word_outline(partial), partial, false) + end + end + + for k, v in pairs(config.extra_briefs) do + -- the irony of reparsing and unparsing this is not lost on me + add_word(read_tp_outline(k), v, true) + end + + for _, upper in ipairs({ false, true }) do + for letter, keys in pairs(letter_map) do + if upper then letter = string.upper(letter) end + letter = "{&" .. letter .. "}" + keys = keys .. "W" .. (upper and "R" or "") + + add_word(read_tp_outline(keys), letter, true) + end + end + + print(dkjson.encode(dictionary)) +end + +return { write_tp_outline = write_tp_outline, word_outline = word_outline } diff --git a/dictgen/words.lua b/dictgen/words.lua new file mode 100644 index 0000000..b83b4ad --- /dev/null +++ b/dictgen/words.lua @@ -0,0 +1,127 @@ +-- in roughly frequency order +return { + "mi", + "li", + "e", + "pona", + "toki", + "ni", + "a", + "la", + "ala", + "lon", + "sina", + "jan", + "tawa", + "sona", + "seme", + "pi", + "tenpo", + "ona", + "mute", + "taso", + "wile", + "o", + "pilin", + "kama", + "ken", + "ike", + "lili", + "tan", + "nimi", + "pali", + "ma", + "musi", + "sitelen", + "kepeken", + "tomo", + "ali", + "lukin", + "oko", + "jo", + "kin", + "pini", + "ilo", + "anu", + "ante", + "lape", + "moku", + "sama", + "suli", + "kalama", + "suno", + "telo", + "kulupu", + "nasa", + "sin", + "lipu", + "pana", + "pakala", + "ijo", + "soweli", + "tu", + "nasin", + "lawa", + "en", + "wawa", + "weka", + "wan", + "mu", + "awen", + "nanpa", + "olin", + "suwi", + "kon", + "seli", + "sewi", + "kute", + "mama", + "sike", + "moli", + "pimeja", + "lete", + "kasi", + "luka", + "sijelo", + "uta", + "poka", + "kala", + "jaki", + "insa", + "utala", + "mani", + "linja", + "open", + "len", + "waso", + "pan", + "ko", + "esun", + "kili", + "meli", + "tonsi", + "pipi", + "supa", + "kiwen", + "poki", + "palisa", + "kule", + "laso", + "noka", + "namako", + "loje", + "walo", + "unpa", + "anpa", + "mun", + "nena", + "mije", + "akesi", + "alasa", + "sinpin", + "selo", + "jelo", + "monsi", + "lupa", + "pu" +} diff --git a/plover_toki_pona/dictionaries/dkjson.lua b/plover_toki_pona/dictionaries/dkjson.lua deleted file mode 100644 index fa50b9f..0000000 --- a/plover_toki_pona/dictionaries/dkjson.lua +++ /dev/null @@ -1,714 +0,0 @@ --- Module options: -local always_try_using_lpeg = true -local register_global_module_table = false -local global_module_name = 'json' - ---[==[ - -David Kolf's JSON module for Lua 5.1/5.2 - -Version 2.5 - - -For the documentation see the corresponding readme.txt or visit -. - -You can contact the author by sending an e-mail to 'david' at the -domain 'dkolf.de'. - - -Copyright (C) 2010-2013 David Heiko Kolf - -Permission is hereby granted, free of charge, to any person obtaining -a copy of this software and associated documentation files (the -"Software"), to deal in the Software without restriction, including -without limitation the rights to use, copy, modify, merge, publish, -distribute, sublicense, and/or sell copies of the Software, and to -permit persons to whom the Software is furnished to do so, subject to -the following conditions: - -The above copyright notice and this permission notice shall be -included in all copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, -EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF -MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND -NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS -BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN -ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN -CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. - ---]==] - --- global dependencies: -local pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset = - pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset -local error, require, pcall, select = error, require, pcall, select -local floor, huge = math.floor, math.huge -local strrep, gsub, strsub, strbyte, strchar, strfind, strlen, strformat = - string.rep, string.gsub, string.sub, string.byte, string.char, - string.find, string.len, string.format -local strmatch = string.match -local concat = table.concat - -local json = { version = "dkjson 2.5" } - -if register_global_module_table then - _G[global_module_name] = json -end - -local _ENV = nil -- blocking globals in Lua 5.2 - -pcall (function() - -- Enable access to blocked metatables. - -- Don't worry, this module doesn't change anything in them. - local debmeta = require "debug".getmetatable - if debmeta then getmetatable = debmeta end -end) - -json.null = setmetatable ({}, { - __tojson = function () return "null" end -}) - -local function isarray (tbl) - local max, n, arraylen = 0, 0, 0 - for k,v in pairs (tbl) do - if k == 'n' and type(v) == 'number' then - arraylen = v - if v > max then - max = v - end - else - if type(k) ~= 'number' or k < 1 or floor(k) ~= k then - return false - end - if k > max then - max = k - end - n = n + 1 - end - end - if max > 10 and max > arraylen and max > n * 2 then - return false -- don't create an array with too many holes - end - return true, max -end - -local escapecodes = { - ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b", ["\f"] = "\\f", - ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t" -} - -local function escapeutf8 (uchar) - local value = escapecodes[uchar] - if value then - return value - end - local a, b, c, d = strbyte (uchar, 1, 4) - a, b, c, d = a or 0, b or 0, c or 0, d or 0 - if a <= 0x7f then - value = a - elseif 0xc0 <= a and a <= 0xdf and b >= 0x80 then - value = (a - 0xc0) * 0x40 + b - 0x80 - elseif 0xe0 <= a and a <= 0xef and b >= 0x80 and c >= 0x80 then - value = ((a - 0xe0) * 0x40 + b - 0x80) * 0x40 + c - 0x80 - elseif 0xf0 <= a and a <= 0xf7 and b >= 0x80 and c >= 0x80 and d >= 0x80 then - value = (((a - 0xf0) * 0x40 + b - 0x80) * 0x40 + c - 0x80) * 0x40 + d - 0x80 - else - return "" - end - if value <= 0xffff then - return strformat ("\\u%.4x", value) - elseif value <= 0x10ffff then - -- encode as UTF-16 surrogate pair - value = value - 0x10000 - local highsur, lowsur = 0xD800 + floor (value/0x400), 0xDC00 + (value % 0x400) - return strformat ("\\u%.4x\\u%.4x", highsur, lowsur) - else - return "" - end -end - -local function fsub (str, pattern, repl) - -- gsub always builds a new string in a buffer, even when no match - -- exists. First using find should be more efficient when most strings - -- don't contain the pattern. - if strfind (str, pattern) then - return gsub (str, pattern, repl) - else - return str - end -end - -local function quotestring (value) - -- based on the regexp "escapable" in https://github.com/douglascrockford/JSON-js - value = fsub (value, "[%z\1-\31\"\\\127]", escapeutf8) - if strfind (value, "[\194\216\220\225\226\239]") then - value = fsub (value, "\194[\128-\159\173]", escapeutf8) - value = fsub (value, "\216[\128-\132]", escapeutf8) - value = fsub (value, "\220\143", escapeutf8) - value = fsub (value, "\225\158[\180\181]", escapeutf8) - value = fsub (value, "\226\128[\140-\143\168-\175]", escapeutf8) - value = fsub (value, "\226\129[\160-\175]", escapeutf8) - value = fsub (value, "\239\187\191", escapeutf8) - value = fsub (value, "\239\191[\176-\191]", escapeutf8) - end - return "\"" .. value .. "\"" -end -json.quotestring = quotestring - -local function replace(str, o, n) - local i, j = strfind (str, o, 1, true) - if i then - return strsub(str, 1, i-1) .. n .. strsub(str, j+1, -1) - else - return str - end -end - --- locale independent num2str and str2num functions -local decpoint, numfilter - -local function updatedecpoint () - decpoint = strmatch(tostring(0.5), "([^05+])") - -- build a filter that can be used to remove group separators - numfilter = "[^0-9%-%+eE" .. gsub(decpoint, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0") .. "]+" -end - -updatedecpoint() - -local function num2str (num) - return replace(fsub(tostring(num), numfilter, ""), decpoint, ".") -end - -local function str2num (str) - local num = tonumber(replace(str, ".", decpoint)) - if not num then - updatedecpoint() - num = tonumber(replace(str, ".", decpoint)) - end - return num -end - -local function addnewline2 (level, buffer, buflen) - buffer[buflen+1] = "\n" - buffer[buflen+2] = strrep (" ", level) - buflen = buflen + 2 - return buflen -end - -function json.addnewline (state) - if state.indent then - state.bufferlen = addnewline2 (state.level or 0, - state.buffer, state.bufferlen or #(state.buffer)) - end -end - -local encode2 -- forward declaration - -local function addpair (key, value, prev, indent, level, buffer, buflen, tables, globalorder, state) - local kt = type (key) - if kt ~= 'string' and kt ~= 'number' then - return nil, "type '" .. kt .. "' is not supported as a key by JSON." - end - if prev then - buflen = buflen + 1 - buffer[buflen] = "," - end - if indent then - buflen = addnewline2 (level, buffer, buflen) - end - buffer[buflen+1] = quotestring (key) - buffer[buflen+2] = ":" - return encode2 (value, indent, level, buffer, buflen + 2, tables, globalorder, state) -end - -local function appendcustom(res, buffer, state) - local buflen = state.bufferlen - if type (res) == 'string' then - buflen = buflen + 1 - buffer[buflen] = res - end - return buflen -end - -local function exception(reason, value, state, buffer, buflen, defaultmessage) - defaultmessage = defaultmessage or reason - local handler = state.exception - if not handler then - return nil, defaultmessage - else - state.bufferlen = buflen - local ret, msg = handler (reason, value, state, defaultmessage) - if not ret then return nil, msg or defaultmessage end - return appendcustom(ret, buffer, state) - end -end - -function json.encodeexception(reason, value, state, defaultmessage) - return quotestring("<" .. defaultmessage .. ">") -end - -encode2 = function (value, indent, level, buffer, buflen, tables, globalorder, state) - local valtype = type (value) - local valmeta = getmetatable (value) - valmeta = type (valmeta) == 'table' and valmeta -- only tables - local valtojson = valmeta and valmeta.__tojson - if valtojson then - if tables[value] then - return exception('reference cycle', value, state, buffer, buflen) - end - tables[value] = true - state.bufferlen = buflen - local ret, msg = valtojson (value, state) - if not ret then return exception('custom encoder failed', value, state, buffer, buflen, msg) end - tables[value] = nil - buflen = appendcustom(ret, buffer, state) - elseif value == nil then - buflen = buflen + 1 - buffer[buflen] = "null" - elseif valtype == 'number' then - local s - if value ~= value or value >= huge or -value >= huge then - -- This is the behaviour of the original JSON implementation. - s = "null" - else - s = num2str (value) - end - buflen = buflen + 1 - buffer[buflen] = s - elseif valtype == 'boolean' then - buflen = buflen + 1 - buffer[buflen] = value and "true" or "false" - elseif valtype == 'string' then - buflen = buflen + 1 - buffer[buflen] = quotestring (value) - elseif valtype == 'table' then - if tables[value] then - return exception('reference cycle', value, state, buffer, buflen) - end - tables[value] = true - level = level + 1 - local isa, n = isarray (value) - if n == 0 and valmeta and valmeta.__jsontype == 'object' then - isa = false - end - local msg - if isa then -- JSON array - buflen = buflen + 1 - buffer[buflen] = "[" - for i = 1, n do - buflen, msg = encode2 (value[i], indent, level, buffer, buflen, tables, globalorder, state) - if not buflen then return nil, msg end - if i < n then - buflen = buflen + 1 - buffer[buflen] = "," - end - end - buflen = buflen + 1 - buffer[buflen] = "]" - else -- JSON object - local prev = false - buflen = buflen + 1 - buffer[buflen] = "{" - local order = valmeta and valmeta.__jsonorder or globalorder - if order then - local used = {} - n = #order - for i = 1, n do - local k = order[i] - local v = value[k] - if v then - used[k] = true - buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) - prev = true -- add a seperator before the next element - end - end - for k,v in pairs (value) do - if not used[k] then - buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) - if not buflen then return nil, msg end - prev = true -- add a seperator before the next element - end - end - else -- unordered - for k,v in pairs (value) do - buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) - if not buflen then return nil, msg end - prev = true -- add a seperator before the next element - end - end - if indent then - buflen = addnewline2 (level - 1, buffer, buflen) - end - buflen = buflen + 1 - buffer[buflen] = "}" - end - tables[value] = nil - else - return exception ('unsupported type', value, state, buffer, buflen, - "type '" .. valtype .. "' is not supported by JSON.") - end - return buflen -end - -function json.encode (value, state) - state = state or {} - local oldbuffer = state.buffer - local buffer = oldbuffer or {} - state.buffer = buffer - updatedecpoint() - local ret, msg = encode2 (value, state.indent, state.level or 0, - buffer, state.bufferlen or 0, state.tables or {}, state.keyorder, state) - if not ret then - error (msg, 2) - elseif oldbuffer == buffer then - state.bufferlen = ret - return true - else - state.bufferlen = nil - state.buffer = nil - return concat (buffer) - end -end - -local function loc (str, where) - local line, pos, linepos = 1, 1, 0 - while true do - pos = strfind (str, "\n", pos, true) - if pos and pos < where then - line = line + 1 - linepos = pos - pos = pos + 1 - else - break - end - end - return "line " .. line .. ", column " .. (where - linepos) -end - -local function unterminated (str, what, where) - return nil, strlen (str) + 1, "unterminated " .. what .. " at " .. loc (str, where) -end - -local function scanwhite (str, pos) - while true do - pos = strfind (str, "%S", pos) - if not pos then return nil end - local sub2 = strsub (str, pos, pos + 1) - if sub2 == "\239\187" and strsub (str, pos + 2, pos + 2) == "\191" then - -- UTF-8 Byte Order Mark - pos = pos + 3 - elseif sub2 == "//" then - pos = strfind (str, "[\n\r]", pos + 2) - if not pos then return nil end - elseif sub2 == "/*" then - pos = strfind (str, "*/", pos + 2) - if not pos then return nil end - pos = pos + 2 - else - return pos - end - end -end - -local escapechars = { - ["\""] = "\"", ["\\"] = "\\", ["/"] = "/", ["b"] = "\b", ["f"] = "\f", - ["n"] = "\n", ["r"] = "\r", ["t"] = "\t" -} - -local function unichar (value) - if value < 0 then - return nil - elseif value <= 0x007f then - return strchar (value) - elseif value <= 0x07ff then - return strchar (0xc0 + floor(value/0x40), - 0x80 + (floor(value) % 0x40)) - elseif value <= 0xffff then - return strchar (0xe0 + floor(value/0x1000), - 0x80 + (floor(value/0x40) % 0x40), - 0x80 + (floor(value) % 0x40)) - elseif value <= 0x10ffff then - return strchar (0xf0 + floor(value/0x40000), - 0x80 + (floor(value/0x1000) % 0x40), - 0x80 + (floor(value/0x40) % 0x40), - 0x80 + (floor(value) % 0x40)) - else - return nil - end -end - -local function scanstring (str, pos) - local lastpos = pos + 1 - local buffer, n = {}, 0 - while true do - local nextpos = strfind (str, "[\"\\]", lastpos) - if not nextpos then - return unterminated (str, "string", pos) - end - if nextpos > lastpos then - n = n + 1 - buffer[n] = strsub (str, lastpos, nextpos - 1) - end - if strsub (str, nextpos, nextpos) == "\"" then - lastpos = nextpos + 1 - break - else - local escchar = strsub (str, nextpos + 1, nextpos + 1) - local value - if escchar == "u" then - value = tonumber (strsub (str, nextpos + 2, nextpos + 5), 16) - if value then - local value2 - if 0xD800 <= value and value <= 0xDBff then - -- we have the high surrogate of UTF-16. Check if there is a - -- low surrogate escaped nearby to combine them. - if strsub (str, nextpos + 6, nextpos + 7) == "\\u" then - value2 = tonumber (strsub (str, nextpos + 8, nextpos + 11), 16) - if value2 and 0xDC00 <= value2 and value2 <= 0xDFFF then - value = (value - 0xD800) * 0x400 + (value2 - 0xDC00) + 0x10000 - else - value2 = nil -- in case it was out of range for a low surrogate - end - end - end - value = value and unichar (value) - if value then - if value2 then - lastpos = nextpos + 12 - else - lastpos = nextpos + 6 - end - end - end - end - if not value then - value = escapechars[escchar] or escchar - lastpos = nextpos + 2 - end - n = n + 1 - buffer[n] = value - end - end - if n == 1 then - return buffer[1], lastpos - elseif n > 1 then - return concat (buffer), lastpos - else - return "", lastpos - end -end - -local scanvalue -- forward declaration - -local function scantable (what, closechar, str, startpos, nullval, objectmeta, arraymeta) - local len = strlen (str) - local tbl, n = {}, 0 - local pos = startpos + 1 - if what == 'object' then - setmetatable (tbl, objectmeta) - else - setmetatable (tbl, arraymeta) - end - while true do - pos = scanwhite (str, pos) - if not pos then return unterminated (str, what, startpos) end - local char = strsub (str, pos, pos) - if char == closechar then - return tbl, pos + 1 - end - local val1, err - val1, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta) - if err then return nil, pos, err end - pos = scanwhite (str, pos) - if not pos then return unterminated (str, what, startpos) end - char = strsub (str, pos, pos) - if char == ":" then - if val1 == nil then - return nil, pos, "cannot use nil as table index (at " .. loc (str, pos) .. ")" - end - pos = scanwhite (str, pos + 1) - if not pos then return unterminated (str, what, startpos) end - local val2 - val2, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta) - if err then return nil, pos, err end - tbl[val1] = val2 - pos = scanwhite (str, pos) - if not pos then return unterminated (str, what, startpos) end - char = strsub (str, pos, pos) - else - n = n + 1 - tbl[n] = val1 - end - if char == "," then - pos = pos + 1 - end - end -end - -scanvalue = function (str, pos, nullval, objectmeta, arraymeta) - pos = pos or 1 - pos = scanwhite (str, pos) - if not pos then - return nil, strlen (str) + 1, "no valid JSON value (reached the end)" - end - local char = strsub (str, pos, pos) - if char == "{" then - return scantable ('object', "}", str, pos, nullval, objectmeta, arraymeta) - elseif char == "[" then - return scantable ('array', "]", str, pos, nullval, objectmeta, arraymeta) - elseif char == "\"" then - return scanstring (str, pos) - else - local pstart, pend = strfind (str, "^%-?[%d%.]+[eE]?[%+%-]?%d*", pos) - if pstart then - local number = str2num (strsub (str, pstart, pend)) - if number then - return number, pend + 1 - end - end - pstart, pend = strfind (str, "^%a%w*", pos) - if pstart then - local name = strsub (str, pstart, pend) - if name == "true" then - return true, pend + 1 - elseif name == "false" then - return false, pend + 1 - elseif name == "null" then - return nullval, pend + 1 - end - end - return nil, pos, "no valid JSON value at " .. loc (str, pos) - end -end - -local function optionalmetatables(...) - if select("#", ...) > 0 then - return ... - else - return {__jsontype = 'object'}, {__jsontype = 'array'} - end -end - -function json.decode (str, pos, nullval, ...) - local objectmeta, arraymeta = optionalmetatables(...) - return scanvalue (str, pos, nullval, objectmeta, arraymeta) -end - -function json.use_lpeg () - local g = require ("lpeg") - - if g.version() == "0.11" then - error "due to a bug in LPeg 0.11, it cannot be used for JSON matching" - end - - local pegmatch = g.match - local P, S, R = g.P, g.S, g.R - - local function ErrorCall (str, pos, msg, state) - if not state.msg then - state.msg = msg .. " at " .. loc (str, pos) - state.pos = pos - end - return false - end - - local function Err (msg) - return g.Cmt (g.Cc (msg) * g.Carg (2), ErrorCall) - end - - local SingleLineComment = P"//" * (1 - S"\n\r")^0 - local MultiLineComment = P"/*" * (1 - P"*/")^0 * P"*/" - local Space = (S" \n\r\t" + P"\239\187\191" + SingleLineComment + MultiLineComment)^0 - - local PlainChar = 1 - S"\"\\\n\r" - local EscapeSequence = (P"\\" * g.C (S"\"\\/bfnrt" + Err "unsupported escape sequence")) / escapechars - local HexDigit = R("09", "af", "AF") - local function UTF16Surrogate (match, pos, high, low) - high, low = tonumber (high, 16), tonumber (low, 16) - if 0xD800 <= high and high <= 0xDBff and 0xDC00 <= low and low <= 0xDFFF then - return true, unichar ((high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000) - else - return false - end - end - local function UTF16BMP (hex) - return unichar (tonumber (hex, 16)) - end - local U16Sequence = (P"\\u" * g.C (HexDigit * HexDigit * HexDigit * HexDigit)) - local UnicodeEscape = g.Cmt (U16Sequence * U16Sequence, UTF16Surrogate) + U16Sequence/UTF16BMP - local Char = UnicodeEscape + EscapeSequence + PlainChar - local String = P"\"" * g.Cs (Char ^ 0) * (P"\"" + Err "unterminated string") - local Integer = P"-"^(-1) * (P"0" + (R"19" * R"09"^0)) - local Fractal = P"." * R"09"^0 - local Exponent = (S"eE") * (S"+-")^(-1) * R"09"^1 - local Number = (Integer * Fractal^(-1) * Exponent^(-1))/str2num - local Constant = P"true" * g.Cc (true) + P"false" * g.Cc (false) + P"null" * g.Carg (1) - local SimpleValue = Number + String + Constant - local ArrayContent, ObjectContent - - -- The functions parsearray and parseobject parse only a single value/pair - -- at a time and store them directly to avoid hitting the LPeg limits. - local function parsearray (str, pos, nullval, state) - local obj, cont - local npos - local t, nt = {}, 0 - repeat - obj, cont, npos = pegmatch (ArrayContent, str, pos, nullval, state) - if not npos then break end - pos = npos - nt = nt + 1 - t[nt] = obj - until cont == 'last' - return pos, setmetatable (t, state.arraymeta) - end - - local function parseobject (str, pos, nullval, state) - local obj, key, cont - local npos - local t = {} - repeat - key, obj, cont, npos = pegmatch (ObjectContent, str, pos, nullval, state) - if not npos then break end - pos = npos - t[key] = obj - until cont == 'last' - return pos, setmetatable (t, state.objectmeta) - end - - local Array = P"[" * g.Cmt (g.Carg(1) * g.Carg(2), parsearray) * Space * (P"]" + Err "']' expected") - local Object = P"{" * g.Cmt (g.Carg(1) * g.Carg(2), parseobject) * Space * (P"}" + Err "'}' expected") - local Value = Space * (Array + Object + SimpleValue) - local ExpectedValue = Value + Space * Err "value expected" - ArrayContent = Value * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp() - local Pair = g.Cg (Space * String * Space * (P":" + Err "colon expected") * ExpectedValue) - ObjectContent = Pair * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp() - local DecodeValue = ExpectedValue * g.Cp () - - function json.decode (str, pos, nullval, ...) - local state = {} - state.objectmeta, state.arraymeta = optionalmetatables(...) - local obj, retpos = pegmatch (DecodeValue, str, pos, nullval, state) - if state.msg then - return nil, state.pos, state.msg - else - return obj, retpos - end - end - - -- use this function only once: - json.use_lpeg = function () return json end - - json.using_lpeg = true - - return json -- so you can get the module using json = require "dkjson".use_lpeg() -end - -if always_try_using_lpeg then - pcall (json.use_lpeg) -end - -return json - diff --git a/plover_toki_pona/dictionaries/steno.lua b/plover_toki_pona/dictionaries/steno.lua deleted file mode 100644 index 7a32c10..0000000 --- a/plover_toki_pona/dictionaries/steno.lua +++ /dev/null @@ -1,256 +0,0 @@ --- toki pona steno generator --- TODO: --- - make fingerspelling syllable-based MAYBE --- - punctuation? - -local dkjson = require("dkjson") -local words = require("words") - -local function warn(str) - io.stderr:write(str .. "\n") -end - -local function nop() end - -local tp_order = { "S", "P", "K", "L", "M", "H", "W", "N", "R", "A", "I", "E", "O", "U" } - --- takes a table of chords - each chord is just a table where any key defined is a key pressedi n the chord -local function write_tp_outline(outline) - local out = {} - for i, chord in ipairs(outline) do - if i > 1 then table.insert(out, "/") end - - for _, k in ipairs(tp_order) do - if chord[k] then table.insert(out, k) end - end - end - - return table.concat(out) -end - -local function read_tp_outline(str) - local out = {} - - for chord in string.gmatch(str, "[^/]+") do - local map = {} - for key in string.gmatch(chord, ".") do map[key] = true end - table.insert(out, map) - end - - return out -end - -assert(write_tp_outline({{S = true, L = true}}) == "SL") -assert(write_tp_outline({{S = true, P = true, O = true}, {N = true, I = true}}) == "SPO/NI") - -local function merge_outline(outline) - local out = {} - - for i, chord in ipairs(outline) do - for k in pairs(chord) do out[k] = true end - end - - return { out } -end - -assert(write_tp_outline(merge_outline({{S = true, P = true, O = true}, {N = true, I = true}})) == "SPNIO") - -local steno_order = { "S-", "T-", "K-", "P-", "W-", "H-", "R-", "A-", "O-", "*", "-E", "-U", "-F", "-R", "-P", "-B", "-L", "-G", "-T", "-S", "-D", "-Z" } - --- convert a traditional-style steno chord to a string -local function write_chord(chord) - local out = {} - - local needs_dash = true - - for _, key in ipairs(steno_order) do - if chord[key] then - local letter = string.gsub(key, "-", "") - if string.match("AOEU*", letter) then needs_dash = false end - - if string.match(key, "^-") and needs_dash then - table.insert(out, "-") - needs_dash = false - end - - table.insert(out, letter) - end - end - - return table.concat(out) -end - -local function write_outline(outline) - local out = {} - - for _, chord in ipairs(outline) do - table.insert(out, write_chord(chord)) - end - - return table.concat(out, "/") -end - -assert(write_outline({{["P-"] = 1, ["O-"] = 1, ["-T"] = 1}}) == "POT") -assert(write_outline({{["-E"] = 1}}) == "E") -assert(write_outline({{["T-"] = 1, ["-T"] = 1}}) == "T-T") -assert(write_outline({{["T-"] = 1}, {["T-"] = 1}}) == "T/T") - -local tp_steno_map = { - S = "S-", P = "T-", K = "P-", L = "H-", M = "*", - H = "A-", W = "O-", N = "-E", R = "-U", - A = "-F", I = "-P", E = "-L", O = "-T", U = "-D", -} - -local function chord_to_steno(chord) - local out = {} - for key in pairs(chord) do - out[tp_steno_map[key]] = true - end - return out -end - -local function outline_to_steno(outline) - local out = {} - for _, chord in ipairs(outline) do - table.insert(out, chord_to_steno(chord)) - end - return out -end - -assert(write_outline(outline_to_steno({{K = 1, E = 1}, {P = 1, E = 1}, {K = 1, E = 1, N = 1}})) == "P-L/T-L/PEL") - --- ok now we're actually generating the maps for tp words - --- returns iterator of syllables -local function split_word(word) - local word = string.gsub(word, "(n?)([^aeiou][aeiou])", "%1-%2") - return string.gmatch(word, "[^-]+") -end - -local letter_map = { - s = "S", t = "SH", p = "P", w = "PH", - k = "K", j = "KH", l = "L", m = "M", - n = "MH", a = "A", e = "E", i = "I", - o = "O", u = "U", -} - -local function convert_syllable(s) - local out = {} - if string.match(s, "n$") then - out.N = true - s = string.gsub(s, "n$", "") - end - - for i in string.gmatch(s, ".") do - for k in string.gmatch(letter_map[i], ".") do out[k] = true end - end - - return out -end - -local function word_outline(word) - local out = {} - - for s in split_word(word) do - table.insert(out, convert_syllable(s)) - end - - return out -end - -local function assert_tp(outline, expected) - assert(write_tp_outline(outline) == expected, expected .. " " .. write_tp_outline(outline)) -end - -assert_tp(word_outline("ken"), "KNE") -assert_tp(word_outline("kepeken"), "KE/PE/KNE") -assert_tp(word_outline("linja"), "LNI/KHA") - --- word briefs are single-chord verisons with W added -local function word_brief(word, modifier) - local out = word_outline(word) - - -- we don't wanna take space with single-syllable briefs - if #out == 1 then return nil end - - if modifier then table.insert(out, {W = true}) end - return merge_outline(out) -end - -assert_tp(word_brief("linja"), "KLHNAI") -assert_tp(word_brief("lili", true), "LWI") - --- -do - local dictionary = {} - - -- returns whether insertion was successful - -- you can pass a nil outline to make this a noöp - local function add_word(outline, word, force) - -- it's more useful to just pretend the insertion was successful - if not outline then return true end - -- we can change this for keymaps later! - local steno = write_outline(outline_to_steno(outline)) - local steno = write_tp_outline(outline) - - if not force and dictionary[steno] then - nop(string.format( - "duplicate: %s = %s, %s", - steno, dictionary[steno], word - )) - return false - else - dictionary[steno] = word - return true - end - end - - for _, word in ipairs(words) do - add_word(word_outline(word), word, true) - if not add_word(word_brief(word, false), word, false) then - add_word(word_brief(word, true), word, false) - end - - local partial = "" - for syl in split_word(word) do - partial = partial .. syl - if syl == word then break end - add_word(word_outline(partial), partial, false) - end - end - - local extra_briefs = { - ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu", - ["HR"] = "{^.}", - ["HWNR"] = "{^\n^}", - W = "=undo", - S = "sina", - HS = "tawa", - P = "pi", - HP = "wile", - K = "kama", - HK = "jan", - L = "li", - M = "mi", - HM = "ni", - } - - for k, v in pairs(extra_briefs) do - -- the irony of reparsing and unparsing this is not lost on me - add_word(read_tp_outline(k), v, true) - end - - for _, upper in ipairs({ false, true }) do - for letter, keys in pairs(letter_map) do - if upper then letter = string.upper(letter) end - letter = "{&" .. letter .. "}" - keys = keys .. "W" .. (upper and "R" or "") - - add_word(read_tp_outline(keys), letter, true) - end - end - - print(dkjson.encode(dictionary)) -end - -return { write_tp_outline = write_tp_outline, word_outline = word_outline } diff --git a/plover_toki_pona/dictionaries/words.lua b/plover_toki_pona/dictionaries/words.lua deleted file mode 100644 index 5abbcd4..0000000 --- a/plover_toki_pona/dictionaries/words.lua +++ /dev/null @@ -1,126 +0,0 @@ -return { - "mi", - "li", - "e", - "pona", - "toki", - "ni", - "a", - "la", - "ala", - "lon", - "sina", - "jan", - "tawa", - "sona", - "seme", - "pi", - "tenpo", - "ona", - "mute", - "taso", - "wile", - "o", - "pilin", - "kama", - "ken", - "ike", - "lili", - "tan", - "nimi", - "pali", - "ma", - "musi", - "sitelen", - "kepeken", - "tomo", - "ali", - "lukin", - "oko", - "jo", - "kin", - "pini", - "ilo", - "anu", - "ante", - "lape", - "moku", - "sama", - "suli", - "kalama", - "suno", - "telo", - "kulupu", - "nasa", - "sin", - "lipu", - "pana", - "pakala", - "ijo", - "soweli", - "tu", - "nasin", - "lawa", - "en", - "wawa", - "weka", - "wan", - "mu", - "awen", - "nanpa", - "olin", - "suwi", - "kon", - "seli", - "sewi", - "kute", - "mama", - "sike", - "moli", - "pimeja", - "lete", - "kasi", - "luka", - "sijelo", - "uta", - "poka", - "kala", - "jaki", - "insa", - "utala", - "mani", - "linja", - "open", - "len", - "waso", - "pan", - "ko", - "esun", - "kili", - "meli", - "tonsi", - "pipi", - "supa", - "kiwen", - "poki", - "palisa", - "kule", - "laso", - "noka", - "namako", - "loje", - "walo", - "unpa", - "anpa", - "mun", - "nena", - "mije", - "akesi", - "alasa", - "sinpin", - "selo", - "jelo", - "monsi", - "lupa", - "pu" -} -- cgit 1.3.0-6-gf8a5