From 18a0bed48264e5fb2db07974dd912f7b5e940f65 Mon Sep 17 00:00:00 2001 From: equa Date: Sun, 15 Jan 2023 11:58:00 -0600 Subject: organize! --- dictgen/config.embedded.lua | 18 ++ dictgen/config.main.lua | 18 ++ dictgen/dkjson.lua | 714 ++++++++++++++++++++++++++++++++++++++++++++ dictgen/steno.lua | 251 ++++++++++++++++ dictgen/words.lua | 127 ++++++++ 5 files changed, 1128 insertions(+) create mode 100644 dictgen/config.embedded.lua create mode 100644 dictgen/config.main.lua create mode 100644 dictgen/dkjson.lua create mode 100644 dictgen/steno.lua create mode 100644 dictgen/words.lua (limited to 'dictgen') diff --git a/dictgen/config.embedded.lua b/dictgen/config.embedded.lua new file mode 100644 index 0000000..8222951 --- /dev/null +++ b/dictgen/config.embedded.lua @@ -0,0 +1,18 @@ +return { + words = require("words"), + convert_to_english = true, + extra_briefs = { + ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu", + ["HR"] = "{^.}", + ["HWNR"] = "{^\n^}", + S = "sina", + HS = "tawa", + P = "pi", + HP = "wile", + K = "kama", + HK = "jan", + L = "li", + M = "mi", + HM = "ni", + } +} diff --git a/dictgen/config.main.lua b/dictgen/config.main.lua new file mode 100644 index 0000000..640b973 --- /dev/null +++ b/dictgen/config.main.lua @@ -0,0 +1,18 @@ +return { + words = require("words"), + convert_to_english = false, + extra_briefs = { + ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu", + ["HR"] = "{^.}", + ["HWNR"] = "{^\n^}", + S = "sina", + HS = "tawa", + P = "pi", + HP = "wile", + K = "kama", + HK = "jan", + L = "li", + M = "mi", + HM = "ni", + } +} diff --git a/dictgen/dkjson.lua b/dictgen/dkjson.lua new file mode 100644 index 0000000..fa50b9f --- /dev/null +++ b/dictgen/dkjson.lua @@ -0,0 +1,714 @@ +-- Module options: +local always_try_using_lpeg = true +local register_global_module_table = false +local global_module_name = 'json' + +--[==[ + +David Kolf's JSON module for Lua 5.1/5.2 + +Version 2.5 + + +For the documentation see the corresponding readme.txt or visit +. + +You can contact the author by sending an e-mail to 'david' at the +domain 'dkolf.de'. + + +Copyright (C) 2010-2013 David Heiko Kolf + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS +BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN +ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + +--]==] + +-- global dependencies: +local pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset = + pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset +local error, require, pcall, select = error, require, pcall, select +local floor, huge = math.floor, math.huge +local strrep, gsub, strsub, strbyte, strchar, strfind, strlen, strformat = + string.rep, string.gsub, string.sub, string.byte, string.char, + string.find, string.len, string.format +local strmatch = string.match +local concat = table.concat + +local json = { version = "dkjson 2.5" } + +if register_global_module_table then + _G[global_module_name] = json +end + +local _ENV = nil -- blocking globals in Lua 5.2 + +pcall (function() + -- Enable access to blocked metatables. + -- Don't worry, this module doesn't change anything in them. + local debmeta = require "debug".getmetatable + if debmeta then getmetatable = debmeta end +end) + +json.null = setmetatable ({}, { + __tojson = function () return "null" end +}) + +local function isarray (tbl) + local max, n, arraylen = 0, 0, 0 + for k,v in pairs (tbl) do + if k == 'n' and type(v) == 'number' then + arraylen = v + if v > max then + max = v + end + else + if type(k) ~= 'number' or k < 1 or floor(k) ~= k then + return false + end + if k > max then + max = k + end + n = n + 1 + end + end + if max > 10 and max > arraylen and max > n * 2 then + return false -- don't create an array with too many holes + end + return true, max +end + +local escapecodes = { + ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b", ["\f"] = "\\f", + ["\n"] = "\\n", ["\r"] = "\\r", ["\t"] = "\\t" +} + +local function escapeutf8 (uchar) + local value = escapecodes[uchar] + if value then + return value + end + local a, b, c, d = strbyte (uchar, 1, 4) + a, b, c, d = a or 0, b or 0, c or 0, d or 0 + if a <= 0x7f then + value = a + elseif 0xc0 <= a and a <= 0xdf and b >= 0x80 then + value = (a - 0xc0) * 0x40 + b - 0x80 + elseif 0xe0 <= a and a <= 0xef and b >= 0x80 and c >= 0x80 then + value = ((a - 0xe0) * 0x40 + b - 0x80) * 0x40 + c - 0x80 + elseif 0xf0 <= a and a <= 0xf7 and b >= 0x80 and c >= 0x80 and d >= 0x80 then + value = (((a - 0xf0) * 0x40 + b - 0x80) * 0x40 + c - 0x80) * 0x40 + d - 0x80 + else + return "" + end + if value <= 0xffff then + return strformat ("\\u%.4x", value) + elseif value <= 0x10ffff then + -- encode as UTF-16 surrogate pair + value = value - 0x10000 + local highsur, lowsur = 0xD800 + floor (value/0x400), 0xDC00 + (value % 0x400) + return strformat ("\\u%.4x\\u%.4x", highsur, lowsur) + else + return "" + end +end + +local function fsub (str, pattern, repl) + -- gsub always builds a new string in a buffer, even when no match + -- exists. First using find should be more efficient when most strings + -- don't contain the pattern. + if strfind (str, pattern) then + return gsub (str, pattern, repl) + else + return str + end +end + +local function quotestring (value) + -- based on the regexp "escapable" in https://github.com/douglascrockford/JSON-js + value = fsub (value, "[%z\1-\31\"\\\127]", escapeutf8) + if strfind (value, "[\194\216\220\225\226\239]") then + value = fsub (value, "\194[\128-\159\173]", escapeutf8) + value = fsub (value, "\216[\128-\132]", escapeutf8) + value = fsub (value, "\220\143", escapeutf8) + value = fsub (value, "\225\158[\180\181]", escapeutf8) + value = fsub (value, "\226\128[\140-\143\168-\175]", escapeutf8) + value = fsub (value, "\226\129[\160-\175]", escapeutf8) + value = fsub (value, "\239\187\191", escapeutf8) + value = fsub (value, "\239\191[\176-\191]", escapeutf8) + end + return "\"" .. value .. "\"" +end +json.quotestring = quotestring + +local function replace(str, o, n) + local i, j = strfind (str, o, 1, true) + if i then + return strsub(str, 1, i-1) .. n .. strsub(str, j+1, -1) + else + return str + end +end + +-- locale independent num2str and str2num functions +local decpoint, numfilter + +local function updatedecpoint () + decpoint = strmatch(tostring(0.5), "([^05+])") + -- build a filter that can be used to remove group separators + numfilter = "[^0-9%-%+eE" .. gsub(decpoint, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0") .. "]+" +end + +updatedecpoint() + +local function num2str (num) + return replace(fsub(tostring(num), numfilter, ""), decpoint, ".") +end + +local function str2num (str) + local num = tonumber(replace(str, ".", decpoint)) + if not num then + updatedecpoint() + num = tonumber(replace(str, ".", decpoint)) + end + return num +end + +local function addnewline2 (level, buffer, buflen) + buffer[buflen+1] = "\n" + buffer[buflen+2] = strrep (" ", level) + buflen = buflen + 2 + return buflen +end + +function json.addnewline (state) + if state.indent then + state.bufferlen = addnewline2 (state.level or 0, + state.buffer, state.bufferlen or #(state.buffer)) + end +end + +local encode2 -- forward declaration + +local function addpair (key, value, prev, indent, level, buffer, buflen, tables, globalorder, state) + local kt = type (key) + if kt ~= 'string' and kt ~= 'number' then + return nil, "type '" .. kt .. "' is not supported as a key by JSON." + end + if prev then + buflen = buflen + 1 + buffer[buflen] = "," + end + if indent then + buflen = addnewline2 (level, buffer, buflen) + end + buffer[buflen+1] = quotestring (key) + buffer[buflen+2] = ":" + return encode2 (value, indent, level, buffer, buflen + 2, tables, globalorder, state) +end + +local function appendcustom(res, buffer, state) + local buflen = state.bufferlen + if type (res) == 'string' then + buflen = buflen + 1 + buffer[buflen] = res + end + return buflen +end + +local function exception(reason, value, state, buffer, buflen, defaultmessage) + defaultmessage = defaultmessage or reason + local handler = state.exception + if not handler then + return nil, defaultmessage + else + state.bufferlen = buflen + local ret, msg = handler (reason, value, state, defaultmessage) + if not ret then return nil, msg or defaultmessage end + return appendcustom(ret, buffer, state) + end +end + +function json.encodeexception(reason, value, state, defaultmessage) + return quotestring("<" .. defaultmessage .. ">") +end + +encode2 = function (value, indent, level, buffer, buflen, tables, globalorder, state) + local valtype = type (value) + local valmeta = getmetatable (value) + valmeta = type (valmeta) == 'table' and valmeta -- only tables + local valtojson = valmeta and valmeta.__tojson + if valtojson then + if tables[value] then + return exception('reference cycle', value, state, buffer, buflen) + end + tables[value] = true + state.bufferlen = buflen + local ret, msg = valtojson (value, state) + if not ret then return exception('custom encoder failed', value, state, buffer, buflen, msg) end + tables[value] = nil + buflen = appendcustom(ret, buffer, state) + elseif value == nil then + buflen = buflen + 1 + buffer[buflen] = "null" + elseif valtype == 'number' then + local s + if value ~= value or value >= huge or -value >= huge then + -- This is the behaviour of the original JSON implementation. + s = "null" + else + s = num2str (value) + end + buflen = buflen + 1 + buffer[buflen] = s + elseif valtype == 'boolean' then + buflen = buflen + 1 + buffer[buflen] = value and "true" or "false" + elseif valtype == 'string' then + buflen = buflen + 1 + buffer[buflen] = quotestring (value) + elseif valtype == 'table' then + if tables[value] then + return exception('reference cycle', value, state, buffer, buflen) + end + tables[value] = true + level = level + 1 + local isa, n = isarray (value) + if n == 0 and valmeta and valmeta.__jsontype == 'object' then + isa = false + end + local msg + if isa then -- JSON array + buflen = buflen + 1 + buffer[buflen] = "[" + for i = 1, n do + buflen, msg = encode2 (value[i], indent, level, buffer, buflen, tables, globalorder, state) + if not buflen then return nil, msg end + if i < n then + buflen = buflen + 1 + buffer[buflen] = "," + end + end + buflen = buflen + 1 + buffer[buflen] = "]" + else -- JSON object + local prev = false + buflen = buflen + 1 + buffer[buflen] = "{" + local order = valmeta and valmeta.__jsonorder or globalorder + if order then + local used = {} + n = #order + for i = 1, n do + local k = order[i] + local v = value[k] + if v then + used[k] = true + buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) + prev = true -- add a seperator before the next element + end + end + for k,v in pairs (value) do + if not used[k] then + buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) + if not buflen then return nil, msg end + prev = true -- add a seperator before the next element + end + end + else -- unordered + for k,v in pairs (value) do + buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state) + if not buflen then return nil, msg end + prev = true -- add a seperator before the next element + end + end + if indent then + buflen = addnewline2 (level - 1, buffer, buflen) + end + buflen = buflen + 1 + buffer[buflen] = "}" + end + tables[value] = nil + else + return exception ('unsupported type', value, state, buffer, buflen, + "type '" .. valtype .. "' is not supported by JSON.") + end + return buflen +end + +function json.encode (value, state) + state = state or {} + local oldbuffer = state.buffer + local buffer = oldbuffer or {} + state.buffer = buffer + updatedecpoint() + local ret, msg = encode2 (value, state.indent, state.level or 0, + buffer, state.bufferlen or 0, state.tables or {}, state.keyorder, state) + if not ret then + error (msg, 2) + elseif oldbuffer == buffer then + state.bufferlen = ret + return true + else + state.bufferlen = nil + state.buffer = nil + return concat (buffer) + end +end + +local function loc (str, where) + local line, pos, linepos = 1, 1, 0 + while true do + pos = strfind (str, "\n", pos, true) + if pos and pos < where then + line = line + 1 + linepos = pos + pos = pos + 1 + else + break + end + end + return "line " .. line .. ", column " .. (where - linepos) +end + +local function unterminated (str, what, where) + return nil, strlen (str) + 1, "unterminated " .. what .. " at " .. loc (str, where) +end + +local function scanwhite (str, pos) + while true do + pos = strfind (str, "%S", pos) + if not pos then return nil end + local sub2 = strsub (str, pos, pos + 1) + if sub2 == "\239\187" and strsub (str, pos + 2, pos + 2) == "\191" then + -- UTF-8 Byte Order Mark + pos = pos + 3 + elseif sub2 == "//" then + pos = strfind (str, "[\n\r]", pos + 2) + if not pos then return nil end + elseif sub2 == "/*" then + pos = strfind (str, "*/", pos + 2) + if not pos then return nil end + pos = pos + 2 + else + return pos + end + end +end + +local escapechars = { + ["\""] = "\"", ["\\"] = "\\", ["/"] = "/", ["b"] = "\b", ["f"] = "\f", + ["n"] = "\n", ["r"] = "\r", ["t"] = "\t" +} + +local function unichar (value) + if value < 0 then + return nil + elseif value <= 0x007f then + return strchar (value) + elseif value <= 0x07ff then + return strchar (0xc0 + floor(value/0x40), + 0x80 + (floor(value) % 0x40)) + elseif value <= 0xffff then + return strchar (0xe0 + floor(value/0x1000), + 0x80 + (floor(value/0x40) % 0x40), + 0x80 + (floor(value) % 0x40)) + elseif value <= 0x10ffff then + return strchar (0xf0 + floor(value/0x40000), + 0x80 + (floor(value/0x1000) % 0x40), + 0x80 + (floor(value/0x40) % 0x40), + 0x80 + (floor(value) % 0x40)) + else + return nil + end +end + +local function scanstring (str, pos) + local lastpos = pos + 1 + local buffer, n = {}, 0 + while true do + local nextpos = strfind (str, "[\"\\]", lastpos) + if not nextpos then + return unterminated (str, "string", pos) + end + if nextpos > lastpos then + n = n + 1 + buffer[n] = strsub (str, lastpos, nextpos - 1) + end + if strsub (str, nextpos, nextpos) == "\"" then + lastpos = nextpos + 1 + break + else + local escchar = strsub (str, nextpos + 1, nextpos + 1) + local value + if escchar == "u" then + value = tonumber (strsub (str, nextpos + 2, nextpos + 5), 16) + if value then + local value2 + if 0xD800 <= value and value <= 0xDBff then + -- we have the high surrogate of UTF-16. Check if there is a + -- low surrogate escaped nearby to combine them. + if strsub (str, nextpos + 6, nextpos + 7) == "\\u" then + value2 = tonumber (strsub (str, nextpos + 8, nextpos + 11), 16) + if value2 and 0xDC00 <= value2 and value2 <= 0xDFFF then + value = (value - 0xD800) * 0x400 + (value2 - 0xDC00) + 0x10000 + else + value2 = nil -- in case it was out of range for a low surrogate + end + end + end + value = value and unichar (value) + if value then + if value2 then + lastpos = nextpos + 12 + else + lastpos = nextpos + 6 + end + end + end + end + if not value then + value = escapechars[escchar] or escchar + lastpos = nextpos + 2 + end + n = n + 1 + buffer[n] = value + end + end + if n == 1 then + return buffer[1], lastpos + elseif n > 1 then + return concat (buffer), lastpos + else + return "", lastpos + end +end + +local scanvalue -- forward declaration + +local function scantable (what, closechar, str, startpos, nullval, objectmeta, arraymeta) + local len = strlen (str) + local tbl, n = {}, 0 + local pos = startpos + 1 + if what == 'object' then + setmetatable (tbl, objectmeta) + else + setmetatable (tbl, arraymeta) + end + while true do + pos = scanwhite (str, pos) + if not pos then return unterminated (str, what, startpos) end + local char = strsub (str, pos, pos) + if char == closechar then + return tbl, pos + 1 + end + local val1, err + val1, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta) + if err then return nil, pos, err end + pos = scanwhite (str, pos) + if not pos then return unterminated (str, what, startpos) end + char = strsub (str, pos, pos) + if char == ":" then + if val1 == nil then + return nil, pos, "cannot use nil as table index (at " .. loc (str, pos) .. ")" + end + pos = scanwhite (str, pos + 1) + if not pos then return unterminated (str, what, startpos) end + local val2 + val2, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta) + if err then return nil, pos, err end + tbl[val1] = val2 + pos = scanwhite (str, pos) + if not pos then return unterminated (str, what, startpos) end + char = strsub (str, pos, pos) + else + n = n + 1 + tbl[n] = val1 + end + if char == "," then + pos = pos + 1 + end + end +end + +scanvalue = function (str, pos, nullval, objectmeta, arraymeta) + pos = pos or 1 + pos = scanwhite (str, pos) + if not pos then + return nil, strlen (str) + 1, "no valid JSON value (reached the end)" + end + local char = strsub (str, pos, pos) + if char == "{" then + return scantable ('object', "}", str, pos, nullval, objectmeta, arraymeta) + elseif char == "[" then + return scantable ('array', "]", str, pos, nullval, objectmeta, arraymeta) + elseif char == "\"" then + return scanstring (str, pos) + else + local pstart, pend = strfind (str, "^%-?[%d%.]+[eE]?[%+%-]?%d*", pos) + if pstart then + local number = str2num (strsub (str, pstart, pend)) + if number then + return number, pend + 1 + end + end + pstart, pend = strfind (str, "^%a%w*", pos) + if pstart then + local name = strsub (str, pstart, pend) + if name == "true" then + return true, pend + 1 + elseif name == "false" then + return false, pend + 1 + elseif name == "null" then + return nullval, pend + 1 + end + end + return nil, pos, "no valid JSON value at " .. loc (str, pos) + end +end + +local function optionalmetatables(...) + if select("#", ...) > 0 then + return ... + else + return {__jsontype = 'object'}, {__jsontype = 'array'} + end +end + +function json.decode (str, pos, nullval, ...) + local objectmeta, arraymeta = optionalmetatables(...) + return scanvalue (str, pos, nullval, objectmeta, arraymeta) +end + +function json.use_lpeg () + local g = require ("lpeg") + + if g.version() == "0.11" then + error "due to a bug in LPeg 0.11, it cannot be used for JSON matching" + end + + local pegmatch = g.match + local P, S, R = g.P, g.S, g.R + + local function ErrorCall (str, pos, msg, state) + if not state.msg then + state.msg = msg .. " at " .. loc (str, pos) + state.pos = pos + end + return false + end + + local function Err (msg) + return g.Cmt (g.Cc (msg) * g.Carg (2), ErrorCall) + end + + local SingleLineComment = P"//" * (1 - S"\n\r")^0 + local MultiLineComment = P"/*" * (1 - P"*/")^0 * P"*/" + local Space = (S" \n\r\t" + P"\239\187\191" + SingleLineComment + MultiLineComment)^0 + + local PlainChar = 1 - S"\"\\\n\r" + local EscapeSequence = (P"\\" * g.C (S"\"\\/bfnrt" + Err "unsupported escape sequence")) / escapechars + local HexDigit = R("09", "af", "AF") + local function UTF16Surrogate (match, pos, high, low) + high, low = tonumber (high, 16), tonumber (low, 16) + if 0xD800 <= high and high <= 0xDBff and 0xDC00 <= low and low <= 0xDFFF then + return true, unichar ((high - 0xD800) * 0x400 + (low - 0xDC00) + 0x10000) + else + return false + end + end + local function UTF16BMP (hex) + return unichar (tonumber (hex, 16)) + end + local U16Sequence = (P"\\u" * g.C (HexDigit * HexDigit * HexDigit * HexDigit)) + local UnicodeEscape = g.Cmt (U16Sequence * U16Sequence, UTF16Surrogate) + U16Sequence/UTF16BMP + local Char = UnicodeEscape + EscapeSequence + PlainChar + local String = P"\"" * g.Cs (Char ^ 0) * (P"\"" + Err "unterminated string") + local Integer = P"-"^(-1) * (P"0" + (R"19" * R"09"^0)) + local Fractal = P"." * R"09"^0 + local Exponent = (S"eE") * (S"+-")^(-1) * R"09"^1 + local Number = (Integer * Fractal^(-1) * Exponent^(-1))/str2num + local Constant = P"true" * g.Cc (true) + P"false" * g.Cc (false) + P"null" * g.Carg (1) + local SimpleValue = Number + String + Constant + local ArrayContent, ObjectContent + + -- The functions parsearray and parseobject parse only a single value/pair + -- at a time and store them directly to avoid hitting the LPeg limits. + local function parsearray (str, pos, nullval, state) + local obj, cont + local npos + local t, nt = {}, 0 + repeat + obj, cont, npos = pegmatch (ArrayContent, str, pos, nullval, state) + if not npos then break end + pos = npos + nt = nt + 1 + t[nt] = obj + until cont == 'last' + return pos, setmetatable (t, state.arraymeta) + end + + local function parseobject (str, pos, nullval, state) + local obj, key, cont + local npos + local t = {} + repeat + key, obj, cont, npos = pegmatch (ObjectContent, str, pos, nullval, state) + if not npos then break end + pos = npos + t[key] = obj + until cont == 'last' + return pos, setmetatable (t, state.objectmeta) + end + + local Array = P"[" * g.Cmt (g.Carg(1) * g.Carg(2), parsearray) * Space * (P"]" + Err "']' expected") + local Object = P"{" * g.Cmt (g.Carg(1) * g.Carg(2), parseobject) * Space * (P"}" + Err "'}' expected") + local Value = Space * (Array + Object + SimpleValue) + local ExpectedValue = Value + Space * Err "value expected" + ArrayContent = Value * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp() + local Pair = g.Cg (Space * String * Space * (P":" + Err "colon expected") * ExpectedValue) + ObjectContent = Pair * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp() + local DecodeValue = ExpectedValue * g.Cp () + + function json.decode (str, pos, nullval, ...) + local state = {} + state.objectmeta, state.arraymeta = optionalmetatables(...) + local obj, retpos = pegmatch (DecodeValue, str, pos, nullval, state) + if state.msg then + return nil, state.pos, state.msg + else + return obj, retpos + end + end + + -- use this function only once: + json.use_lpeg = function () return json end + + json.using_lpeg = true + + return json -- so you can get the module using json = require "dkjson".use_lpeg() +end + +if always_try_using_lpeg then + pcall (json.use_lpeg) +end + +return json + diff --git a/dictgen/steno.lua b/dictgen/steno.lua new file mode 100644 index 0000000..95040d6 --- /dev/null +++ b/dictgen/steno.lua @@ -0,0 +1,251 @@ +-- toki pona steno generator +-- TODO: +-- - make fingerspelling syllable-based MAYBE +-- - punctuation? + +local dkjson = require("dkjson") + +local function warn(str) + io.stderr:write(str .. "\n") +end + +local function nop() end + +local tp_order = { "S", "P", "K", "L", "M", "H", "W", "N", "R", "A", "I", "E", "O", "U" } + +-- takes a table of chords - each chord is just a table where any key defined is a key pressedi n the chord +local function write_tp_outline(outline) + local out = {} + for i, chord in ipairs(outline) do + if i > 1 then table.insert(out, "/") end + + for _, k in ipairs(tp_order) do + if chord[k] then table.insert(out, k) end + end + end + + return table.concat(out) +end + +local function read_tp_outline(str) + local out = {} + + for chord in string.gmatch(str, "[^/]+") do + local map = {} + for key in string.gmatch(chord, ".") do map[key] = true end + table.insert(out, map) + end + + return out +end + +assert(write_tp_outline({{S = true, L = true}}) == "SL") +assert(write_tp_outline({{S = true, P = true, O = true}, {N = true, I = true}}) == "SPO/NI") + +local function merge_outline(outline) + local out = {} + + for i, chord in ipairs(outline) do + for k in pairs(chord) do out[k] = true end + end + + return { out } +end + +assert(write_tp_outline(merge_outline({{S = true, P = true, O = true}, {N = true, I = true}})) == "SPNIO") + +local steno_order = { "S-", "T-", "K-", "P-", "W-", "H-", "R-", "A-", "O-", "*", "-E", "-U", "-F", "-R", "-P", "-B", "-L", "-G", "-T", "-S", "-D", "-Z" } + +-- convert a traditional-style steno chord to a string +local function write_chord(chord) + local out = {} + + local needs_dash = true + + for _, key in ipairs(steno_order) do + if chord[key] then + local letter = string.gsub(key, "-", "") + if string.match("AOEU*", letter) then needs_dash = false end + + if string.match(key, "^-") and needs_dash then + table.insert(out, "-") + needs_dash = false + end + + table.insert(out, letter) + end + end + + return table.concat(out) +end + +local function write_outline(outline) + local out = {} + + for _, chord in ipairs(outline) do + table.insert(out, write_chord(chord)) + end + + return table.concat(out, "/") +end + +assert(write_outline({{["P-"] = 1, ["O-"] = 1, ["-T"] = 1}}) == "POT") +assert(write_outline({{["-E"] = 1}}) == "E") +assert(write_outline({{["T-"] = 1, ["-T"] = 1}}) == "T-T") +assert(write_outline({{["T-"] = 1}, {["T-"] = 1}}) == "T/T") + +local tp_steno_map = { + S = "S-", P = "T-", K = "P-", L = "H-", M = "*", + H = "A-", W = "O-", N = "-E", R = "-U", + A = "-F", I = "-P", E = "-L", O = "-T", U = "-D", +} + +local function chord_to_steno(chord) + local out = {} + for key in pairs(chord) do + out[tp_steno_map[key]] = true + end + return out +end + +local function outline_to_steno(outline) + local out = {} + for _, chord in ipairs(outline) do + table.insert(out, chord_to_steno(chord)) + end + return out +end + +assert(write_outline(outline_to_steno({{K = 1, E = 1}, {P = 1, E = 1}, {K = 1, E = 1, N = 1}})) == "P-L/T-L/PEL") + +-- ok now we're actually generating the maps for tp words + +-- returns iterator of syllables +local function split_word(word) + local word = string.gsub(word, "(n?)([^aeiou][aeiou])", "%1-%2") + return string.gmatch(word, "[^-]+") +end + +local letter_map = { + s = "S", t = "SH", p = "P", w = "PH", + k = "K", j = "KH", l = "L", m = "M", + n = "MH", a = "A", e = "E", i = "I", + o = "O", u = "U", +} + +local function convert_syllable(s) + local out = {} + if string.match(s, "n$") then + out.N = true + s = string.gsub(s, "n$", "") + end + + for i in string.gmatch(s, ".") do + for k in string.gmatch(letter_map[i], ".") do out[k] = true end + end + + return out +end + +local function word_outline(word) + local out = {} + + for s in split_word(word) do + table.insert(out, convert_syllable(s)) + end + + return out +end + +local function assert_tp(outline, expected) + assert(write_tp_outline(outline) == expected, expected .. " " .. write_tp_outline(outline)) +end + +assert_tp(word_outline("ken"), "KNE") +assert_tp(word_outline("kepeken"), "KE/PE/KNE") +assert_tp(word_outline("linja"), "LNI/KHA") + +-- word briefs are single-chord verisons with W added +local function word_brief(word, modifier) + local out = word_outline(word) + + -- we don't wanna take space with single-syllable briefs + if #out == 1 then return nil end + + if modifier then table.insert(out, {W = true}) end + return merge_outline(out) +end + +assert_tp(word_brief("linja"), "KLHNAI") +assert_tp(word_brief("lili", true), "LWI") + +-- +do + if not arg[1] then + warn("usage: steno.lua [CONFIG]") + os.exit(1) + end + + local config = assert(dofile(arg[1])) + assert(config.words) + if not config.extra_briefs then config.extra_briefs = {} end + local dictionary = {} + + -- returns whether insertion was successful + -- you can pass a nil outline to make this a noöp + local function add_word(outline, word, force) + -- it's more useful to just pretend the insertion was successful + if not outline then return true end + -- we can change this for keymaps later! + local steno + if config.convert_to_english then + steno = write_outline(outline_to_steno(outline)) + else + steno = write_tp_outline(outline) + end + + if not force and dictionary[steno] then + nop(string.format( + "duplicate: %s = %s, %s", + steno, dictionary[steno], word + )) + return false + else + dictionary[steno] = word + return true + end + end + + for _, word in ipairs(config.words) do + add_word(word_outline(word), word, true) + if not add_word(word_brief(word, false), word, false) then + add_word(word_brief(word, true), word, false) + end + + local partial = "" + for syl in split_word(word) do + partial = partial .. syl + if syl == word then break end + add_word(word_outline(partial), partial, false) + end + end + + for k, v in pairs(config.extra_briefs) do + -- the irony of reparsing and unparsing this is not lost on me + add_word(read_tp_outline(k), v, true) + end + + for _, upper in ipairs({ false, true }) do + for letter, keys in pairs(letter_map) do + if upper then letter = string.upper(letter) end + letter = "{&" .. letter .. "}" + keys = keys .. "W" .. (upper and "R" or "") + + add_word(read_tp_outline(keys), letter, true) + end + end + + print(dkjson.encode(dictionary)) +end + +return { write_tp_outline = write_tp_outline, word_outline = word_outline } diff --git a/dictgen/words.lua b/dictgen/words.lua new file mode 100644 index 0000000..b83b4ad --- /dev/null +++ b/dictgen/words.lua @@ -0,0 +1,127 @@ +-- in roughly frequency order +return { + "mi", + "li", + "e", + "pona", + "toki", + "ni", + "a", + "la", + "ala", + "lon", + "sina", + "jan", + "tawa", + "sona", + "seme", + "pi", + "tenpo", + "ona", + "mute", + "taso", + "wile", + "o", + "pilin", + "kama", + "ken", + "ike", + "lili", + "tan", + "nimi", + "pali", + "ma", + "musi", + "sitelen", + "kepeken", + "tomo", + "ali", + "lukin", + "oko", + "jo", + "kin", + "pini", + "ilo", + "anu", + "ante", + "lape", + "moku", + "sama", + "suli", + "kalama", + "suno", + "telo", + "kulupu", + "nasa", + "sin", + "lipu", + "pana", + "pakala", + "ijo", + "soweli", + "tu", + "nasin", + "lawa", + "en", + "wawa", + "weka", + "wan", + "mu", + "awen", + "nanpa", + "olin", + "suwi", + "kon", + "seli", + "sewi", + "kute", + "mama", + "sike", + "moli", + "pimeja", + "lete", + "kasi", + "luka", + "sijelo", + "uta", + "poka", + "kala", + "jaki", + "insa", + "utala", + "mani", + "linja", + "open", + "len", + "waso", + "pan", + "ko", + "esun", + "kili", + "meli", + "tonsi", + "pipi", + "supa", + "kiwen", + "poki", + "palisa", + "kule", + "laso", + "noka", + "namako", + "loje", + "walo", + "unpa", + "anpa", + "mun", + "nena", + "mije", + "akesi", + "alasa", + "sinpin", + "selo", + "jelo", + "monsi", + "lupa", + "pu" +} -- cgit 1.3.0-6-gf8a5