summary refs log tree commit diff
path: root/dictgen
diff options
context:
space:
mode:
authorequa <equaa@protonmail.com>2023-01-15 11:58:00 -0600
committerequa <equaa@protonmail.com>2023-01-15 11:58:00 -0600
commit18a0bed48264e5fb2db07974dd912f7b5e940f65 (patch)
tree1be5b1f8d93e7433b9093d7a687052ac74527684 /dictgen
parent08793cfa2ccdae4c4091e1b977f9a45c174e8d49 (diff)
organize!
Diffstat (limited to 'dictgen')
-rw-r--r--dictgen/config.embedded.lua18
-rw-r--r--dictgen/config.main.lua18
-rw-r--r--dictgen/dkjson.lua714
-rw-r--r--dictgen/steno.lua251
-rw-r--r--dictgen/words.lua127
5 files changed, 1128 insertions, 0 deletions
diff --git a/dictgen/config.embedded.lua b/dictgen/config.embedded.lua
new file mode 100644
index 0000000..8222951
--- /dev/null
+++ b/dictgen/config.embedded.lua
@@ -0,0 +1,18 @@
+return {
+	words = require("words"),
+	convert_to_english = true,
+	extra_briefs = {
+                ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu",
+                ["HR"] = "{^.}",
+                ["HWNR"] = "{^\n^}",
+                S = "sina",
+                HS = "tawa",
+                P = "pi",
+                HP = "wile",
+                K = "kama",
+                HK = "jan",
+                L = "li",
+                M = "mi",
+                HM = "ni",
+	}
+}
diff --git a/dictgen/config.main.lua b/dictgen/config.main.lua
new file mode 100644
index 0000000..640b973
--- /dev/null
+++ b/dictgen/config.main.lua
@@ -0,0 +1,18 @@
+return {
+	words = require("words"),
+	convert_to_english = false,
+	extra_briefs = {
+                ["KI/KHE/SHE/SNA/SHA/KA/LU"] = "kijetesantakalu",
+                ["HR"] = "{^.}",
+                ["HWNR"] = "{^\n^}",
+                S = "sina",
+                HS = "tawa",
+                P = "pi",
+                HP = "wile",
+                K = "kama",
+                HK = "jan",
+                L = "li",
+                M = "mi",
+                HM = "ni",
+	}
+}
diff --git a/dictgen/dkjson.lua b/dictgen/dkjson.lua
new file mode 100644
index 0000000..fa50b9f
--- /dev/null
+++ b/dictgen/dkjson.lua
@@ -0,0 +1,714 @@
+-- Module options:
+local always_try_using_lpeg = true
+local register_global_module_table = false
+local global_module_name = 'json'
+
+--[==[
+
+David Kolf's JSON module for Lua 5.1/5.2
+
+Version 2.5
+
+
+For the documentation see the corresponding readme.txt or visit
+<http://dkolf.de/src/dkjson-lua.fsl/>.
+
+You can contact the author by sending an e-mail to 'david' at the
+domain 'dkolf.de'.
+
+
+Copyright (C) 2010-2013 David Heiko Kolf
+
+Permission is hereby granted, free of charge, to any person obtaining
+a copy of this software and associated documentation files (the
+"Software"), to deal in the Software without restriction, including
+without limitation the rights to use, copy, modify, merge, publish,
+distribute, sublicense, and/or sell copies of the Software, and to
+permit persons to whom the Software is furnished to do so, subject to
+the following conditions:
+
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
+
+--]==]
+
+-- global dependencies:
+local pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset =
+      pairs, type, tostring, tonumber, getmetatable, setmetatable, rawset
+local error, require, pcall, select = error, require, pcall, select
+local floor, huge = math.floor, math.huge
+local strrep, gsub, strsub, strbyte, strchar, strfind, strlen, strformat =
+      string.rep, string.gsub, string.sub, string.byte, string.char,
+      string.find, string.len, string.format
+local strmatch = string.match
+local concat = table.concat
+
+local json = { version = "dkjson 2.5" }
+
+if register_global_module_table then
+  _G[global_module_name] = json
+end
+
+local _ENV = nil -- blocking globals in Lua 5.2
+
+pcall (function()
+  -- Enable access to blocked metatables.
+  -- Don't worry, this module doesn't change anything in them.
+  local debmeta = require "debug".getmetatable
+  if debmeta then getmetatable = debmeta end
+end)
+
+json.null = setmetatable ({}, {
+  __tojson = function () return "null" end
+})
+
+local function isarray (tbl)
+  local max, n, arraylen = 0, 0, 0
+  for k,v in pairs (tbl) do
+    if k == 'n' and type(v) == 'number' then
+      arraylen = v
+      if v > max then
+        max = v
+      end
+    else
+      if type(k) ~= 'number' or k < 1 or floor(k) ~= k then
+        return false
+      end
+      if k > max then
+        max = k
+      end
+      n = n + 1
+    end
+  end
+  if max > 10 and max > arraylen and max > n * 2 then
+    return false -- don't create an array with too many holes
+  end
+  return true, max
+end
+
+local escapecodes = {
+  ["\""] = "\\\"", ["\\"] = "\\\\", ["\b"] = "\\b", ["\f"] = "\\f",
+  ["\n"] = "\\n",  ["\r"] = "\\r",  ["\t"] = "\\t"
+}
+
+local function escapeutf8 (uchar)
+  local value = escapecodes[uchar]
+  if value then
+    return value
+  end
+  local a, b, c, d = strbyte (uchar, 1, 4)
+  a, b, c, d = a or 0, b or 0, c or 0, d or 0
+  if a <= 0x7f then
+    value = a
+  elseif 0xc0 <= a and a <= 0xdf and b >= 0x80 then
+    value = (a - 0xc0) * 0x40 + b - 0x80
+  elseif 0xe0 <= a and a <= 0xef and b >= 0x80 and c >= 0x80 then
+    value = ((a - 0xe0) * 0x40 + b - 0x80) * 0x40 + c - 0x80
+  elseif 0xf0 <= a and a <= 0xf7 and b >= 0x80 and c >= 0x80 and d >= 0x80 then
+    value = (((a - 0xf0) * 0x40 + b - 0x80) * 0x40 + c - 0x80) * 0x40 + d - 0x80
+  else
+    return ""
+  end
+  if value <= 0xffff then
+    return strformat ("\\u%.4x", value)
+  elseif value <= 0x10ffff then
+    -- encode as UTF-16 surrogate pair
+    value = value - 0x10000
+    local highsur, lowsur = 0xD800 + floor (value/0x400), 0xDC00 + (value % 0x400)
+    return strformat ("\\u%.4x\\u%.4x", highsur, lowsur)
+  else
+    return ""
+  end
+end
+
+local function fsub (str, pattern, repl)
+  -- gsub always builds a new string in a buffer, even when no match
+  -- exists. First using find should be more efficient when most strings
+  -- don't contain the pattern.
+  if strfind (str, pattern) then
+    return gsub (str, pattern, repl)
+  else
+    return str
+  end
+end
+
+local function quotestring (value)
+  -- based on the regexp "escapable" in https://github.com/douglascrockford/JSON-js
+  value = fsub (value, "[%z\1-\31\"\\\127]", escapeutf8)
+  if strfind (value, "[\194\216\220\225\226\239]") then
+    value = fsub (value, "\194[\128-\159\173]", escapeutf8)
+    value = fsub (value, "\216[\128-\132]", escapeutf8)
+    value = fsub (value, "\220\143", escapeutf8)
+    value = fsub (value, "\225\158[\180\181]", escapeutf8)
+    value = fsub (value, "\226\128[\140-\143\168-\175]", escapeutf8)
+    value = fsub (value, "\226\129[\160-\175]", escapeutf8)
+    value = fsub (value, "\239\187\191", escapeutf8)
+    value = fsub (value, "\239\191[\176-\191]", escapeutf8)
+  end
+  return "\"" .. value .. "\""
+end
+json.quotestring = quotestring
+
+local function replace(str, o, n)
+  local i, j = strfind (str, o, 1, true)
+  if i then
+    return strsub(str, 1, i-1) .. n .. strsub(str, j+1, -1)
+  else
+    return str
+  end
+end
+
+-- locale independent num2str and str2num functions
+local decpoint, numfilter
+
+local function updatedecpoint ()
+  decpoint = strmatch(tostring(0.5), "([^05+])")
+  -- build a filter that can be used to remove group separators
+  numfilter = "[^0-9%-%+eE" .. gsub(decpoint, "[%^%$%(%)%%%.%[%]%*%+%-%?]", "%%%0") .. "]+"
+end
+
+updatedecpoint()
+
+local function num2str (num)
+  return replace(fsub(tostring(num), numfilter, ""), decpoint, ".")
+end
+
+local function str2num (str)
+  local num = tonumber(replace(str, ".", decpoint))
+  if not num then
+    updatedecpoint()
+    num = tonumber(replace(str, ".", decpoint))
+  end
+  return num
+end
+
+local function addnewline2 (level, buffer, buflen)
+  buffer[buflen+1] = "\n"
+  buffer[buflen+2] = strrep ("  ", level)
+  buflen = buflen + 2
+  return buflen
+end
+
+function json.addnewline (state)
+  if state.indent then
+    state.bufferlen = addnewline2 (state.level or 0,
+                           state.buffer, state.bufferlen or #(state.buffer))
+  end
+end
+
+local encode2 -- forward declaration
+
+local function addpair (key, value, prev, indent, level, buffer, buflen, tables, globalorder, state)
+  local kt = type (key)
+  if kt ~= 'string' and kt ~= 'number' then
+    return nil, "type '" .. kt .. "' is not supported as a key by JSON."
+  end
+  if prev then
+    buflen = buflen + 1
+    buffer[buflen] = ","
+  end
+  if indent then
+    buflen = addnewline2 (level, buffer, buflen)
+  end
+  buffer[buflen+1] = quotestring (key)
+  buffer[buflen+2] = ":"
+  return encode2 (value, indent, level, buffer, buflen + 2, tables, globalorder, state)
+end
+
+local function appendcustom(res, buffer, state)
+  local buflen = state.bufferlen
+  if type (res) == 'string' then
+    buflen = buflen + 1
+    buffer[buflen] = res
+  end
+  return buflen
+end
+
+local function exception(reason, value, state, buffer, buflen, defaultmessage)
+  defaultmessage = defaultmessage or reason
+  local handler = state.exception
+  if not handler then
+    return nil, defaultmessage
+  else
+    state.bufferlen = buflen
+    local ret, msg = handler (reason, value, state, defaultmessage)
+    if not ret then return nil, msg or defaultmessage end
+    return appendcustom(ret, buffer, state)
+  end
+end
+
+function json.encodeexception(reason, value, state, defaultmessage)
+  return quotestring("<" .. defaultmessage .. ">")
+end
+
+encode2 = function (value, indent, level, buffer, buflen, tables, globalorder, state)
+  local valtype = type (value)
+  local valmeta = getmetatable (value)
+  valmeta = type (valmeta) == 'table' and valmeta -- only tables
+  local valtojson = valmeta and valmeta.__tojson
+  if valtojson then
+    if tables[value] then
+      return exception('reference cycle', value, state, buffer, buflen)
+    end
+    tables[value] = true
+    state.bufferlen = buflen
+    local ret, msg = valtojson (value, state)
+    if not ret then return exception('custom encoder failed', value, state, buffer, buflen, msg) end
+    tables[value] = nil
+    buflen = appendcustom(ret, buffer, state)
+  elseif value == nil then
+    buflen = buflen + 1
+    buffer[buflen] = "null"
+  elseif valtype == 'number' then
+    local s
+    if value ~= value or value >= huge or -value >= huge then
+      -- This is the behaviour of the original JSON implementation.
+      s = "null"
+    else
+      s = num2str (value)
+    end
+    buflen = buflen + 1
+    buffer[buflen] = s
+  elseif valtype == 'boolean' then
+    buflen = buflen + 1
+    buffer[buflen] = value and "true" or "false"
+  elseif valtype == 'string' then
+    buflen = buflen + 1
+    buffer[buflen] = quotestring (value)
+  elseif valtype == 'table' then
+    if tables[value] then
+      return exception('reference cycle', value, state, buffer, buflen)
+    end
+    tables[value] = true
+    level = level + 1
+    local isa, n = isarray (value)
+    if n == 0 and valmeta and valmeta.__jsontype == 'object' then
+      isa = false
+    end
+    local msg
+    if isa then -- JSON array
+      buflen = buflen + 1
+      buffer[buflen] = "["
+      for i = 1, n do
+        buflen, msg = encode2 (value[i], indent, level, buffer, buflen, tables, globalorder, state)
+        if not buflen then return nil, msg end
+        if i < n then
+          buflen = buflen + 1
+          buffer[buflen] = ","
+        end
+      end
+      buflen = buflen + 1
+      buffer[buflen] = "]"
+    else -- JSON object
+      local prev = false
+      buflen = buflen + 1
+      buffer[buflen] = "{"
+      local order = valmeta and valmeta.__jsonorder or globalorder
+      if order then
+        local used = {}
+        n = #order
+        for i = 1, n do
+          local k = order[i]
+          local v = value[k]
+          if v then
+            used[k] = true
+            buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
+            prev = true -- add a seperator before the next element
+          end
+        end
+        for k,v in pairs (value) do
+          if not used[k] then
+            buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
+            if not buflen then return nil, msg end
+            prev = true -- add a seperator before the next element
+          end
+        end
+      else -- unordered
+        for k,v in pairs (value) do
+          buflen, msg = addpair (k, v, prev, indent, level, buffer, buflen, tables, globalorder, state)
+          if not buflen then return nil, msg end
+          prev = true -- add a seperator before the next element
+        end
+      end
+      if indent then
+        buflen = addnewline2 (level - 1, buffer, buflen)
+      end
+      buflen = buflen + 1
+      buffer[buflen] = "}"
+    end
+    tables[value] = nil
+  else
+    return exception ('unsupported type', value, state, buffer, buflen,
+      "type '" .. valtype .. "' is not supported by JSON.")
+  end
+  return buflen
+end
+
+function json.encode (value, state)
+  state = state or {}
+  local oldbuffer = state.buffer
+  local buffer = oldbuffer or {}
+  state.buffer = buffer
+  updatedecpoint()
+  local ret, msg = encode2 (value, state.indent, state.level or 0,
+                   buffer, state.bufferlen or 0, state.tables or {}, state.keyorder, state)
+  if not ret then
+    error (msg, 2)
+  elseif oldbuffer == buffer then
+    state.bufferlen = ret
+    return true
+  else
+    state.bufferlen = nil
+    state.buffer = nil
+    return concat (buffer)
+  end
+end
+
+local function loc (str, where)
+  local line, pos, linepos = 1, 1, 0
+  while true do
+    pos = strfind (str, "\n", pos, true)
+    if pos and pos < where then
+      line = line + 1
+      linepos = pos
+      pos = pos + 1
+    else
+      break
+    end
+  end
+  return "line " .. line .. ", column " .. (where - linepos)
+end
+
+local function unterminated (str, what, where)
+  return nil, strlen (str) + 1, "unterminated " .. what .. " at " .. loc (str, where)
+end
+
+local function scanwhite (str, pos)
+  while true do
+    pos = strfind (str, "%S", pos)
+    if not pos then return nil end
+    local sub2 = strsub (str, pos, pos + 1)
+    if sub2 == "\239\187" and strsub (str, pos + 2, pos + 2) == "\191" then
+      -- UTF-8 Byte Order Mark
+      pos = pos + 3
+    elseif sub2 == "//" then
+      pos = strfind (str, "[\n\r]", pos + 2)
+      if not pos then return nil end
+    elseif sub2 == "/*" then
+      pos = strfind (str, "*/", pos + 2)
+      if not pos then return nil end
+      pos = pos + 2
+    else
+      return pos
+    end
+  end
+end
+
+local escapechars = {
+  ["\""] = "\"", ["\\"] = "\\", ["/"] = "/", ["b"] = "\b", ["f"] = "\f",
+  ["n"] = "\n", ["r"] = "\r", ["t"] = "\t"
+}
+
+local function unichar (value)
+  if value < 0 then
+    return nil
+  elseif value <= 0x007f then
+    return strchar (value)
+  elseif value <= 0x07ff then
+    return strchar (0xc0 + floor(value/0x40),
+                    0x80 + (floor(value) % 0x40))
+  elseif value <= 0xffff then
+    return strchar (0xe0 + floor(value/0x1000),
+                    0x80 + (floor(value/0x40) % 0x40),
+                    0x80 + (floor(value) % 0x40))
+  elseif value <= 0x10ffff then
+    return strchar (0xf0 + floor(value/0x40000),
+                    0x80 + (floor(value/0x1000) % 0x40),
+                    0x80 + (floor(value/0x40) % 0x40),
+                    0x80 + (floor(value) % 0x40))
+  else
+    return nil
+  end
+end
+
+local function scanstring (str, pos)
+  local lastpos = pos + 1
+  local buffer, n = {}, 0
+  while true do
+    local nextpos = strfind (str, "[\"\\]", lastpos)
+    if not nextpos then
+      return unterminated (str, "string", pos)
+    end
+    if nextpos > lastpos then
+      n = n + 1
+      buffer[n] = strsub (str, lastpos, nextpos - 1)
+    end
+    if strsub (str, nextpos, nextpos) == "\"" then
+      lastpos = nextpos + 1
+      break
+    else
+      local escchar = strsub (str, nextpos + 1, nextpos + 1)
+      local value
+      if escchar == "u" then
+        value = tonumber (strsub (str, nextpos + 2, nextpos + 5), 16)
+        if value then
+          local value2
+          if 0xD800 <= value and value <= 0xDBff then
+            -- we have the high surrogate of UTF-16. Check if there is a
+            -- low surrogate escaped nearby to combine them.
+            if strsub (str, nextpos + 6, nextpos + 7) == "\\u" then
+              value2 = tonumber (strsub (str, nextpos + 8, nextpos + 11), 16)
+              if value2 and 0xDC00 <= value2 and value2 <= 0xDFFF then
+                value = (value - 0xD800)  * 0x400 + (value2 - 0xDC00) + 0x10000
+              else
+                value2 = nil -- in case it was out of range for a low surrogate
+              end
+            end
+          end
+          value = value and unichar (value)
+          if value then
+            if value2 then
+              lastpos = nextpos + 12
+            else
+              lastpos = nextpos + 6
+            end
+          end
+        end
+      end
+      if not value then
+        value = escapechars[escchar] or escchar
+        lastpos = nextpos + 2
+      end
+      n = n + 1
+      buffer[n] = value
+    end
+  end
+  if n == 1 then
+    return buffer[1], lastpos
+  elseif n > 1 then
+    return concat (buffer), lastpos
+  else
+    return "", lastpos
+  end
+end
+
+local scanvalue -- forward declaration
+
+local function scantable (what, closechar, str, startpos, nullval, objectmeta, arraymeta)
+  local len = strlen (str)
+  local tbl, n = {}, 0
+  local pos = startpos + 1
+  if what == 'object' then
+    setmetatable (tbl, objectmeta)
+  else
+    setmetatable (tbl, arraymeta)
+  end
+  while true do
+    pos = scanwhite (str, pos)
+    if not pos then return unterminated (str, what, startpos) end
+    local char = strsub (str, pos, pos)
+    if char == closechar then
+      return tbl, pos + 1
+    end
+    local val1, err
+    val1, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta)
+    if err then return nil, pos, err end
+    pos = scanwhite (str, pos)
+    if not pos then return unterminated (str, what, startpos) end
+    char = strsub (str, pos, pos)
+    if char == ":" then
+      if val1 == nil then
+        return nil, pos, "cannot use nil as table index (at " .. loc (str, pos) .. ")"
+      end
+      pos = scanwhite (str, pos + 1)
+      if not pos then return unterminated (str, what, startpos) end
+      local val2
+      val2, pos, err = scanvalue (str, pos, nullval, objectmeta, arraymeta)
+      if err then return nil, pos, err end
+      tbl[val1] = val2
+      pos = scanwhite (str, pos)
+      if not pos then return unterminated (str, what, startpos) end
+      char = strsub (str, pos, pos)
+    else
+      n = n + 1
+      tbl[n] = val1
+    end
+    if char == "," then
+      pos = pos + 1
+    end
+  end
+end
+
+scanvalue = function (str, pos, nullval, objectmeta, arraymeta)
+  pos = pos or 1
+  pos = scanwhite (str, pos)
+  if not pos then
+    return nil, strlen (str) + 1, "no valid JSON value (reached the end)"
+  end
+  local char = strsub (str, pos, pos)
+  if char == "{" then
+    return scantable ('object', "}", str, pos, nullval, objectmeta, arraymeta)
+  elseif char == "[" then
+    return scantable ('array', "]", str, pos, nullval, objectmeta, arraymeta)
+  elseif char == "\"" then
+    return scanstring (str, pos)
+  else
+    local pstart, pend = strfind (str, "^%-?[%d%.]+[eE]?[%+%-]?%d*", pos)
+    if pstart then
+      local number = str2num (strsub (str, pstart, pend))
+      if number then
+        return number, pend + 1
+      end
+    end
+    pstart, pend = strfind (str, "^%a%w*", pos)
+    if pstart then
+      local name = strsub (str, pstart, pend)
+      if name == "true" then
+        return true, pend + 1
+      elseif name == "false" then
+        return false, pend + 1
+      elseif name == "null" then
+        return nullval, pend + 1
+      end
+    end
+    return nil, pos, "no valid JSON value at " .. loc (str, pos)
+  end
+end
+
+local function optionalmetatables(...)
+  if select("#", ...) > 0 then
+    return ...
+  else
+    return {__jsontype = 'object'}, {__jsontype = 'array'}
+  end
+end
+
+function json.decode (str, pos, nullval, ...)
+  local objectmeta, arraymeta = optionalmetatables(...)
+  return scanvalue (str, pos, nullval, objectmeta, arraymeta)
+end
+
+function json.use_lpeg ()
+  local g = require ("lpeg")
+
+  if g.version() == "0.11" then
+    error "due to a bug in LPeg 0.11, it cannot be used for JSON matching"
+  end
+
+  local pegmatch = g.match
+  local P, S, R = g.P, g.S, g.R
+
+  local function ErrorCall (str, pos, msg, state)
+    if not state.msg then
+      state.msg = msg .. " at " .. loc (str, pos)
+      state.pos = pos
+    end
+    return false
+  end
+
+  local function Err (msg)
+    return g.Cmt (g.Cc (msg) * g.Carg (2), ErrorCall)
+  end
+
+  local SingleLineComment = P"//" * (1 - S"\n\r")^0
+  local MultiLineComment = P"/*" * (1 - P"*/")^0 * P"*/"
+  local Space = (S" \n\r\t" + P"\239\187\191" + SingleLineComment + MultiLineComment)^0
+
+  local PlainChar = 1 - S"\"\\\n\r"
+  local EscapeSequence = (P"\\" * g.C (S"\"\\/bfnrt" + Err "unsupported escape sequence")) / escapechars
+  local HexDigit = R("09", "af", "AF")
+  local function UTF16Surrogate (match, pos, high, low)
+    high, low = tonumber (high, 16), tonumber (low, 16)
+    if 0xD800 <= high and high <= 0xDBff and 0xDC00 <= low and low <= 0xDFFF then
+      return true, unichar ((high - 0xD800)  * 0x400 + (low - 0xDC00) + 0x10000)
+    else
+      return false
+    end
+  end
+  local function UTF16BMP (hex)
+    return unichar (tonumber (hex, 16))
+  end
+  local U16Sequence = (P"\\u" * g.C (HexDigit * HexDigit * HexDigit * HexDigit))
+  local UnicodeEscape = g.Cmt (U16Sequence * U16Sequence, UTF16Surrogate) + U16Sequence/UTF16BMP
+  local Char = UnicodeEscape + EscapeSequence + PlainChar
+  local String = P"\"" * g.Cs (Char ^ 0) * (P"\"" + Err "unterminated string")
+  local Integer = P"-"^(-1) * (P"0" + (R"19" * R"09"^0))
+  local Fractal = P"." * R"09"^0
+  local Exponent = (S"eE") * (S"+-")^(-1) * R"09"^1
+  local Number = (Integer * Fractal^(-1) * Exponent^(-1))/str2num
+  local Constant = P"true" * g.Cc (true) + P"false" * g.Cc (false) + P"null" * g.Carg (1)
+  local SimpleValue = Number + String + Constant
+  local ArrayContent, ObjectContent
+
+  -- The functions parsearray and parseobject parse only a single value/pair
+  -- at a time and store them directly to avoid hitting the LPeg limits.
+  local function parsearray (str, pos, nullval, state)
+    local obj, cont
+    local npos
+    local t, nt = {}, 0
+    repeat
+      obj, cont, npos = pegmatch (ArrayContent, str, pos, nullval, state)
+      if not npos then break end
+      pos = npos
+      nt = nt + 1
+      t[nt] = obj
+    until cont == 'last'
+    return pos, setmetatable (t, state.arraymeta)
+  end
+
+  local function parseobject (str, pos, nullval, state)
+    local obj, key, cont
+    local npos
+    local t = {}
+    repeat
+      key, obj, cont, npos = pegmatch (ObjectContent, str, pos, nullval, state)
+      if not npos then break end
+      pos = npos
+      t[key] = obj
+    until cont == 'last'
+    return pos, setmetatable (t, state.objectmeta)
+  end
+
+  local Array = P"[" * g.Cmt (g.Carg(1) * g.Carg(2), parsearray) * Space * (P"]" + Err "']' expected")
+  local Object = P"{" * g.Cmt (g.Carg(1) * g.Carg(2), parseobject) * Space * (P"}" + Err "'}' expected")
+  local Value = Space * (Array + Object + SimpleValue)
+  local ExpectedValue = Value + Space * Err "value expected"
+  ArrayContent = Value * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp()
+  local Pair = g.Cg (Space * String * Space * (P":" + Err "colon expected") * ExpectedValue)
+  ObjectContent = Pair * Space * (P"," * g.Cc'cont' + g.Cc'last') * g.Cp()
+  local DecodeValue = ExpectedValue * g.Cp ()
+
+  function json.decode (str, pos, nullval, ...)
+    local state = {}
+    state.objectmeta, state.arraymeta = optionalmetatables(...)
+    local obj, retpos = pegmatch (DecodeValue, str, pos, nullval, state)
+    if state.msg then
+      return nil, state.pos, state.msg
+    else
+      return obj, retpos
+    end
+  end
+
+  -- use this function only once:
+  json.use_lpeg = function () return json end
+
+  json.using_lpeg = true
+
+  return json -- so you can get the module using json = require "dkjson".use_lpeg()
+end
+
+if always_try_using_lpeg then
+  pcall (json.use_lpeg)
+end
+
+return json
+
diff --git a/dictgen/steno.lua b/dictgen/steno.lua
new file mode 100644
index 0000000..95040d6
--- /dev/null
+++ b/dictgen/steno.lua
@@ -0,0 +1,251 @@
+-- toki pona steno generator
+-- TODO:
+-- - make fingerspelling syllable-based MAYBE
+-- - punctuation?
+
+local dkjson = require("dkjson")
+
+local function warn(str)
+	io.stderr:write(str .. "\n")
+end
+
+local function nop() end
+
+local tp_order = { "S", "P", "K", "L", "M", "H", "W", "N", "R", "A", "I", "E", "O", "U" }
+
+-- takes a table of chords - each chord is just a table where any key defined is a key pressedi n the chord
+local function write_tp_outline(outline)
+	local out = {}
+	for i, chord in ipairs(outline) do
+		if i > 1 then table.insert(out, "/") end
+
+		for _, k in ipairs(tp_order) do
+			if chord[k] then table.insert(out, k) end
+		end
+	end
+
+	return table.concat(out)
+end
+
+local function read_tp_outline(str)
+	local out = {}
+
+	for chord in string.gmatch(str, "[^/]+") do
+		local map = {}
+		for key in string.gmatch(chord, ".") do map[key] = true end
+		table.insert(out, map)
+	end
+
+	return out
+end
+
+assert(write_tp_outline({{S = true, L = true}}) == "SL")
+assert(write_tp_outline({{S = true, P = true, O = true}, {N = true, I = true}}) == "SPO/NI")
+
+local function merge_outline(outline)
+	local out = {}
+
+	for i, chord in ipairs(outline) do
+		for k in pairs(chord) do out[k] = true end
+	end
+
+	return { out }
+end
+
+assert(write_tp_outline(merge_outline({{S = true, P = true, O = true}, {N = true, I = true}})) == "SPNIO")
+
+local steno_order = { "S-", "T-", "K-", "P-", "W-", "H-", "R-", "A-", "O-", "*", "-E", "-U", "-F", "-R", "-P", "-B", "-L", "-G", "-T", "-S", "-D", "-Z" }
+
+-- convert a traditional-style steno chord to a string
+local function write_chord(chord)
+	local out = {}
+
+	local needs_dash = true
+
+	for _, key in ipairs(steno_order) do
+		if chord[key] then
+			local letter = string.gsub(key, "-", "")
+			if string.match("AOEU*", letter) then needs_dash = false end
+
+			if string.match(key, "^-") and needs_dash then
+				table.insert(out, "-")
+				needs_dash = false
+			end
+
+			table.insert(out, letter)
+		end
+	end
+
+	return table.concat(out)
+end
+
+local function write_outline(outline)
+	local out = {}
+
+	for _, chord in ipairs(outline) do
+		table.insert(out, write_chord(chord))
+	end
+
+	return table.concat(out, "/")
+end
+
+assert(write_outline({{["P-"] = 1, ["O-"] = 1, ["-T"] = 1}}) == "POT")
+assert(write_outline({{["-E"] = 1}}) == "E")
+assert(write_outline({{["T-"] = 1, ["-T"] = 1}}) == "T-T")
+assert(write_outline({{["T-"] = 1}, {["T-"] = 1}}) == "T/T")
+
+local tp_steno_map = {
+	S = "S-", P = "T-", K = "P-", L = "H-", M = "*",
+	H = "A-", W = "O-", N = "-E", R = "-U",
+	A = "-F", I = "-P", E = "-L", O = "-T", U = "-D",
+}
+
+local function chord_to_steno(chord)
+	local out = {}
+	for key in pairs(chord) do
+		out[tp_steno_map[key]] = true
+	end
+	return out
+end
+
+local function outline_to_steno(outline)
+	local out = {}
+	for _, chord in ipairs(outline) do
+		table.insert(out, chord_to_steno(chord))
+	end
+	return out
+end
+
+assert(write_outline(outline_to_steno({{K = 1, E = 1}, {P = 1, E = 1}, {K = 1, E = 1, N = 1}})) == "P-L/T-L/PEL")
+
+-- ok now we're actually generating the maps for tp words
+
+-- returns iterator of syllables
+local function split_word(word)
+	local word = string.gsub(word, "(n?)([^aeiou][aeiou])", "%1-%2")
+	return string.gmatch(word, "[^-]+")
+end
+
+local letter_map = {
+	s = "S", t = "SH", p = "P", w = "PH",
+	k = "K", j = "KH", l = "L", m = "M",
+	n = "MH", a = "A", e = "E", i = "I",
+	o = "O", u = "U",
+}
+
+local function convert_syllable(s)
+	local out = {}
+	if string.match(s, "n$") then
+		out.N = true
+		s = string.gsub(s, "n$", "")
+	end
+
+	for i in string.gmatch(s, ".") do
+		for k in string.gmatch(letter_map[i], ".") do out[k] = true end
+	end
+
+	return out
+end
+
+local function word_outline(word)
+	local out = {}
+
+	for s in split_word(word) do
+		table.insert(out, convert_syllable(s))
+	end
+
+	return out
+end
+
+local function assert_tp(outline, expected)
+	assert(write_tp_outline(outline) == expected, expected .. " " .. write_tp_outline(outline))
+end
+
+assert_tp(word_outline("ken"), "KNE")
+assert_tp(word_outline("kepeken"), "KE/PE/KNE")
+assert_tp(word_outline("linja"), "LNI/KHA")
+
+-- word briefs are single-chord verisons with W added
+local function word_brief(word, modifier)
+	local out = word_outline(word)
+
+	-- we don't wanna take space with single-syllable briefs
+	if #out == 1 then return nil end
+
+	if modifier then table.insert(out, {W = true}) end
+	return merge_outline(out)
+end
+
+assert_tp(word_brief("linja"), "KLHNAI")
+assert_tp(word_brief("lili", true), "LWI")
+
+--
+do
+	if not arg[1] then
+		warn("usage: steno.lua [CONFIG]")
+		os.exit(1)
+	end
+
+	local config = assert(dofile(arg[1]))
+		assert(config.words)
+	if not config.extra_briefs then config.extra_briefs = {} end
+	local dictionary = {}
+
+	-- returns whether insertion was successful
+	-- you can pass a nil outline to make this a noöp
+	local function add_word(outline, word, force)
+		-- it's more useful to just pretend the insertion was successful
+		if not outline then return true end
+		-- we can change this for keymaps later!
+		local steno
+		if config.convert_to_english then
+			steno = write_outline(outline_to_steno(outline))
+		else
+			steno = write_tp_outline(outline)
+		end
+
+		if not force and dictionary[steno] then
+			nop(string.format(
+				"duplicate: %s = %s, %s",
+				steno, dictionary[steno], word
+			))
+			return false
+		else
+			dictionary[steno] = word
+			return true
+		end
+	end
+
+	for _, word in ipairs(config.words) do
+		add_word(word_outline(word), word, true)
+		if not add_word(word_brief(word, false), word, false) then
+			add_word(word_brief(word, true), word, false)
+		end
+
+		local partial = ""
+		for syl in split_word(word) do
+			partial = partial .. syl
+			if syl == word then break end
+			add_word(word_outline(partial), partial, false)
+		end
+	end
+
+	for k, v in pairs(config.extra_briefs) do
+		-- the irony of reparsing and unparsing this is not lost on me
+		add_word(read_tp_outline(k), v, true)
+	end
+
+	for _, upper in ipairs({ false, true }) do
+		for letter, keys in pairs(letter_map) do
+			if upper then letter = string.upper(letter) end
+			letter = "{&" .. letter .. "}"
+			keys = keys .. "W" .. (upper and "R" or "")
+
+			add_word(read_tp_outline(keys), letter, true)
+		end
+	end
+
+	print(dkjson.encode(dictionary))
+end
+
+return { write_tp_outline = write_tp_outline, word_outline = word_outline }
diff --git a/dictgen/words.lua b/dictgen/words.lua
new file mode 100644
index 0000000..b83b4ad
--- /dev/null
+++ b/dictgen/words.lua
@@ -0,0 +1,127 @@
+-- in roughly frequency order
+return {
+	"mi",
+	"li",
+	"e",
+	"pona",
+	"toki",
+	"ni",
+	"a",
+	"la",
+	"ala",
+	"lon",
+	"sina",
+	"jan",
+	"tawa",
+	"sona",
+	"seme",
+	"pi",
+	"tenpo",
+	"ona",
+	"mute",
+	"taso",
+	"wile",
+	"o",
+	"pilin",
+	"kama",
+	"ken",
+	"ike",
+	"lili",
+	"tan",
+	"nimi",
+	"pali",
+	"ma",
+	"musi",
+	"sitelen",
+	"kepeken",
+	"tomo",
+	"ali",
+	"lukin",
+	"oko",
+	"jo",
+	"kin",
+	"pini",
+	"ilo",
+	"anu",
+	"ante",
+	"lape",
+	"moku",
+	"sama",
+	"suli",
+	"kalama",
+	"suno",
+	"telo",
+	"kulupu",
+	"nasa",
+	"sin",
+	"lipu",
+	"pana",
+	"pakala",
+	"ijo",
+	"soweli",
+	"tu",
+	"nasin",
+	"lawa",
+	"en",
+	"wawa",
+	"weka",
+	"wan",
+	"mu",
+	"awen",
+	"nanpa",
+	"olin",
+	"suwi",
+	"kon",
+	"seli",
+	"sewi",
+	"kute",
+	"mama",
+	"sike",
+	"moli",
+	"pimeja",
+	"lete",
+	"kasi",
+	"luka",
+	"sijelo",
+	"uta",
+	"poka",
+	"kala",
+	"jaki",
+	"insa",
+	"utala",
+	"mani",
+	"linja",
+	"open",
+	"len",
+	"waso",
+	"pan",
+	"ko",
+	"esun",
+	"kili",
+	"meli",
+	"tonsi",
+	"pipi",
+	"supa",
+	"kiwen",
+	"poki",
+	"palisa",
+	"kule",
+	"laso",
+	"noka",
+	"namako",
+	"loje",
+	"walo",
+	"unpa",
+	"anpa",
+	"mun",
+	"nena",
+	"mije",
+	"akesi",
+	"alasa",
+	"sinpin",
+	"selo",
+	"jelo",
+	"monsi",
+	"lupa",
+	"pu"
+}