diff options
Diffstat (limited to 'dictgen/steno.lua')
-rw-r--r-- | dictgen/steno.lua | 251 |
1 files changed, 251 insertions, 0 deletions
diff --git a/dictgen/steno.lua b/dictgen/steno.lua new file mode 100644 index 0000000..95040d6 --- /dev/null +++ b/dictgen/steno.lua @@ -0,0 +1,251 @@ +-- toki pona steno generator +-- TODO: +-- - make fingerspelling syllable-based MAYBE +-- - punctuation? + +local dkjson = require("dkjson") + +local function warn(str) + io.stderr:write(str .. "\n") +end + +local function nop() end + +local tp_order = { "S", "P", "K", "L", "M", "H", "W", "N", "R", "A", "I", "E", "O", "U" } + +-- takes a table of chords - each chord is just a table where any key defined is a key pressedi n the chord +local function write_tp_outline(outline) + local out = {} + for i, chord in ipairs(outline) do + if i > 1 then table.insert(out, "/") end + + for _, k in ipairs(tp_order) do + if chord[k] then table.insert(out, k) end + end + end + + return table.concat(out) +end + +local function read_tp_outline(str) + local out = {} + + for chord in string.gmatch(str, "[^/]+") do + local map = {} + for key in string.gmatch(chord, ".") do map[key] = true end + table.insert(out, map) + end + + return out +end + +assert(write_tp_outline({{S = true, L = true}}) == "SL") +assert(write_tp_outline({{S = true, P = true, O = true}, {N = true, I = true}}) == "SPO/NI") + +local function merge_outline(outline) + local out = {} + + for i, chord in ipairs(outline) do + for k in pairs(chord) do out[k] = true end + end + + return { out } +end + +assert(write_tp_outline(merge_outline({{S = true, P = true, O = true}, {N = true, I = true}})) == "SPNIO") + +local steno_order = { "S-", "T-", "K-", "P-", "W-", "H-", "R-", "A-", "O-", "*", "-E", "-U", "-F", "-R", "-P", "-B", "-L", "-G", "-T", "-S", "-D", "-Z" } + +-- convert a traditional-style steno chord to a string +local function write_chord(chord) + local out = {} + + local needs_dash = true + + for _, key in ipairs(steno_order) do + if chord[key] then + local letter = string.gsub(key, "-", "") + if string.match("AOEU*", letter) then needs_dash = false end + + if string.match(key, "^-") and needs_dash then + table.insert(out, "-") + needs_dash = false + end + + table.insert(out, letter) + end + end + + return table.concat(out) +end + +local function write_outline(outline) + local out = {} + + for _, chord in ipairs(outline) do + table.insert(out, write_chord(chord)) + end + + return table.concat(out, "/") +end + +assert(write_outline({{["P-"] = 1, ["O-"] = 1, ["-T"] = 1}}) == "POT") +assert(write_outline({{["-E"] = 1}}) == "E") +assert(write_outline({{["T-"] = 1, ["-T"] = 1}}) == "T-T") +assert(write_outline({{["T-"] = 1}, {["T-"] = 1}}) == "T/T") + +local tp_steno_map = { + S = "S-", P = "T-", K = "P-", L = "H-", M = "*", + H = "A-", W = "O-", N = "-E", R = "-U", + A = "-F", I = "-P", E = "-L", O = "-T", U = "-D", +} + +local function chord_to_steno(chord) + local out = {} + for key in pairs(chord) do + out[tp_steno_map[key]] = true + end + return out +end + +local function outline_to_steno(outline) + local out = {} + for _, chord in ipairs(outline) do + table.insert(out, chord_to_steno(chord)) + end + return out +end + +assert(write_outline(outline_to_steno({{K = 1, E = 1}, {P = 1, E = 1}, {K = 1, E = 1, N = 1}})) == "P-L/T-L/PEL") + +-- ok now we're actually generating the maps for tp words + +-- returns iterator of syllables +local function split_word(word) + local word = string.gsub(word, "(n?)([^aeiou][aeiou])", "%1-%2") + return string.gmatch(word, "[^-]+") +end + +local letter_map = { + s = "S", t = "SH", p = "P", w = "PH", + k = "K", j = "KH", l = "L", m = "M", + n = "MH", a = "A", e = "E", i = "I", + o = "O", u = "U", +} + +local function convert_syllable(s) + local out = {} + if string.match(s, "n$") then + out.N = true + s = string.gsub(s, "n$", "") + end + + for i in string.gmatch(s, ".") do + for k in string.gmatch(letter_map[i], ".") do out[k] = true end + end + + return out +end + +local function word_outline(word) + local out = {} + + for s in split_word(word) do + table.insert(out, convert_syllable(s)) + end + + return out +end + +local function assert_tp(outline, expected) + assert(write_tp_outline(outline) == expected, expected .. " " .. write_tp_outline(outline)) +end + +assert_tp(word_outline("ken"), "KNE") +assert_tp(word_outline("kepeken"), "KE/PE/KNE") +assert_tp(word_outline("linja"), "LNI/KHA") + +-- word briefs are single-chord verisons with W added +local function word_brief(word, modifier) + local out = word_outline(word) + + -- we don't wanna take space with single-syllable briefs + if #out == 1 then return nil end + + if modifier then table.insert(out, {W = true}) end + return merge_outline(out) +end + +assert_tp(word_brief("linja"), "KLHNAI") +assert_tp(word_brief("lili", true), "LWI") + +-- +do + if not arg[1] then + warn("usage: steno.lua [CONFIG]") + os.exit(1) + end + + local config = assert(dofile(arg[1])) + assert(config.words) + if not config.extra_briefs then config.extra_briefs = {} end + local dictionary = {} + + -- returns whether insertion was successful + -- you can pass a nil outline to make this a noöp + local function add_word(outline, word, force) + -- it's more useful to just pretend the insertion was successful + if not outline then return true end + -- we can change this for keymaps later! + local steno + if config.convert_to_english then + steno = write_outline(outline_to_steno(outline)) + else + steno = write_tp_outline(outline) + end + + if not force and dictionary[steno] then + nop(string.format( + "duplicate: %s = %s, %s", + steno, dictionary[steno], word + )) + return false + else + dictionary[steno] = word + return true + end + end + + for _, word in ipairs(config.words) do + add_word(word_outline(word), word, true) + if not add_word(word_brief(word, false), word, false) then + add_word(word_brief(word, true), word, false) + end + + local partial = "" + for syl in split_word(word) do + partial = partial .. syl + if syl == word then break end + add_word(word_outline(partial), partial, false) + end + end + + for k, v in pairs(config.extra_briefs) do + -- the irony of reparsing and unparsing this is not lost on me + add_word(read_tp_outline(k), v, true) + end + + for _, upper in ipairs({ false, true }) do + for letter, keys in pairs(letter_map) do + if upper then letter = string.upper(letter) end + letter = "{&" .. letter .. "}" + keys = keys .. "W" .. (upper and "R" or "") + + add_word(read_tp_outline(keys), letter, true) + end + end + + print(dkjson.encode(dictionary)) +end + +return { write_tp_outline = write_tp_outline, word_outline = word_outline } |