summary refs log tree commit diff
path: root/dictgen/steno.lua
diff options
context:
space:
mode:
Diffstat (limited to 'dictgen/steno.lua')
-rw-r--r--dictgen/steno.lua251
1 files changed, 251 insertions, 0 deletions
diff --git a/dictgen/steno.lua b/dictgen/steno.lua
new file mode 100644
index 0000000..95040d6
--- /dev/null
+++ b/dictgen/steno.lua
@@ -0,0 +1,251 @@
+-- toki pona steno generator
+-- TODO:
+-- - make fingerspelling syllable-based MAYBE
+-- - punctuation?
+
+local dkjson = require("dkjson")
+
+local function warn(str)
+	io.stderr:write(str .. "\n")
+end
+
+local function nop() end
+
+local tp_order = { "S", "P", "K", "L", "M", "H", "W", "N", "R", "A", "I", "E", "O", "U" }
+
+-- takes a table of chords - each chord is just a table where any key defined is a key pressedi n the chord
+local function write_tp_outline(outline)
+	local out = {}
+	for i, chord in ipairs(outline) do
+		if i > 1 then table.insert(out, "/") end
+
+		for _, k in ipairs(tp_order) do
+			if chord[k] then table.insert(out, k) end
+		end
+	end
+
+	return table.concat(out)
+end
+
+local function read_tp_outline(str)
+	local out = {}
+
+	for chord in string.gmatch(str, "[^/]+") do
+		local map = {}
+		for key in string.gmatch(chord, ".") do map[key] = true end
+		table.insert(out, map)
+	end
+
+	return out
+end
+
+assert(write_tp_outline({{S = true, L = true}}) == "SL")
+assert(write_tp_outline({{S = true, P = true, O = true}, {N = true, I = true}}) == "SPO/NI")
+
+local function merge_outline(outline)
+	local out = {}
+
+	for i, chord in ipairs(outline) do
+		for k in pairs(chord) do out[k] = true end
+	end
+
+	return { out }
+end
+
+assert(write_tp_outline(merge_outline({{S = true, P = true, O = true}, {N = true, I = true}})) == "SPNIO")
+
+local steno_order = { "S-", "T-", "K-", "P-", "W-", "H-", "R-", "A-", "O-", "*", "-E", "-U", "-F", "-R", "-P", "-B", "-L", "-G", "-T", "-S", "-D", "-Z" }
+
+-- convert a traditional-style steno chord to a string
+local function write_chord(chord)
+	local out = {}
+
+	local needs_dash = true
+
+	for _, key in ipairs(steno_order) do
+		if chord[key] then
+			local letter = string.gsub(key, "-", "")
+			if string.match("AOEU*", letter) then needs_dash = false end
+
+			if string.match(key, "^-") and needs_dash then
+				table.insert(out, "-")
+				needs_dash = false
+			end
+
+			table.insert(out, letter)
+		end
+	end
+
+	return table.concat(out)
+end
+
+local function write_outline(outline)
+	local out = {}
+
+	for _, chord in ipairs(outline) do
+		table.insert(out, write_chord(chord))
+	end
+
+	return table.concat(out, "/")
+end
+
+assert(write_outline({{["P-"] = 1, ["O-"] = 1, ["-T"] = 1}}) == "POT")
+assert(write_outline({{["-E"] = 1}}) == "E")
+assert(write_outline({{["T-"] = 1, ["-T"] = 1}}) == "T-T")
+assert(write_outline({{["T-"] = 1}, {["T-"] = 1}}) == "T/T")
+
+local tp_steno_map = {
+	S = "S-", P = "T-", K = "P-", L = "H-", M = "*",
+	H = "A-", W = "O-", N = "-E", R = "-U",
+	A = "-F", I = "-P", E = "-L", O = "-T", U = "-D",
+}
+
+local function chord_to_steno(chord)
+	local out = {}
+	for key in pairs(chord) do
+		out[tp_steno_map[key]] = true
+	end
+	return out
+end
+
+local function outline_to_steno(outline)
+	local out = {}
+	for _, chord in ipairs(outline) do
+		table.insert(out, chord_to_steno(chord))
+	end
+	return out
+end
+
+assert(write_outline(outline_to_steno({{K = 1, E = 1}, {P = 1, E = 1}, {K = 1, E = 1, N = 1}})) == "P-L/T-L/PEL")
+
+-- ok now we're actually generating the maps for tp words
+
+-- returns iterator of syllables
+local function split_word(word)
+	local word = string.gsub(word, "(n?)([^aeiou][aeiou])", "%1-%2")
+	return string.gmatch(word, "[^-]+")
+end
+
+local letter_map = {
+	s = "S", t = "SH", p = "P", w = "PH",
+	k = "K", j = "KH", l = "L", m = "M",
+	n = "MH", a = "A", e = "E", i = "I",
+	o = "O", u = "U",
+}
+
+local function convert_syllable(s)
+	local out = {}
+	if string.match(s, "n$") then
+		out.N = true
+		s = string.gsub(s, "n$", "")
+	end
+
+	for i in string.gmatch(s, ".") do
+		for k in string.gmatch(letter_map[i], ".") do out[k] = true end
+	end
+
+	return out
+end
+
+local function word_outline(word)
+	local out = {}
+
+	for s in split_word(word) do
+		table.insert(out, convert_syllable(s))
+	end
+
+	return out
+end
+
+local function assert_tp(outline, expected)
+	assert(write_tp_outline(outline) == expected, expected .. " " .. write_tp_outline(outline))
+end
+
+assert_tp(word_outline("ken"), "KNE")
+assert_tp(word_outline("kepeken"), "KE/PE/KNE")
+assert_tp(word_outline("linja"), "LNI/KHA")
+
+-- word briefs are single-chord verisons with W added
+local function word_brief(word, modifier)
+	local out = word_outline(word)
+
+	-- we don't wanna take space with single-syllable briefs
+	if #out == 1 then return nil end
+
+	if modifier then table.insert(out, {W = true}) end
+	return merge_outline(out)
+end
+
+assert_tp(word_brief("linja"), "KLHNAI")
+assert_tp(word_brief("lili", true), "LWI")
+
+--
+do
+	if not arg[1] then
+		warn("usage: steno.lua [CONFIG]")
+		os.exit(1)
+	end
+
+	local config = assert(dofile(arg[1]))
+		assert(config.words)
+	if not config.extra_briefs then config.extra_briefs = {} end
+	local dictionary = {}
+
+	-- returns whether insertion was successful
+	-- you can pass a nil outline to make this a noöp
+	local function add_word(outline, word, force)
+		-- it's more useful to just pretend the insertion was successful
+		if not outline then return true end
+		-- we can change this for keymaps later!
+		local steno
+		if config.convert_to_english then
+			steno = write_outline(outline_to_steno(outline))
+		else
+			steno = write_tp_outline(outline)
+		end
+
+		if not force and dictionary[steno] then
+			nop(string.format(
+				"duplicate: %s = %s, %s",
+				steno, dictionary[steno], word
+			))
+			return false
+		else
+			dictionary[steno] = word
+			return true
+		end
+	end
+
+	for _, word in ipairs(config.words) do
+		add_word(word_outline(word), word, true)
+		if not add_word(word_brief(word, false), word, false) then
+			add_word(word_brief(word, true), word, false)
+		end
+
+		local partial = ""
+		for syl in split_word(word) do
+			partial = partial .. syl
+			if syl == word then break end
+			add_word(word_outline(partial), partial, false)
+		end
+	end
+
+	for k, v in pairs(config.extra_briefs) do
+		-- the irony of reparsing and unparsing this is not lost on me
+		add_word(read_tp_outline(k), v, true)
+	end
+
+	for _, upper in ipairs({ false, true }) do
+		for letter, keys in pairs(letter_map) do
+			if upper then letter = string.upper(letter) end
+			letter = "{&" .. letter .. "}"
+			keys = keys .. "W" .. (upper and "R" or "")
+
+			add_word(read_tp_outline(keys), letter, true)
+		end
+	end
+
+	print(dkjson.encode(dictionary))
+end
+
+return { write_tp_outline = write_tp_outline, word_outline = word_outline }