Module:lt-pron

From Wiktionary, the free dictionary
Jump to navigation Jump to search
This module page is still in development.

This module is not to be directly used. It is used by Template:lt-IPA, see there for usage.

Testcases[edit]

6 of 20 tests failed. (refresh)

TextExpectedActual
test_pron:
Failedstovė́tistoːˈʋʲeː.tʲɪstoːˈʋʲêː.tʲɪ
Passedgaliàɡɐˈlʲɛɡɐˈlʲɛ
Passedgàliaˈɡɐ.lʲɛˈɡɐ.lʲɛ
Passedapniū̃ktiɐpʲˈnʲuːk.tʲɪɐpʲˈnʲuːk.tʲɪ
Passedpažìntipɐˈʑɪnʲ.tʲɪpɐˈʑɪnʲ.tʲɪ
Passedtamsùstɐmˈsʊstɐmˈsʊs
Failedabúojasɐˈbûə.jɐsɐˈbʊ̂ə.jɐs
Passedkóšėˈkôː.ɕeːˈkôː.ɕeː
Passedkõšėˈkoː.ɕeːˈkoː.ɕeː
Failedpriepʲrʲiəpʲrʲɪ
Passednórsˈnôːrsˈnôːrs
Failedgyvẽnimasɡʲiːˈʋʲɛː.nʲɪ.mɐsɡʲiːˈʋʲɛ.nʲɪ.mɐs
Failedkauburỹskɐʊ̯ˑ.bʊˈrʲiːskɐʊ̯.bʊˈrʲiːs
TextExpectedActual
test_respell:
Passedpãieškospãjieškospãjieškos
Passedievàjievàjievà
Passeddiẽmedisdʲiẽmʲedʲisdʲiẽmʲedʲis
Passedapkeliáujantiapʲkʲelʲiáujanʲtʲiapʲkʲelʲiáujanʲtʲi
Passedapniū̃ktiapʲnʲū̃ktʲiapʲnʲū̃ktʲi
Passedabchãząabçãząabçãzą
Failedbiŏlŏ̀gijabʲijɔlɔ̀gʲijabʲiɔlɔ̀gʲija

local export = {}

local U = require("Module:string/char")
local udecomp = mw.ustring.toNFD
local rsubn = mw.ustring.gsub
local sub = mw.ustring.sub
local rmatch = mw.ustring.match
local rfind = mw.ustring.find
local ugmatch = mw.ustring.gmatch
local ulen = mw.ustring.len
local ulower = mw.ustring.lower

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar, n)
	local retval = rsubn(term, foo, bar, n)
	return retval
end

-- unicode codepoints
local ACUTE = U(0x0301) -- acute =  ́
local GRAVE = U(0x0300) -- grave =  ̀
local CIRC = U(0x0302) -- circumflex =  ̂
local TILDE = U(0x0303) -- tilde =  ̃
local MACRON = U(0x0304) -- macron =  ̄
local BREVE = U(0x0306) -- breve =  ̆
local DOT = U(0x0307) -- dot above = ̇
local CARON = U(0x030C) -- caron =  ̌
local OGONEK = U(0x0328) -- ogonek =  ̨

-- character classes
local accents = ACUTE .. GRAVE .. CIRC .. TILDE .. MACRON .. BREVE
local diacritics = accents .. DOT .. CARON .. OGONEK
local vowels = "[aeiouy]"
local consonants = "[bcdfghklmnprstvzðþx]" -- does not include j

-- various substitutions
local subs = {
	glyphs = {
		["ch"] = "ç",
		["ts"] = "ć",
		["dz"] = "ð",
		["dz" .. CARON] = "þ",
		["o" .. BREVE] = "ɔ"
	},
	ipa_c = {
		["c" .. CARON] = "t͡ʃ",
		["c"] = "t͡s",
		["c" .. ACUTE] = "t͡s",
		["ç"] = "x",
		["þ"] = "d͡ʒ",
		["ð"] = "d͡z",
		["g"] = "ɡ",
		["h"] = "ɣ",
		["qu"] = "kʋ",
		["q"] = "k",
		["s" .. CARON] = "ʃ",
		["v"] = "ʋ",
		["z" .. CARON] = "ʒ"
	},
	ipa_v = {
		["a"] = "ɐ",
		["e"] = "ɛ",
		["i"] = "ɪ",
		["u"] = "ʊ"
	}
}

local lang = require("Module:languages").getByCode("lt")

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang }
end

--[=[
Takes the orthographic representation to make it closer
to the phonological output by respelling and adding missing segments
]=]
function export.respell(term)
	-- decompose accents from term
	term = udecomp(term)

	-- replace digraph consonants with temporary placeholders
	term = rsub(term, ".[" .. CARON .. BREVE .. "hsz]?", subs.glyphs)

	-- add missing /j/: ievà > jievà
	if rfind(term, "^i[" .. ACUTE .. GRAVE .. "]?e" .. TILDE .. "?") then
		term = "j" .. term
	end
	-- pãieškos > pãjieškos
	term = rsub(
		term,
		"(" .. vowels .. "[" .. diacritics .. "]*)" .. "(i[" .. ACUTE .. GRAVE .. "]?e" .. TILDE .. "?)",
		"%1j%2"
	)

	-- show palatalisation
	term = rsub(term, "i([aou][" .. accents .. "]*)(.?)",
		function(vow, next_char)
			if next_char == "u" then
				return "i" .. vow .. next_char
			else
				return "ʲ" .. vow .. next_char
			end
		end
	)
	term = rsub(term, "(" .. consonants .. CARON .. "?)([iejy])", "%1ʲ%2")
	term = rsub(term, "(" .. consonants .. "+)(" .. consonants .. "ʲ)",
		function(cons, soft)
			local out = ""
			for c in ugmatch(cons, ".") do
				-- k does not become palatalised: krienas, apniūkti do not have kʲ
				if c == "k" then
					out = out .. c
				else
					out = out .. c .. "ʲ"
				end
			end
			return out .. soft
		end
	)

	return term
end

--[=[
Splits the term into its syllable boundaries
]=]
function export.syllabify(term, table)
	term = rsub(term,
		"([aeioɔuy" .. diacritics ..
		"]*[^aeioɔuy]-)([sz]?" .. CARON .. "?ʲ?[ptkbdðþgçćc]?" .. CARON .. "?ʲ?[lmnrvj]?ʲ?[aeioɔuy])",
		"%1.%2"
	)
	term = rsub(term, "^%.", "")
	term = rsub(term, "%.ʲ", "ʲ.")
	term = rsub(term, "%.([ptbdðþ]ʲ?)([mn])", "%1.%2")

	return table and ugmatch(term, ".") or term
end

--[=[
Render pronunciation
]=]
local function pron(term)
	-- replace consonants with their ipa equivalents
	term = rsub(term, ".[" .. CARON .. "u]?", subs.ipa_c)

	-- consonant allophones
	term = rsub(term, "ʃʲ", "ɕ")
	term = rsub(term, "ʒʲ", "ʑ")
	term = rsub(term, "l([^ʲ])", "ɫ%1")
	term = rsub(term, "n(ʲ?%.?[kɡ])", "ŋ%1")

	-- vowels
	term = rsub(term,
		"^([^%." .. ACUTE .. GRAVE .. TILDE .. "]-[" .. ACUTE .. GRAVE .. TILDE .. "])",
		"ˈ%1"
	)
	term = rsub(term,
		"%.([^%." .. ACUTE .. GRAVE .. TILDE .. "]-[" .. ACUTE .. GRAVE .. TILDE .. "])",
		"ˈ%1"
	)
	-- diphtongs
	term = rsub(term, "ia" .. ACUTE .. "u", "æ" .. CIRC .. "ʊ")
	term = rsub(term, "iau" .. TILDE .. "?", "ɛʊ")
	term = rsub(term, "au" .. TILDE .. "?", "ɐʊ̯")
	term = rsub(term, "a" .. ACUTE .. "i", "ɐ" .. CIRC .. "ɪ")
	term = rsub(term, "ai" .. TILDE .. "?", "ɐɪ")
	term = rsub(term, "e" .. ACUTE .. "i", "ɛ" .. CIRC .. "ɪ")
	term = rsub(term, "ei" .. TILDE .. "?", "ɛɪ")
	term = rsub(term, "a" .. ACUTE .. "u", "a" .. CIRC .. "ʊ")
	term = rsub(term, "au" .. TILDE .. "?", "ɒʊ")
	term = rsub(term, "e" .. ACUTE .. "u", "ɛ" .. CIRC .. "ʊ")
	term = rsub(term, "eu" .. TILDE .. "?", "ɛʊ")
	term = rsub(term, "i" .. ACUTE .. "e", "i" .. CIRC .. "ə")
	term = rsub(term, "ie" .. TILDE .. "?", "iə")
	term = rsub(term, "u" .. ACUTE .. "o", "u" .. CIRC .. "ə")
	term = rsub(term, "uo" .. TILDE .. "?", "uə")
	term = rsub(term, "u" .. ACUTE .. "i", "ʊ" .. CIRC .. "ɪ")
	term = rsub(term, "ui" .. TILDE .. "?", "ʊɪ")
	term = rsub(term, "o" .. ACUTE .. "u", "ɔ" .. CIRC .. "ɪ")
	term = rsub(term, "u" .. ACUTE .. "u", "ɔ" .. CIRC .. "ʊ")

	term = rsub(term, "a", "ɐ")
	term = rsub(term, "ɐ" .. ACUTE, "âː")
	term = rsub(term, "ɐ" .. TILDE, "aː")

	term = rsub(term, "e", "ɛ")
	term = rsub(term, "ɛ" .. DOT, "eː")
	term = rsub(term, "ɛ" .. OGONEK, "æː")

	term = rsub(term, "i", "ɪ")
	term = rsub(term, "ɪ" .. OGONEK, "iː")

	term = rsub(term, "y", "iː")

	term = rsub(term, "u", "ʊ")
	term = rsub(term, "ʊ" .. MACRON, "uː")
	term = rsub(term, "ʊ" .. OGONEK, "uː")

	term = rsub(term, "o" .. TILDE .. "?", "oː")
	term = rsub(term, "ʲaː", "ʲæː")
	term = rsub(term, "ʲɐ", "ʲɛ")

	term = rsub(term, "ː" .. ACUTE, CIRC .. "ː") -- acutes = stressed + circumflex tone
	term = rsub(term, TILDE, "") -- tilde = stressed + long
	term = rsub(term, GRAVE, "") -- grave = stressed + short

	term = rsub(term, "([ʲj])a(" .. OGONEK .. "?)", "%1e%2")
	return term
end

--[=[
	Returns the respelled term and its pronunciation
]=]
function export.test_respell(frame)
	local args = require("Module:parameters").process(frame:getParent().args, {[1] = {default = ""}})
	local syll = export.syllabify(export.respell(args[1]))

	return syll .. " → [" .. pron(syll) .. "]"
end

--[=[
Converts the term to IPA
]=]
function export.toIPA(text)
	return pron(ulower(export.syllabify(export.respell(text))))
end

--[=[
	Displays the IPA of the term
]=]
function export.show(frame)
	local params = {
		[1] = {default = mw.title.getCurrentTitle().text}
	}
	local args = require("Module:parameters").process(frame:getParent().args, params)

	return args[1]
end

return export