Module:User:Catonif/scn-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Catonif, for their own experimentation. Items in this module may be added and removed at Catonif's discretion; do not rely on this module's stability.


local p = {}

local lang = require("Module:languages").getByCode("scn")
local m_IPA = require("Module:IPA")
local m_a = require("Module:accent qualifier")

local a = {}
a["\195\160"] = "a`"; a["\195\162"] = "a^"
a["\195\168"] = "e`"; a["\195\170"] = "e^"
a["\195\172"] = "i`"; a["\195\174"] = "i^"
a["\195\178"] = "o`"; a["\195\180"] = "o^"
a["\195\185"] = "u`"; a["\195\187"] = "u^"

local function singleword(str)

	local str, accentCount = str:gsub("\195[\160\162\168-\187]", a); -- [technical] diacritics
	-- FIXME: [memory] redefined local. is this a problem?

	str = str:gsub("%^", "") -- ignore circumflex for now
	-- TODO: [important] understand whether the circumflex is relevat for pronunciation or not

	:gsub("ç", "M") -- [technical] as to make it one byte

	:gsub("([^aeiou])%1", "*%1") -- [technical] geminated consonants

	:gsub("[nm]([^aeiou])", "n%1"); -- [technical] nasal coda

	-- put accents on plain words
	if accentCount < 1 then
		str = str:gsub("([aeiou])([^aeiou]*i?[aeiou][^aeiou]*)$", "%1`%2");
	end

	-- velars and palatal affricates
	str = str:gsub("[cqk]", "C"):gsub("g", "G")
	:gsub("([CG])([^ie])", function (a, b) return a:lower() .. b end)
	:gsub("c", "k"):gsub("([kg])h", "%1")

	-- temp

	-- nongeminated /ts/ in [[scàuzu]], etc.
	:gsub("(a`?u)z", "%1ts")
	-- TODO: [unimportant] agree on whether this should be in the template
	-- it could very well just not be here, and the input for (eg) scàuzu would need to be 'scàutsu'

	-- some spelling clusters
	:gsub("%*d", "*D"):gsub("d%.d", "*d") -- retroflex dd
	-- TODO: [unimportant] agree
	-- what spelling on the page title
	:gsub("gn", "N"):gsub("g%.n", "gn") -- palatal gn
	:gsub("ng", "Q"):gsub("n%.g", "ng") -- velar ng
	-- TODO: [important] agree
	-- theoretically an allophone of /ng/
	:gsub("sC", "S"):gsub("s%.C", "sC") -- fricative scj
	:gsub("gli", "Li"):gsub("g%.l", "gl") -- palatal glj
	-- TODO: [unimportant] agree
	-- it is only a regional allophone of ghj, but nothing wrong with supporting the spelling
	:gsub("zh", "Z") -- voiced dz
	-- TODO: [important] understand what this is
	-- marginal phoneme, orthographically undistinct from /ts/, the 'zh' would only appear as input for the template

	-- double (and intial) r is another phoneme
	:gsub("^r([aeiou])", "*R%1"):gsub("%*r", "*R")
	-- TODO: [important] agree on this

	-- always geminated consonants
	:gsub("([aeiou]`?)([bGzZNSLQ])", "%1*%2")
	:gsub("^([bGzZNSLQ])", "*%1")
	-- FIXME: [optimization] is there no other way to do something like ([aeiou`?]|^)

	-- hyphenation
	:gsub("([rnls%*])([^aeiou])", "%1-%2") -- syllable break between closed coda and consonant
	:gsub("^([^%-aeiou]*[aeiou])", "-%1") -- initial syllable break for pure clusters
	:gsub("([aeiou]`?)([^%-aeiou]*[aeiou])", "%1-%2") -- syllable break after open coda
	:gsub("([aeiou]`?)([^%-aeiou]*[aeiou])", "%1-%2") -- repeat for scaramanzia
	-- TODO: [important] agree: the kind of hyphenation I like is highly debatable

	-- semivowels
	:gsub("([iu])%-([aeiou])", function (a, b) return a:upper() .. b end) -- unstressed /i u/ before vowels become semivowels
	:gsub("([iu]%-)%.([aeiou])", "%1%2") -- the dot indicates hiatus
	:gsub("([GCSLM])I", "%1") -- yod gets absorbed by dento-palatal vowel

	:gsub("s%-tr", "*-Hɽ"):gsub("s%-%.tr", "s-tr"); -- retroflex str

	return str;

end

local uppercase = {}
uppercase["T"] = "ʈ"; uppercase["D"] = "ɖ";
uppercase["r"] = "ɾ"; uppercase["R"] = "r";
uppercase["K"] = "c"; uppercase["A"] = "ɟ";
uppercase["M"] = "ç";
uppercase["N"] = "ɲ"; uppercase["L"] = "ʎ";
uppercase["S"] = "ʃ"; uppercase["H"] = "ʂ";
uppercase["I"] = "j"; uppercase["U"] = "w";

local function refine(str)

	-- return str

	-- sandhi
	-- [[
	return str:gsub("%*( ?[rlns%*]%-)", "%1") -- can't geminate a closed coda
	:gsub("([rlns%*] ?)%*%-", "%1-") -- can't self-geminate after a closed coda
	:gsub("%* %-", " *-") -- [technical] move gemination, mainly for liaison character

	-- add liaison character
	:gsub("([aeiou]`?) ([rlns%*]%-)", "%1W%2") -- vowel with coda
	:gsub("([rlns]) %-([aeiou])", "-%1W%2") -- de-coda-y before vowel

	:gsub("([%*n] ?%-)j", "%1GI") -- j > ghj
	:gsub("(%* ?%-)v", "%1b") -- v > b

end

local function transcription_phonemic(str)

	-- symbols

	return str --:gsub("tr", "Tɽ"):gsub("dr", "Dɽ") ʈɽ is not phonemic against tr
	--:gsub("n( ?%-[CG]I)", "ɲ%1") [ɲ] is allophone of /n/ in this case, i guess
	--:gsub("kI", "K"):gsub("gI", "A")
	:gsub("C", "tʃ"):gsub("G", "dʒ")
	:gsub("z", "ts"):gsub("Z", "dz")

	:gsub("(.)%*%-(.)", "%1%2%-%2") -- FIXME: [technical] to be moved

	:gsub("[RSTPHDrKANLIUM]", uppercase)

	:gsub("n( ?%-[pb])", "m%1")
	--:gsub("n( ?%-[kg])", "ŋ%1")
	:gsub("g", "ɡ")

	-- stress
	:gsub("%-([^` W%-]*`)", "'%1") -- main
	:gsub("'([^ W'])*", ",%1") -- secondary

	:gsub(" %-", " "):gsub("^-", "") -- syllable break doesn't remain at the beginning of a word
	:gsub("^([^aeiou])%-", "%1") -- no syllable separator for impure clusters if not stressed on the first syllable
	:gsub("e", "ɛ"):gsub("o", "ɔ")

	:gsub("W", "‿")

--	:gsub("i`", "I"):gsub("i", "ɪ"):gsub("u`", "U"):gsub("u", "ʊ"):lower() -- unstressed /i u/ are /ɪ ʊ/
	-- TODO: [important] agree

	:gsub("`", ""):gsub("'", "ˈ"):gsub(",", "ˌ"):gsub("%-+", ".");
	--]]

end

local function transcription_Palermo(str)

	-- symbols

	return str:gsub("tr", "Tɽ"):gsub("dr", "Dɽ")
	:gsub("n( ?%-[CG]I)", "ɲ%1")
	:gsub("kI", "K"):gsub("gI", "A")
	:gsub("C", "ʃ"):gsub("G", "dʒ"):gsub("nʃ", "ntʃ")
	:gsub("z", "ts"):gsub("Z", "dz")

	:gsub("Hɽ", "H(ɽ)")

	:gsub("(.)%*%-(.)", "%1%2%-%2") -- FIXME: [technical] to be moved

	:gsub("([aeiou]`?-)d([aeiou])", "%1r%2")

	:gsub("[RSTPHDrKANLIUM]", uppercase)

	:gsub("n( ?%-[pb])", "m%1")
	:gsub("n( ?%-k)", "ŋ%1")
	:gsub("n( ?%-)g", "ŋ%1ŋ")
	:gsub("g", "ɡ")

	-- stress
	:gsub("%-([^` W%-]*`)", "'%1") -- main
	:gsub("'([^ W'])*", ",%1") -- secondary

	:gsub(" %-", " "):gsub("^-", "") -- syllable break doesn't remain at the beginning of a word
	:gsub("^([^aeiou])%-", "%1") -- no syllable separator for impure clusters if not stressed on the first syllable

	:gsub("W", "‿")

	:gsub("([aeiou])`%-u", "%1`-vu"):gsub("v", "β")

	:gsub("i`", "I"):gsub("i", "i̞"):gsub("u`", "U"):gsub("u", "u̞"):lower() -- unstressed /i u/ are /ɪ ʊ/
	:gsub("e`", "iɛ̯"):gsub("o`", "uɔ̯"):gsub("e", "ɛ"):gsub("o", "ɔ")
	-- TODO: [important] agree

	:gsub("`", ""):gsub("'", "ˈ"):gsub(",", "ˌ"):gsub("%-+", ".")
	:gsub("*", "");
	--]]

end

local initial_Palermo = {};
	initial_Palermo["C"] = {};
		initial_Palermo["C"].nas = "‿n.tʃ"
		initial_Palermo["C"].gem = "‿t.tʃ"
	initial_Palermo["d"] = {};
		initial_Palermo["d"].nas = "‿n.n" -- ?? i dont know
		initial_Palermo["d"].gem = "‿d.d"
	initial_Palermo["g"] = {};
		initial_Palermo["g"].nas = "‿ŋ.ŋ" -- not sure of this either
		initial_Palermo["g"].gem = "‿ɡ.ɡ"
	initial_Palermo["gr"] = {};
		initial_Palermo["gr"].nas = "‿ŋ.ɡɾ"
		initial_Palermo["gr"].gem = "‿ɡ.ɡɾ" -- i don't know the distribution of [dd] for initial geminated /gr/

local function multiword(str)

	local rough = "";
	for token in string.gmatch(str, "[^%s']+") do
		rough = rough .. singleword(token) .. " ";
	end

	local CPA, _ = refine(rough:sub(1, -2)); -- removes final space

	local r = transcription_phonemic(CPA);

	local pre = "";
	local post = "";

	if (r:sub(-1) == "*") then -- if final character is *
		r = r:sub(0, -2)
		post = "<abbr title=\"triggers final syntactic gemination of the following consonant\">*</abbr>";
	end

	if (r:sub(0, 1) == "*") then -- if initial character is *
		r = r:sub(2, -1)
		pre = "<abbr title=\"triggers initial auto gemination\">*</abbr>";
	end

	local pale = transcription_Palermo(CPA);
	-- [[
	local initpale = initial_Palermo[CPA:sub(2, 2)];
	local additionalString = "";
	if (initpale ~= nil) then
		if (initpale.voc ~= nil) then
			additionalString = ", " .. m_a.format_qualifiers(lang, {'<abbr title="after a vowel">vow.</abbr>'}) .. " " .. m_IPA.format_IPA(lang, "[" .. initpale.voc .. "-]")
		end
		if (initpale.gem ~= nil) then
			additionalString = additionalString .. ", " .. m_a.format_qualifiers(lang, {'<abbr title="after syntactic gemination">gem.</abbr>'}) .. " " .. m_IPA.format_IPA(lang, "[" .. initpale.gem .. "-]")
		end
		if (initpale.nas ~= nil) then
			additionalString = additionalString .. ", " .. m_a.format_qualifiers(lang, {'<abbr title="after a nasal">nas.</abbr>'}) .. " " .. m_IPA.format_IPA(lang, "[" .. initpale.nas .. "-]")
		end
	end

	return '*' .. m_IPA.format_IPA_full(lang, {}) .. pre .. m_IPA.format_IPA(lang, "/" .. r .. "/") .. post
		.. '\n**'  .. m_a.format_qualifiers(lang, {"Palermo"}) .. " " .. m_IPA.format_IPA_full(lang, {}) .. m_IPA.format_IPA(lang, "[" .. pale .. "]") .. additionalString ;
	--]]
end

function p.IPA (frame)

	return multiword(frame.args[1]);

end

return p;