Module:User:AryamanA/hi-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of AryamanA, for his own experimentation. Items in this module may be added and removed at AryamanA's discretion; do not rely on this module's stability.


local export = {}

local lang = require("Module:languages").getByCode("hi")
local sc = require("Module:scripts").getByCode("Deva")
local m_IPA = require("Module:IPA")

local gsub = mw.ustring.gsub
local sub = mw.ustring.sub
local gmatch = mw.ustring.gmatch
local find = mw.ustring.find

local correspondences = {
	["ṅ"] = "ŋ", ["g"] = "ɡ", 
	["c"] = "t͡ʃ", ["j"] = "d͡ʒ", ["ñ"] = "ɲ",
	["ṭ"] = "ʈ", ["ḍ"] = "ɖ", ["ṇ"] = "ɳ",
	["t"] = "t̪", ["d"] = "d̪",
	["y"] = "j", ["r"] = "ɾ", ["v"] = "ʋ", ["l"] = "l̪",
	["ś"] = "ʃ", ["ṣ"] = "ʂ", ["h"] = "ɦ",
	["ṛ"] = "ɽ", ["ž"] = "ʒ", ["ḻ"] = "ɭ", ["ġ"] = "ɣ", ["q"] = "q", ["x"] = "x", ["ṉ"] = "n", ["ṟ"] = "ɹ",

	["a"] = "ə", ["ā"] = "ɑː", ["i"] = "ɪ",
	["ī"] = "iː", ["o"] = "oː", ["e"] = "eː",
	["u"] = "ʊ", ["ū"] = "uː", ["ŏ"] = "ɔ", ["ĕ"] = "æ",

	["ẽ"] = "ẽː", ["ũ"] = "ʊ̃", ["õ"] = "õː", ["ã"] = "ə̃", ["ā̃"] = "ɑ̃ː",  ["ĩ"] = "ɪ̃", ["ī̃"] = "ĩː",

	["ॐ"] = "oːm", ["ḥ"] = "ʰ", ["'"] = "(ʔ)",
}

local perso_arabic = {
	["x"] = "kh", ["ġ"] = "g", ["q"] = "k", ["ž"] = "z", ["z"] = "j", ["f"] = "ph", ["'"] = "",
}

local lengthen = {
	["a"] = "ā", ["i"] = "ī", ["u"] = "ū",
}

local vowels = "əäaāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː"
local vowel = "[əäaāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃]ː?"
local weak_h = "([gjdḍbṛnməäaāiīuūoŏĕʊɪɔɔ̃ɛeæãā̃ẽĩī̃õũū̃ː])h"
local aspirate = "([kctṭp])"
local syllabify_pattern = "([" .. vowels .. "]̃?)([^" .. vowels .. "%.]+)([" .. vowels .. "]̃?)"

local function find_consonants(text)
	local current = ""
	local cons = {}
	for cc in mw.ustring.gcodepoint(text .. " ") do
		local ch = mw.ustring.char(cc)
		if find(current .. ch, "^[kgṅcjñṭḍṇtdnpbmyrlvśṣshqxġzžḻṛṟfθṉ]$") or find(current .. ch, "^[kgcjṭḍṇtdpbṛ]h$") then
			current = current .. ch
		else
			table.insert(cons, current)
			current = ch
		end
	end
	return cons
end

local function syllabify(text)
	for count = 1, 2 do
		text = gsub(text, syllabify_pattern, function(a, b, c)
			b_set = find_consonants(b)
			table.insert(b_set, #b_set > 1 and 2 or 1, ".")
			return a .. table.concat(b_set) .. c
			end)
		text = gsub(text, "(" .. vowel .. ")(?=" .. vowel .. ")", "%1.")
	end
	for count = 1, 2 do
		text = gsub(text, "(" .. vowel .. ")(" .. vowel .. ")", "%1.%2")
	end
	return text
end

function export.stress(ipa)
	local result = {}
	for word in gmatch(ipa, '([^‿]+)') do
		local syllables = {}
		local light, light2 = nil, nil
		local heavy, heavy2 = nil, nil
		local superheavy, superheavy2 = nil, nil
		local i = 1
		for syllable in gmatch(word, '([^%.]+)') do
			local t = 'L'
			if gmatch(sub(syllable, 1, 1), '[^' .. vowels .. ']') and gmatch(sub(syllable, -1, -1), '[^' .. vowels .. ']') then
				superheavy2, superheavy = superheavy, i
				t = 'SH'
			elseif gmatch(syllable, 'ː') then
				heavy2, heavy = heavy, i
				t = 'H'
			else
				light2, light = light, i
			end
			table.insert(syllables, syllable .. t)
			i = i + 1
		end
		if superheavy2 then table.insert(syllables, superheavy2, 'ˈ')
		elseif superheavy then table.insert(syllables, superheavy, 'ˈ')
		elseif heavy2 then table.insert(syllables, heavy2, 'ˈ')
		elseif heavy then table.insert(syllables, heavy, 'ˈ')
		elseif light2 then table.insert(syllables, light2, 'ˈ')
		elseif light then table.insert(syllables, light, 'ˈ') end
		table.insert(result, table.concat(syllables, '.'))
	end
	local final = table.concat(result, '‿')
	final = gsub(final, '%.ˈ', 'ˈ')
	final = gsub(final, 'ˈ%.', 'ˈ')
	return final
end

local identical = "knlsfzθ"
for character in gmatch(identical, ".") do
	correspondences[character] = character
end

local function transliterate(text)
	return lang:transliterate(text)
end

function export.link(term)
	return require("Module:links").full_link{ term = term, lang = lang, sc = sc }
end

function export.narrow_IPA(ipa)
	-- what /ɑ/ really is
	ipa = gsub(ipa, 'ɑ', 'ä')
	-- dentals
	ipa = gsub(ipa, '([snl])', '%1̪')
	-- nasals induce nasalization
	ipa = gsub(ipa, '([əäɪiʊueɛoɔæ])(ː?)([nɳŋm])', '%1̃%2%3')
	-- cc, jj
	ipa = gsub(ipa, 't͡ʃ(%.?)t͡ʃ', 't̚%1t͡ʃ')
	ipa = gsub(ipa, 'd͡ʒ(%.?)d͡ʒ', 'd̚%1d͡ʒ')
	-- syllable boundary consonants
	ipa = gsub(ipa, '([kgʈɖtdpb]̪?)%.([^jʋ])', '%1̚.%2')
	ipa = gsub(ipa, '([kgʈɖtdpb]̪?)%.([^jʋ])', '%1̚.%2')
	-- aspiration rules
	ipa = gsub(ipa, 'əʱ%.([kgŋtdɲʈɖɳnpbmɾlzqfʂʃsʒɭɣɹʋ])', 'ɛʱ.%1')
	ipa = gsub(ipa, 'ʊʱ%.([kgŋtdɲʈɖɳnpbmɾlzqfʂʃsʒɭɣɹʋ])', 'ɔʱ.%1')
	ipa = gsub(ipa, 'ə%.ɦə', 'ɛ.ɦɛ')
	ipa = gsub(ipa, 'ʊ%.ɦə', 'ɔ.ɦɔ')
	ipa = gsub(ipa, 'ə%.ɦʊ', 'ɔ.ɦɔ')
	-- retroflex s rules
	ipa = gsub(ipa, 'ʂ(%.?[^ʈɖ])', 'ʃ%1')
	ipa = gsub(ipa, 'ʂ$', 'ʃ')
	return ipa
end

function export.toIPA(text, style)
	text = gsub(text, '॰', '.')
	text = gsub(text, '%-', '.')
	local translit = transliterate(text)
	if not translit then
		error('The term "' .. Hindi .. '" could not be transliterated.')
	end
	
	if style == "standard" then
		translit = gsub(translit, "[xġqžzf']", perso_arabic)
	end
	
	-- force final schwa
	translit = gsub(translit, "a~$", "ə")
	
	-- vowels
	translit = gsub(translit, "͠", "̃")
	translit = gsub(translit, "a([ui])([yw])", function(a, b)
		return "a" .. lengthen[a] .. b
	end)
	translit = gsub(translit, 'a(̃?)i', 'ɛ%1ː')
	translit = gsub(translit, 'a(̃?)u', 'ɔ%1ː')
	translit = gsub(translit, "%-$", "")
	translit = gsub(translit, "^%-", "")
	translit = gsub(translit, "ŕ$", "r")
	translit = gsub(translit, "ŕ", "ri")
	translit = gsub(translit, ",", "")
	translit = gsub(translit, " ", "..")
	
	translit = syllabify(translit)
	translit = gsub(translit, "%.ː", "ː.")
	translit = gsub(translit, "%.̃", "̃")
	
	-- gy
	translit = gsub(translit, 'jñ', 'gy')

	translit = gsub(translit, aspirate .. "h", '%1ʰ')
	translit = gsub(translit, weak_h, '%1ʱ')
	
	local result = gsub(translit, ".", correspondences)

	-- remove final schwa (Pandey, 2014)
	-- actually weaken
	result = gsub(result, "(...)ə$", "%1ᵊ")
	result = gsub(result, "(...)ə ", "%1ᵊ ")
	result = gsub(result, "(...)ə%.?%-", "%1ᵊ-")
	result = gsub(result, "%.?%-", ".")

	result = gsub(result, "%.%.", "‿")
	
	-- formatting
	result = gsub(result, "ː̃", "̃ː")
	result = gsub(result, "ː%.̃", "̃ː.")
	result = gsub(result, "%.$", "")

	-- i and u lengthening
	result = gsub(result, "ʊ(̃?)(ʱ?)$", "u%1ː%2")
	result = gsub(result, "ɪ(̃?)(ʱ?)$", "i%1ː%2")
	result = gsub(result, "ɪ%.j", "iː.j")
	
	return result
end

function export.make(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		p = { pagetitle }
	end
	
	for _, Hindi in ipairs(p) do
		local standard = export.toIPA(Hindi, "standard")
		local persianized = export.toIPA(Hindi, "persianized")
		table.insert(results, { pron = "/" .. export.stress(standard) .. "/" })
		local narrow = export.narrow_IPA(standard)
		if narrow ~= standard then table.insert(results, { pron = "[" .. export.stress(narrow) .. "]" }) end
		if standard ~= persianized then
			table.insert(results, { pron = "/" .. export.stress(persianized) .. "/" })
			local narrow = export.narrow_IPA(persianized)
			if narrow ~= persianized then table.insert(results, { pron = "[" .. export.stress(narrow) .. "]" }) end
		end
	end
	
	return  m_IPA.format_IPA_full { lang = lang, items = results }
end

function export.make_ur(frame)
	local args = frame:getParent().args
	local pagetitle = mw.title.getCurrentTitle().text
	local lang = require("Module:languages").getByCode("ur")
	local sc = require("Module:scripts").getByCode("ur-Arab")
	
	local p, results = {}, {}, {}
	
	if args[1] then
		for index, item in ipairs(args) do
			table.insert(p, (item ~= "") and item or nil)
		end
	else
		error("No transliterations given.")
	end
	
	for _, Urdu in ipairs(p) do
		table.insert(results, { pron = "/" .. export.toIPA(Urdu, "persianized") .. "/" })
	end
	
	return  m_IPA.format_IPA_full { lang = lang, items = results }
end

return export