Module:ro-pronunciation

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Testcases[edit]

40 of 40 tests failed. (refresh)

TextExpectedActual
test_to_phonemic:
Failedcopílkoˈpilkopˈil
Failedcopíikoˈpijkopˈiʲ
Failedcopíiikoˈpi.jikopˈiij
Failedlupilupʲlˈupʲ
Failedșárpeˈʃar.peʃˈarpe
Failedțárăˈt͡sa.rət͡sˈarə
Failedînăuntruɨ.nəˈun.truɨnəẃntru
Failedxilofónksi.loˈfonksilofˈon
Failedquarckwarkkẃark
Failedfiertfjertfˈiert
Failedvitezăviˈte.zəvitˈezə
Failedviteázăviˈte̯a.zəviteˈazə
Failedmingeˈmin.d͡ʒemˈind͡ʒe
Failedmingeacminˈd͡ʒe̯akmind͡ʒˈeak
Failedghețárɡeˈt͡sarɡet͡sˈar
Failedghíndăˈɡin.dəɡˈində
Failedjargónʒarˈɡonʒarɡˈon
Failedcireáșăt͡ʃiˈre̯a.ʃət͡ʃireˈaʃə
Failedcétinăˈt͡ʃe.ti.nət͡ʃˈetinə
Failedchiarkjarkˈiar
Failedchestieˈkes.ti.ekˈestie
Failedmlădíțămləˈdi.t͡səmlədˈit͡sə
Failedtârșăˈtɨr.ʃətˈɨrʃə
Failedoaieˈo̯a.jeoˈaie
Failedrâurɨwrˈɨu
Failedcontinuukonˈti.nuwkontˈinuw
Failedcâineˈkɨj.nekɨj́ne
Failedmeame̯amˈea
Failedsocoteaiso.koˈte̯ajsokoteˈaʲ
Failedleoaicăleˈo̯aj.kəleoaj́kə
Failedaccelerasemak.t͡ʃe.leˈra.semakt͡ʃelerˈasem
Failedcreeazăkreˈe̯a.zəkreeˈazə
Failedcreazăˈkre̯a.zəkreˈazə
Failedbeaube̯awbeˈau
Failedsuiausuˈjawswiˈau
Failedpieipjejpjˈeʲ
Failedpleoapeˈple̯o̯a.pepleoˈape
Failedcreioanekreˈjo̯a.nekrejoˈane
Failedsculpturăskulpˈtu.rəskulp.tˈurə
Failedpoezíepo.eˈzi.epoezˈie
TextExpectedActual
test_to_phonetic:



local u = require("Module:string/char")

local export = {}

local stress = "ˈ"
local long = "ː"
local acute = u(0x301)
local grave = u(0x300)
local circumflex = u(0x302)
local acute_or_grave = "[" .. acute .. grave .. "]"
local vowels = "aeiouəɨ"
local vowel = "[" .. vowels .. "]"
local vowel_or_semivowel = "[" .. vowels .. "jw]"
local not_vowel = "[^" .. vowels .. "]"
local front = "[ij]"
local fronted = u(0x031F)
local voiced_consonant = "[bdɡlmnrvz]"

local full_affricates = { ["ʦ"] = "t͡s", ["ʣ"] = "d͡z", ["ʧ"] = "t͡ʃ", ["ʤ"] = "d͡ʒ" }

-- ʦ, ʣ, ʧ, ʤ used for
-- t͡s, d͡z, t͡ʃ, d͡ʒ in body of function.

function export.to_phonemic(word, single_character_affricates)
	word = mw.ustring.lower(word):gsub("'", ""):gsub("â", "ɨ"):gsub("î", "ɨ"):gsub("ă", "ə"):gsub("j", "ʒ"):gsub("ș", "ʃ"):gsub("ț", "ʦ"):gsub("cc", "kc"):gsub("uu", "uw")
	
	-- Decompose combining characters: for instance, è → e + ◌̀
	local decomposed = mw.ustring.toNFD(word):gsub("x", "ks"):gsub("y", "i")
		:gsub("ck", "k"):gsub("sh", "ʃ")

	-- Transcriptions must contain an acute or grave, to indicate stress position.
	-- This does not handle phrases containing more than one stressed word.
	-- Default to penultimate stress rather than throw error?
	local vowel_count
	if not mw.ustring.find(decomposed, acute_or_grave) then
		-- Allow monosyllabic unstressed words.
		vowel_count = select(2, decomposed:gsub(vowel, "%1"))
		if vowel_count ~= 1 then
			-- Add acute accent on second-to-last vowel.
			decomposed = mw.ustring.gsub(decomposed, 
				"(" .. vowel .. ")(" .. not_vowel .. "*[iu]?" .. vowel .. not_vowel .. "*)$",
				"%1" .. acute .. "%2")
		end
	end
	
	local transcription = decomposed

	-- ci, gi + vowel
	-- Do ci, gi + e, é, è sometimes contain /j/?
	transcription = mw.ustring.gsub(transcription,
		"([cg])([cg]?)i(" .. vowel .. ")",
		function (consonant, double, vowel)
			local out_consonant
			if consonant == "c" then
				out_consonant = "ʧ"
			else
				out_consonant = "ʤ"
			end
			
			if double ~= "" then
				if double ~= consonant then
					error("Invalid sequence " .. consonant .. double .. ".")
				end
				
				out_consonant = out_consonant .. out_consonant
			end
			
			return out_consonant .. vowel
		end)
	
	-- Handle other cases of c, g.
	transcription = mw.ustring.gsub(transcription,
		"(([cg])([cg]?)(h?))(.?)",
		function (consonant, first, double, second, next)
			-- Don't allow the combinations cg, gc.
			-- Or do something else?
			if double ~= "" and double ~= first then
				error("Invalid sequence " .. first .. double .. ".")
			end
			
			-- c, g is soft before e, i.
			local consonant
			if (next == "e" or next == "i") and second ~= "h" then
				if first == "c" then
					consonant = "ʧ"
				else
					consonant = "ʤ"
				end
			else
				if first == "c" then
					consonant = "k"
				else
					consonant = "ɡ"
				end
			end
			
			if double ~= "" then
				consonant = consonant .. consonant
			end
			
			return consonant .. next
		end)
	
	-- ⟨qu⟩ represents /kw/.
	transcription = transcription:gsub("qu", "kw")
	
	transcription = mw.ustring.gsub(transcription,  "i$", "ʲ")
	transcription = mw.ustring.gsub(transcription, "iiʲ$", "iji")
	transcription = mw.ustring.gsub(transcription, "iʲ$", "ij")

	-- u or i (without accent) before another vowel is a semivowel.
	transcription = mw.ustring.gsub(transcription,
		"([iu])(" .. vowel .. ")",
		function (semivowel, vowel)
			if semivowel == "i" then
				semivowel = "j"
			else
				semivowel = "w"
			end
			
			return semivowel .. vowel
		end)

	transcription = mw.ustring.gsub(transcription,
		"(" .. vowel .. ")([iu])",
		function (vowel, semivowel)
			if semivowel == "i" then
				semivowel = "j"
			else
				semivowel = "w"
			end
			
			return vowel .. semivowel
		end)

	transcription = mw.ustring.gsub(transcription, "je$", "ie")
	
	-- Replace acute and grave with stress mark.
	transcription = mw.ustring.gsub(transcription,
		"(" .. vowel .. ")" .. acute_or_grave, stress .. "%1")
	
	transcription = mw.ustring.gsub(transcription, "lpt", "lp.t")
	transcription = mw.ustring.gsub(transcription, "mpt", "mp.t")
	transcription = mw.ustring.gsub(transcription, "nct", "nc.t")
	transcription = mw.ustring.gsub(transcription, "ncʦ", "nc.ʦ")
	transcription = mw.ustring.gsub(transcription, "ncʃ", "nc.ʃ")
	transcription = mw.ustring.gsub(transcription, "ndv", "nd.v")
	transcription = mw.ustring.gsub(transcription, "rct", "rc.t")
	transcription = mw.ustring.gsub(transcription, "rtf", "rt.f")
	transcription = mw.ustring.gsub(transcription, "stm", "st.m")

	transcription = mw.ustring.gsub(transcription,
		"(" .. vowels .. ")" .. "(bkhdɡlmnrvz)" .. "(" .. vowels .. ")" ,
		function (vowel, consonant, anothervowel)
			return vowel .. "." .. consonant .. anothervowel
		end)


	-- Move stress before syllable onset, and add syllable breaks.
	-- This rule may need refinement.
--	transcription = mw.ustring.gsub(transcription,
--		"()(" .. not_vowel .. "?)([^" .. vowels .. stress .. "]*)(" .. stress
--			.. "?)(" .. vowel .. ")",
--		function (position, first, rest, syllable_divider, vowel)
--			-- beginning of word, that is, at the moment, beginning of string
--			if position == 1 then
--				return syllable_divider .. first .. rest .. vowel
--			end
--			if syllable_divider == "" then
--				syllable_divider = "."
--			end
--			if rest == "" then
--				return syllable_divider .. first .. vowel
--			else
--				return first .. syllable_divider .. rest .. vowel
--			end
--		end)
	
	if not single_character_affricates then
		transcription = mw.ustring.gsub(transcription, "([ʦʣʧʤ])([%." .. stress .. "]*)([ʦʣʧʤ]*)",
			function (affricate1, divider, affricate2)
				local full_affricate = full_affricates[affricate1]
				
				if affricate2 ~= "" then
					return mw.ustring.sub(full_affricate, 1, 1) .. divider .. full_affricate
				end
				
				return full_affricate .. divider
			end)
	end
	
	transcription = mw.ustring.gsub(transcription, "[h%-" .. circumflex .. "]", "")
	transcription = transcription:gsub("%.ˈ", "ˈ")
	
	return transcription
end

function export.show(frame)
	local m_IPA = require "Module:IPA"
	
	local args = require "Module:parameters".process(
		frame:getParent().args,
		{
			-- words to transcribe
			[1] = { list = true, default = mw.title.getCurrentTitle().text }
		})
	
	local Array = require "Module:array"
	
	local transcriptions = Array(args[1])
		:map(
			function (word, i)
				return { pron = "/" .. export.to_phonemic(word) .. "/" }
			end)
	
	return m_IPA.format_IPA_full(
		require "Module:languages".getByCode "ro", transcriptions)
end

return export