Module:az-IPA

From Wiktionary, the free dictionary
Jump to navigation Jump to search

-- Based on [[User:Allahverdi Verdizade/az-ipa]].

local export = {}

local function make_map_and_key_string(correspondences)
	local map = {}
	local all_letters = {}
	for letter, symbol in correspondences:gsub("%s*%-%-[^\n]*", ""):gmatch("(%S+) +(%S+)") do
		if symbol ~= letter then
			map[letter] = symbol
		end
		all_letters[letter] = true
	end
	return map, require "Module:array".keys(all_letters):concat ""
end

-- Pronunciation 1
local map, allowed_letters = make_map_and_key_string [=[
	a ɑ
	b b
	c ʧ -- converted to t͡ʃ later
	ç ʤ -- converted to d͡ʒ later
	d d
	e e
	ə æ
	f f
	g ɡ
	ğ ɣ
	h h
	x χ
	ı ɯ
	i i
	j ʒ
	k k -- [[w:Azerbaijani phonology]] writes c
	q ɡ
	l l
	m m
	n n
	o o
	ö œ
	p p
	r r
	s s
	ş ʃ
	t t
	u u
	ü y
	v v
	y j
	z z
]=]

export.map = map

local affricate_map, affricate_letters = make_map_and_key_string [[
	ʧ t͡ʃ
	ʤ d͡ʒ
]]

local vowel_properties = {}
local function set_vowel_property(vowels, property)
	for vowel in mw.ustring.gmatch(vowels, ".") do
		vowel_properties[vowel] = property
		local IPA_vowel = export.map[vowel]
		if IPA_vowel and IPA_vowel ~= vowel then
			assert(not vowel_properties[IPA_vowel], "Duplicate vowel properties for " .. IPA_vowel)
			vowel_properties[IPA_vowel] = property
		end
	end
end
set_vowel_property("auoı", "back")
set_vowel_property("əüöie", "front")

export.vowel_properties = vowel_properties

local function is_back(vowel)
	return vowel_properties[vowel] == "back"
end

local function is_front(vowel)
	return vowel_properties[vowel] == "front"
end

local vowels = require "Module:array".keys(vowel_properties):concat ""

local stress_mark = "ˈ"

function export.phonetic(word)
	local ugsub, umatch, usub = mw.ustring.gsub, mw.ustring.match, mw.ustring.sub
	if type(word) ~= "string" then
		error("Expected string")
	elseif word == "" then
		error("Expected non-empty string")
	elseif not umatch(word, "^[" .. allowed_letters .. "]+$") then
		error("Illegal characters: " .. ugsub(word, "[" .. allowed_letters .. "]", ""))
	end
	
	local initial = ugsub(word, ".", export.map)
	local phonetic = initial
	
	-- Double q: <qq> → [kː]/[back vowel]_[back vowel]
	phonetic = ugsub(phonetic, "()ɡɡ()", function(start_pos, after_pos)
		local before = usub(phonetic, math.max(1, start_pos - 1), math.max(0, start_pos - 1))
		local after = usub(phonetic, after_pos, after_pos)
		if is_back(before) and is_back(after) then
			return "kk"
		end
	end)
	
	-- Fricativization: < q > → [χ]/ [back vowel]_#
	-- Approximatization: < k > → [j]/ [front vowel]_#
	phonetic = ugsub(phonetic, "(.)([kɡ])(.?)", function(preceding, dorsal_stop, next)
		if next == "" then -- end of word
			if dorsal_stop == "ɡ" and is_back(preceding) then
				return preceding .. "χ"
			
			-- [[w:Azerbaijani phonology]] writes this as [ç] and applies
			-- the change before a voiceless consonant as well.
			elseif dorsal_stop == "k" and is_front(preceding) then
				return preceding .. "j"
			end
		end
	end)
	
	-- l-Assimilation: <l> → [tː,dː,n,rː]/t,d,n_
	phonetic = ugsub(phonetic, "([rtdnszʃʒ])l()", function(consonant, pos)
		local suffix = usub(phonetic, pos - 1, pos + 2)
		
		-- r-Assimilation: <r> → [lː]/_lar,lər (in non-lemma forms only)
		if consonant == "r" and (suffix == "lær" or suffix == "lɑr") then
			return "ll"
		elseif consonant == "z" or consonant == "ʒ" then
			return consonant .. "d"
		elseif consonant == "s" or consonant == "ʃ" then
			return consonant .. "t"
		else
			return consonant .. consonant
		end
	end)
	
	-- n-Assimilation: <n> → [mː]/_m
	phonetic = ugsub(phonetic, "nm", "mm")
	
	-- l-Velarization: l → [ɫ]/_[back vowel], l → [ɫ]/[back vowel]_
	phonetic = ugsub(phonetic, "()l", function(pos)
		local before = usub(phonetic, math.max(1, pos - 1), math.max(0, pos - 1))
		local after = usub(phonetic, pos + 1, pos + 1)
		if is_back(before) or is_back(after) then
			return "ɫ"
		end
	end)
	
	-- Add stress.
	-- TODO: handle non-initial stress.
	phonetic = ugsub(
		phonetic,
		"[^" .. vowels .. "][" .. vowels .. "][^" .. vowels .. "]*$",
		stress_mark .. "%0")
	
	-- Write geminates with length symbol.
	phonetic = ugsub(phonetic, "(.)(" .. stress_mark .. "?)%1", function(consonant, stress)
		return stress .. consonant .. "ː"
	end)
	
	phonetic = ugsub(phonetic, "[" .. affricate_letters .. "]", affricate_map)
	
	return phonetic
end

return export