Module:User:Fenakhay/az-decl-noun

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Fenakhay, for his own experimentation. Items in this module may be added and removed at Fenakhay's discretion; do not rely on this module's stability.


local export = {}

local lang = require("Module:languages").getByCode("az")
local m_table = require("Module:table")
local m_string_utilities = require("Module:string utilities")
local m_script_utilities = require("Module:script utilities")
local iut = require("Module:inflection utilities")
local m_para = require("Module:parameters")

local m_data = require("Module:User:Fenakhay/az-decl-noun/data")
local m_common = require("Module:az-common")

local current_title = mw.title.getCurrentTitle()
local PAGENAME = current_title.text

local rfind = mw.ustring.find
local rmatch = mw.ustring.match
local usub = mw.ustring.sub
local ulower = mw.ustring.lower

local vowel = "aəeiıuüöo"
local vowel_c = "[" .. vowel .. "]"

local kq = "kq"
local voiced_cons_not_g = "dmlnbvgğjzc"
local unvoiced_cons_k = "psştfxhç"
local consonant = unvoiced_cons_k .. voiced_cons_not_g .. kq
local front_unrounded_vowel = "eəi"
local front_rounded_vowel = "öü"
local back_unrounded_vowel = "aı"
local back_rounded_vowel = "ou"

letter_classes = {

	front_unrounded_vowel = front_unrounded_vowel,
	front_rounded_vowel = front_rounded_vowel,
	back_unrounded_vowel = back_unrounded_vowel,
	back_rounded_vowel = back_rounded_vowel,

	-- Subclasses of final letters.
	consonant = consonant,
	vowel = vowel,
	front_vowel = front_rounded_vowel .. front_unrounded_vowel,
	back_vowel = back_unrounded_vowel .. back_rounded_vowel,
	all = vowel .. consonant
}

local output_noun_slots = {
	nom_s = "nom|s",
	gen_s = "gen|s",
	dat_s = "dat|s",
	acc_s = "acc|s",
	loc_s = "loc|s",
	abl_s = "abl|s",
	nom_p = "nom|p",
	gen_p = "gen|p",
	dat_p = "dat|p",
	acc_p = "acc|p",
	loc_p = "loc|p",
	abl_p = "abl|p",
	nom_1s_spos = "nom|1s|spos",
	gen_1s_spos = "gen|1s|spos",
	dat_1s_spos = "dat|1s|spos",
	acc_1s_spos = "acc|1s|spos",
	loc_1s_spos = "loc|1s|spos",
	abl_1s_spos = "abl|1s|spos",
	nom_1s_mpos = "nom|1s|mpos",
	gen_1s_mpos = "gen|1s|mpos",
	dat_1s_mpos = "dat|1s|mpos",
	acc_1s_mpos = "acc|1s|mpos",
	loc_1s_mpos = "loc|1s|mpos",
	abl_1s_mpos = "abl|1s|mpos",
	nom_2s_spos = "nom|2s|spos",
	gen_2s_spos = "gen|2s|spos",
	dat_2s_spos = "dat|2s|spos",
	acc_2s_spos = "acc|2s|spos",
	loc_2s_spos = "loc|2s|spos",
	abl_2s_spos = "abl|2s|spos",
	nom_2s_mpos = "nom|2s|mpos",
	gen_2s_mpos = "gen|2s|mpos",
	dat_2s_mpos = "dat|2s|mpos",
	acc_2s_mpos = "acc|2s|mpos",
	loc_2s_mpos = "loc|2s|mpos",
	abl_2s_mpos = "abl|2s|mpos",
	nom_3s_spos = "nom|3s|spos",
	gen_3s_spos = "gen|3s|spos",
	dat_3s_spos = "dat|3s|spos",
	acc_3s_spos = "acc|3s|spos",
	loc_3s_spos = "loc|3s|spos",
	abl_3s_spos = "abl|3s|spos",
	nom_3s_mpos = "nom|3s|mpos",
	gen_3s_mpos = "gen|3s|mpos",
	dat_3s_mpos = "dat|3s|mpos",
	acc_3s_mpos = "acc|3s|mpos",
	loc_3s_mpos = "loc|3s|mpos",
	abl_3s_mpos = "abl|3s|mpos",
	nom_1p_spos = "nom|1p|spos",
	gen_1p_spos = "gen|1p|spos",
	dat_1p_spos = "dat|1p|spos",
	acc_1p_spos = "acc|1p|spos",
	loc_1p_spos = "loc|1p|spos",
	abl_1p_spos = "abl|1p|spos",
	nom_1p_mpos = "nom|1p|mpos",
	gen_1p_mpos = "gen|1p|mpos",
	dat_1p_mpos = "dat|1p|mpos",
	acc_1p_mpos = "acc|1p|mpos",
	loc_1p_mpos = "loc|1p|mpos",
	abl_1p_mpos = "abl|1p|mpos",
	nom_2p_spos = "nom|2p|spos",
	gen_2p_spos = "gen|2p|spos",
	dat_2p_spos = "dat|2p|spos",
	acc_2p_spos = "acc|2p|spos",
	loc_2p_spos = "loc|2p|spos",
	abl_2p_spos = "abl|2p|spos",
	nom_2p_mpos = "nom|2p|mpos",
	gen_2p_mpos = "gen|2p|mpos",
	dat_2p_mpos = "dat|2p|mpos",
	acc_2p_mpos = "acc|2p|mpos",
	loc_2p_mpos = "loc|2p|mpos",
	abl_2p_mpos = "abl|2p|mpos",
	nom_3p_spos = "nom|3p|spos",
	gen_3p_spos = "gen|3p|spos",
	dat_3p_spos = "dat|3p|spos",
	acc_3p_spos = "acc|3p|spos",
	loc_3p_spos = "loc|3p|spos",
	abl_3p_spos = "abl|3p|spos",
	nom_3p_mpos = "nom|3p|mpos",
	gen_3p_mpos = "gen|3p|mpos",
	dat_3p_mpos = "dat|3p|mpos",
	acc_3p_mpos = "acc|3p|mpos",
	loc_3p_mpos = "loc|3p|mpos",
	abl_3p_mpos = "abl|3p|mpos",
}

local function skip_slot(decl_spec, slot)
	return
		decl_spec.number == "sg" and rfind(slot, "_p$") or decl_spec.number ==
			"pl" and rfind(slot, "_s$")
end

local function modify_lemma(lemma, type_class)
	local modified_lemma = lemma
    if type_class == "cc" then
        modified_lemma = usub(lemma, 1, -2)
    elseif type_class == "k" then
        modified_lemma = usub(lemma, 1, -2) .. "y"
    elseif type_class == "q" then
        modified_lemma = usub(lemma, 1, -2) .. "ğ"
    else
        modified_lemma = lemma
    end
	return modified_lemma
end

local function get_vn_lemma(lemma, last_vowel)
	local vn_lemma = lemma
	if rmatch(last_vowel, "[" .. letter_classes["back_vowel"] .. "]") then
		vn_lemma = lemma .. "q"
	else
		vn_lemma = lemma .. "k"
	end
	local _ , type_class = m_common.getType(vn_lemma)
	return vn_lemma, modify_lemma(vn_lemma, type_class), type_class
end

local function decline_noun(decl_spec, lemma, override_type, vn)
	local lowercase_lemma = ulower(lemma)
	local last_letter = usub(lowercase_lemma, -1)

	local last_vowel, type_class = m_common.getType(lowercase_lemma)

    if override_type ~= nil then
        type_class = override_type
    end

    local modified_lemma = modify_lemma(lowercase_lemma, type_class)

	local vn_lemma, modified_vn_lemma, vn_type_class
	if vn then
		vn_lemma, modified_vn_lemma, vn_type_class = get_vn_lemma(lowercase_lemma, last_vowel)
	end

	local function add(slot, endings)
		if skip_slot(decl_spec, slot) then return end
		local temp_lemma = lowercase_lemma
		local temp_modified_lemma = modified_lemma
		local temp_type_class = type_class
		local temp_last_letter = last_letter
		if vn then
			if rfind(slot, "_3s_") or not rfind(slot, "pos") then
				temp_lemma = lowercase_lemma
				temp_modified_lemma = modified_lemma
				temp_type_class = type_class
				temp_last_letter = last_letter
			else
				temp_lemma = vn_lemma
				temp_modified_lemma = modified_vn_lemma
				temp_type_class = vn_type_class
				temp_last_letter = usub(vn_lemma, -1)
			end
		end
		for class, second_endings in pairs(endings) do
			assert(letter_classes[class], "Unrecognized letter class " .. class)
			if rmatch(last_vowel, "[" .. letter_classes[class] .. "]") then
                for class2, ending in pairs(second_endings) do
					assert(letter_classes[class2],
						   "Unrecognized letter class " .. class2)
					if rmatch(temp_last_letter, "[" .. letter_classes[class2] .. "]") then
                        local temp_endings
                        if type(ending) == "string" then
                            temp_endings = { ending }
                        elseif type(ending) == "table" then
                            temp_endings = ending
                        end
                        local temp_forms = {}
                        for _, element in ipairs(temp_endings) do
	                        local first_letter = usub(element, 1, 1)
                            if rmatch(first_letter, vowel_c) then
                                if temp_type_class == "cc" then
                                    table.insert(temp_forms, {form = temp_lemma .. element})
                                else 
                                    table.insert(temp_forms, {form = temp_modified_lemma .. element})
                                end
                            else
                                if temp_type_class == "cc" then
                                    table.insert(temp_forms, {form = temp_modified_lemma .. element})
                                else 
                                    table.insert(temp_forms, {form = temp_lemma .. element})
                                end
                            end
                        end
                        iut.insert_forms(decl_spec.forms, slot, temp_forms)
						return
					end
				end
				error(
					"Last letter '" .. temp_last_letter .. "' of lemma '" .. temp_lemma ..
						"' doesn't match any known letter class")
			end
		end
		error("Last vowel '" .. last_vowel .. "' of lemma '" .. temp_lemma ..
				  "' doesn't match any known letter class")
	end

	if not skip_slot(decl_spec, "nom_s") then
		iut.insert_form(decl_spec.forms, "nom_s", {form = lemma})
	end

    for index, ending in pairs(m_data) do 
        add(index, ending)
    end
end

-- Compute the categories to add the noun to, as well as the annotation to display in the
-- declension title bar. We combine the code to do these functions as both categories and
-- title bar contain similar information.
local function compute_categories_and_annotation(decl_spec)
	local cats = {}
	local function insert(cattype)
		m_table.insertIfNot(cats, "Azerbaijani " .. cattype)
	end
	if decl_spec.number == "sg" then
		insert("uncountable nouns")
	elseif decl_spec.number == "pl" then
		insert("pluralia tantum")
	end
	decl_spec.annotation = decl_spec.number == "sg" and "sg-only" or
							   decl_spec.number == "pl" and "pl-only" or ""
	decl_spec.categories = cats
end

local function show_forms(decl_spec)
	local lemmas = {}
	if decl_spec.forms.nom_s then
		for _, nom_s in ipairs(decl_spec.forms.nom_s) do
			table.insert(lemmas, nom_s.form)
		end
	elseif decl_spec.forms.nom_p then
		for _, nom_p in ipairs(decl_spec.forms.nom_p) do
			table.insert(lemmas, nom_p.form)
		end
	end
	local props = {
		lemmas = lemmas,
		slot_table = output_noun_slots,
		lang = lang,
		include_translit = true,
	}
	iut.show_forms(decl_spec.forms, props)
end

local function make_table(decl_spec)
	local forms = decl_spec.forms

	local table_spec_both = [=[
<div class="NavFrame" style="display: block;max-width: 45em">
<div class="NavHead" style="background:#eff7ff" >{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;min-width:45em" class="inflection-table"
|-
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | singular<br>
! style="background:#d9ebff" | plural<br>
|-
!style="background:#eff7ff"|nominative
| {nom_s}
| {nom_p}
|-
!style="background:#eff7ff"|accusative
| {acc_s}
| {acc_p}
|-
!style="background:#eff7ff"|dative
| {dat_s}
| {dat_p}
|-
!style="background:#eff7ff"|locative
| {loc_s}
| {loc_p}
|-
!style="background:#eff7ff"|ablative
| {abl_s}
| {abl_p}
|-
!style="background:#eff7ff"|genitive
| {gen_s}
| {gen_p}
|{\cl}</div></div>
<div class="NavFrame" style="display: block;max-width: 45em">
<div class="NavHead" style="background:#eff7ff" >{title_possessive}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;min-width:45em" class="inflection-table"
|-
!style="background:#d9ebff"|
!colspan="2" style="background:#d9ebff"|first-person singular
|-
!style="background:#eff7ff"|nominative
| {nom_1s_spos}
| {nom_1s_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_1s_spos}
| {acc_1s_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_1s_spos}
| {dat_1s_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_1s_spos}
| {loc_1s_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_1s_spos}
| {abl_1s_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_1s_spos}
| {gen_1s_mpos}
|-
!style="background:#d9ebff"|
!colspan="2" style="background:#d9ebff"|second-person singular
|-
!style="background:#eff7ff"|nominative
| {nom_2s_spos}
| {nom_2s_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_2s_spos}
| {acc_2s_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_2s_spos}
| {dat_2s_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_2s_spos}
| {loc_2s_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_2s_spos}
| {abl_2s_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_2s_spos}
| {gen_2s_mpos}
|-
!style="background:#d9ebff"|
!colspan="2" style="background:#d9ebff"|third-person singular
|-
!style="background:#eff7ff"|nominative
| {nom_3s_spos}
| {nom_3s_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_3s_spos}
| {acc_3s_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_3s_spos}
| {dat_3s_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_3s_spos}
| {loc_3s_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_3s_spos}
| {abl_3s_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_3s_spos}
| {gen_3s_mpos}
|-
!style="background:#d9ebff"|
!colspan="2" style="background:#d9ebff"|first-person plural
|-
!style="background:#eff7ff"|nominative
| {nom_1p_spos}
| {nom_1p_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_1p_spos}
| {acc_1p_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_1p_spos}
| {dat_1p_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_1p_spos}
| {loc_1p_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_1p_spos}
| {abl_1p_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_1p_spos}
| {gen_1p_mpos}
|-
!style="background:#d9ebff"|
!colspan="2" style="background:#d9ebff"|second-person plural
|-
!style="background:#eff7ff"|nominative
| {nom_2p_spos}
| {nom_2p_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_2p_spos}
| {acc_2p_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_2p_spos}
| {dat_2p_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_2p_spos}
| {loc_2p_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_2p_spos}
| {abl_2p_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_2p_spos}
| {gen_2p_mpos}
|-
!style="background:#d9ebff"|
!colspan="2" style="background:#d9ebff"|third-person plural
|-
!style="background:#eff7ff"|nominative
| {nom_3p_spos}
| {nom_3p_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_3p_spos}
| {acc_3p_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_3p_spos}
| {dat_3p_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_3p_spos}
| {loc_3p_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_3p_spos}
| {abl_3p_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_3p_spos}
| {gen_3p_mpos}
|{\cl}</div></div>]=]

	local table_spec_sg = [=[
<div class="NavFrame" style="width:30em">
<div class="NavHead" style="background:#eff7ff">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:30em" class="inflection-table"
|-
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | singular
|-
!style="background:#eff7ff"|nominative
| {nom_s}
|-
!style="background:#eff7ff"|accusative
| {acc_s}
|-
!style="background:#eff7ff"|dative
| {dat_s}
|-
!style="background:#eff7ff"|locative
| {loc_s}
|-
!style="background:#eff7ff"|ablative
| {abl_s}
|-
!style="background:#eff7ff"|genitive
| {gen_s}
|{\cl}</div></div>
<div class="NavFrame" style="width: 30em">
<div class="NavHead" style="background:#eff7ff" >{title_possessive}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:30em" class="inflection-table"
|-
!style="background:#d9ebff"|
!style="background:#d9ebff"|first-person singular
|-
!style="background:#eff7ff"|nominative
| {nom_1s_spos}
|-
!style="background:#eff7ff"|accusative
| {acc_1s_spos}
|-
!style="background:#eff7ff"|dative
| {dat_1s_spos}
|-
!style="background:#eff7ff"|locative
| {loc_1s_spos}
|-
!style="background:#eff7ff"|ablative
| {abl_1s_spos}
|-
!style="background:#eff7ff"|genitive
| {gen_1s_spos}
|{\cl}</div></div>]=]

	local table_spec_pl = [=[
<div class="NavFrame" style="width:30em">
<div class="NavHead" style="background:#eff7ff">{title}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:30em" class="inflection-table"
|-
! style="width:33%;background:#d9ebff" |
! style="background:#d9ebff" | plural
|-
!style="background:#eff7ff"|nominative
| {nom_p}
|-
!style="background:#eff7ff"|accusative
| {acc_p}
|-
!style="background:#eff7ff"|dative
| {dat_p}
|-
!style="background:#eff7ff"|locative
| {loc_p}
|-
!style="background:#eff7ff"|ablative
| {abl_p}
|-
!style="background:#eff7ff"|genitive
| {gen_p}
|{\cl}</div></div>
<div class="NavFrame" style="width: 30em">
<div class="NavHead" style="background:#eff7ff" >{title_possessive}{annotation}</div>
<div class="NavContent">
{\op}| style="background:#F9F9F9;text-align:center;width:30em" class="inflection-table"
|-
!style="background:#d9ebff"|
!style="background:#d9ebff"|first-person singular
|-
!style="background:#eff7ff"|nominative
| {nom_1s_mpos}
|-
!style="background:#eff7ff"|accusative
| {acc_1s_mpos}
|-
!style="background:#eff7ff"|dative
| {dat_1s_mpos}
|-
!style="background:#eff7ff"|locative
| {loc_1s_mpos}
|-
!style="background:#eff7ff"|ablative
| {abl_1s_mpos}
|-
!style="background:#eff7ff"|genitive
| {gen_1s_mpos}
|{\cl}</div></div>]=]

	if decl_spec.title then
		forms.title = decl_spec.title
	else
		forms.title =
			'Declension of <i lang="az" class="Latn">' .. forms.lemma .. '</i>'
	end
	
	if decl_spec.title_possessive then
		forms.title_possessive = decl_spec.title_possessive
	else
		forms.title_possessive =
			'Possessive forms of <i lang="az" class="Latn">' .. forms.lemma .. '</i>'
	end

	local function make_text_smaller(text)
		return "(<span style=\"font-size: smaller;\">" .. text .. "</span>)"
	end

	local annotation = decl_spec.annotation
	if annotation == "" then
		forms.annotation = ""
	else
		forms.annotation = " " .. make_text_smaller(annotation)
	end

	local table_spec = decl_spec.number == "sg" and table_spec_sg or
						   decl_spec.number == "pl" and table_spec_pl or
						   table_spec_both
	return m_string_utilities.format(table_spec, forms)
end

-- Externally callable function to parse and decline a noun where all forms
-- are given manually. Return value is WORD_SPEC, an object where the declined
-- forms are in `WORD_SPEC.forms` for each slot. If there are no values for a
-- slot, the slot key will be missing. The value for a given slot is a list of
-- objects {form=FORM, footnotes=FOOTNOTES}.
function export.do_generate_forms(parent_args, number)
	if number ~= "sg" and number ~= "pl" and number ~= "both" then
		error(
			"Internal error: number (arg 1) must be 'sg', 'pl' or 'both': '" ..
				number .. "'")
	end

	local params = {[1] = {}, title = {}, class = {}, vn = {}}

	local args = m_para.process(parent_args, params)
	local decl_spec = {title = args.title, forms = {}, number = number}
	local lemma = args[1] or PAGENAME
	if number == "pl" then
		local sg_lemma = rmatch(lemma, "(.*)[l][aə]r$")
		if not sg_lemma then
			error(
				"Plural lemma doesn't end with nominative plural ending (-lar, -lər): " ..
					lemma)
		end
		lemma = sg_lemma
	end
	decline_noun(decl_spec, lemma, args["class"], args["vn"])
	compute_categories_and_annotation(decl_spec)
	return decl_spec
end

-- Entry point for {{az-decl-noun}}, {{az-decl-noun-sg}} and {{az-decl-noun-pl}}.
function export.show(frame)
	local iparams = {[1] = {required = true}, ["class"] = {}, ["vn"] = { type = "boolean"}}
	local iargs = m_para.process(frame.args, iparams)
	local parent_args = frame:getParent().args
	local decl_spec = export.do_generate_forms(parent_args, iargs[1])
	show_forms(decl_spec)
	return make_table(decl_spec) ..
			   require("Module:utilities").format_categories(
				   decl_spec.categories, lang)
end

return export