Module:User:Suzukaze-c/Hani-tab/ja

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This is a private module sandbox of Suzukaze-c, for their own experimentation. Items in this module may be added and removed at Suzukaze-c's discretion; do not rely on this module's stability.


--[==[
TODO:
* [feature parity]
** juubako & yutou
*** automatic → need to examine original entry title?
** "|yomi=x,x3"
** kan'youon, goon, etc? [[Module:ja-kanjitab/documentation]] (いつの間に?)
** disable "cat:read as x" categories for irregular readings!!
** sortkeys
** [etc]
* "cat:read with on'yomi" "secondary school kanji" "one kanji" etc?
* read data from "cat:字 read as じ", or read data from "字#Readings"?
** have "cat:" and "{{ja-kanjitab}}" read from "#Readings"
* [[Template talk:ja-spellings#kyūjitai]]
]==]

local export = {}

local gsub = mw.ustring.gsub
local match = mw.ustring.match
local split = mw.text.split

-- XXX: share this data with [[t:ja-pron]]?
local yomi_class_aliases = {
	["o"] = "on",
	["k"] = "kun",
	["i"] = "irregular",
	["irr"] = "irregular",
}

local yomi_class_data = {
	["on"] = {
		text = "on’yomi",
		entry = "音読み",
		category = "Japanese terms read with on'yomi",
	},
	["kun"] = {
		text = "kun’yomi",
		entry = "訓読み",
		category = "Japanese terms read with kun'yomi",
	},
	["yutou"] = {
		text = "yutōyomi",
		entry = "湯桶読み",
		category = "Japanese terms read with yutōyomi",
	},
	["juubako"] = {
		text = "jūbakoyomi",
		entry = "重箱読み",
		category = "Japanese terms read with jūbakoyomi",
	},
	["kan"] = {
		text = "kan’yōon",
		entry = "慣用音",
		category = "Japanese terms read with kan'yōon",
	},
	["nanori"] = {
		text = "nanori",
		entry = "名乗り読み",
		category = "Japanese terms read with nanori",
	},
	["irregular"] = {
		text = "<i>irregular</i>",
		category = "Japanese terms with irregular kanji readings",
	},
}

local kanji_grade_links = {
	'[[:w:Kyōiku_kanji|Grade: 1]]',
	'[[:w:Kyōiku_kanji|Grade: 2]]',
	'[[:w:Kyōiku_kanji|Grade: 3]]',
	'[[:w:Kyōiku_kanji|Grade: 4]]',
	'[[:w:Kyōiku_kanji|Grade: 5]]',
	'[[:w:Kyōiku_kanji|Grade: 6]]',
	'[[:w:Jōyō kanji|Grade: S]]',     -- 7
	'[[:w:Jinmeiyō kanji|Jinmeiyō]]', -- 8
	'[[:w:Hyōgai kanji|Hyōgaiji]]'    -- 9
}

local category_models = {
	["$kanji read as $reading"] = "Japanese terms spelled with %s read as %s",
	["rendaku"] = "Japanese terms with rendaku",
	["omitted okurigana"] = "Japanese terms with omitted okurigana",
}

local function tag_text(text)
	return require('Module:script utilities').tag_text(text, require('Module:languages').getByCode('ja'))
end

local function clean_reading(reading)
	reading = gsub(reading, '%([^)]+%)', '')
	reading = gsub(reading, '>.+', '')
	return reading
end

local function analyze_reading(categories, char, reading)
	if not reading then
		return
	end

	-- error checking

	if match(reading, '>') then
		if match(reading, '%(') and not match(reading, '.+%(.+%)>.+%(.+%)') then
			error('Please fix your parentheses.')
			-- BUT maybe "かた>がた(り)" is nicer than "かた(り)>がた(り)"
		end
	elseif match(reading, '<') then
		error('Please show sound change in the ">" direction.')
	end

	if match(reading, '[()]') and not match(reading, '.+%(.+%)') then
		error('Please close your parentheses.')
	end

	-- 本番

	if match(reading, '%(') then
		table.insert(categories, category_models['omitted okurigana'])

		reading = gsub(reading, '%([^)]+%)', '')
	end

	if match(reading, '>') then
		local reading_forms = split(reading, '>')

		-- if reading_forms[1] == gsub(mw.ustring.toNFD(reading_forms[2]), '[゚゙]', '') then
		-- [perhaps using mw.ustring.sub and mw.ustring.codepoint is less "ugly"]

		local kanaA = mw.ustring.sub(reading_forms[1], 1, 1) -- pre-rendaku kana
		local kanaB = mw.ustring.sub(reading_forms[2], 1, 1) -- post-rendaku kana
		kanaB = mw.ustring.char(mw.ustring.codepoint(kanaB) - 1) -- strip dakuon
		kanaC = mw.ustring.char(mw.ustring.codepoint(kanaB) - 1) -- strip dakuon again, for パ行
		if (kanaA == kanaB) or (kanaA == kanaC) then
			table.insert(categories, category_models['rendaku'])
		end

		reading = gsub(reading, '>.+', '')
	end

	table.insert(categories, category_models['$kanji read as $reading']:format(char, reading))
end

local function build_tab_row_b_cell(char, reading)
	local tab_row_b_cell = mw.html.create('td')

	local kanji_grade = require('Module:ja').kanji_grade(char)

	if reading then
		-- reading = gsub(reading, '>', '<sub>&gt;</sub>')
		-- reading = gsub(reading, '%([^%(%)]+%)', '<sup>%1</sup>')
		reading = gsub(reading, '>', '→')

		tab_row_b_cell
			:wikitext(tag_text(reading))
			:tag('br')
		:done()
	end

	tab_row_b_cell
		:wikitext('<small>' .. kanji_grade_links[kanji_grade] .. '</small>')
	:done()

	return tab_row_b_cell
end

local function build_tab_row_c(tab_row_c, yomi, chars)
	for i, yomi_class in ipairs(yomi) do
		yomi[i] = yomi_class_aliases[yomi_class] or yomi_class -- "o"→"on"
		yomi[i] = yomi_class_data[yomi[i]].text -- "on"→"on’yomi"
	end
		
	if #yomi == 1 then
		local tab_row_c_cell = mw.html.create('td')
			:attr('colspan', #chars)
			:wikitext(yomi[1])
		:done()

		tab_row_c
			:node(tab_row_c_cell)
		:done()
	else
		local tab_row_c_cells = {}

		for i, yomi_class in ipairs(yomi) do
			if (yomi[i] == yomi[i-1]) then
				tab_row_c_cells[#tab_row_c_cells]
					:attr('colspan', tab_row_c_cells[#tab_row_c_cells]:getAttr('colspan') + 1)
				:done()
			else
				local tab_row_c_cell = mw.html.create('td')
					:wikitext(yomi[i])
					:attr('colspan', 1)
				:done()

				table.insert(tab_row_c_cells, tab_row_c_cell)
			end
		end

		for i, tab_row_c_cell in ipairs(tab_row_c_cells) do
			tab_row_c
				:node(tab_row_c_cell)
			:done()
		end
	end

	return tab_row_c
end

local function detect_yomi_classes(chars, readings)
	local yomi = {}

	for i, char in ipairs(chars) do
		if readings[i] then
			local category_name = category_models["$kanji read as $reading"]:format(char, clean_reading(readings[i]))
			local category_content = mw.title.new(category_name, 'Category'):getContent()

			if not category_content then
				error('Please verify that [[:Category:' .. category_name .. ']] is formatted correctly, or fill out the "yomi" parameter.')
			end

			category_content = gsub(category_content, '^{{', '')
			category_content = gsub(category_content, '}}$', '')
			category_content = split(category_content, '|')
			table.remove(category_content, 1) -- "ja-readingcat"
			table.remove(category_content, 1) -- [kanji]
			table.remove(category_content, 1) -- [reading]

			-- XXX: 🤔
			local detected_yomi_types = ''
			for i, yomi_type in ipairs(category_content) do
				if match(yomi_type, 'on$') then
					detected_yomi_types = detected_yomi_types .. '0'
				else
					detected_yomi_types = detected_yomi_types .. '1'
				end
			end
			if match(detected_yomi_types, '01') or match(detected_yomi_types, '10') then
				error('Could not determine if [[:Category:' .. category_name .. ']] was an on reading or a kun reading; please fill out the "yomi" parameter.')
			elseif match(detected_yomi_types, '0') then
				yomi[i] = 'o'
			else
				yomi[i] = category_content[1] -- 🤔
			end
		else
			-- yomi[i] = ''
			error('reading was not provided for [[' .. char .. ']]?')
		end
	end

	return yomi
end

function export.main(data)
	local tab_row_b = mw.html.create('tr')
	local tab_row_c = mw.html.create('tr')
	local categories = {}

	if data.yomi then
		data.yomi = split(data.yomi, ',')
	elseif data.readings then
		data.yomi = detect_yomi_classes(data.chars, data.readings)
	end

	if table.maxn(data.yomi) ~= 1 then
		if table.maxn(data.yomi) ~= table.maxn(data.readings) then
			error(table.maxn(data.readings) .. ' readings and ' .. table.maxn(data.yomi) .. ' yomi designations?')
		end
	end

	for i, char in ipairs(data.chars) do
		tab_row_b
			:node(build_tab_row_b_cell(char, data.readings[i]))
		:done()

		analyze_reading(categories, char, data.readings[i])
	end

	if data.yomi then
		tab_row_c = build_tab_row_c(tab_row_c, data.yomi, data.chars)
	end

	data.hani_tab
		:node(tab_row_b)
		:node(tab_row_c)
	:done()

	return data.hani_tab, categories
end

return export