Module:category tree/poscatboiler/data/lang-specific/jpx

From Wiktionary, the free dictionary
Jump to navigation Jump to search

This module handles generating the descriptions and categorization for Japonic category pages of the format "Japonic LABEL" where LABEL can be any text. Examples are Category:Bulgarian conjugation 2.1 verbs and Category:Russian velar-stem neuter-form nouns. This module is part of the poscatboiler system, which is a general framework for generating the descriptions and categorization of category pages.

For more information, see Module:category tree/poscatboiler/data/lang-specific/documentation.

NOTE: If you add a new language-specific module, you must add the language code to the list at the top of Module:category tree/poscatboiler/data/lang-specific in order for the module to be recognized.


local export = {}

local Hira = require("Module:scripts").getByCode("Hira")
local Jpan = require("Module:scripts").getByCode("Jpan")
local kana_to_romaji = require("Module:Hrkt-translit").tr
local m_numeric = require("Module:ConvertNumeric")
local rmatch = mw.ustring.match


function export.add_labels(labels, lang)
	labels["adnominals"] = {
		description = "{{{langname}}} {{m|ja|連体詞||[[adnominal]], attributive|tr=れんたいし, rentaishi|sc=Jpan}}. Modifies nouns, doesn’t conjugate, and doesn’t [[predicate#Verb|predicate]].",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["hiragana"] = {
		description = "The [[hiragana]] ([[平仮名]], [[ひらがな]]) form of " .. (lang:getCode() == "ryu" and "an" or "a") .. " {{{langname}}} word is a [[phonetic]] representation of that word. " ..
		"Wiktionary represents {{{langname}}}-language segments in three ways: in normal form (with [[kanji]], if appropriate), in [[hiragana]] " ..
		"form (this differs from kanji form only when the segment contains kanji), and in [[romaji]] form.",
		additional = (lang:getCode() == "ja" and "For more information, see [[Wiktionary:Japanese language]].\n\n" or "") .. "''See also'' [[:Category:{{{langname}}} katakana]]",
		toc_template = "categoryTOC-hiragana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:Hiragana script characters",
		}
	}

	labels["historical hiragana"] = {
		description = "{{{langname}}} historical [[hiragana]].",
		additional = "''See also'' [[:Category:{{{langname}}} historical katakana]].",
		toc_template = "categoryTOC-hiragana",
		parents = {
			"hiragana",
			{name = "{{{langcat}}}", raw = true},
			"Category:Hiragana script characters",
		}
	}

	labels["katakana"] = {
		description = "{{{langname}}} words and terms with katakana forms, sorted by conventional katakana sequence. Katakana is used primarily for transliterations of foreign words, including old Chinese Hanzi not used in [[shinjitai]].",
		additional = (lang:getCode() == "ja" and "For more information, see [[Wiktionary:Japanese language]].\n\n" or "") .. "''See also'' [[:Category:{{{langname}}} hiragana]]",
		toc_template = "categoryTOC-katakana",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"Category:Katakana script characters",
		}
	}

	labels["historical katakana"] = {
		description = "{{{langname}}} historical [[katakana]].",
		additional = "''See also'' [[:Category:{{{langname}}} historical hiragana]].",
		toc_template = "categoryTOC-katakana",
		parents = {
			"katakana",
			{name = "{{{langcat}}}", raw = true},
			"Category:Katakana script characters",
		}
	}

	labels["terms spelled with mixed kana"] = {
		description = "{{{langname}}} terms which combine [[hiragana]] and [[katakana]] characters, potentially with [[kanji]] too.",
		parents = {
			{name = "{{{langcat}}}", raw = true},
			"hiragana",
			"katakana",
		},
	}

	labels["honorifics"] = {
		topright = "{{wikipedia|Honorific speech in Japanese}}",
		description = "{{{langname}}} [[honorific]]s.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["humble language"] = {
		description = "{{{langname}}} humble terms, or {{ja-r|謙譲語|けんじょうご}}, which is a type of honorific speech in {{{langname}}} that lowers the speaker in relation to the listener.",
		parents = "honorifics",
	}

	labels["respectful language"] = {
		description = "{{{langname}}} respectful terms, or {{ja-r|尊敬語|そんけいご}}, which is a type of honorific speech in {{{langname}}} that elevates the listener in relation to the speaker.",
		parents = "honorifics",
	}

	labels["kanji by reading"] = {
		description = "{{{langname}}} kanji categorized by reading.",
		parents = {{name = "Han characters", sort = "reading"}},
	}
	
	labels["makurakotoba"] = {
		topright = "{{wikipedia|Makurakotoba}}",
		description = "{{{langname}}} idioms used in poetry to introduce specific words.",
		parents = {"idioms"},
	}

	labels["terms by kanji readings"] = {
		description = "{{{langname}}} categories grouped with regard to the readings of the kanji with which they are spelled.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms by reading pattern"] = {
		description = "{{{langname}}} categories with terms grouped by their reading patterns.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	for reading_pattern, link in pairs {
		["jukujikun"] = "{{ja-r|熟字訓|じゅくじくん}}",
		["jūbakoyomi"] = "{{ja-r|重%箱%読み|じゅう%ばこ%よみ}}, a reading pattern in {{{langname}}} compounds where kanji read with ''on'yomi'' are followed by kanji read with ''kun'yomi''",
		["kun'yomi"] = "{{ja-r|訓読み|くんよみ}}",
		["nanori"] = "{{ja-r|名乗り|なのり}}",
		["on'yomi"] = "{{ja-r|音%読み|おん%よみ}}",
		["yutōyomi"] = "{{ja-r|湯%桶%読み|ゆ%とう%よみ}}, a reading pattern in {{{langname}}} compounds where kanji read with ''kun'yomi'' are followed by kanji read with ''on'yomi''",
	} do
		labels["terms read with " .. reading_pattern] = {
			description = "{{{langname}}} terms exhibiting " .. link .. ".",
			breadcrumb = reading_pattern,
			parents = {{name = "terms by reading pattern", sort = reading_pattern}},
		}
	end

	labels["terms by kanji reading type"] = {
		description = "{{{langname}}} categories with terms grouped with regard to the types of readings of the kanji with which " ..
		"they are spelled, broadly those of Chinese origin (''on'' readings) and those of {{{langname}}} origin (''kun'' readings).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["verbs without transitivity"] = {
		description = "{{{langname}}} verbs missing the <code>tr=</code> parameter in the headword template.",
		hidden = true,
		can_be_empty = true,
		parents = {"entry maintenance"},
	}
	
	labels["yojijukugo"] = {
		topright = "{{wikipedia|Yojijukugo}}",
		description = "{{{langname}}} four-[[kanji]] compound terms with idiomatic meanings, typically derived from Classical Chinese, Buddhist scripture or traditional Japanese proverbs.",
		additional = "Compare [[w:Chengyu|chengyu]] in Sinitic languages.",
		parents = {"idioms"},
	}

	local on_continuation = [=[


	Categories of terms with more specific types of ''on'' readings can be found in the following categories:
	* [[:Category:{{{langname}}} terms spelled with kanji with goon readings]]
	* [[:Category:{{{langname}}} terms spelled with kanji with kan'yōon readings]]
	* [[:Category:{{{langname}}} terms spelled with kanji with kan'on readings]]
	* [[:Category:{{{langname}}} terms spelled with kanji with tōon readings]]
	]=]

	local on_desc = ", which is a type of {{ja-r|音%読み|おん%よみ}} or {{ja-r|音|おん}} reading"
	for reading_type, reading_desc in pairs {
		["goon"] = "a {{ja-r|呉%音|ご%おん}} reading" .. on_desc,
		["kan'on"] = "a {{ja-r|漢%音|かん%おん}} reading" .. on_desc,
		["kan'yōon"] = "a {{ja-l|慣用音|かんようおん|kan'yōon}} reading" .. on_desc,
		["kun"] = "a {{ja-r|訓%読み|くん%よみ}} or {{ja-r|訓|くん}} reading",
		["nanori"] = "a {{ja-r|名%乗り|な%のり}} reading, which is a type of reading used for people and places",
		["on"] = "an {{ja-r|音%読み|おん%よみ}} or {{ja-r|音|おん}} reading",
		["tōon"] = "a {{ja-r|唐%音|とう%おん}} reading" .. on_desc,
		["sōon"] = "a {{ja-r|宋%音|そう%おん}} reading" .. on_desc,
	} do
		labels["terms spelled with kanji with " .. reading_type .. " readings"] = {
			description = "{{{langname}}} categories with terms that are spelled with one or more kanji which exhibit " .. reading_desc .. ".",
			additional = reading_type == "on" and on_continuation,
			breadcrumb = reading_type,
			parents = {{name = "terms by kanji reading type", sort = reading_pattern}},
		}
	end

	labels["terms spelled with ateji"] = {
		topright = "{{wikipedia|Ateji}}",
		description = "{{{langname}}} terms containing one or more [[Appendix:Japanese glossary#ateji|ateji]] ({{m|ja|当て字}}), which are [[kanji]] used to represent sounds rather than meanings (though meaning may have some influence on which kanji are chosen).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms spelled with daiyōji"] = {
		description = "Japanese terms spelled using [[Appendix:Japanese glossary#daiyouji|daiyōji]], categorized using {{temp|ja-daiyouji}}.",
		parents = {"terms by etymology"},
	}

	labels["terms spelled with jukujikun"] = {
		description = "{{{langname}}} terms containing one or more [[Appendix:Japanese glossary#jukujikun|jukujikun]] ({{m|ja|熟字訓}}), which are [[kanji]] used to represent meanings rather than sounds.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["terms spelled with jōyō kanji"] = {
		topright = "{{wikipedia|Jōyō kanji}}",
		description = "{{{langname}}} terms spelled with at least one kanji, where all kanji in the terms are included on the official list of jōyō kanji.",
		additional = "See also [[:Category:{{{langname}}} terms spelled with non-jōyō kanji]].",
		parents = {{name = "terms by orthographic property", sort = "jōyō"}},
	}

	labels["terms spelled with non-jōyō kanji"] = {
		topright = "{{wikipedia|Jōyō kanji}}",
		description = "{{{langname}}} terms spelled with at least one kanji not included in the official list of jōyō kanji.",
		additional = "See also [[:Category:{{{langname}}} terms spelled with jōyō kanji]].",
		parents = {{name = "terms by orthographic property", sort = "non-jōyō"}},
	}

	for _, non_joyo_type in ipairs {"hyōgaiji", "jinmeiyō"} do
		labels["terms spelled with " .. non_joyo_type .. " kanji"] = {
			description = "{{{langname}}} terms spelled with " .. non_joyo_type .. " kanji.",
			parents = {{name = "terms spelled with non-jōyō kanji", sort = non_joyo_type}},
		}
	end

	for i = 1, 6 do
		local ord = m_numeric.ones_position_ord[i]
		labels["terms spelled with " .. ord .. " grade kanji"] = {
			description = "{{{langname}}} terms spelled with " .. ord .. " grade kanji.",
			parents = {{name = "terms spelled with jōyō kanji", sort = i}},
		}
	end

	labels["terms spelled with secondary school kanji"] = {
		description = "{{{langname}}} terms spelled with secondary school kanji.",
		parents = {{name = "terms spelled with jōyō kanji", sort = "secondary school"}},
	}
		
	labels["terms with multiple readings"] = {
		description = "{{{langname}}} terms with multiple pronunciations (hence multiple [[kana]] spellings).",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}

	labels["kanji readings by number of morae"] = {
		description = "{{{langname}}} categories grouped with regard to the number of morae in their kanji readings.",
		parents = {{name = "{{{langcat}}}", raw = true}},
	}
	
	labels["single-kanji terms"] = {
		description = "{{{langname}}} terms written as a single kanji.",
		parents = {"terms by orthographic property", {name = "character counts", sort = " "}},
	}
end



function export.add_handlers(handlers, lang, m_lang)
	-- FIXME: Only works for 0 through 19.
	local word_to_number = {}
	for k, v in pairs(m_numeric.ones_position) do
		word_to_number[v] = k
	end

	local kana_capture = "([-ぁ-ー𛀁𛀆]+)"

	local periods = {
		historical = true,
		ancient = true,
	}

	local function get_period_text_and_reading_type_link(period, reading_type)
		period = period ~= "" and period or nil
		if period and not periods[period] then
			return nil
		end
		local period_text = period and period .. " " or nil

		-- Allow periods (historical or ancient) by themselves; they will parse as reading types.
		if not period and periods[reading_type] then
			return nil, reading_type
		end

		local reading_type_link = "[[Appendix:Japanese glossary#" .. reading_type .. "|" .. reading_type .. "]]"
		return period_text, reading_type_link
	end
	
	local function get_sc(str)
		return mw.ustring.match(str:gsub('[%z\1-\127]', ''), '[^' .. Hira:getCharacters() .. ']') and Jpan or Hira
	end
			
	local function get_tagged_reading(reading)
		return require("Module:script utilities").tag_text(reading, lang, get_sc(reading))
	end

	local function get_reading_link(reading, historical, link)
		local display = reading:gsub('[%.%- ]', '')
		return require("Module:links").full_link({
			lang = lang,
			sc = get_sc(reading),
			term = link or display,
			alt = display,
			tr = kana_to_romaji((reading:gsub('%-', '')), lang:getCode(), nil, {hist = historical}),
		}, 'term')
	end

	local function is_on_subtype(reading_type)
		return reading_type:find(".on$")
	end


	table.insert(handlers, function(data)
		local count, plural = data.label:match("^terms written with (.+) Han script character(s?)$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			if (count == "one") ~= (plural == "") then
				return nil
			end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "{{{langname}}} terms written with " .. count .. " Han script character" .. plural .. " (also known as [[kanji]]).",
				breadcrumb = num,
				parents = {{name = "character counts", sort = num}},
			}
		end
	end)


	table.insert(handlers, function(data)
		local count, plural = data.label:match("^kanji readings with (.+) mora(e?)$")
		if count then
			-- Make sure 'one' goes with singular and other numbers with plural.
			if (count == "one") ~= (plural == "") then
				return nil
			end
			local num = word_to_number[count]
			if not num then
				return nil
			end
			return {
				description = "{{{langname}}} kanji readings containing " .. count .. " mora" .. plural .. ".",
				breadcrumb = num,
				parents = {{name = "kanji readings by number of morae", sort = num}},
			}
		end
	end)


	table.insert(handlers, function(data)
		local label_pref, period, reading_type, reading = rmatch(data.label, "^(kanji with ([a-z]-) ?([%a']+) reading )" .. kana_capture .. "$")
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				{name = "kanji by " .. (period_text or "") .. reading_type .. " reading", sort = (data.lang:makeSortKey(reading))}
			}
			if is_on_subtype(reading_type) then
				table.insert(parents, {name = "kanji with " .. (period_text or "") .. "on reading " .. reading, sort = reading_type})
			elseif period_text then
				table.insert(parents, {name = "kanji with " .. period_text .. "reading " .. reading, sort = reading_type})
			end
			if not period_text then
				table.insert(parents, {name = "kanji read as " .. reading, sort = reading_type})
			end

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} [[kanji]] with the " .. (period_text or "") .. reading_type_link .. " reading " ..
					get_reading_link(reading, period_text) .. ".",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local period, reading_type = rmatch(data.label, "^kanji by ([a-z]-) ?([%a']+) reading$")
		if period then
			local period_text, reading_type_link = get_period_text_and_reading_type_link(period, reading_type)
			if not reading_type_link then
				return nil
			end

			-- Compute parents.
			local parents = {
				is_on_subtype(reading_type) and {name = "kanji by " .. (period_text or "") .. "on reading", sort = reading_type} or
				period_text and {name = "kanji by " .. reading_type .. " reading", sort = period} or
				{name = "kanji by reading", sort = reading_type}
			}
			if period_text then
				table.insert(parents, {name = "kanji by " .. period_text .. "reading", sort = reading_type})
			end

			-- Compute description.
			local description = "{{{langname}}} [[kanji]] categorized by " .. (period_text or "") .. reading_type_link .. " reading."
			return {
				description = description,
				breadcrumb = (period_text or "") .. reading_type,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(kanji read as )" .. kana_capture .. "$")
		if reading then
			local params = {
				["histconsol"] = {},
			}
			local args = require("Module:parameters").process(data.args, params)
			local parents = {{name = "kanji by reading", sort = (data.lang:makeSortKey(reading))}}
			local addl
			local period_text
			if args.histconsol then
				period_text = "historical"
				addl = ("This is a [[Wikipedia:Historical kana orthography|historical]] [[Wikipedia:Kanazukai|reading]], now " ..
				"consolidated with the [[Wikipedia:Modern kana usage|modern reading]] of " ..
				get_reading_link(args.histconsol, nil, ("Category:Japanese kanji read as %s"):format(args.histconsol)) .. ".")
			end

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} [[kanji]] read as " .. get_reading_link(reading, period_text) .. ".",
				additional = addl,
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}, true
		end
	end)


	table.insert(handlers, function(data)
		local label_pref, reading = rmatch(data.label, "^(terms spelled with kanji read as )" .. kana_capture .. "$")
		if reading then
			-- Compute parents.
			local sort_key = (data.lang:makeSortKey(reading))
			local mora_count = m_lang.count_morae(reading)
			local mora_count_words = m_numeric.spell_number(tostring(mora_count))
			local parents = {
				{name = "terms by kanji readings", sort = sort_key},
				{name = "kanji readings with " .. mora_count_words .. " mora" .. (mora_count > 1 and "e" or ""), sort = sort_key},
				{name = "kanji read as " .. reading, sort = " "},
			}

			local tagged_reading = get_tagged_reading(reading)
			return {
				description = "{{{langname}}} terms that contain kanji that exhibit a reading of " .. get_reading_link(reading) ..
				" in those terms prior to any sound changes.",
				displaytitle = "{{{langname}}} " .. label_pref .. tagged_reading,
				breadcrumb = tagged_reading,
				parents = parents,
			}
		end
	end)


	table.insert(handlers, function(data)
		local kanji, reading = rmatch(data.label, "^terms spelled with (.) read as " .. kana_capture .. "$")
		if not kanji then
			return nil
		end
		local params = {
			[1] = {list = true},
		}
		local args = require("Module:parameters").process(data.args, params)
		if #args[1] == 0 then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms spelled with KANJI read as READING\", at least one reading type (e.g. \"kun\" or \"on\") must be specified using 1=, 2=, 3=, etc.")
		end
		local reading_types = {}
		for _, reading_type in ipairs(args[1]) do
			table.insert(reading_types, "<i>" .. reading_type .. "</i> reading")
		end
		reading_types = require("Module:table").serialCommaJoin(reading_types, {conj = "or"})

		local parents = {
			{name = "terms spelled with " .. kanji, sort = (data.lang:makeSortKey(reading))},
			-- FIXME, using the kanji directly as the sort key is what it did before but maybe we should call [[Module:Hani-sortkey]]
			-- to get the radical/stroke sort key
			{name = "terms spelled with kanji read as " .. reading, sort = kanji},
		}
		for _, reading_type in ipairs(args[1]) do
			table.insert(parents, {name = "terms spelled with kanji with " .. reading_type .. " readings", sort = (data.lang:makeSortKey(reading))})
		end
		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_reading = get_tagged_reading(reading)
		return {
			description = "{{{langname}}} terms spelled with {{l|{{{langcode}}}|" .. kanji .. "}} with its " ..
				reading_types .. " of " .. get_reading_link(reading) .. ".",
			displaytitle = "{{{langname}}} terms spelled with " .. tagged_kanji .. " read as " .. tagged_reading,
			breadcrumb = "read as " .. tagged_reading,
			parents = parents,
		}, true
	end)


	table.insert(handlers, function(data)
		local kanji, daiyoji = rmatch(data.label, "^terms with (.) replaced by daiyōji (.)$")
		if not kanji then
			return nil
		end
		local params = {
			["sort"] = {},
		}
		local args = require("Module:parameters").process(data.args, params)
		if not args.sort then
			error("For categories of the form \"" .. data.lang:getCanonicalName() ..
				" terms with KANJI replaced by daiyōji DAIYOJI\", the sort key must be specified using sort=")
		end

		local tagged_kanji = get_tagged_reading(kanji)
		local tagged_daiyoji = get_tagged_reading(daiyoji)
		return {
			description = "{{{langname}}} terms with {{l|{{{langcode}}}|" .. kanji .. "}} replaced by [[Appendix:Japanese glossary#daiyouji|daiyōji]] {{l|{{{langcode}}}|" .. daiyoji .. "}}.",
			displaytitle = "{{{langname}}} terms with " .. tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji,
			breadcrumb = tagged_kanji .. " replaced by daiyōji " .. tagged_daiyoji,
			parents = {{name = "terms spelled with daiyōji", sort = args.sort}},
		}, true
	end)
end


return export