Module:User:Kiril kovachev/tok-hyph

From Wiktionary, the free dictionary
Jump to navigation Jump to search

Syllabifies a Toki Pona word.

E.g. for sitelen: Syllabification: si‧te‧len.

Usage[edit]

Basic usage: {{tok-hyph}}.

Optionally, you can pass a word to be syllabified instead of the page title: {{tok-hyph|sitelen}}.

Tests[edit]

All tests passed. (refresh)

TextExpectedActual
test_syllabification:
Passedaaa
Passedakesia‧ke‧sia‧ke‧si
Passedalaa‧laa‧la
Passedalasaa‧la‧saa‧la‧sa
Passedalea‧lea‧le
Passedalia‧lia‧li
Passedanpaan‧paan‧pa
Passedantean‧tean‧te
Passedanua‧nua‧nu
Passedawena‧wena‧wen
Passedeee
Passedenenen
Passedepikue‧pi‧kue‧pi‧ku
Passedesune‧sune‧sun
Passedijoi‧joi‧jo
Passedikei‧kei‧ke
Passediloi‧loi‧lo
Passedinsain‧sain‧sa
Passedjakija‧kija‧ki
Passedjanjanjan
Passedjasimaja‧si‧maja‧si‧ma
Passedjeloje‧loje‧lo
Passedjojojo
Passedkalaka‧laka‧la
Passedkalamaka‧la‧maka‧la‧ma
Passedkamaka‧maka‧ma
Passedkasika‧sika‧si
Passedkenkenken
Passedkepekenke‧pe‧kenke‧pe‧ken
Passedkijetesantakaluki‧je‧te‧san‧ta‧ka‧luki‧je‧te‧san‧ta‧ka‧lu
Passedkiliki‧liki‧li
Passedkinkinkin
Passedkipisiki‧pi‧siki‧pi‧si
Passedkiwenki‧wenki‧wen
Passedkokoko
Passedkokosilako‧ko‧si‧lako‧ko‧si‧la
Passedkonkonkon
Passedkukuku
Passedkuleku‧leku‧le
Passedkulupuku‧lu‧puku‧lu‧pu
Passedkuteku‧teku‧te
Passedlalala
Passedlanpanlan‧panlan‧pan
Passedlapela‧pela‧pe
Passedlasola‧sola‧so
Passedlawala‧wala‧wa
Passedlekole‧kole‧ko
Passedlenlenlen
Passedletele‧tele‧te
Passedlilili
Passedlilili‧lili‧li
Passedlinjalin‧jalin‧ja
Passedlipuli‧puli‧pu
Passedlojelo‧jelo‧je
Passedlonlonlon
Passedlukalu‧kalu‧ka
Passedlukinlu‧kinlu‧kin
Passedlupalu‧palu‧pa
Passedmamama
Passedmamama‧mama‧ma
Passedmanima‧nima‧ni
Passedmelime‧lime‧li
Passedmesome‧some‧so
Passedmimimi
Passedmijemi‧jemi‧je
Passedmisikekemi‧si‧ke‧kemi‧si‧ke‧ke
Passedmokumo‧kumo‧ku
Passedmolimo‧limo‧li
Passedmonsimon‧simon‧si
Passedmonsutamon‧su‧tamon‧su‧ta
Passedmumumu
Passedmunmunmun
Passedmusimu‧simu‧si
Passedmutemu‧temu‧te
Passednnn
Passednamakona‧ma‧kona‧ma‧ko
Passednanpanan‧panan‧pa
Passednasana‧sana‧sa
Passednasinna‧sinna‧sin
Passednenane‧nane‧na
Passedninini
Passednimini‧mini‧mi
Passednokano‧kano‧ka
Passedooo
Passedokoo‧koo‧ko
Passedolino‧lino‧lin
Passedonao‧nao‧na
Passedopeno‧peno‧pen
Passedpakalapa‧ka‧lapa‧ka‧la
Passedpalipa‧lipa‧li
Passedpalisapa‧li‧sapa‧li‧sa
Passedpanpanpan
Passedpanapa‧napa‧na
Passedpipipi
Passedpilinpi‧linpi‧lin
Passedpimejapi‧me‧japi‧me‧ja
Passedpinipi‧nipi‧ni
Passedpipipi‧pipi‧pi
Passedpokapo‧kapo‧ka
Passedpokipo‧kipo‧ki
Passedponapo‧napo‧na
Passedpupupu
Passedsamasa‧masa‧ma
Passedselise‧lise‧li
Passedselose‧lose‧lo
Passedsemese‧mese‧me
Passedsewise‧wise‧wi
Passedsijelosi‧je‧losi‧je‧lo
Passedsikesi‧kesi‧ke
Passedsinsinsin
Passedsinasi‧nasi‧na
Passedsinpinsin‧pinsin‧pin
Passedsitelensi‧te‧lensi‧te‧len
Passedsokoso‧koso‧ko
Passedsonaso‧naso‧na
Passedsoweliso‧we‧liso‧we‧li
Passedsulisu‧lisu‧li
Passedsunosu‧nosu‧no
Passedsupasu‧pasu‧pa
Passedsuwisu‧wisu‧wi
Passedtantantan
Passedtasota‧sota‧so
Passedtawata‧wata‧wa
Passedtelote‧lote‧lo
Passedtenpoten‧poten‧po
Passedtokito‧kito‧ki
Passedtomoto‧moto‧mo
Passedtonsiton‧siton‧si
Passedtututu
Passedunpaun‧paun‧pa
Passedutau‧tau‧ta
Passedutalau‧ta‧lau‧ta‧la
Passedwalowa‧lowa‧lo
Passedwanwanwan
Passedwasowa‧sowa‧so
Passedwawawa‧wawa‧wa
Passedwekawe‧kawe‧ka
Passedwilewi‧lewi‧le

-- Primary module authorship: Chernorizets (original Bulgarian syllabification code)
-- Port to Lua: Kiril Kovachev
-- Adaptation to Toki Pona: Kiril Kovachev
-- 17 April 2024.

local export = {}

local substring = mw.ustring.sub
local rsubn = mw.ustring.gsub
local rsplit = mw.text.split
local U = mw.ustring.char
local lang = require("Module:languages").getByCode("tok")
local script = require("Module:scripts").getByCode("Latn")

local hvowels_c = "[aioeu]"

local HYPH = U(0x2027)
local BREAK_MARKER = "."

-- version of rsubn() that discards all but the first return value
local function rsub(term, foo, bar)
	local retval = rsubn(term, foo, bar)
	return retval
end

local function char_at(str, index)
	return substring(str, index, index)
end

local function count_vowels(word)
	local _, vowel_count = mw.ustring.gsub(word, hvowels_c, "")
	return vowel_count
end

local function is_vowel(ch)
	for _, v in pairs{"a", "e", "i", "o", "u"} do
		if v == ch then
			return true
		end
	end
	return false
end

---- Main syllabification code
-- word: the word being scanned
-- left/right vowels: integers
local function find_next_syllable_onset(word, left_vowel, right_vowel)
    local n_cons = right_vowel - left_vowel - 1

    -- No consonants - syllable starts on rightVowel
    if n_cons == 0 then return right_vowel end

    -- Single consonant between two vowels - starts a syllable
    if n_cons == 1 then return left_vowel + 1 end

    -- Two ("or more") consonants between the vowels.
    -- In Toki Pona, the phonotactics only allow this if the first syllable
    -- ends in a nasal and the second begins in a consonant, so there can only
    -- ever be two consonants, and the sonority break occurs between the two
    -- consonants (so just add 2 to the left vowel).
	local sonority_break = left_vowel + 2

    return sonority_break
end

-- Returns a table of strings (list)
local function syllabify_poly(word)
    local syllables = {}

    local prev_vowel = -1
    local prev_onset = 1;
    
    for i = 1, mw.ustring.len(word) do
	    if is_vowel(mw.ustring.lower(char_at(word, i))) then
	        -- A vowel, yay!
	        local should_skip = false
	        if prev_vowel == -1 then
	            prev_vowel = i
	            should_skip = true;
	        end

	        -- This is not the first vowel we've seen. In-between
	        -- the previous vowel and this one, there is a syllable
	        -- break, and the first character after the break starts
	        -- a new syllable.
	        if not should_skip then
		        local next_onset = find_next_syllable_onset(word, prev_vowel, i)
		        table.insert(syllables, substring(word, prev_onset, next_onset - 1))
		        prev_vowel = i
		        prev_onset = next_onset
			end
	    end
    	
    end

    -- Add the last syllable
    table.insert(syllables, substring(word, prev_onset))

    return syllables
end

function export.syllabify_word(word)
    if mw.ustring.len(word) == 0 then return {} end;

    local n_vowels = count_vowels(word)
    local syllables = n_vowels <= 1 and {word} or syllabify_poly(word)

    return table.concat(syllables, HYPH)
end

function export.syllabify(term)
	local words = rsplit(term, " ")

	local out = {}
	for _, word in pairs(words) do
		table.insert(out, export.syllabify_word(word))	
	end
	return table.concat(out, " ")
end

function export.show_syllabification(frame)
	local params = {
		[1] = {},
	}
	
	local title = mw.title.getCurrentTitle()
	local args = require("Module:parameters").process(frame:getParent().args, params)
	local term = args[1] or title.nsText == "Template" and "sitelen" or title.text

	local syllabification = export.syllabify(term)
	local syllables = rsplit(syllabification, HYPH)
	return require("Module:hyphenation").format_hyphenations(
		{ 
			lang = lang,
			hyphs = { { hyph = syllables } },
			sc = script,
			caption = "Syllabification",
		}
	)
end

return export