User:Sbb1413/bn-translit

From Wiktionary, the free dictionary
Jump to navigation Jump to search

The following is the personal copy of the code I have used in Bengali transliteration.

-- Transliteration for Bengali

local export = {} local gsub = mw.ustring.gsub local match = mw.ustring.match

local conv = { -- consonants ["ক"] = "k", ["খ"] = "kh", ["গ"] = "g", ["ঘ"] = "gh", ["ঙ"] = "ṅ", ["চ"] = "c", ["ছ"] = "ch", ["জ"] = "j", ["ঝ"] = "jh", ["ঞ"] = "ñ", ["ট"] = "ṭ", ["ঠ"] = "ṭh", ["ড"] = "ḍ", ["ঢ"] = "ḍh", ["ণ"] = "ṇ", ["ত"] = "t", ["থ"] = "th", ["দ"] = "d", ["ধ"] = "dh", ["ন"] = "n", ["প"] = "p", ["ফ"] = "ph", ["ব"] = "b", ["ভ"] = "bh", ["ম"] = "m", ["য"] = "ĵ", ["র"] = "r", ["ল"] = "l", ["শ"] = "ś", ["ষ"] = "ṣ", ["স"] = "s", ["হ"] = "h", ["য়"] = "ẏ", ["ড়"] = "ṛ", ["ঢ়"] = "ṛh", ["জ়"] = "z",

-- vowel diacritics ["ি"] = "i", ["ু"] = "u", ["ৃ"] = "r̥", ["ে"] = "e", ["ো"] = "ō", ["া"] = "a", ["ী"] = "ī", ["ূ"] = "ū", ["ৈ"] = "ōi", ["ৌ"] = "ōu",

-- archaic vowel diacritics ["ৄ"] = "r̥̄", ["ৢ"] = "l̥", ["ৣ"] = "l̥̄",

-- visarga ["ঃ"] = "ḥ",

-- vowel signs ["অ"] = "o", ["ই"] = "i", ["উ"] = "u", ["ঋ"] = "r̥", ["এ"] = "e", ["ও"] = "ō", ["আ"] = "a", ["ঈ"] = "ī", ["ঊ"] = "ū", ["ঐ"] = "ōi", ["ঔ"] = "ōu",

-- archaic vowel diacritics ["ৠ"] = "r̥̄", ["ঌ"] = "l̥", ["ৡ"] = "l̥̄",

--virama ["্"] = "",

-- chandrabindu ["ঁ"] = "̃",

-- avagraha ['ঽ']='’',

-- anusvara ["ং"] = "ṁ",

-- khandata, ["ৎ"] = "t",

-- numerals ["০"] = "0", ["১"] = "1", ["২"] = "2", ["৩"] = "3", ["৪"] = "4", ["৫"] = "5", ["৬"] = "6", ["৭"] = "7", ["৮"] = "8", ["৯"] = "9",

-- punctuation ["।"] = ".", -- dãri }

local consonant, vowel, vowel_sign = "ক-হড়-য়", "oা-ৌ’", "অ-ঔ" local c = "[" .. consonant .. "]" local cc = "়?" .. c local v = "[" .. vowel .. vowel_sign .. "]" local syncope_pattern = "(" .. v .. cc .. v .. cc .. ")o(" .. cc .. "ঁ?" .. v .. ")"


local function rev_string(text) local result, length = "", mw.ustring.len(text) for i = 1, length do result = result .. mw.ustring.sub(text, length - i + 1, length - i + 1) end return result end

function export.tr(text, lang, sc, mode) text = gsub(text, "݁", "্অ") text = gsub(text, "[࣪ܿ]", "্") text = gsub(text, "ঃ", "্ḥ") text = gsub(text, "টি" , "্টি") text = gsub(text, "কার" , "্কার") text = gsub(text, "খানা" , "্খানা") text = gsub(text, "ডাল" , "্ডাল") text = gsub(text, "খানি" , "্খানি") text = gsub(text, "জন" , "্জন") text = gsub(text, "সকল" , "্সকল") text = gsub(text, "কে" , "্কে") text = gsub(text, "ফল" , "্ফল") text = gsub(text, "কেই" , "্কেই") text = gsub(text, "মান" , "্মান")

text = gsub(text, "(" .. c .. "়?)([" .. vowel .. "’?্]?)", function(a, b) return a .. (b == "" and "o" or b) end)

for word in mw.ustring.gmatch(text, "[ঁ-৽o’]+") do local orig_word = word word = rev_string(word) word = gsub(word, "^o(়?" .. c .. ")(ঁ?" .. v .. ")", "%1%2") while match(word, syncope_pattern) do word = gsub(word, syncope_pattern, "%1%2") end text = gsub(text, orig_word, rev_string(word)) end

text = gsub(text, "([^র])্য", "%1y") text = gsub(text, "্ব", "v")

text = gsub(text, ".[়’]?", conv) text = gsub(text, ".", conv)

text = gsub(text, "([bgmr])v", "%1b") text = gsub(text, "hv", "hb") text = gsub(text, "kṣ", "ḳh") text = gsub(text, "jñ", "ġy") text = gsub(text, "ry", "rĵ")

text = gsub(text, "nḍo$", "nḍ") text = gsub(text, "([aæeiīoōuū])h$", "%1ho") text = gsub(text, "([lś])aho$", "%1ah")

text = gsub(text, "^oya", "æ") text = gsub(text, "^eya", "æ")

text = gsub(text, "ẏo([ln])([aeiīoōuū])", "ẏ%1%2") text = gsub(text, "oō$", "ō")

text = gsub(text, "([iī])ẏ", "%1ẏo")

if match(text, "[ঁ-৽]") and mode ~= "debug" then return nil else return mw.ustring.toNFC(text) end end

return export