User:ArielGlenn/scripts/mgxlit.pl
< User:ArielGlenn | scripts
I use this to transliterate from greek to the transliteration scheme on the Wiktionary:About_Greek page. Sure hope I got all the rules right. It's ot meant to be good perl, just maintainable.
#!/usr/bin/perl binmode(STDOUT, ":utf8"); binmode(STDIN, ":utf8"); use utf8; #use encoding "utf8"; # transcription: # β γ γγ γχ δ ζ θ κ λ μ μπ ν ντ ξ π ρ σ τ φ χ ψ # v g ng nch d z th k l m mb|b n nt ks p r s t f ch ps # αά εέ ηή ιίϊΐ οό υύϋΰ ωώ # aá eé ií iíïí oó yýÿý oó ## aá eé ií iíïí oó yýyý oó ## aá eé ií iíií oó yýyý oó # αυ αύ ευ εύ ηυ ηύ ου ού # af|av áf|áv ef|ev éf|év if|iv íf|ív ou oú ## αυ αύ ευ εύ ηυ ηύ ου ού οϋ ## af|av áf|áv ef|ev éf|év if|iv íf|ív ou oú oy # rules: sub get_next_cluster { my ($index,@letters) = @_; if ($index == $#letters) { return(0); } SWITCH: { # consonants ($letters[$index] =~ /β/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /γ/) && do { if ($letters[$index+1] =~ /[κγξχ]/) { return(2,$letters[$index],$letters[$index+1]); } else { return(1,$letters[$index]); } }; ($letters[$index] =~ /δ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ζ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /θ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /λ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /μ/) && do { if ($letters[$index+1] =~ /π/) { return(2,$letters[$index],$letters[$index+1]); } else { return(1,$letters[$index]); } }; ($letters[$index] =~ /ν/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ξ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /π/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ρ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /σ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ς/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /τ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /φ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /χ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ψ/) && do { return(1,$letters[$index]); }; # vowels ($letters[$index] =~ /α/) && do { if ($letters[$index+1] =~ /[υύ]/) { return(2,$letters[$index],$letters[$index+1]); } else { return(1,$letters[$index]); } }; ($letters[$index] =~ /ά/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ε/) && do { if ($letters[$index+1] =~ /[υύ]/) { return(2,$letters[$index],$letters[$index+1]); } else { return(1,$letters[$index]); } }; ($letters[$index] =~ /έ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /η/) && do { if ($letters[$index+1] =~ /[υύ]/) { return(2,$letters[$index],$letters[$index+1]); } else { return(1,$letters[$index]); } }; ($letters[$index] =~ /ή/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ι/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ί/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ϊ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ΐ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ο/) && do { if ($letters[$index+1] =~ /[υύϋ]/) { return(2,$letters[$index],$letters[$index+1]); } else { return(1,$letters[$index]); } }; ($letters[$index] =~ /ό/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /υ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ύ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ϋ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ΰ/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ω/) && do { return(1,$letters[$index]); }; ($letters[$index] =~ /ώ/) && do { return(1,$letters[$index]); }; # default do { return(1,$letters[$index]); }; } } sub cluster_to_sym { my($count,@cluster) = @_; if ($count == 0) { return(""); } if ($count == 1) { if ($cluster[0] =~ /β/) { return("v"); } if ($cluster[0] =~ /γ/) { return("g"); } if ($cluster[0] =~ /δ/) { return("d"); } if ($cluster[0] =~ /ζ/) { return("z"); } if ($cluster[0] =~ /θ/) { return("th"); } if ($cluster[0] =~ /κ/) { return("k"); } if ($cluster[0] =~ /λ/) { return("l"); } if ($cluster[0] =~ /μ/) { return("m"); } if ($cluster[0] =~ /ν/) { return("n"); } if ($cluster[0] =~ /ξ/) { return("x"); } if ($cluster[0] =~ /π/) { return("p"); } if ($cluster[0] =~ /ρ/) { return("r"); } if ($cluster[0] =~ /σ/) { return("s"); } if ($cluster[0] =~ /ς/) { return("s"); } if ($cluster[0] =~ /τ/) { return("t"); } if ($cluster[0] =~ /φ/) { return("f"); } if ($cluster[0] =~ /χ/) { return("ch"); } if ($cluster[0] =~ /ψ/) { return("ps"); } if ($cluster[0] =~ /α/) { return("a"); } if ($cluster[0] =~ /ά/) { return("á"); } if ($cluster[0] =~ /ε/) { return("e"); } if ($cluster[0] =~ /έ/) { return("é"); } if ($cluster[0] =~ /η/) { return("i"); } if ($cluster[0] =~ /ή/) { return("í"); } if ($cluster[0] =~ /ι/) { return("i"); } if ($cluster[0] =~ /ί/) { return("í"); } if ($cluster[0] =~ /ϊ/) { return("ï"); } if ($cluster[0] =~ /ΐ/) { return("í"); } # if ($cluster[0] =~ /ΐ/) { return("ḯ"); } if ($cluster[0] =~ /ο/) { return("o"); } if ($cluster[0] =~ /ό/) { return("ó"); } if ($cluster[0] =~ /υ/) { return("y"); } if ($cluster[0] =~ /ύ/) { return("ý"); } if ($cluster[0] =~ /ϋ/) { return("ÿ"); } if ($cluster[0] =~ /ΰ/) { return("ý"); } # if ($cluster[0] =~ /ΰ/) { return("ý"); } # this is a problem. I really want something that doesn't exist # in the unicode tables, i.e. y with accent and diaeresis. if ($cluster[0] =~ /ω/) { return("o"); } if ($cluster[0] =~ /ώ/) { return("ó"); } } if ($count == 2) { # we change these later if needed. if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /γ/)) { return("ng"); } if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /κ/)) { return("gk"); } if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /ξ/)) { return("gx"); } if (($cluster[0] =~ /γ/) && ($cluster[1] =~ /χ/)) { return("nch"); } if (($cluster[0] =~ /μ/) && ($cluster[1] =~ /π/)) { return("b"); } if (($cluster[0] =~ /α/) && ($cluster[1] =~ /υ/)) { return("av"); } if (($cluster[0] =~ /α/) && ($cluster[1] =~ /ύ/)) { return("áv"); } if (($cluster[0] =~ /ε/) && ($cluster[1] =~ /υ/)) { return("ev"); } if (($cluster[0] =~ /ε/) && ($cluster[1] =~ /ύ/)) { return("év"); } if (($cluster[0] =~ /η/) && ($cluster[1] =~ /υ/)) { return("iv"); } if (($cluster[0] =~ /η/) && ($cluster[1] =~ /ύ/)) { return("ív"); } if (($cluster[0] =~ /ο/) && ($cluster[1] =~ /υ/)) { return("ou"); } if (($cluster[0] =~ /ο/) && ($cluster[1] =~ /ύ/)) { return("oú"); } # if (($cluster[0] =~ /ο/) && ($cluster[1] =~ /ϋ/)) { return("oy"); } if (($cluster[0] =~ /υ/) && ($cluster[1] =~ /ι/)) { return("yi"); } if (($cluster[0] =~ /υ/) && ($cluster[1] =~ /ί/)) { return("yí"); } } # return(" "); return($cluster[0]); } sub modify_from_following { my($sym,$index,@letters) = @_; if ($index > $#letters) { return($sym); } ($peek_count,@peek_cluster) = get_next_cluster($index,@letters); my $sym_next = ""; if ($peek_count) { $sym_next = $peek_cluster[0]; } if ($sym =~ /av/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("af"); } else { return($sym); } } if ($sym =~ /áv/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("áf"); } else { return($sym); } } if ($sym =~ /ev/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("ef"); } else { return($sym); } } if ($sym =~ /év/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("éf"); } else { return($sym); } } if ($sym =~ /iv/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("if"); } else { return($sym); } } if ($sym =~ /ív/) { if (($sym_next =~ /^[θκξπστφχψ\W]/) || ($sym_next =~ /^$/)) { return("íf"); } else { return($sym); } } return($sym); } sub is_following { my ($index,@letters) = @_; if ($index >= $#letters) { return(0); } ($peek_count,@peek_cluster) = get_next_cluster($index,@letters); return($peek_count); } sub b_to_mp { my($sym,$sym_preceding,$index,@letters) = @_; if ($sym =~ /^$/) { return($sym); } if ($sym =~ /b/) { if ($sym_preceding =~ /^.+$/) { if (is_following($index,@letters)) { return("mp"); } } } return($sym); } while (<STDIN>) { $line = lc($_); $newline = ""; @letters = split(//,$line); $precedes = ""; $index = 0; $sym_preceding = ""; $sym = ""; $output = ""; while (($count, @cluster) = get_next_cluster($index,@letters)) { if ($count == 0) { last; } # convert to transliterated value, first approx. $sym = cluster_to_sym($count,@cluster); # check for following letters $sym = modify_from_following($sym,$index+$count,@letters); # special case: b or mp $sym = b_to_mp($sym,$sym_preceding,$index,@letters); # for ($i=0; $i<$count; $i++ ) { # print "$cluster[$i]"; # } # print "($sym)"; # print " "; $output .= $sym; $index += $count; $sym_preceding = $sym; $sym = ""; } print "$output\n"; } exit 0;