Modul:ja
The module does various things related to Japanese.
Testcases
ännernFunctions
ännern- romaji_dediacritics:
{{#invoke:ja|romaji_dediacritics|Tōkyō}}
→ Toukyou
- hira_to_kata:
{{#invoke:ja|hira_to_kata|おはようございます}}
→ オハヨウゴザイマス
- kata_to_hira:
{{#invoke:ja|kata_to_hira|アメリカンアパレル}}
→ あめりかんあぱれる
- kana_to_romaji:
{{#invoke:ja|kana_to_romaji|おやすみなさい}}
→ oyasuminasai{{#invoke:ja|kana_to_romaji|バックブリーカー}}
→ bakkuburīkā
- romaji_to_kata:
{{#invoke:ja|romaji_to_kata|bakkurasshu}}
→ バックラッシュ
- script:
{{#invoke:ja|script|どうも有難う御座います}}
→ Hira+Hani{{#invoke:ja|script|どうぞよろしく}}
→ Hira{{#invoke:ja|script|アメリカ合衆国}}
→ Kana+Hani
- extract_kanji:
{{#invoke:ja|extract_kanji|どうも有難う御座います}}
→ {{ja-kanjitab|有|難|御|座}}
- hidx:
{{#invoke:ja|hidx|バックラッシュ}}
→ |hidx=はっくらっしゅ'{{#invoke:ja|hidx|どうぞよろしく}}
→ |hidx=とうぞよろしく'{{#invoke:ja|hidx|アメリカ}}
→ |hidx=あめりか
- conj:
{{#invoke:ja|conj|やっつける|やっつける|two}}
→ {{ja-ichi|やっつけ}}{{#invoke:ja|conj|鋳る|いる|two}}
→ {{ja-ichi|鋳|い}}{{#invoke:ja|conj|集る|あつまる}}
→ {{ja-go-ru|集|あつま}}{{#invoke:ja|conj|現す|あらわす}}
→ {{ja-go-su|現|あらわ}}
Uses
ännernIt is used by
{{ja new}}
(which uses it substitutively){{ja-verbconj}}
and its subtemplates{{ja-noun}}
,{{ja-verb}}
,{{ja-adj}}
,{{ja-pos}}
, and{{ja-verb-suru}}
which detect the script, generate romanizations, generate sort keys{{ja-readingcat}}
,{{ja-readascat}}
,{{kanji readings tab}}
to generate romanizations, sort keys, count morae, perform checks
local M={}
-- romaji with diacritics to romaji without
local rd={
['ā']='aa',['ē']='ee',['ī']='ii',['ō']='ou',['ū']='uu'
};
-- equivalent hiragana = katakana pairs, h=k or hk
local hk={
['ぁ']='ァ',['あ']='ア',['ぃ']='ィ',['い']='イ',['ぅ']='ゥ',['う']='ウ',['ぇ']='ェ',['え']='エ',['ぉ']='ォ',['お']='オ',['か']='カ',['が']='ガ',['き']='キ',['ぎ']='ギ',['く']='ク',['ぐ']='グ',['け']='ケ',['げ']='ゲ',['こ']='コ',['ご']='ゴ',['さ']='サ',['ざ']='ザ',['し']='シ',['じ']='ジ',['す']='ス',['ず']='ズ',['せ']='セ',['ぜ']='ゼ',['そ']='ソ',['ぞ']='ゾ',['た']='タ',['だ']='ダ',['ち']='チ',['ぢ']='ヂ',['っ']='ッ',['つ']='ツ',['づ']='ヅ',['て']='テ',['で']='デ',['と']='ト',['ど']='ド',['な']='ナ',['に']='ニ',['ぬ']='ヌ',['ね']='ネ',['の']='ノ',['は']='ハ',['ば']='バ',['ぱ']='パ',['ひ']='ヒ',['び']='ビ',['ぴ']='ピ',['ふ']='フ',['ぶ']='ブ',['ぷ']='プ',['へ']='ヘ',['べ']='ベ',['ぺ']='ペ',['ほ']='ホ',['ぼ']='ボ',['ぽ']='ポ',['ま']='マ',['み']='ミ',['む']='ム',['め']='メ',['も']='モ',['ゃ']='ャ',['や']='ヤ',['ゅ']='ュ',['ゆ']='ユ',['ょ']='ョ',['よ']='ヨ',['ら']='ラ',['り']='リ',['る']='ル',['れ']='レ',['ろ']='ロ',['ゎ']='ヮ',['わ']='ワ',['ゐ']='ヰ',['ゑ']='ヱ',['を']='ヲ',['ん']='ン',['ゔ']='ヴ',['ゕ']='ヵ',['ゖ']='ヶ'
};
-- equivalent katakana = hiragana pairs, k=h or kh
local kh={
['ァ']='ぁ',['ア']='あ',['ィ']='ぃ',['イ']='い',['ゥ']='ぅ',['ウ']='う',['ェ']='ぇ',['エ']='え',['ォ']='ぉ',['オ']='お',['カ']='か',['ガ']='が',['キ']='き',['ギ']='ぎ',['ク']='く',['グ']='ぐ',['ケ']='け',['ゲ']='げ',['コ']='こ',['ゴ']='ご',['サ']='さ',['ザ']='ざ',['シ']='し',['ジ']='じ',['ス']='す',['ズ']='ず',['セ']='せ',['ゼ']='ぜ',['ソ']='そ',['ゾ']='ぞ',['タ']='た',['ダ']='だ',['チ']='ち',['ヂ']='ぢ',['ッ']='っ',['ツ']='つ',['ヅ']='づ',['テ']='て',['デ']='で',['ト']='と',['ド']='ど',['ナ']='な',['ニ']='に',['ヌ']='ぬ',['ネ']='ね',['ノ']='の',['ハ']='は',['バ']='ば',['パ']='ぱ',['ヒ']='ひ',['ビ']='び',['ピ']='ぴ',['フ']='ふ',['ブ']='ぶ',['プ']='ぷ',['ヘ']='へ',['ベ']='べ',['ペ']='ぺ',['ホ']='ほ',['ボ']='ぼ',['ポ']='ぽ',['マ']='ま',['ミ']='み',['ム']='む',['メ']='め',['モ']='も',['ャ']='ゃ',['ヤ']='や',['ュ']='ゅ',['ユ']='ゆ',['ョ']='ょ',['ヨ']='よ',['ラ']='ら',['リ']='り',['ル']='る',['レ']='れ',['ロ']='ろ',['ヮ']='ゎ',['ワ']='わ',['ヰ']='ゐ',['ヱ']='ゑ',['ヲ']='を',['ン']='ん',['ヴ']='ゔ',['ヵ']='ゕ',['ヶ']='ゖ'
};
-- equivalent katakana = romaji pairs, k=r or kr
-- clever trick: replaces ン with n@
local kr={
["ア"] = "a", ["イ"] = "i", ["イィ"] = "yi", ["イェ"] = "ye", ["ウ"] = "u", ["ヴ"] = "vu", ["ヴァ"] = "va", ["ヴィ"] = "vi", ["ヴィェ"] = "vye", ["ヴェ"] = "ve", ["ヴォ"] = "vo", ["ヴャ"] = "vya", ["ヴュ"] = "vyu", ["ヴョ"] = "vyo", ["ウァ"] = "wa", ["ウィ"] = "wi", ["ウゥ"] = "wu", ["ウェ"] = "we", ["ウォ"] = "wo", ["ウュ"] = "wyu", ["エ"] = "e", ["オ"] = "o", ["カ"] = "ka", ["キ"] = "ki", ["キェ"] = "kye", ["キャ"] = "kya", ["キュ"] = "kyu", ["キョ"] = "kyo", ["ガ"] = "ga", ["ギ"] = "gi", ["ギェ"] = "gye", ["ギャ"] = "gya", ["ギュ"] = "gyu", ["ギョ"] = "gyo", ["ク"] = "ku", ["クァ"] = "kwa", ["クィ"] = "kwi", ["クェ"] = "kwe", ["クォ"] = "kwo", ["クヮ"] = "kwa", ["グ"] = "gu", ["グァ"] = "gwa", ["グィ"] = "gwi", ["グェ"] = "gwe", ["グォ"] = "gwo", ["グヮ"] = "gwa", ["ケ"] = "ke", ["ゲ"] = "ge", ["コ"] = "ko", ["ゴ"] = "go", ["サ"] = "sa", ["ザ"] = "za", ["シ"] = "shi", ["シェ"] = "she", ["シャ"] = "sha", ["シュ"] = "shu", ["ショ"] = "sho", ["ジ"] = "ji", ["ジェ"] = "je", ["ジャ"] = "ja", ["ジュ"] = "ju", ["ジョ"] = "jo", ["ス"] = "su", ["スィ"] = "si", ["ズ"] = "zu", ["ズィ"] = "zi", ["セ"] = "se", ["ゼ"] = "ze", ["ソ"] = "so", ["ゾ"] = "zo", ["タ"] = "ta", ["ダ"] = "da", ["チ"] = "chi", ["チェ"] = "che", ["チャ"] = "cha", ["チュ"] = "chu", ["チョ"] = "cho", ["ヂ"] = "ji", ["ヂャ"] = "ja", ["ヂュ"] = "ju", ["ヂョ"] = "jo", ["ヅ"] = "zu", ["ツ"] = "tsu", ["ツァ"] = "tsa", ["ツィ"] = "tsi", ["ツェ"] = "tse", ["ツォ"] = "tso", ["ツュ"] = "tsyu", ["テ"] = "te", ["ティ"] = "ti", ["テュ"] = "tyu", ["デ"] = "de", ["ディ"] = "di", ["デュ"] = "dyu", ["ト"] = "to", ["トゥ"] = "tu", ["ド"] = "do", ["ドゥ"] = "du", ["ナ"] = "na", ["ニ"] = "ni", ["ニェ"] = "nye", ["ニャ"] = "nya", ["ニュ"] = "nyu", ["ニョ"] = "nyo", ["ヌ"] = "nu", ["ネ"] = "ne", ["ノ"] = "no", ["ハ"] = "ha", ["バ"] = "ba", ["パ"] = "pa", ["ヒ"] = "hi", ["ビ"] = "bi", ["ピ"] = "pi", ["ヒェ"] = "hye", ["ヒャ"] = "hya", ["ヒュ"] = "hyu", ["ヒョ"] = "hyo", ["ビェ"] = "bye", ["ピェ"] = "pye", ["ビャ"] = "bya", ["ピャ"] = "pya", ["ビュ"] = "byu", ["ピュ"] = "pyu", ["ビョ"] = "byo", ["ピョ"] = "pyo", ["フ"] = "fu", ["ファ"] = "fa", ["フィ"] = "fi", ["フィェ"] = "fye", ["フェ"] = "fe", ["フォ"] = "fo", ["フャ"] = "fya", ["フュ"] = "fyu", ["フョ"] = "fyo", ["ブ"] = "bu", ["プ"] = "pu", ["ヘ"] = "he", ["ベ"] = "be", ["ペ"] = "pe", ["ホ"] = "ho", ["ボ"] = "bo", ["ポ"] = "po", ["ホゥ"] = "hu", ["マ"] = "ma", ["ミ"] = "mi", ["ミェ"] = "mye", ["ミャ"] = "mya", ["ミュ"] = "myu", ["ミョ"] = "myo", ["ム"] = "mu", ["メ"] = "me", ["モ"] = "mo", ["ヤ"] = "ya", ["ユ"] = "yu", ["ヨ"] = "yo", ["ラ"] = "ra", ["リ"] = "ri", ["ラ゜"] = "la", ["リ゜"] = "li", ["リェ"] = "rye", ["リャ"] = "rya", ["リュ"] = "ryu", ["リョ"] = "ryo", ["ル"] = "ru", ["ル゜"] = "lu", ["レ゜"] = "le", ["レ"] = "re", ["ロ"] = "ro", ["ロ゜"] = "lo", ["ワ"] = "wa", ["ヷ"] = "va", ["ヰ"] = "wi", ["ヸ"] = "vi", ["ヱ"] = "we", ["ヹ"] = "ve", ["ヲ"] = "o", ["ヺ"] = "vo", ["ン"] = "n@",
["、"] = ", ", ["。"] = ". ", ["・"] = " ", ["("] = "(", [")"] = ")"
};
-- equivalent romaji = katakana pairs, r=k or rk
local rk={
['wyu']='ウュ',['vyu']='ヴュ',['vyo']='ヴョ',['vye']='ヴィェ',['vya']='ヴャ',['tyu']='テュ',['ryu']='リュ',['ryo']='リョ',['rye']='リェ',['rya']='リャ',['pyu']='ピュ',['pyo']='ピョ',['pye']='ピェ',['pya']='ピャ',['nyu']='ニュ',['nyo']='ニョ',['nye']='ニェ',['nya']='ニャ',['myu']='ミュ',['myo']='ミョ',['mye']='ミェ',['mya']='ミャ',['kyu']='キュ',['kyo']='キョ',['kye']='キェ',['kya']='キャ',['kwo']='クォ',['kwi']='クィ',['kwe']='クェ',['kwa']='クァ',['kwa']='クヮ',['hyu']='ヒュ',['hyo']='ヒョ',['hye']='ヒェ',['hya']='ヒャ',['gyu']='ギュ',['gyo']='ギョ',['gye']='ギェ',['gya']='ギャ',['gwo']='グォ',['gwi']='グィ',['gwe']='グェ',['gwa']='グァ',['gwa']='グヮ',['fyu']='フュ',['fyo']='フョ',['fye']='フィェ',['fya']='フャ',['dyu']='デュ',['byu']='ビュ',['byo']='ビョ',['bye']='ビェ',['bya']='ビャ',['zu']='ズ',['zo']='ゾ',['zi']='ズィ',['ze']='ゼ',['za']='ザ',['yu']='ユ',['yo']='ヨ',['yi']='イィ',['ye']='イェ',['ya']='ヤ',['wu']='ウゥ',['wo']='ウォ',['wi']='ウィ',['we']='ウェ',['wa']='ワ',['vu']='ヴ',['vo']='ヴォ',['vi']='ヴィ',['ve']='ヴェ',['va']='ヴァ',['tu']='トゥ',['to']='ト',['ti']='ティ',['te']='テ',['ta']='タ',['su']='ス',['so']='ソ',['si']='スィ',['se']='セ',['sa']='サ',['ru']='ル',['ro']='ロ',['ri']='リ',['re']='レ',['ra']='ラ',['pu']='プ',['po']='ポ',['pi']='ピ',['pe']='ペ',['pa']='パ',['mu']='ム',['mo']='モ',['mi']='ミ',['me']='メ',['ma']='マ',['lu']='ル゜',['lo']='ロ゜',['li']='リ゜',['le']='レ゜',['la']='ラ゜',['ku']='ク',['ko']='コ',['ki']='キ',['ke']='ケ',['ka']='カ',['ju']='ジュ',['jo']='ジョ',['ji']='ジ',['je']='ジェ',['ja']='ジャ',['hu']='ホゥ',['ho']='ホ',['hi']='ヒ',['he']='ヘ',['ha']='ハ',['gu']='グ',['go']='ゴ',['gi']='ギ',['ge']='ゲ',['ga']='ガ',['fu']='フ',['fo']='フォ',['fi']='フィ',['fe']='フェ',['fa']='ファ',['du']='ドゥ',['do']='ド',['di']='ディ',['de']='デ',['da']='ダ',['bu']='ブ',['bo']='ボ',['bi']='ビ',['be']='ベ',['ba']='バ'
};
-- hiragana to empty
local hy={
['ぁ']='',['あ']='',['ぃ']='',['い']='',['ぅ']='',['う']='',['ぇ']='',['え']='',['ぉ']='',['お']='',['か']='',['が']='',['き']='',['ぎ']='',['く']='',['ぐ']='',['け']='',['げ']='',['こ']='',['ご']='',['さ']='',['ざ']='',['し']='',['じ']='',['す']='',['ず']='',['せ']='',['ぜ']='',['そ']='',['ぞ']='',['た']='',['だ']='',['ち']='',['ぢ']='',['っ']='',['つ']='',['づ']='',['て']='',['で']='',['と']='',['ど']='',['な']='',['に']='',['ぬ']='',['ね']='',['の']='',['は']='',['ば']='',['ぱ']='',['ひ']='',['び']='',['ぴ']='',['ふ']='',['ぶ']='',['ぷ']='',['へ']='',['べ']='',['ぺ']='',['ほ']='',['ぼ']='',['ぽ']='',['ま']='',['み']='',['む']='',['め']='',['も']='',['ゃ']='',['や']='',['ゅ']='',['ゆ']='',['ょ']='',['よ']='',['ら']='',['り']='',['る']='',['れ']='',['ろ']='',['ゎ']='',['わ']='',['ゐ']='',['ゑ']='',['を']='',['ん']='',['ゔ']='',['ゕ']='',['ゖ']=''
};
-- katakana to empty
local ky={
['ー']='',['ァ']='',['ア']='',['ィ']='',['イ']='',['ゥ']='',['ウ']='',['ェ']='',['エ']='',['ォ']='',['オ']='',['カ']='',['ガ']='',['キ']='',['ギ']='',['ク']='',['グ']='',['ケ']='',['ゲ']='',['コ']='',['ゴ']='',['サ']='',['ザ']='',['シ']='',['ジ']='',['ス']='',['ズ']='',['セ']='',['ゼ']='',['ソ']='',['ゾ']='',['タ']='',['ダ']='',['チ']='',['ヂ']='',['ッ']='',['ツ']='',['ヅ']='',['テ']='',['デ']='',['ト']='',['ド']='',['ナ']='',['ニ']='',['ヌ']='',['ネ']='',['ノ']='',['ハ']='',['バ']='',['パ']='',['ヒ']='',['ビ']='',['ピ']='',['フ']='',['ブ']='',['プ']='',['ヘ']='',['ベ']='',['ペ']='',['ホ']='',['ボ']='',['ポ']='',['マ']='',['ミ']='',['ム']='',['メ']='',['モ']='',['ャ']='',['ヤ']='',['ュ']='',['ユ']='',['ョ']='',['ヨ']='',['ラ']='',['リ']='',['ル']='',['レ']='',['ロ']='',['ヮ']='',['ワ']='',['ヰ']='',['ヱ']='',['ヲ']='',['ン']='',['ヴ']='',['ヵ']='',['ヶ']=''
};
-- Japanese abbreviation symbols to empty
local sy={
['々']='',['ゞ']=''
};
-- hiragana with dakuten to empty
local dakuten={
['が']='',['ぎ']='',['ぐ']='',['げ']='',['ご']='',['ざ']='',['じ']='',['ず']='',['ぜ']='',['ぞ']='',['だ']='',['ぢ']='',['づ']='',['で']='',['ど']='',['ば']='',['び']='',['ぶ']='',['べ']='',['ぼ']=''
};
-- hiragana with dakuten or handakuten to those without
local tenconv={
['が']='か',['ぎ']='き',['ぐ']='く',['げ']='け',['ご']='こ',['ざ']='さ',['じ']='し',['ず']='す',['ぜ']='せ',['ぞ']='そ',['だ']='た',['ぢ']='ち',['づ']='つ',['で']='て',['ど']='と',['ば']='は',['び']='ひ',['ぶ']='ふ',['べ']='へ',['ぼ']='ほ',['ぱ']='は',['ぴ']='ひ',['ぷ']='ふ',['ぺ']='へ',['ぽ']='ほ'
};
-- hiragana with handakuten to empty
local handakuten={
['ぱ']='',['ぴ']='',['ぷ']='',['ぺ']='',['ぽ']=''
};
-- all small hiragana except small tsu (useful when counting morae)
local nonmora_to_empty={
['ぁ']='',['ぅ']='',['ぃ']='',['ぇ']='',['ぉ']='',['ゃ']='',['ゅ']='',['ょ']=''
};
local longvowels={
['あー']='ああ',['いー']='いい',['うー']='うう',['えー']='ええ',['おー']='おお',['ぁー']='ぁあ',['ぃー']='ぃい',['ぅー']='ぅう',['ぇー']='ぇえ',['ぉー']='ぉお', ['かー']='かあ',['きー']='きい',['くー']='くう',['けー']='けえ',['こー']='こお',['さー']='さあ',['しー']='しい',['すー']='すう',['せー']='せえ',['そー']='そお',['たー']='たあ',['ちー']='ちい',['つー']='つう',['てー']='てえ',['とー']='とお',['なー']='なあ',['にー']='にい',['ぬー']='ぬう',['ねー']='ねえ',['のー']='のお',['はー']='はあ',['ひー']='ひい',['ふー']='ふう',['へー']='へえ',['ほー']='ほお',['まー']='まあ',['みー']='みい',['むー']='むう',['めー']='めえ',['もー']='もお',['やー']='やあ',['ゆー']='ゆう',['よー']='よお',['ゃー']='ゃあ',['ゅー']='ゅう',['ょー']='ょお',['らー']='らあ',['りー']='りい',['るー']='るう',['れー']='れえ',['ろー']='ろお',['わー']='わあ'
};
function M.romaji_dediacritics(f)
return (mw.ustring.gsub(f.args[1], '.', rd))
end
function M.hira_to_kata(f)
return (mw.ustring.gsub(f.args[1], '.', hk))
end
function M.kata_to_hira(f)
return (mw.ustring.gsub(f.args[1], '.', kh))
end
function M.kana_to_romaji(f)
local text = f.args[1]
-- if there is a は separated by halfwidth spaces, romanize it as " wa "
text = mw.ustring.gsub(text, ' は ', ' wa ')
-- also if it follows a space and is the last character, e.g. それでは
text = mw.ustring.gsub(text, ' は$', ' wa')
-- convert hiragana to katakana
text = mw.ustring.gsub(text, '.', hk)
-- replace katakana with romaji (?? not sure what the pattern below does ??)
text = mw.ustring.gsub(text, '.[ィェォャュョァヮゥ゜]?ェ?', kr)
-- replace long vowel mark with the vowel that comes before
text = mw.ustring.gsub(text, '([aeiou])ー', '%1%1')
-- add vowels with diacritics
if f.args[2] == nil then
text = mw.ustring.gsub(text, 'oo', 'ō')
text = mw.ustring.gsub(text, 'aa', 'ā')
text = mw.ustring.gsub(text, 'ee', 'ē')
text = mw.ustring.gsub(text, 'ou', 'ō')
text = mw.ustring.gsub(text, 'uu', 'ū')
text = mw.ustring.gsub(text, 'ii', 'ī')
end
-- if input had spaces, keep them
-- if the input string had hyphens, then remove them now
-- TODO: keep them once all relevant entries are converted to periods
--text = mw.ustring.gsub(text, '-', '')
-- if the input string had periods, then remove them now
text = mw.ustring.gsub(text, '%.', '')
-- romanize sokuon or geminate consonants
-- text = mw.ustring.gsub(text, '^ッ', '')
-- double the previous consonant letter if there is a small tsu
text = mw.ustring.gsub(text, 'ッ([kstpgdbjzrfh])', '%1%1')
-- replace ッc with tc
text = mw.ustring.gsub(text, 'ッc', 'tc')
-- if small tsu comes at the end, just throw it away
text = mw.ustring.gsub(text, 'ッ$', '')
-- the @ is used to determine when to insert an opostrophe after ん or ン
-- (all is kata at that point)
-- insert apostrophe when ン is followed by a vowel or
-- y, which corresponds to the cases んや (n'ya) んゆ (n'yu) and んよ (n'yo)
text = mw.ustring.gsub(text, "@([aeiouy])", "'%1")
-- remove @
text = mw.ustring.gsub(text, "@", "")
return text
end
-- same as kana_to_romaji except it works
-- better with verbs, specifically
-- does not convert final おう when it's the end
-- of the verb (which the above function does with
-- verbs like ひろう)
-- This function is intended to be used by ja-verb for auto romaji.
function M.kana_to_romaji_for_verbs(f)
local text = f.args[1]
-- store the last kana, which conjugates because this is a verb
local endkana = ""
-- store 'u' or an empty string to append when done
local endromaji = ""
local len = 1
endkana = mw.ustring.sub(text,-1)
if endkana == 'う' then
-- convert everything up to the last kana and
-- add 'u' manually when done romanizing
len = mw.ustring.len(text)
text = mw.ustring.sub(text,1,len-1)
endromaji = 'u'
end
-- first convert hiragana to katakana
text = mw.ustring.gsub(text, '.', hk)
-- replace katakana with romaji (?? not sure what the pattern below does ??)
text = mw.ustring.gsub(text, '.[ィェォャュョァヮゥ゜]?ェ?', kr)
-- replace long vowel mark with the vowel that comes before
text = mw.ustring.gsub(text, '([aeiou])ー', '%1%1')
-- add vowels with diacritics
if f.args[2] == nil then
text = mw.ustring.gsub(text, 'oo', 'ō')
text = mw.ustring.gsub(text, 'aa', 'ā')
text = mw.ustring.gsub(text, 'ee', 'ē')
text = mw.ustring.gsub(text, 'ou', 'ō')
text = mw.ustring.gsub(text, 'uu', 'ū')
text = mw.ustring.gsub(text, 'ii', 'ī')
end
-- if this verb ended in う, add 'u' to the romanization
-- because う had been cut off earlier
text = (text .. endromaji)
-- if input had spaces, keep them
-- if the input string had hyphens, then remove them now
-- TODO: keep them once all relevant entries are converted to periods
--text = mw.ustring.gsub(text, '-', '')
-- if the input string had periods, then remove them now
text = mw.ustring.gsub(text, '%.', '')
-- romanize sokuon or geminate consonants
-- double the previous consonant letter if there is a small tsu
text = mw.ustring.gsub(text, 'ッ([kstpgdbjzrfh])', '%1%1')
-- replace ッc with tc
text = mw.ustring.gsub(text, 'ッc', 'tc')
-- if small tsu comes at the end, just throw it away
text = mw.ustring.gsub(text, 'ッ$', '')
-- the @ is used to determine when to insert an opostrophe after ん or ン
-- (all is kata at this point)
-- insert apostrophe when ン is followed by a vowel or
-- y, which corresponds to the cases んや (n'ya) んゆ (n'yu) and んよ (n'yo)
text = mw.ustring.gsub(text, "@([aeiouy])", "'%1")
-- remove @
text = mw.ustring.gsub(text, "@", "")
return text
end
function M.kana_to_romaji_for_i_adjectives(f)
local text = f.args[1]
-- remove the い at the end (will stick 'i' on the end later when finished)
local len = 1
len = mw.ustring.len(text)
text = mw.ustring.sub(text,1,len-1)
-- if there is a は separated by halfwidth spaces, romanize it as " wa "
text = mw.ustring.gsub(text, ' は ', ' wa ')
-- first convert hiragana to katakana
text = mw.ustring.gsub(text, '.', hk)
-- replace katakana with romaji (?? not sure what the pattern below does ??)
text = mw.ustring.gsub(text, '.[ィェォャュョァヮゥ゜]?ェ?', kr)
-- replace long vowel mark with the vowel that comes before
text = mw.ustring.gsub(text, '([aeiou])ー', '%1%1')
-- add vowels with diacritics with the exception below for kanji
if f.args[2] == nil then
text = mw.ustring.gsub(text, 'oo', 'ō')
text = mw.ustring.gsub(text, 'aa', 'ā')
text = mw.ustring.gsub(text, 'ee', 'ē')
text = mw.ustring.gsub(text, 'ou', 'ō')
text = mw.ustring.gsub(text, 'uu', 'ū')
text = mw.ustring.gsub(text, 'ii', 'ī')
end
text = (text .. 'i')
-- if input had spaces, keep them
-- if the input string had hyphens, then remove them now
-- TODO: keep them once all relevant entries are converted to periods
--text = mw.ustring.gsub(text, '-', '')
-- if the input string had periods, then remove them now
text = mw.ustring.gsub(text, '%.', '')
-- romanize sokuon or geminate consonants
-- double the previous consonant letter if there is a small tsu
text = mw.ustring.gsub(text, 'ッ([kstpgdbjzrfh])', '%1%1')
-- replace ッc with tc
text = mw.ustring.gsub(text, 'ッc', 'tc')
-- if small tsu comes at the end, just throw it away
text = mw.ustring.gsub(text, 'ッ$', '')
-- the @ is used to determine when to insert an opostrophe after ん or ン
-- (all is kata at this point)
-- insert apostrophe when ン is followed by a vowel or
-- y, which corresponds to the cases んや (n'ya) んゆ (n'yu) and んよ (n'yo)
text = mw.ustring.gsub(text, "@([aeiouy])", "'%1")
-- remove @
text = mw.ustring.gsub(text, "@", "")
return text
end
-- accepts romaji as input, returns same string with first letter upper case
-- intended to be used for proper nouns
-- also capitalizes the first letter following a space
function M.initcap(f)
local romaji = f.args[1]
-- capitalize first letter
romaji = mw.ustring.gsub(romaji, "^%l", mw.ustring.upper)
-- capitalize letters following spaces
romaji = mw.ustring.gsub(romaji, " %l", mw.ustring.upper)
return romaji
end
-- removes spaces and hyphens from input
-- intended to be used when checking manual romaji to allow the
-- insertion of spaces or hyphens in manual romaji without appearing "wrong"
function M.rm_spaces_hyphens(f)
local text = f.args[1]
text = mw.ustring.gsub(text, ' ', '')
text = mw.ustring.gsub(text, '-', '')
text = mw.ustring.gsub(text, '%.', '')
text = mw.ustring.gsub(text, ' ', '')
text = mw.ustring.gsub(text, '\'', '')
return text
end
function M.romaji_to_kata(f)
local text = f.args[1]
text = mw.ustring.gsub(text, '.', rd)
text = mw.ustring.gsub(text, 'kk', 'ッk')
text = mw.ustring.gsub(text, 'ss', 'ッs')
text = mw.ustring.gsub(text, 'tt', 'ッt')
text = mw.ustring.gsub(text, 'pp', 'ッp')
text = mw.ustring.gsub(text, 'bb', 'ッb')
text = mw.ustring.gsub(text, 'dd', 'ッd')
text = mw.ustring.gsub(text, 'gg', 'ッg')
text = mw.ustring.gsub(text, 'jj', 'ッj')
text = mw.ustring.gsub(text, 'tc', 'ッc')
text = mw.ustring.gsub(text, 'tsyu', 'ツュ')
text = mw.ustring.gsub(text, 'ts[uoiea]', {['tsu']='ツ',['tso']='ツォ',['tsi']='ツィ',['tse']='ツェ',['tsa']='ツァ'})
text = mw.ustring.gsub(text, 'sh[uoiea]', {['shu']='シュ',['sho']='ショ',['shi']='シ',['she']='シェ',['sha']='シャ'})
text = mw.ustring.gsub(text, 'ch[uoiea]', {['chu']='チュ',['cho']='チョ',['chi']='チ',['che']='チェ',['cha']='チャ'})
text = mw.ustring.gsub(text, "n[uoiea']?", {['nu']='ヌ',['no']='ノ',['ni']='ニ',['ne']='ネ',['na']='ナ',['n']='ン',["n'"]='ン'})
text = mw.ustring.gsub(text, '[wvtrpsmlkjhgfdbzy][yw]?[uoiea]', rk)
text = mw.ustring.gsub(text, 'u', 'ウ')
text = mw.ustring.gsub(text, 'o', 'オ')
text = mw.ustring.gsub(text, 'i', 'イ')
text = mw.ustring.gsub(text, 'e', 'エ')
text = mw.ustring.gsub(text, 'a', 'ア')
return text
end
-- expects: any mix of kanji and kana
-- determines the script types used
-- e.g. given イギリス人, it returns Kana+Hani
function M.script(f)
local text, script = f.args[1], {}
if mw.ustring.match(text, '[ぁ-ゖ]') then
table.insert(script, 'Hira')
end
-- TODO: there are two kanas. This should insert Kata.
if mw.ustring.match(text, '[ァ-ヺ]') then
table.insert(script, 'Kana')
end
-- 一 is unicode 4e00, previously used 丁 is 4e01
if mw.ustring.match(text, '[一-龯㐀-䶵]') then
table.insert(script, 'Hani')
end
-- matching %a should have worked but matched the end of every string
if mw.ustring.match(text, '[a-zA-ZāēīōūĀĒĪŌŪa-zA-Z]') then
table.insert(script, 'Romaji')
end
if mw.ustring.match(text, '[0-90-9]') then
table.insert(script, 'Number')
end
if mw.ustring.match(text, '[〆々]') then
table.insert(script, 'Abbreviation')
end
return table.concat(script, '+')
end
-- accepts the entry name, extracts the kanji, and
-- puts the kanji inside {{ja-kanji|}} and returns it
function M.extract_kanji(f)
local text = f.args[1]
local len = 1
local result = ''
text = mw.ustring.gsub(mw.ustring.gsub(text, ".", ky), ".", hy)
len = mw.ustring.len(text)
if text ~= '' then
result = '{{ja-kanjitab'
for i = 1, len, 1 do
char = mw.ustring.sub(text,i,i)
result = (result .. '|' .. char)
end
result = (result .. '}}')
end
return result
end
-- returns the number of kanji in this term
function M.count_kanji(f)
local text = f.args[1]
local len = 1
-- replace 時々 with 時時
text = mw.ustring.gsub(text, '([一-龯㐀-䶵])々', '%1%1')
-- first and last characters in unicode CJK unified ideographs block, see
-- List_of_CJK_Unified_Ideographs,_part_1_of_4 and List_of_CJK_Unified_Ideographs,_part_4_of_4
text = mw.ustring.gsub(text, '[^一-鿌]', '')
len = mw.ustring.len(text)
return len
end
-- used within other functions but >> no longer necessary <<
-- returns a hidx-style hiragana sort key attached to |hidx=,
-- e.g. |hidx=はつぐん' when given ばつぐん
function M.hidx(f)
local text = f.args[1]
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
local kyreplace = ''
kyreplace = mw.ustring.gsub(text,'.',ky)
if kyreplace == '' then
result = ('|' .. 'hidx' .. '=')
end
text = mw.ustring.gsub(text,'.',kh)
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',dakuten) == '' then
if kyreplace == '' then else result = ('|' .. 'hidx' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',tenconv)
result = (result .. convertedten .. textsub .. "'")
else
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',handakuten) == '' then
if kyreplace == '' then else result = ('|' .. 'hidx' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',tenconv)
result = (result .. convertedten .. textsub .. "''")
else
if kyreplace == '' then
result = (result .. text)
end
end
end
return result
end
-- when counting morae, most small hiragana belong to the previous mora,
-- so for purposes of counting them, they can be removed and the characters
-- can be counted to get the number of morae. The exception is small tsu,
-- so nonmora_to_empty maps all small hiragana except small tsu.
function M.count_morae(f)
local text = f.args[1]
-- convert kata to hira (hira is untouched)
text = mw.ustring.gsub(f.args[1], '.', kh)
-- remove all of the small hiragana such as ょ except small tsu
text = mw.ustring.gsub(text,'.',nonmora_to_empty)
-- return number of characters, which should be the number of morae
return mw.ustring.len(text)
end
-- accepts: any mix of kana
-- returns: a hiragana sort key designed for WMF software (hidx of old)
-- this is like hidx above but doesn't return |hidx=sortkey,
-- just the sort key itself, but unlike hidx above, this
-- replaces the long vowel mark with its vowel
function M.jsort(f)
local text = f.args[1]
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
-- clean up markup
text = mw.ustring.gsub(text, ' ', '')
text = mw.ustring.gsub(text, '-', '')
text = mw.ustring.gsub(text, '%.', '')
text = mw.ustring.gsub(text,'.',kh)
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',dakuten) == '' then
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',tenconv)
text = (convertedten .. textsub .. "'")
else
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',handakuten) == '' then
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',tenconv)
text = (convertedten .. textsub .. "''")
end
end
-- replace the long vowel mark with the vowel that it stands for
for key,value in pairs(longvowels) do
text = mw.ustring.gsub(text,key,value)
end
return text
end
-- accepts a marked-up kana such as き.いろ.い
-- and returns a string like ''hiragana''
function M.center_of_headword(f)
local text = f.args[1]
local script = ''
-- clean up the markup symbols from the kana
text = mw.ustring.gsub(text, ' ', '')
text = mw.ustring.gsub(text, '-', '')
text = mw.ustring.gsub(text, '%.', '')
-- acceptable script types are "hiragana", "katakana", or "hiragana and katakana"
if mw.ustring.match(text, '[ぁ-ゖ]') then
script = '\'\'hiragana'
if mw.ustring.match(text, '[ァ-ヺ]') then
script = (script .. ' and katakana\'\'')
else
script = (script .. '\'\'')
end
else
if mw.ustring.match(text, '[ァ-ヺ]') then
script = '\'\'katakana\'\''
end
end
return script
end
-- returns 'yes' if the string contains kana (not exactly is kana)
-- returns 'no' otherwise, including if string is empty
function M.is_kana(f)
local text = f.args[1]
if mw.ustring.match(text, '[ぁ-ゖ]') then
return 'yes'
end
if mw.ustring.match(text, '[ァ-ヺ]') then
return 'yes'
end
return 'no'
end
-- returns a sort key with |sort= in front, e.g.
-- |sort=はつぐん' if given ばつぐん
function M.sort(f)
local text = f.args[1]
local textsub = ''
local convertedten = ''
local result = ''
local len = 1
local kyreplace = ''
kyreplace = mw.ustring.gsub(text,'.',ky)
if kyreplace == '' then
result = ('|' .. 'sort' .. '=')
end
text = mw.ustring.gsub(text,'.',kh)
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',dakuten) == '' then
if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',tenconv)
result = (result .. convertedten .. textsub .. "'")
else
if mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',handakuten) == '' then
if kyreplace == '' then else result = ('|' .. 'sort' .. '=') end
len = mw.ustring.len(text)
textsub = mw.ustring.sub(text,2,len)
convertedten = mw.ustring.gsub(mw.ustring.sub(text,1,1),'.',tenconv)
result = (result .. convertedten .. textsub .. "''")
else
if kyreplace == '' then
result = (result .. text)
end
end
end
return result
end
-- used by another template to generate old-style Japanese entries
-- TODO: known bug is returning ja-go-uu when it should return ja-go-u
function M.conj(f)
local param1 = f.args[1] or ''
local param2 = f.args[2] or ''
local param2n = 'a'
local type = f.args[3] or ''
local final = ''
local final2 = ''
local definal1 = ''
local definal2 = ''
local text = ''
local cons = ''
text = '{{ja-'
if param1 == param2 then param2n = '' end
final = mw.ustring.sub(param2,mw.ustring.len(param2),mw.ustring.len(param2))
definal1 = mw.ustring.sub(param1,1,(mw.ustring.len(param1)-1))
definal2 = mw.ustring.sub(param2,1,(mw.ustring.len(param2)-1))
final2 = mw.ustring.sub(param2,(mw.ustring.len(param2)-1),(mw.ustring.len(param2)-1))
final2 = mw.ustring.gsub(mw.ustring.gsub(final2,'.',hk),'.',kr)
cons = mw.ustring.sub(mw.ustring.gsub(mw.ustring.gsub(final,'.',hk),'.',kr),1,1)
if cons == '[aeiou]' then cons = '' end
if cons == 't' then cons = 'ts' end
final2 = mw.ustring.sub(final2,mw.ustring.len(final2),mw.ustring.len(final2))
if final == 'る' then
if final2 == 'i' or 'e' then
if type == 'two' then
text = (text .. 'ichi|' .. definal1)
else
text = (text .. 'go-' .. cons .. 'u|' .. definal1)
end
else
text = (text .. 'go-' .. cons .. 'u|' .. definal1)
end
else
text = (text .. 'go-' .. cons .. 'u|' .. definal1)
end
if param2n == '' then
text = (text .. '}}')
else
text = (text .. '|' .. definal2 .. '}}')
end
return text
end
-- as far as I can tell, this was only used with the old version of ja-ichi
-- to allow the template to be used with no parameters and to rely on
-- {{PAGENAME}} to generate the conjugation table when the entry is
-- kana, which actually applies to only a few words
function M.definal(f)
return mw.ustring.sub(f.args[1],1,(mw.ustring.len(f.args[1])-1))
end
-- >> currently unused <<
-- accepts any mix of kana
-- returns "yes" when the input ends in an "u" sound
function M.endswith(f)
local text = f.args[1]
-- convert to hiragana
text = mw.ustring.gsub(text,'.',kh)
-- convert hiragana with dakuten or handakuten to corresponding hiragana without
text = mw.ustring.gsub(text,'.',tenconv)
-- replace the long vowel mark with the vowel that it stands for
for key,value in pairs(longvowels) do
text = mw.ustring.gsub(text,key,value)
end
-- if match, return 'yes', otherwise end and do not return anything
if mw.ustring.match(text, '[うくすつぬふむゆる]$')
then
return 'yes'
end
end
-- returns 'yes' if the string ends in
-- うむぶつすく, which is enough by itself
-- to say that it is godan (otherwise it's ambiguous)
function M.is_godan(f)
local text = f.args[1]
if mw.ustring.match(text, '[うむぶつすく]$')
then
return 'yes'
end
end
function M.is_joyo(f)
local text = f.args[1]
local j1, j2, j3, j4, j5, j6, j7, j8, j9, j10, j11, j12, j13, j14, j15, j16, j17, j18, j19 = ''
local j20, j21, j22, j23, j24, j25, j26, j27, j28, j29, j30, j31, j32, j33, j34, j35, j36, j37, j38, j39, j40, j41, j42 = ''
j1 = '亜哀挨愛曖悪握圧扱宛嵐安案暗以衣位囲医依委威為畏胃尉異移萎偉椅彙意違維慰遺緯域育一壱逸茨芋引印因咽姻員'
j2 = '院淫陰飲隠韻右宇羽雨唄鬱畝浦運雲永泳英映栄営詠影鋭衛易疫益液駅悦越謁閲円延沿炎宴怨媛援園煙猿遠鉛塩演縁'
j3 = '艶汚王凹央応往押旺欧殴桜翁奥横岡屋億憶臆虞乙俺卸音恩温穏下化火加可仮何花佳価果河苛科架夏家荷華菓貨渦過'
j4 = '嫁暇禍靴寡歌箇稼課蚊牙瓦我画芽賀雅餓介回灰会快戒改怪拐悔海界皆械絵開階塊楷解潰壊懐諧貝外劾害崖涯街慨蓋'
j5 = '該概骸垣柿各角拡革格核殻郭覚較隔閣確獲嚇穫学岳楽額顎掛潟括活喝渇割葛滑褐轄且株釜鎌刈干刊甘汗缶完肝官冠'
j6 = '巻看陥乾勘患貫寒喚堪換敢棺款間閑勧寛幹感漢慣管関歓監緩憾還館環簡観韓艦鑑丸含岸岩玩眼頑顔願企伎危机気岐'
j7 = '希忌汽奇祈季紀軌既記起飢鬼帰基寄規亀喜幾揮期棋貴棄毀旗器畿輝機騎技宜偽欺義疑儀戯擬犠議菊吉喫詰却客脚逆'
j8 = '虐九久及弓丘旧休吸朽臼求究泣急級糾宮救球給嗅窮牛去巨居拒拠挙虚許距魚御漁凶共叫狂京享供協況峡挟狭恐恭胸'
j9 = '脅強教郷境橋矯鏡競響驚仰暁業凝曲局極玉巾斤均近金菌勤琴筋僅禁緊錦謹襟吟銀区句苦駆具惧愚空偶遇隅串屈掘窟'
j10 = '熊繰君訓勲薫軍郡群兄刑形系径茎係型契計恵啓掲渓経蛍敬景軽傾携継詣慶憬稽憩警鶏芸迎鯨隙劇撃激桁欠穴血決結'
j11 = '傑潔月犬件見券肩建研県倹兼剣拳軒健険圏堅検嫌献絹遣権憲賢謙鍵繭顕験懸元幻玄言弦限原現舷減源厳己戸古呼固'
j12 = '孤弧股虎故枯個庫湖雇誇鼓錮顧五互午呉後娯悟碁語誤護口工公勾孔功巧広甲交光向后好江考行坑孝抗攻更効幸拘肯'
j13 = '侯厚恒洪皇紅荒郊香候校耕航貢降高康控梗黄喉慌港硬絞項溝鉱構綱酵稿興衡鋼講購乞号合拷剛傲豪克告谷刻国黒穀'
j14 = '酷獄骨駒込頃今困昆恨根婚混痕紺魂墾懇左佐沙査砂唆差詐鎖座挫才再災妻采砕宰栽彩採済祭斎細菜最裁債催塞歳載'
j15 = '際埼在材剤財罪崎作削昨柵索策酢搾錯咲冊札刷刹拶殺察撮擦雑皿三山参桟蚕惨産傘散算酸賛残斬暫士子支止氏仕史'
j16 = '司四市矢旨死糸至伺志私使刺始姉枝祉肢姿思指施師恣紙脂視紫詞歯嗣試詩資飼誌雌摯賜諮示字寺次耳自似児事侍治'
j17 = '持時滋慈辞磁餌璽鹿式識軸七叱失室疾執湿嫉漆質実芝写社車舎者射捨赦斜煮遮謝邪蛇尺借酌釈爵若弱寂手主守朱取'
j18 = '狩首殊珠酒腫種趣寿受呪授需儒樹収囚州舟秀周宗拾秋臭修袖終羞習週就衆集愁酬醜蹴襲十汁充住柔重従渋銃獣縦叔'
j19 = '祝宿淑粛縮塾熟出述術俊春瞬旬巡盾准殉純循順準潤遵処初所書庶暑署緒諸女如助序叙徐除小升少召匠床抄肖尚招承'
j20 = '昇松沼昭宵将消症祥称笑唱商渉章紹訟勝掌晶焼焦硝粧詔証象傷奨照詳彰障憧衝賞償礁鐘上丈冗条状乗城浄剰常情場'
j21 = '畳蒸縄壌嬢錠譲醸色拭食植殖飾触嘱織職辱尻心申伸臣芯身辛侵信津神唇娠振浸真針深紳進森診寝慎新審震薪親人刃'
j22 = '仁尽迅甚陣尋腎須図水吹垂炊帥粋衰推酔遂睡穂随髄枢崇数据杉裾寸瀬是井世正生成西声制姓征性青斉政星牲省凄逝'
j23 = '清盛婿晴勢聖誠精製誓静請整醒税夕斥石赤昔析席脊隻惜戚責跡積績籍切折拙窃接設雪摂節説舌絶千川仙占先宣専泉'
j24 = '浅洗染扇栓旋船戦煎羨腺詮践箋銭潜線遷選薦繊鮮全前善然禅漸膳繕狙阻祖租素措粗組疎訴塑遡礎双壮早争走奏相荘'
j25 = '草送倉捜挿桑巣掃曹曽爽窓創喪痩葬装僧想層総遭槽踪操燥霜騒藻造像増憎蔵贈臓即束足促則息捉速側測俗族属賊続'
j26 = '卒率存村孫尊損遜他多汰打妥唾堕惰駄太対体耐待怠胎退帯泰堆袋逮替貸隊滞態戴大代台第題滝宅択沢卓拓託濯諾濁'
j27 = '但達脱奪棚誰丹旦担単炭胆探淡短嘆端綻誕鍛団男段断弾暖談壇地池知値恥致遅痴稚置緻竹畜逐蓄築秩窒茶着嫡中仲'
j28 = '虫沖宙忠抽注昼柱衷酎鋳駐著貯丁弔庁兆町長挑帳張彫眺釣頂鳥朝貼超腸跳徴嘲潮澄調聴懲直勅捗沈珍朕陳賃鎮追椎'
j29 = '墜通痛塚漬坪爪鶴低呈廷弟定底抵邸亭貞帝訂庭逓停偵堤提程艇締諦泥的笛摘滴適敵溺迭哲鉄徹撤天典店点展添転塡'
j30 = '田伝殿電斗吐妬徒途都渡塗賭土奴努度怒刀冬灯当投豆東到逃倒凍唐島桃討透党悼盗陶塔搭棟湯痘登答等筒統稲踏糖'
j31 = '頭謄藤闘騰同洞胴動堂童道働銅導瞳峠匿特得督徳篤毒独読栃凸突届屯豚頓貪鈍曇丼那奈内梨謎鍋南軟難二尼弐匂肉'
j32 = '虹日入乳尿任妊忍認寧熱年念捻粘燃悩納能脳農濃把波派破覇馬婆罵拝杯背肺俳配排敗廃輩売倍梅培陪媒買賠白伯拍'
j33 = '泊迫剝舶博薄麦漠縛爆箱箸畑肌八鉢発髪伐抜罰閥反半氾犯帆汎伴判坂阪板版班畔般販斑飯搬煩頒範繁藩晩番蛮盤比'
j34 = '皮妃否批彼披肥非卑飛疲秘被悲扉費碑罷避尾眉美備微鼻膝肘匹必泌筆姫百氷表俵票評漂標苗秒病描猫品浜貧賓頻敏'
j35 = '瓶不夫父付布扶府怖阜附訃負赴浮婦符富普腐敷膚賦譜侮武部舞封風伏服副幅復福腹複覆払沸仏物粉紛雰噴墳憤奮分'
j36 = '文聞丙平兵併並柄陛閉塀幣弊蔽餅米壁璧癖別蔑片辺返変偏遍編弁便勉歩保哺捕補舗母募墓慕暮簿方包芳邦奉宝抱放'
j37 = '法泡胞俸倣峰砲崩訪報蜂豊飽褒縫亡乏忙坊妨忘防房肪某冒剖紡望傍帽棒貿貌暴膨謀頰北木朴牧睦僕墨撲没勃堀本奔'
j38 = '翻凡盆麻摩磨魔毎妹枚昧埋幕膜枕又末抹万満慢漫未味魅岬密蜜脈妙民眠矛務無夢霧娘名命明迷冥盟銘鳴滅免面綿麺'
j39 = '茂模毛妄盲耗猛網目黙門紋問冶夜野弥厄役約訳薬躍闇由油喩愉諭輸癒唯友有勇幽悠郵湧猶裕遊雄誘憂融優与予余誉'
j40 = '預幼用羊妖洋要容庸揚揺葉陽溶腰様瘍踊窯養擁謡曜抑沃浴欲翌翼拉裸羅来雷頼絡落酪辣乱卵覧濫藍欄吏利里理痢裏'
j41 = '履璃離陸立律慄略柳流留竜粒隆硫侶旅虜慮了両良料涼猟陵量僚領寮療瞭糧力緑林厘倫輪隣臨瑠涙累塁類令礼冷励戻'
j42 = '例鈴零霊隷齢麗暦歴列劣烈裂恋連廉練錬呂炉賂路露老労弄郎朗浪廊楼漏籠六録麓論和話賄脇惑枠湾腕'
joyo_kanji = (j1 .. j2 .. j3 .. j4 .. j5 .. j6 .. j7 .. j8 .. j9 .. j10 .. j11 .. j12 .. j13 .. j14 .. j15 .. j16 .. j17 .. j18 .. j19 .. j20 .. j21 .. j22 .. j23 .. j24 .. j25 .. j26 .. j27 .. j28 .. j29 .. j30 .. j31 .. j32 .. j33 .. j34 .. j35 .. j36 .. j37 .. j38 .. j39 .. j40 .. j41 .. j42)
local non_joyo_kanji = ('[^' .. joyo_kanji .. ']')
-- remove non-kanji characters
text = mw.ustring.gsub(text, '[^一-鿌]', '')
if mw.ustring.len(text) > 0
then
if mw.ustring.match(text, non_joyo_kanji)
then
return 'no'
else
return 'yes'
end
else
return 'no kanji'
end
end
return M