metadata {
authority_id: var id: hepburn-1886 language: iso-639-2:jpn source_script: Hrkt destination_script: Latn name: Traditional Hepburn System url: http://www.ab.cyberhome.ne.jp/~kaizu/roomazi/doc/hep3.html creation_date: 1886 adoption_date: description: This is a traditional version of Hepburn romanization. notes: " The book was published before the Japanese orthographic reform, and this map takes the reformed orthography in Kana as the source form. https://en.wikipedia.org/wiki/Historical_kana_orthography The distinction for long-vowel vs. repeating vowels has not been implemented. For example, the consecutive o's in these words are considered a case of long vowel, and is transliterated as ō: 氷 (こおり) - kōri, 大阪(おおさか)- Ōsaka If there are two consecutive o's in a string, but they belong to different morpheme, then they should be transliterated separately. 小躍り(こおどり)- koodori The same goes for the combinations o+u, u+u as well. However, this cannot be easily determined from the Kana. Lexical knowledge is needed, and sometimes the Kanji representation will give more hints about morpheme boundary. For now, this map will assume that all o+o, o+u, u+u combinations to be instances of long vowels. "
}
tests {
test "ぐんま", "gumma" test "しんよう", "shin-yō" test "きんようび", "kin-yōbi" test "とうきょう", "tōkyō" test "しんばし", "shimbashi"
}
stage {
# RULES # Add a dash (-) between ん and a vowel sound or ya, yu, yo sub none, "-", before: any("んン"), after: any("あいうえおやゆよアイウエオヤユヨ") # Convert ん into m before b, m, p sub any("んン"), "m", after: any("ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ") # CHARACTERS parallel { # Hiragana sub "あ", "a" sub "い", "i" sub "う", "u" sub "え", "e" sub "お", "o" sub "おう", "ō" sub "か", "ka" sub "き", "ki" sub "く", "ku" sub "け", "ke" sub "こ", "ko" sub "きゃ", "kya" sub "きゅ", "kyu" sub "きょ", "kyo" sub "きょう", "kyō" sub "こう", "kō" sub "さ", "sa" sub "し", "shi" sub "す", "su" sub "せ", "se" sub "そ", "so" sub "しゃ", "sha" sub "しゅ", "shu" sub "しょ", "sho" sub "しょう", "shō" sub "そう", "sō" sub "た", "ta" sub "ち", "chi" sub "つ", "tsu" sub "て", "te" sub "と", "to" sub "ちゃ", "cha" sub "ちゅ", "chu" sub "ちょ", "cho" sub "とう", "tō" sub "ちょう", "chō" sub "な", "na" sub "に", "ni" sub "ぬ", "nu" sub "ね", "ne" sub "の", "no" sub "にゃ", "nya" sub "にゅ", "nyu" sub "にょ", "nyo" sub "にょう", "nyō" sub "のう", "nō" sub "は", "ha" sub "ひ", "hi" sub "ふ", "fu" sub "へ", "he" sub "ほ", "ho" sub "ひゃ", "hya" sub "ひゅ", "hyu" sub "ひょ", "hyo" sub "ひょう", "hyō" sub "ほう", "hō" sub "ま", "ma" sub "み", "mi" sub "む", "mu" sub "め", "me" sub "も", "mo" sub "みゃ", "mya" sub "みゅ", "myu" sub "みょ", "myo" sub "みょう", "myō" sub "もう", "mō" sub "や", "ya" sub "ゆ", "yu" sub "よ", "yo" sub "よう", "yō" sub "ら", "ra" sub "り", "ri" sub "る", "ru" sub "れ", "re" sub "ろ", "ro" sub "りゃ", "rya" sub "りゅ", "ryu" sub "りょ", "ryo" sub "りょう", "ryō" sub "ろう", "rō" sub "わ", "wa" sub "を", "wo" sub "が", "ga" sub "ぎ", "gi" sub "ぐ", "gu" sub "げ", "ge" sub "ご", "go" sub "ぎゃ", "gya" sub "ぎゅ", "gyu" sub "ぎょ", "gyo" sub "ぎょう", "gyō" sub "ごう", "gō" sub "ざ", "za" sub "じ", "ji" sub "ず", "zu" sub "ぜ", "ze" sub "ぞ", "zo" sub "じゃ", "ja" sub "じゅ", "ju" sub "じょ", "jo" sub "じょう", "jō" sub "ぞう", "zō" sub "だ", "da" sub "ぢ", "ji" sub "づ", "zu" sub "で", "de" sub "ど", "do" sub "ぢゃ", "ja" sub "ぢゅ", "ju" sub "ぢょ", "jo" sub "どう", "dō" sub "ば", "ba" sub "び", "bi" sub "ぶ", "bu" sub "べ", "be" sub "ぼ", "bo" sub "びゃ", "bya" sub "びゅ", "byu" sub "びょ", "byo" sub "びょう", "byō" sub "ぼう", "bō" sub "ぱ", "pa" sub "ぴ", "pi" sub "ぷ", "pu" sub "ぺ", "pe" sub "ぽ", "po" sub "ぴゃ", "pya" sub "ぴゅ", "pyu" sub "ぴょ", "pyo" sub "ぴょう", "pyō" sub "ぽう", "pō" sub "ん", "n" # Katakana sub "ア", "a" sub "イ", "i" sub "ウ", "u" sub "エ", "e" sub "オ", "o" sub "オウ", "ō" sub "カ", "ka" sub "キ", "ki" sub "ク", "ku" sub "ケ", "ke" sub "コ", "ko" sub "キャ", "kya" sub "キュ", "kyu" sub "キョ", "kyo" sub "キョウ", "kyō" sub "コウ", "kō" sub "サ", "sa" sub "シ", "shi" sub "ス", "su" sub "セ", "se" sub "ソ", "so" sub "シャ", "sha" sub "シュ", "shu" sub "ショ", "sho" sub "ショウ", "shō" sub "ソウ", "sō" sub "タ", "ta" sub "チ", "chi" sub "ツ", "tsu" sub "テ", "te" sub "ト", "to" sub "チャ", "cha" sub "チュ", "chu" sub "チョ", "cho" sub "チョウ", "chō" sub "トウ", "tō" sub "ナ", "na" sub "ニ", "ni" sub "ヌ", "nu" sub "ネ", "ne" sub "ノ", "no" sub "ニャ", "nya" sub "ニュ", "nyu" sub "ニョ", "nyo" sub "ニョウ", "nyō" sub "ノウ", "nō" sub "ハ", "ha" sub "ヒ", "hi" sub "フ", "fu" sub "ヘ", "he" sub "ホ", "ho" sub "ヒャ", "hya" sub "ヒュ", "hyu" sub "ヒョ", "hyo" sub "ヒョウ", "hyō" sub "ホウ", "hō" sub "マ", "ma" sub "ミ", "mi" sub "ム", "mu" sub "メ", "me" sub "モ", "mo" sub "ミャ", "mya" sub "ミュ", "myu" sub "ミョ", "myo" sub "ミョウ", "myō" sub "モウ", "mō" sub "ヤ", "ya" sub "ユ", "yu" sub "ヨ", "yo" sub "ヨウ", "yō" sub "ラ", "ra" sub "リ", "ri" sub "ル", "ru" sub "レ", "re" sub "ロ", "ro" sub "リャ", "rya" sub "リュ", "ryu" sub "リョ", "ryo" sub "リョウ", "ryō" sub "ロウ", "rō" sub "ワ", "wa" sub "ヲ", "wo" sub "ガ", "ga" sub "ギ", "gi" sub "グ", "gu" sub "ゲ", "ge" sub "ゴ", "go" sub "ギャ", "gya" sub "ギュ", "gyu" sub "ギョ", "gyo" sub "ギョウ", "gyō" sub "ゴウ", "gō" sub "ザ", "za" sub "ジ", "ji" sub "ズ", "zu" sub "ゼ", "ze" sub "ゾ", "zo" sub "ジャ", "ja" sub "ジュ", "ju" sub "ジョ", "jo" sub "ジョウ", "jō" sub "ゾウ", "zō" sub "ダ", "da" sub "ヂ", "ji" sub "ヅ", "zu" sub "デ", "de" sub "ド", "do" sub "ヂャ", "ja" sub "ヂュ", "ju" sub "ヂョ", "jo" sub "ドウ", "dō" sub "バ", "ba" sub "ビ", "bi" sub "ブ", "bu" sub "ベ", "be" sub "ボ", "bo" sub "ビャ", "bya" sub "ビュ", "byu" sub "ビョ", "byo" sub "ビョウ", "byō" sub "ボウ", "bō" sub "パ", "pa" sub "ピ", "pi" sub "プ", "pu" sub "ペ", "pe" sub "ポ", "po" sub "ピャ", "pya" sub "ピュ", "pyu" sub "ピョ", "pyo" sub "ピョウ", "pyō" sub "ポウ", "pō" sub "ン", "n" } # POSTRULES # Handling of っ/ッ # # The kana っ/ッ is a geminate marker. # When followed by a consonant, repeat the first letter of # the following syllable. Exception: the combination -cch- # should be transliterated as -tch- # # If っ/ッ is not followed by a consonant, then it is usually # phonetically realised as an abrupt stop or shorterning of # the previous syllable. There is no documented or commonly # accepted way to transliterate this sound. sub any("っッ") + capture(any("BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz")), ref( 1 ) + ref( 1 ) sub any("っッ") + capture(any("Cc")), "t" + ref( 1 ) # ッ followed by ch- sub any("っッ"), "" # drop all other っッ. # In Traditional Hepburn, long o (which can be o+o or o+u), and long u # are transliterated as ō and ū. # # Macron should not be used if two repeating letters split across # a morpheme boundary. # # Long vowels in loanwords are indicated with a macron instead # of letter doubling. sub "a" + any("ー"), "ā" sub "i" + any("ー"), "ī" sub "u" + any("ーu"), "ū" sub "e" + any("ー"), "ē" sub "o" + any("ーo"), "ō"
}