metadata {

authority_id: var
id: hepburn-1886
language: iso-639-2:jpn
source_script: Hrkt
destination_script: Latn
name: Traditional Hepburn System
url: http://www.ab.cyberhome.ne.jp/~kaizu/roomazi/doc/hep3.html
creation_date: 1886
adoption_date:
description:
    This is a traditional version of Hepburn romanization.

notes:
    "
    The book was published before the Japanese orthographic reform,
    and this map takes the reformed orthography in Kana as the source
    form.
    https://en.wikipedia.org/wiki/Historical_kana_orthography

    The distinction for long-vowel vs. repeating vowels has not been
    implemented.
    For example, the consecutive o's in these words are considered
    a case of long vowel, and is transliterated as ō:

        氷 (こおり) - kōri, 大阪(おおさか)- Ōsaka

    If there are two consecutive o's in a string, but they belong to
    different morpheme, then they should be transliterated separately.

        小躍り(こおどり)- koodori

    The same goes for the combinations o+u, u+u as well.

    However, this cannot be easily determined from the Kana.
    Lexical knowledge is needed, and sometimes the Kanji representation
    will give more hints about morpheme boundary.

    For now, this map will assume that all o+o, o+u, u+u combinations to
    be instances of long vowels.
    "

}

tests {

test "ぐんま", "gumma"
test "しんよう", "shin-yō"
test "きんようび", "kin-yōbi"
test "とうきょう", "tōkyō"
test "しんばし", "shimbashi"

}

stage {

# RULES
# Add a dash (-) between ん and a vowel sound or ya, yu, yo
sub none, "-", before: any("んン"), after: any("あいうえおやゆよアイウエオヤユヨ")

# Convert ん into m before b, m, p
sub any("んン"), "m", after: any("ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ")

# CHARACTERS
parallel {

  # Hiragana

  sub "あ", "a"
  sub "い", "i"
  sub "う", "u"
  sub "え", "e"
  sub "お", "o"
  sub "おう", "ō"

  sub "か", "ka"
  sub "き", "ki"
  sub "く", "ku"
  sub "け", "ke"
  sub "こ", "ko"
  sub "きゃ", "kya"
  sub "きゅ", "kyu"
  sub "きょ", "kyo"
  sub "きょう", "kyō"
  sub "こう", "kō"

  sub "さ", "sa"
  sub "し", "shi"
  sub "す", "su"
  sub "せ", "se"
  sub "そ", "so"
  sub "しゃ", "sha"
  sub "しゅ", "shu"
  sub "しょ", "sho"
  sub "しょう", "shō"
  sub "そう", "sō"

  sub "た", "ta"
  sub "ち", "chi"
  sub "つ", "tsu"
  sub "て", "te"
  sub "と", "to"
  sub "ちゃ", "cha"
  sub "ちゅ", "chu"
  sub "ちょ", "cho"
  sub "とう", "tō"
  sub "ちょう", "chō"

  sub "な", "na"
  sub "に", "ni"
  sub "ぬ", "nu"
  sub "ね", "ne"
  sub "の", "no"
  sub "にゃ", "nya"
  sub "にゅ", "nyu"
  sub "にょ", "nyo"
  sub "にょう", "nyō"
  sub "のう", "nō"

  sub "は", "ha"
  sub "ひ", "hi"
  sub "ふ", "fu"
  sub "へ", "he"
  sub "ほ", "ho"
  sub "ひゃ", "hya"
  sub "ひゅ", "hyu"
  sub "ひょ", "hyo"
  sub "ひょう", "hyō"
  sub "ほう", "hō"

  sub "ま", "ma"
  sub "み", "mi"
  sub "む", "mu"
  sub "め", "me"
  sub "も", "mo"
  sub "みゃ", "mya"
  sub "みゅ", "myu"
  sub "みょ", "myo"
  sub "みょう", "myō"
  sub "もう", "mō"

  sub "や", "ya"
  sub "ゆ", "yu"
  sub "よ", "yo"
  sub "よう", "yō"

  sub "ら", "ra"
  sub "り", "ri"
  sub "る", "ru"
  sub "れ", "re"
  sub "ろ", "ro"
  sub "りゃ", "rya"
  sub "りゅ", "ryu"
  sub "りょ", "ryo"
  sub "りょう", "ryō"
  sub "ろう", "rō"

  sub "わ", "wa"
  sub "を", "wo"

  sub "が", "ga"
  sub "ぎ", "gi"
  sub "ぐ", "gu"
  sub "げ", "ge"
  sub "ご", "go"
  sub "ぎゃ", "gya"
  sub "ぎゅ", "gyu"
  sub "ぎょ", "gyo"
  sub "ぎょう", "gyō"
  sub "ごう", "gō"

  sub "ざ", "za"
  sub "じ", "ji"
  sub "ず", "zu"
  sub "ぜ", "ze"
  sub "ぞ", "zo"
  sub "じゃ", "ja"
  sub "じゅ", "ju"
  sub "じょ", "jo"
  sub "じょう", "jō"
  sub "ぞう", "zō"

  sub "だ", "da"
  sub "ぢ", "ji"
  sub "づ", "zu"
  sub "で", "de"
  sub "ど", "do"
  sub "ぢゃ", "ja"
  sub "ぢゅ", "ju"
  sub "ぢょ", "jo"
  sub "どう", "dō"

  sub "ば", "ba"
  sub "び", "bi"
  sub "ぶ", "bu"
  sub "べ", "be"
  sub "ぼ", "bo"
  sub "びゃ", "bya"
  sub "びゅ", "byu"
  sub "びょ", "byo"
  sub "びょう", "byō"
  sub "ぼう", "bō"

  sub "ぱ", "pa"
  sub "ぴ", "pi"
  sub "ぷ", "pu"
  sub "ぺ", "pe"
  sub "ぽ", "po"
  sub "ぴゃ", "pya"
  sub "ぴゅ", "pyu"
  sub "ぴょ", "pyo"
  sub "ぴょう", "pyō"
  sub "ぽう", "pō"

  sub "ん", "n"

  # Katakana

  sub "ア", "a"
  sub "イ", "i"
  sub "ウ", "u"
  sub "エ", "e"
  sub "オ", "o"
  sub "オウ", "ō"

  sub "カ", "ka"
  sub "キ", "ki"
  sub "ク", "ku"
  sub "ケ", "ke"
  sub "コ", "ko"
  sub "キャ", "kya"
  sub "キュ", "kyu"
  sub "キョ", "kyo"
  sub "キョウ", "kyō"
  sub "コウ", "kō"

  sub "サ", "sa"
  sub "シ", "shi"
  sub "ス", "su"
  sub "セ", "se"
  sub "ソ", "so"
  sub "シャ", "sha"
  sub "シュ", "shu"
  sub "ショ", "sho"
  sub "ショウ", "shō"
  sub "ソウ", "sō"

  sub "タ", "ta"
  sub "チ", "chi"
  sub "ツ", "tsu"
  sub "テ", "te"
  sub "ト", "to"
  sub "チャ", "cha"
  sub "チュ", "chu"
  sub "チョ", "cho"
  sub "チョウ", "chō"
  sub "トウ", "tō"

  sub "ナ", "na"
  sub "ニ", "ni"
  sub "ヌ", "nu"
  sub "ネ", "ne"
  sub "ノ", "no"
  sub "ニャ", "nya"
  sub "ニュ", "nyu"
  sub "ニョ", "nyo"
  sub "ニョウ", "nyō"
  sub "ノウ", "nō"

  sub "ハ", "ha"
  sub "ヒ", "hi"
  sub "フ", "fu"
  sub "ヘ", "he"
  sub "ホ", "ho"
  sub "ヒャ", "hya"
  sub "ヒュ", "hyu"
  sub "ヒョ", "hyo"
  sub "ヒョウ", "hyō"
  sub "ホウ", "hō"

  sub "マ", "ma"
  sub "ミ", "mi"
  sub "ム", "mu"
  sub "メ", "me"
  sub "モ", "mo"
  sub "ミャ", "mya"
  sub "ミュ", "myu"
  sub "ミョ", "myo"
  sub "ミョウ", "myō"
  sub "モウ", "mō"

  sub "ヤ", "ya"
  sub "ユ", "yu"
  sub "ヨ", "yo"
  sub "ヨウ", "yō"

  sub "ラ", "ra"
  sub "リ", "ri"
  sub "ル", "ru"
  sub "レ", "re"
  sub "ロ", "ro"
  sub "リャ", "rya"
  sub "リュ", "ryu"
  sub "リョ", "ryo"
  sub "リョウ", "ryō"
  sub "ロウ", "rō"

  sub "ワ", "wa"
  sub "ヲ", "wo"

  sub "ガ", "ga"
  sub "ギ", "gi"
  sub "グ", "gu"
  sub "ゲ", "ge"
  sub "ゴ", "go"
  sub "ギャ", "gya"
  sub "ギュ", "gyu"
  sub "ギョ", "gyo"
  sub "ギョウ", "gyō"
  sub "ゴウ", "gō"

  sub "ザ", "za"
  sub "ジ", "ji"
  sub "ズ", "zu"
  sub "ゼ", "ze"
  sub "ゾ", "zo"
  sub "ジャ", "ja"
  sub "ジュ", "ju"
  sub "ジョ", "jo"
  sub "ジョウ", "jō"
  sub "ゾウ", "zō"

  sub "ダ", "da"
  sub "ヂ", "ji"
  sub "ヅ", "zu"
  sub "デ", "de"
  sub "ド", "do"
  sub "ヂャ", "ja"
  sub "ヂュ", "ju"
  sub "ヂョ", "jo"
  sub "ドウ", "dō"

  sub "バ", "ba"
  sub "ビ", "bi"
  sub "ブ", "bu"
  sub "ベ", "be"
  sub "ボ", "bo"
  sub "ビャ", "bya"
  sub "ビュ", "byu"
  sub "ビョ", "byo"
  sub "ビョウ", "byō"
  sub "ボウ", "bō"

  sub "パ", "pa"
  sub "ピ", "pi"
  sub "プ", "pu"
  sub "ペ", "pe"
  sub "ポ", "po"
  sub "ピャ", "pya"
  sub "ピュ", "pyu"
  sub "ピョ", "pyo"
  sub "ピョウ", "pyō"
  sub "ポウ", "pō"

  sub "ン", "n"
}

# POSTRULES
# Handling of っ/ッ
#
# The kana っ/ッ is a geminate marker.
# When followed by a consonant, repeat the first letter of
# the following syllable. Exception: the combination -cch-
# should be transliterated as -tch-
#
# If っ/ッ is not followed by a consonant, then it is usually
# phonetically realised as an abrupt stop or shorterning of
# the previous syllable. There is no documented or commonly
# accepted way to transliterate this sound.

sub any("っッ") + capture(any("BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz")), ref( 1 ) + ref( 1 )
sub any("っッ") + capture(any("Cc")), "t" + ref( 1 ) # ッ followed by ch-
sub any("っッ"), "" # drop all other っッ.

# In Traditional Hepburn, long o (which can be o+o or o+u), and long u
# are transliterated as ō and ū.
#
# Macron should not be used if two repeating letters split across
# a morpheme boundary.
#
# Long vowels in loanwords are indicated with a macron instead
# of letter doubling.

sub "a" + any("ー"), "ā"
sub "i" + any("ー"), "ī"
sub "u" + any("ーu"), "ū"
sub "e" + any("ー"), "ē"
sub "o" + any("ーo"), "ō"

}