metadata {

authority_id: var
id: hepburn-1954
language: iso-639-2:jpn
source_script: Hrkt
destination_script: Latn
name: Modified Hepburn System
url:
creation_date: 1954
adoption_date:
description:
    This is a modified version of Hepburn romanization, which was
    adopted by Kenkyusha's New Japanese-English Dictionary (3rd edition)
    One crucial difference between Modified and Tranditional Hepburn is
    that the ん/ン sound is always transliterated as n, even before the
    letters b,m,p.

notes:

}

tests {

# - source: かんおう
#   expected: kan'ō
# - source: かのう
#   expected: kanō
# - source: きんゆう
#   expected: kin'yū
# - source: とうきょう
#   expected: tōkyō
# - source: がっこう
#   expected: gakkō
test "かごっま", "kagomma"
# - source: ぽっぽっや
#   expected: poppoyya
# - source: てっら
#   expected: terra
# - source: にゃっほー
#   expected: nyahhō

}

stage {

# RULES
sub capture(any("んン")), ref( 1 ) + "’", after: any("あいうえおやゆよアイウエオヤユヨ")

# CHARACTERS
parallel {

  # Hiragana

  sub "あ", "a"
  sub "い", "i"
  sub "う", "u"
  sub "え", "e"
  sub "お", "o"
  sub "おう", "ō"

  sub "か", "ka"
  sub "き", "ki"
  sub "く", "ku"
  sub "け", "ke"
  sub "こ", "ko"
  sub "きゃ", "kya"
  sub "きゅ", "kyu"
  sub "きょ", "kyo"
  sub "きょう", "kyō"
  sub "こう", "kō"

  sub "さ", "sa"
  sub "し", "shi"
  sub "す", "su"
  sub "せ", "se"
  sub "そ", "so"
  sub "しゃ", "sha"
  sub "しゅ", "shu"
  sub "しょ", "sho"
  sub "しょう", "shō"
  sub "そう", "sō"

  sub "た", "ta"
  sub "ち", "chi"
  sub "つ", "tsu"
  sub "て", "te"
  sub "と", "to"
  sub "ちゃ", "cha"
  sub "ちゅ", "chu"
  sub "ちょ", "cho"
  sub "とう", "tō"
  sub "ちょう", "chō"

  sub "な", "na"
  sub "に", "ni"
  sub "ぬ", "nu"
  sub "ね", "ne"
  sub "の", "no"
  sub "にゃ", "nya"
  sub "にゅ", "nyu"
  sub "にょ", "nyo"
  sub "にょう", "nyō"
  sub "のう", "nō"

  sub "は", "ha"
  sub "ひ", "hi"
  sub "ふ", "fu"
  sub "へ", "he"
  sub "ほ", "ho"
  sub "ひゃ", "hya"
  sub "ひゅ", "hyu"
  sub "ひょ", "hyo"
  sub "ひょう", "hyō"
  sub "ほう", "hō"

  sub "ま", "ma"
  sub "み", "mi"
  sub "む", "mu"
  sub "め", "me"
  sub "も", "mo"
  sub "みゃ", "mya"
  sub "みゅ", "myu"
  sub "みょ", "myo"
  sub "みょう", "myō"
  sub "もう", "mō"

  sub "や", "ya"
  sub "ゆ", "yu"
  sub "よ", "yo"
  sub "よう", "yō"

  sub "ら", "ra"
  sub "り", "ri"
  sub "る", "ru"
  sub "れ", "re"
  sub "ろ", "ro"
  sub "りゃ", "rya"
  sub "りゅ", "ryu"
  sub "りょ", "ryo"
  sub "りょう", "ryō"
  sub "ろう", "rō"

  sub "わ", "wa"
  sub "を", "o"

  sub "が", "ga"
  sub "ぎ", "gi"
  sub "ぐ", "gu"
  sub "げ", "ge"
  sub "ご", "go"
  sub "ぎゃ", "gya"
  sub "ぎゅ", "gyu"
  sub "ぎょ", "gyo"
  sub "ぎょう", "gyō"
  sub "ごう", "gō"

  sub "ざ", "za"
  sub "じ", "ji"
  sub "ず", "zu"
  sub "ぜ", "ze"
  sub "ぞ", "zo"
  sub "じゃ", "ja"
  sub "じゅ", "ju"
  sub "じょ", "jo"
  sub "じょう", "jō"
  sub "ぞう", "zō"

  sub "だ", "da"
  sub "ぢ", "ji"
  sub "づ", "zu"
  sub "で", "de"
  sub "ど", "do"
  sub "ぢゃ", "ja"
  sub "ぢゅ", "ju"
  sub "ぢょ", "jo"
  sub "どう", "dō"

  sub "ば", "ba"
  sub "び", "bi"
  sub "ぶ", "bu"
  sub "べ", "be"
  sub "ぼ", "bo"
  sub "びゃ", "bya"
  sub "びゅ", "byu"
  sub "びょ", "byo"
  sub "びょう", "byō"
  sub "ぼう", "bō"

  sub "ぱ", "pa"
  sub "ぴ", "pi"
  sub "ぷ", "pu"
  sub "ぺ", "pe"
  sub "ぽ", "po"
  sub "ぴゃ", "pya"
  sub "ぴゅ", "pyu"
  sub "ぴょ", "pyo"
  sub "ぴょう", "pyō"
  sub "ぽう", "pō"

  sub "ん", "n"

  # Katakana

  sub "ア", "a"
  sub "イ", "i"
  sub "ウ", "u"
  sub "エ", "e"
  sub "オ", "o"
  sub "オウ", "ō"

  sub "カ", "ka"
  sub "キ", "ki"
  sub "ク", "ku"
  sub "ケ", "ke"
  sub "コ", "ko"
  sub "キャ", "kya"
  sub "キュ", "kyu"
  sub "キョ", "kyo"
  sub "キョウ", "kyō"
  sub "コウ", "kō"

  sub "サ", "sa"
  sub "シ", "shi"
  sub "ス", "su"
  sub "セ", "se"
  sub "ソ", "so"
  sub "シャ", "sha"
  sub "シュ", "shu"
  sub "ショ", "sho"
  sub "ショウ", "shō"
  sub "ソウ", "sō"

  sub "タ", "ta"
  sub "チ", "chi"
  sub "ツ", "tsu"
  sub "テ", "te"
  sub "ト", "to"
  sub "チャ", "cha"
  sub "チュ", "chu"
  sub "チョ", "cho"
  sub "チョウ", "chō"
  sub "トウ", "tō"

  sub "ナ", "na"
  sub "ニ", "ni"
  sub "ヌ", "nu"
  sub "ネ", "ne"
  sub "ノ", "no"
  sub "ニャ", "nya"
  sub "ニュ", "nyu"
  sub "ニョ", "nyo"
  sub "ニョウ", "nyō"
  sub "ノウ", "nō"

  sub "ハ", "ha"
  sub "ヒ", "hi"
  sub "フ", "fu"
  sub "ヘ", "he"
  sub "ホ", "ho"
  sub "ヒャ", "hya"
  sub "ヒュ", "hyu"
  sub "ヒョ", "hyo"
  sub "ヒョウ", "hyō"
  sub "ホウ", "hō"

  sub "マ", "ma"
  sub "ミ", "mi"
  sub "ム", "mu"
  sub "メ", "me"
  sub "モ", "mo"
  sub "ミャ", "mya"
  sub "ミュ", "myu"
  sub "ミョ", "myo"
  sub "ミョウ", "myō"
  sub "モウ", "mō"

  sub "ヤ", "ya"
  sub "ユ", "yu"
  sub "ヨ", "yo"
  sub "ヨウ", "yō"

  sub "ラ", "ra"
  sub "リ", "ri"
  sub "ル", "ru"
  sub "レ", "re"
  sub "ロ", "ro"
  sub "リャ", "rya"
  sub "リュ", "ryu"
  sub "リョ", "ryo"
  sub "リョウ", "ryō"
  sub "ロウ", "rō"

  sub "ワ", "wa"
  sub "ヲ", "o"

  sub "ガ", "ga"
  sub "ギ", "gi"
  sub "グ", "gu"
  sub "ゲ", "ge"
  sub "ゴ", "go"
  sub "ギャ", "gya"
  sub "ギュ", "gyu"
  sub "ギョ", "gyo"
  sub "ギョウ", "gyō"
  sub "ゴウ", "gō"

  sub "ザ", "za"
  sub "ジ", "ji"
  sub "ズ", "zu"
  sub "ゼ", "ze"
  sub "ゾ", "zo"
  sub "ジャ", "ja"
  sub "ジュ", "ju"
  sub "ジョ", "jo"
  sub "ジョウ", "jō"
  sub "ゾウ", "zō"

  sub "ダ", "da"
  sub "ヂ", "ji"
  sub "ヅ", "zu"
  sub "デ", "de"
  sub "ド", "do"
  sub "ヂャ", "ja"
  sub "ヂュ", "ju"
  sub "ヂョ", "jo"
  sub "ドウ", "dō"

  sub "バ", "ba"
  sub "ビ", "bi"
  sub "ブ", "bu"
  sub "ベ", "be"
  sub "ボ", "bo"
  sub "ビャ", "bya"
  sub "ビュ", "byu"
  sub "ビョ", "byo"
  sub "ビョウ", "byō"
  sub "ボウ", "bō"

  sub "パ", "pa"
  sub "ピ", "pi"
  sub "プ", "pu"
  sub "ペ", "pe"
  sub "ポ", "po"
  sub "ピャ", "pya"
  sub "ピュ", "pyu"
  sub "ピョ", "pyo"
  sub "ピョウ", "pyō"
  sub "ポウ", "pō"

  sub "ン", "n"
}

# POSTRULES
# Handling of っ/ッ
#
# The kana っ/ッ is a geminate marker.
# When followed by a consonant, repeat the first letter of
# the following syllable. Exception: the combination -cch-
# should be transliterated as -tch-
#
# If っ/ッ is not followed by a consonant, then it is usually
# phonetically realised as an abrupt stop or shorterning of
# the previous syllable. There is no documented or commonly
# accepted way to transliterate this sound.

sub any("っッ") + capture(any("BbDdFfGgHhJjKkLlMmNnPpQqRrSsTtVvWwXxYyZz")), ref( 1 ) + ref( 1 ) # っ or ッ folloved by consonant
sub any("っッ") + capture(any("Cc")), "t" + ref( 1 ) # ッ followed by ch-
sub any("っッ"), "" # drop all other っッ.

# In Modified Hepburn, long vowels within the same morpheme are
# transliterated using a macron, i.e. ā, ī, ū, ē, ō.
#
# If two repeating letters are split across a morpheme boundary
# then they should not be changed into macron.
#
# Long vowels in loanwords are indicated with a macron instead
# of letter doubling.

sub "a" + any("ーa"), "ā"
sub "i" + any("ーi"), "ī"
sub "u" + any("ーu"), "ū"
sub "e" + any("ーe"), "ē"
sub "o" + any("ーo"), "ō"

}