metadata {

authority_id: bgn
id: 1962
language: iso-639-2:jpn
source_script: Hrkt
destination_script: Latn
name: Japanese Modified Hepburn (1962)
url:
creation_date: 1930
adoption_date: 1962
description: |
  The BGN (Modified Hepburn) System for the transliteration of Japanese
  has been in use by the Board on Geographic Names since about 1930 and
  has been extensively employed in the systematic standardization of
  thousands of geographic names of Japan in Romanized form.

notes: |

  1. The "tsu" forms (ツ/つ) are also used to indicate a double consonant and
  are generally (but not always) written in smaller script or type
  slightly to the right of or below the regular line. These characters
  are transliterated as k before k; s before s or sh; t before t, ts, or
  ch; and p before p. Occasionally, when a "ku" (ク/く) or "ki" (キ/き) form
  precedes k, the u in ku or the i in ki is dropped.

  2. The transliterations in parentheses are used in specific cases when
  the kana symbol is known to be so pronounced.

  3. The transliteration m is used before b, p, and m.

  4. This letter has been added for the use in transliterating foreign
  words.

  5. The asterisk (*) indicates standard combined forms. Those combined
  forms not so marked are rarely used.

  ----

  Implementation Notes:

  a. Despite the mentioning of the term "Modified Hepburn" in the
  specification, the handling of ん/ン in this standard is different from
  Modified Hepburn. It follows the Traditional Hepburn in that the
  letter m is used before b, m, p.

  b. This document includes obsolete (pre-reform) combinations.
  Pre-reform combinations will clash with modern Japanese transliteration.

  c. There is no discussion on how cross-morpheme vowel sounds should be
  handled.

  d. There is no mentioning of separation mark between n and another vowel.

  e. Everything not explicitly stated in the specification will be
  assumed to be inherited from var-jpn-Hrkt-Latn-hepburn-1954.

  f. Obsolete combinations can be handled by post rules, and are
  included for the sake of completeness only. They have been commented
  out, since they are rarely used and follow different rules than modern
  Japanese.

}

tests {

# Note: these test cases follow the pre-reform standard.
# They are commented out for now.
#
# - source: "けふ"
#   expected: "kyō"
# - source: "ぎうにう"
#   expected: "gyūnyū"
# - source: "きふ"   # きふ should always be kifu in Modern Japanese
#   expected: "kyū"
# - source: "ちう"
#   expected: "chū"
# - source: "けう"
#   expected: "kyō"
# Modern Japanese test cases
test "しんばし", "shimbashi"
test "とうきょう", "tōkyō"
test "しんじゅく", "shinjuku"
test "かんおう", "kan’ō"
test "かのう", "kanō"
test "きんゆう", "kin’yū"
test "とうきょう", "tōkyō"
test "かごっま", "kagomma"
test "ぽっぽっや", "poppoyya"
test "てっら", "terra"
test "にゃっほー", "nyahhō"

}

dependency “var-jpn-Hrkt-Latn-hepburn-1954”, as: hrktlatn

stage {

# RULES
# Convert ん into m before b, m, p
sub any("んン"), "m", after: any("ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ")

# CHARACTERS
parallel {
  # ke
  # These are listed as alternative pronunciation, but in fact this usage of ヶ
  # as the archaic possessive marker is not found in Kana only texts.
  # Also it is always typed using the smaller form. (ヶ U+30F6)
  sub "け", any(["ke", "ga", "ka", "ko"])
  sub "ケ", any(["ke", "ga\"", "ka", "ko"])
  sub "ヶ", any(["ga", "ka", "ko"])

  # The Ha-column
  # は is still pronounced as wa when used as a particle,
  # the alternative pronunciations for the other four kana's are obsolete.
  sub "は", any(["ha", "wa"])
  sub "ひ", any(["hi", "i"])
  sub "ふ", any(["fu", "u", "o"])
  sub "へ", any(["he", "e"])
  sub "ほ", any(["ho", "o"])
  sub "ハ", any(["ha", "wa"])
  sub "ヒ", any(["hi", "i"])
  sub "フ", any(["fu", "u", "o"])
  sub "ヘ", any(["he", "e"])
  sub "ホ", any(["ho", "o"])

  # The Wa-column
  # These two kanas below are only used in pre-reform texts.
  sub "ゐ", "i"
  sub "ゑ", "e"
  sub "ヰ", "i"
  sub "ヱ", "e"

  # Combined forms
  # These are obsolete forms. See Note 5.
  # They can be handled by post-rules if ever needed.
  # "あう": "ō"
  # "あふ": "ō"
  # "いふ": "yū"
  # "えう": "yō"
  # "えふ": "yō"
  # "おふ": "ō"
  # "かう": "kō"
  # "かふ": "kō"
  # "がう": "gō"
  # "がふ": "gō"
  # "きう": "kyū"
  # "きふ": "kyū"
  # "きやう": "kyō"
  # "ぎう": "gyū"
  # "ぎふ": "gyū"
  # "ぎやう": "gyō"
  sub "くわ", "ka"
  sub "くわう", "kō"
  sub "ぐわ", "ga"
  sub "ぐわう", "gō"
  sub "クワ", "ka"
  sub "クワウ", "kō"
  sub "グワ", "ga"
  sub "グワウ", "gō"
  # "けう": "kyō"
  # "けふ": "kyō"
  # "げう": "gyō"
  # "げふ": "gyō"
  # "こふ": "kō"
  # "ごふ": "gō"
  # "さう": "sō"
  # "さふ": "sō"
  # "ざう": "zō"
  # "ざふ": "zō"
  # "しう": "shū"
  # "しふ": "shū"
  # "しやう": "shō"
  # "じう": "jū"
  # "じふ": "jū"
  # "じやう": "jō"
  # "せう": "shō"
  # "せふ": "shō"
  # "ぜう": "jō"
  # "ぜふ": "jō"
  # "そふ": "sō"
  # "ぞふ": "zō"
  # "たう": "tō"
  # "たふ": "tō"
  # "だう": "dō"
  # "だふ": "dō"
  # "ちう": "chū"
  # "ちふ": "chū"
  # "ちやう": "chō"
  # "ぢう": "jū"
  # "ぢふ": "jū"
  # "ぢや": "ja"
  # "ぢやう": "jō"
  # "ぢゆ": "ju"
  # "ぢよ": "jo"
  # "ぢよう": "jō"
  # "てう": "chō"
  # "てふ": "chō"
  # "でう": "jō"
  # "でふ": "jō"
  # "とふ": "tō"
  # "どふ": "dō"
  # "なう": "nō"
  # "なふ": "nō"
  # "にう": "nyū"
  # "にふ": "nyū"
  # "にやう": "nyō"
  # "ねう": "nyō"
  # "ねふ": "nyō"
  # "のふ": "nō"
  # "はう": ["hō","ō"]
  # "はふ": "hō"
  # "ばふ": "bō"
  # "ばう": "bō"
  # "ぱう": "pō"
  # "ぱふ": "pō"
  # "ひう": "hyū"
  # "ひふ": "hyū"
  # "ひやう": "hyō"
  # "びう": "byū"
  # "びふ": "byū"
  # "びやう": "byō"
  # "ぴう": "pyū"
  # "ぴふ": "pyū"
  # "ぴやう": "pyō"
  # "へう": "hyō"
  # "へふ": "hyō"
  # "べう": "byō"
  # "べふ": "byō"
  # "ぺう": "pyō"
  # "ぺふ": "pyō"
  # "ほふ": "hō"
  # "ぼふ": "bō"
  # "ぽふ": "pō"
  # "まう": "mō"
  # "まふ": "mō"
  # "まを": "mō"
  # "みやう": "myō"
  # "みう": "myū"
  # "みふ": "myū"
  # "めう": "myō"
  # "めふ": "myō"
  # "めを": "myō"
  # "もふ": "mō"
  # "やう": "yō"
  # "やふ": "yō"
  # "よふ": "yō"
  # "らう": "rō"
  # "らふ": "rō"
  # "りう": "ryū"
  # "りふ": "ryū"
  # "りやう": "ryō"
  # "れう": "ryō"
  # "れふ": "ryō"
  # "ろふ": "rō"
  # "わう": "wō"
  # "わふ": "wō"
  # "ゑふ": "yō"
  # "をう": "ō"
  # "をふ": "ō"
}

run map.hrktlatn.stage.main

# POSTRULES
# Handle obsolete forms
# Note that these forms are present in the rules, but will break
# if used with Modern Japanese. They are commented out for now.
#
# - pattern: "ef?[uo]|iyau"
#   result: "yō"
# - pattern: "if?u"
#   result: "yū"
# - pattern: "[ao]f?[uo]"
#   result: "ō"
# - pattern: "iy"
#   result: "y"
# - pattern: "ty"
#   result: "ch"
# - pattern: "dy"
#   result: "j"
# - pattern: "[jz]y"
#   result: "j"
# - pattern: "(?<=[sc])hy"
#   result: "h"
# - pattern: "sy"
#   result: "sh"

}