metadata {
authority_id: bgn id: 1962 language: iso-639-2:jpn source_script: Hrkt destination_script: Latn name: Japanese Modified Hepburn (1962) url: creation_date: 1930 adoption_date: 1962 description: | The BGN (Modified Hepburn) System for the transliteration of Japanese has been in use by the Board on Geographic Names since about 1930 and has been extensively employed in the systematic standardization of thousands of geographic names of Japan in Romanized form. notes: | 1. The "tsu" forms (ツ/つ) are also used to indicate a double consonant and are generally (but not always) written in smaller script or type slightly to the right of or below the regular line. These characters are transliterated as k before k; s before s or sh; t before t, ts, or ch; and p before p. Occasionally, when a "ku" (ク/く) or "ki" (キ/き) form precedes k, the u in ku or the i in ki is dropped. 2. The transliterations in parentheses are used in specific cases when the kana symbol is known to be so pronounced. 3. The transliteration m is used before b, p, and m. 4. This letter has been added for the use in transliterating foreign words. 5. The asterisk (*) indicates standard combined forms. Those combined forms not so marked are rarely used. ---- Implementation Notes: a. Despite the mentioning of the term "Modified Hepburn" in the specification, the handling of ん/ン in this standard is different from Modified Hepburn. It follows the Traditional Hepburn in that the letter m is used before b, m, p. b. This document includes obsolete (pre-reform) combinations. Pre-reform combinations will clash with modern Japanese transliteration. c. There is no discussion on how cross-morpheme vowel sounds should be handled. d. There is no mentioning of separation mark between n and another vowel. e. Everything not explicitly stated in the specification will be assumed to be inherited from var-jpn-Hrkt-Latn-hepburn-1954. f. Obsolete combinations can be handled by post rules, and are included for the sake of completeness only. They have been commented out, since they are rarely used and follow different rules than modern Japanese.
}
tests {
# Note: these test cases follow the pre-reform standard. # They are commented out for now. # # - source: "けふ" # expected: "kyō" # - source: "ぎうにう" # expected: "gyūnyū" # - source: "きふ" # きふ should always be kifu in Modern Japanese # expected: "kyū" # - source: "ちう" # expected: "chū" # - source: "けう" # expected: "kyō" # Modern Japanese test cases test "しんばし", "shimbashi" test "とうきょう", "tōkyō" test "しんじゅく", "shinjuku" test "かんおう", "kan’ō" test "かのう", "kanō" test "きんゆう", "kin’yū" test "とうきょう", "tōkyō" test "かごっま", "kagomma" test "ぽっぽっや", "poppoyya" test "てっら", "terra" test "にゃっほー", "nyahhō"
}
dependency “var-jpn-Hrkt-Latn-hepburn-1954”, as: hrktlatn
stage {
# RULES # Convert ん into m before b, m, p sub any("んン"), "m", after: any("ばびぶべぼまみむめもぱぴぷぺぽバビブベボマミムメモパピプペポ") # CHARACTERS parallel { # ke # These are listed as alternative pronunciation, but in fact this usage of ヶ # as the archaic possessive marker is not found in Kana only texts. # Also it is always typed using the smaller form. (ヶ U+30F6) sub "け", any(["ke", "ga", "ka", "ko"]) sub "ケ", any(["ke", "ga\"", "ka", "ko"]) sub "ヶ", any(["ga", "ka", "ko"]) # The Ha-column # は is still pronounced as wa when used as a particle, # the alternative pronunciations for the other four kana's are obsolete. sub "は", any(["ha", "wa"]) sub "ひ", any(["hi", "i"]) sub "ふ", any(["fu", "u", "o"]) sub "へ", any(["he", "e"]) sub "ほ", any(["ho", "o"]) sub "ハ", any(["ha", "wa"]) sub "ヒ", any(["hi", "i"]) sub "フ", any(["fu", "u", "o"]) sub "ヘ", any(["he", "e"]) sub "ホ", any(["ho", "o"]) # The Wa-column # These two kanas below are only used in pre-reform texts. sub "ゐ", "i" sub "ゑ", "e" sub "ヰ", "i" sub "ヱ", "e" # Combined forms # These are obsolete forms. See Note 5. # They can be handled by post-rules if ever needed. # "あう": "ō" # "あふ": "ō" # "いふ": "yū" # "えう": "yō" # "えふ": "yō" # "おふ": "ō" # "かう": "kō" # "かふ": "kō" # "がう": "gō" # "がふ": "gō" # "きう": "kyū" # "きふ": "kyū" # "きやう": "kyō" # "ぎう": "gyū" # "ぎふ": "gyū" # "ぎやう": "gyō" sub "くわ", "ka" sub "くわう", "kō" sub "ぐわ", "ga" sub "ぐわう", "gō" sub "クワ", "ka" sub "クワウ", "kō" sub "グワ", "ga" sub "グワウ", "gō" # "けう": "kyō" # "けふ": "kyō" # "げう": "gyō" # "げふ": "gyō" # "こふ": "kō" # "ごふ": "gō" # "さう": "sō" # "さふ": "sō" # "ざう": "zō" # "ざふ": "zō" # "しう": "shū" # "しふ": "shū" # "しやう": "shō" # "じう": "jū" # "じふ": "jū" # "じやう": "jō" # "せう": "shō" # "せふ": "shō" # "ぜう": "jō" # "ぜふ": "jō" # "そふ": "sō" # "ぞふ": "zō" # "たう": "tō" # "たふ": "tō" # "だう": "dō" # "だふ": "dō" # "ちう": "chū" # "ちふ": "chū" # "ちやう": "chō" # "ぢう": "jū" # "ぢふ": "jū" # "ぢや": "ja" # "ぢやう": "jō" # "ぢゆ": "ju" # "ぢよ": "jo" # "ぢよう": "jō" # "てう": "chō" # "てふ": "chō" # "でう": "jō" # "でふ": "jō" # "とふ": "tō" # "どふ": "dō" # "なう": "nō" # "なふ": "nō" # "にう": "nyū" # "にふ": "nyū" # "にやう": "nyō" # "ねう": "nyō" # "ねふ": "nyō" # "のふ": "nō" # "はう": ["hō","ō"] # "はふ": "hō" # "ばふ": "bō" # "ばう": "bō" # "ぱう": "pō" # "ぱふ": "pō" # "ひう": "hyū" # "ひふ": "hyū" # "ひやう": "hyō" # "びう": "byū" # "びふ": "byū" # "びやう": "byō" # "ぴう": "pyū" # "ぴふ": "pyū" # "ぴやう": "pyō" # "へう": "hyō" # "へふ": "hyō" # "べう": "byō" # "べふ": "byō" # "ぺう": "pyō" # "ぺふ": "pyō" # "ほふ": "hō" # "ぼふ": "bō" # "ぽふ": "pō" # "まう": "mō" # "まふ": "mō" # "まを": "mō" # "みやう": "myō" # "みう": "myū" # "みふ": "myū" # "めう": "myō" # "めふ": "myō" # "めを": "myō" # "もふ": "mō" # "やう": "yō" # "やふ": "yō" # "よふ": "yō" # "らう": "rō" # "らふ": "rō" # "りう": "ryū" # "りふ": "ryū" # "りやう": "ryō" # "れう": "ryō" # "れふ": "ryō" # "ろふ": "rō" # "わう": "wō" # "わふ": "wō" # "ゑふ": "yō" # "をう": "ō" # "をふ": "ō" } run map.hrktlatn.stage.main # POSTRULES # Handle obsolete forms # Note that these forms are present in the rules, but will break # if used with Modern Japanese. They are commented out for now. # # - pattern: "ef?[uo]|iyau" # result: "yō" # - pattern: "if?u" # result: "yū" # - pattern: "[ao]f?[uo]" # result: "ō" # - pattern: "iy" # result: "y" # - pattern: "ty" # result: "ch" # - pattern: "dy" # result: "j" # - pattern: "[jz]y" # result: "j" # - pattern: "(?<=[sc])hy" # result: "h" # - pattern: "sy" # result: "sh"
}