metadata {
authority_id: kp id: 2002 language: iso-639-2:kor source_script: Hang destination_script: Latn name: Korean Democratic People's Republic of Korea Korean System (2002) url: https://unstats.un.org/unsd/geoinfo/UNGEGN/docs/8th-uncsgn-docs/inf/8th_UNCSGN_econf.94_INF.72.pdf creation_date: adoption_date: description: notes: - Here is a list of features that are listed in the guideline but not unimplemented in this map. - Note 3.2 The combination n+r is romanized as -ll- only when it is "considered to be longstanding". In this implementation, all n+r will be romanized as -ll- for the sake of simplicity. - Note 3.3 Sai-siot (Connective ㅅ) is not written out in DPRK Korean, but it is supposed to be romanized. Sai-siot is not predictable. This has not been implemented. - Note 4.1 Hyphen "may be inserted in case of a possible confusion in pronunciation". Except for the n-g combination, this has not been implemented. - Note 4.4 Geographical names "may be transliterated or translated". In this map, all names will be transliterated, not translated. Numerals will not be transliterated. - Note 4.5 Spacing rule for personal names has not been implemented. - Note 4.7 Optional omission of diacritics and optional simplification of KK, TT, PP, SS, JJ to single letter have not been implemented.
}
tests {
# Note1.5 test "우리산", "Urisan" # Note2.1 test "교구동", "Kyogu-dong" test "초도", "Chodo" test "고비리", "Kobi-ri" test "강동", "Kangdong" test "금교", "Kümgyo" test "칠보산", "Chilbosan" # Note2.2 test "곡산", "Koksan" test "갑산", "Kapsan" test "앞산", "Apsan" test "삿갓봉", "Satkatbong" # Note2.3 test "울산", "Ulsan" # - source: "은률" # expected: "Ünryul" # This is an exceptino to note 3.1 # Note2.4 test "닭섬", "Taksŏm" test "물곬", "Mulkol" test "붉은바위", "Pulgünbawi" test "앉은바위", "Anjünbawi" # Note3.1 test "백마산", "Paengmasan" test "꽃마을", "Kkonmaül" test "압록강", "Amrokgang" # Note3.2 test "천리마", "Chŏllima" # - source: "한나산" # Typo in the original document test "한라산", "Hallasan" test "전라도", "Jŏlla-do" # Note3.3 # - source: "기대산" # ROK: 깃대산 # expected: "Kittaesan" # - source: "새별읍" # ROK: 샛별 # expected: "Saeppyŏl-üp" # hyphen # - source: "뒤문" # ROK: 뒷문 # expected: "Twinmun" # Note4.1 - Separator (OPTIONAL) test "앞-언덕", "Ap-ŏndŏk" test "부억-안골", "Puŏk-angol" test "판교", "Phan-gyo" # - source: "방어동" # expected: "Pang-ŏ-dong" # Note4.2 test "평안남도 평성시", "Phyŏngannam-do Phyŏngsŏng-si" # Note4.3 test "3.1동", "3.1-dong" # Note4.6 test "평양", "Pyongyang" # Fix a problem with a trailing comma test "구현, 글꼴, 문자 배열, 다국어 컴퓨팅.", "Kuhyŏn, Külkkol, Munja Paeyŏl, Tagugŏ Khŏmphyuthing."
}
dependency “unicode”, import: true dependency “var-kor-Hang-Hang-jamo”, as: hanghang
stage {
# RULES # This system does not require transliteration of numerals # convert numbers to space + Hangul # - pattern: "([^0-9 ])(?=[0-9])" # result: "\\1 " # - pattern: "1" # result: "일" # - pattern: "2" # result: "이" # - pattern: "3" # result: "삼" # - pattern: "4" # result: "사" # - pattern: "5" # result: "오" # - pattern: "6" # result: "육" # - pattern: "7" # result: "칠" # - pattern: "8" # result: "팔" # - pattern: "9" # result: "구" # Use voiced onset for geographical features # Note 4.3.1 sub "산" + capture(any([" ", line_end])), "san" + ref( 1 ), before: any_character + any_character sub "거리" + capture(any([" ", line_end])), "gŏri" + ref( 1 ), before: any_character + any_character sub "고개" + capture(any([" ", line_end])), "gogae" + ref( 1 ), before: any_character + any_character sub "대" + capture(any([" ", line_end])), "dae" + ref( 1 ), before: any_character + any_character sub "봉" + capture(any([" ", line_end])), "bong" + ref( 1 ), before: any_character + any_character sub "교" + capture(any([" ", line_end])), "gyo" + ref( 1 ), before: any_character + any_character sub "골" + capture(any([" ", line_end])), "gol" + ref( 1 ), before: any_character + any_character sub "각" + capture(any([" ", line_end])), "gak" + ref( 1 ), before: any_character + any_character sub "벌" + capture(any([" ", line_end])), "bŏl" + ref( 1 ), before: any_character + any_character sub "관" + capture(any([" ", line_end])), "gwan" + ref( 1 ), before: any_character + any_character sub "곶" + capture(any([" ", line_end])), "got" + ref( 1 ), before: any_character + any_character sub "강" + capture(any([" ", line_end])), "gang" + ref( 1 ), before: any_character + any_character # add hyphen in front of generics # Only add hyphen if the name is three syllables or longer sub "도" + capture(any([" ", line_end])), "-do" + ref( 1 ), before: any_character + any_character sub "시" + capture(any([" ", line_end])), "-si" + ref( 1 ), before: any_character + any_character sub "군" + capture(any([" ", line_end])), "-gun" + ref( 1 ), before: any_character + any_character sub "면" + capture(any([" ", line_end])), "-myŏn" + ref( 1 ), before: any_character + any_character sub "리" + capture(any([" ", line_end])), "-ri" + ref( 1 ), before: any_character + any_character sub "동" + capture(any([" ", line_end])), "-dong" + ref( 1 ), before: any_character + any_character sub "구" + capture(any([" ", line_end])), "-gu" + ref( 1 ), before: any_character + any_character sub "구역" + capture(any([" ", line_end])), "-guyŏk" + ref( 1 ), before: any_character + any_character # The name Pyongyang will be an exception # Not Phyŏngyang sub "평양", "Pyongyang" run map.hanghang.stage.main # POSTRULES # Add space to the two ends of the string for easier word boundary handling sub line_start, " " sub line_end, " " # HANGUL JONGSEONG SSANGKIYEOK sub "ᆩᄋ", "ᆨᄁ" sub "ᆩ", "ᆨ" # HANGUL JONGSEONG SSANGKIYEOK sub "ᆪᄋ", "ᆨᄉ" sub "ᆪ", "ᆨ" # HANGUL JONGSEONG NIEUN-CIEUC sub "ᆬᄋ", "ᆫᄌ" sub "ᆬ", "ᆫ" # HANGUL JONGSEONG NIEUN-CIEUC sub "ᆭᄀ", "ᆫᄏ" sub "ᆭᄃ", "ᆫᄐ" sub "ᆭᄇ", "ᆫᄑ" sub "ᆭᄌ", "ᆫᄎ" sub "ᆭ", "ᆫ" # HANGUL JONGSEONG TIEUT sub "ᆮ", "ᆺ", after: any("ᄀᄁᄂᄃᄄᄅᄆᄇᄈᄉᄊᄌᄍᄎᄏᄐᄑᄒ") # HANGUL JONGSEONG RIEUL-SIOS sub "ᆳᄋ", "ᆯᄉ" sub "ᆳ", "ᆯ" # HANGUL JONGSEONG RIEUL-THIEUTH sub "ᆴᄋ", "ᆯᄐ" sub "ᆴ", "ᆯ" # HANGUL JONGSEONG RIEUL-PHIEUPH sub "ᆵᄋ", "ᆯᄑ" sub "ᆵ", "ᆯ", after: any("ᄃᄄᄐ") sub "ᆵ", "ᄇ" # HANGUL JONGSEONG RIEUL-HIEUH sub "ᆶᄀ", "ᆯᄏ" sub "ᆶᄃ", "ᆯᄐ" sub "ᆶᄇ", "ᆯᄑ" sub "ᆶᄌ", "ᆯᄎ" sub "ᆶ", "ᆯ" # HANGUL JONGSEONG PIEUP-SIOS sub "ᆹᄋ", "ᄇᄉ" sub "ᆹ", "ᄇ" # HANGUL JONGSEONG SSANG-SIOS sub "ᆻᄋ", "ᆺᄊ" sub "ᆻ", "ᆺ" # HANGUL JONGSEONG CIEUC sub "ᆽᄋ", "ᆺᄌ" sub "ᆽ", "ᆺ" # HANGUL JONGSEONG CHIEUCH sub "ᆾᄋ", "ᆺᄎ" sub "ᆾ", "ᆺ" # HANGUL JONGSEONG KHIEUKH sub "ᆿᄋ", "ᆨᄏ" sub "ᆿ", "ᆨ" # HANGUL JONGSEONG THIEUTH sub "ᇀᄋ", "ᆺᄐ" sub "ᇀ", "ᆺ" # HANGUL JONGSEONG PHIEUPH sub "ᇁᄋ", "ᆸᄑ" sub "ᇁ", "ᆸ" # HANGUL JONGSEONG HIEUH sub "ᇂᄀ", "ᄏ" sub "ᇂᄃ", "ᄐ" sub "ᇂᄇ", "ᄑ" sub "ᇂᄌ", "ᄎ" sub "ᇂ", "" # CHARACTERS # parallel { # From Unicode Chart # https://github.com/unicode-org/cldr/blob/master/common/transforms/Korean-Latin-BGN.xml parallel { sub "ᆨᄀ", "kk" # HANGUL JONGSEONG KIYEOK + CHOSEONG KIYEOK sub "ᆨᄂ", "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG NIEUN sub "ᆨᄃ", "kt" # HANGUL JONGSEONG KIYEOK + CHOSEONG TIEUT sub "ᆨᄅ", "ngn" # HANGUL JONGSEONG KIYEOK + CHOSEONG RIEUL sub "ᆨᄆ", "ngm" # HANGUL JONGSEONG KIYEOK + CHOSEONG MIEUM sub "ᆨᄇ", "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PIEUP sub "ᆨᄉ", "ks" # HANGUL JONGSEONG KIYEOK + CHOSEONG SIOS sub "ᆨᄋ", "g" # HANGUL JONGSEONG KIYEOK + CHOSEONG IEUNG sub "ᆨᄌ", "kj" # HANGUL JONGSEONG KIYEOK + CHOSEONG CIEUC sub "ᆨᄎ", "kch" # HANGUL JONGSEONG KIYEOK + CHOSEONG CHIEUCH sub "ᆨᄏ", "kkh" # HANGUL JONGSEONG KIYEOK + CHOSEONG KHIEUKH # NOTE: the dash is always skipped sub "ᆨᄐ", "kth" # HANGUL JONGSEONG KIYEOK + CHOSEONG THIEUTH sub "ᆨᄑ", "kp" # HANGUL JONGSEONG KIYEOK + CHOSEONG PHIEUPH sub "ᆨᄒ", "kh" # HANGUL JONGSEONG KIYEOK + CHOSEONG HIEUH sub "ᆨᄁ", "kkk" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGKIYEOK sub "ᆨᄄ", "ktt" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGTIEUT sub "ᆨᄈ", "kpp" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGPIEUP sub "ᆨᄊ", "kss" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGSIOS sub "ᆨᄍ", "kjj" # HANGUL JONGSEONG KIYEOK + CHOSEONG SSANGCIEUC sub "ᆫᄀ", "n-g" # HANGUL JONGSEONG NIEUN + CHOSEONG KIEUK sub "ᆫᄂ", "nn" # HANGUL JONGSEONG NIEUN + CHOSEONG NIEUN sub "ᆫᄃ", "nd" # HANGUL JONGSEONG NIEUN + CHOSEONG TIEUT sub "ᆫᄅ", "ll" # HANGUL JONGSEONG NIEUN + CHOSEONG RIEUL sub "ᆫᄆ", "nm" # HANGUL JONGSEONG NIEUN + CHOSEONG MIEUM sub "ᆫᄇ", "nb" # HANGUL JONGSEONG NIEUN + CHOSEONG PIEUP sub "ᆫᄉ", "ns" # HANGUL JONGSEONG NIEUN + CHOSEONG SIOS sub "ᆫᄋ", "n" # HANGUL JONGSEONG NIEUN + CHOSEONG IEUNG sub "ᆫᄌ", "nj" # HANGUL JONGSEONG NIEUN + CHOSEONG CIEUC sub "ᆫᄎ", "nch" # HANGUL JONGSEONG NIEUN + CHOSEONG CHIEUCH sub "ᆫᄏ", "nkh" # HANGUL JONGSEONG NIEUN + CHOSEONG KHIEUKH sub "ᆫᄐ", "nth" # HANGUL JONGSEONG NIEUN + CHOSEONG THIEUTH sub "ᆫᄑ", "nph" # HANGUL JONGSEONG NIEUN + CHOSEONG PHIEUPH sub "ᆫᄒ", "nh" # HANGUL JONGSEONG NIEUN + CHOSEONG HIEUH sub "ᆫᄁ", "nkk" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGKIYEOK sub "ᆫᄄ", "ntt" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGTIEUT sub "ᆫᄈ", "npp" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGPIEUP sub "ᆫᄊ", "nss" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGSIOS sub "ᆫᄍ", "njj" # HANGUL JONGSEONG NIEUN + CHOSEONG SSANGCIEUC sub "ᆯᄀ", "lk" # HANGUL JONGSEONG RIEUL + CHOSEONG KIYEOK sub "ᆯᄂ", "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG NIEUN sub "ᆯᄃ", "lt" # HANGUL JONGSEONG RIEUL + CHOSEONG TIEUT sub "ᆯᄅ", "ll" # HANGUL JONGSEONG RIEUL + CHOSEONG RIEUL sub "ᆯᄆ", "lm" # HANGUL JONGSEONG RIEUL + CHOSEONG MIEUM sub "ᆯᄇ", "lb" # HANGUL JONGSEONG RIEUL + CHOSEONG PIEUP sub "ᆯᄉ", "ls" # HANGUL JONGSEONG RIEUL + CHOSEONG SIOS sub "ᆯᄋ", "r" # HANGUL JONGSEONG RIEUL + CHOSEONG IEUNG sub "ᆯᄌ", "lj" # HANGUL JONGSEONG RIEUL + CHOSEONG CIEUC sub "ᆯᄎ", "lch" # HANGUL JONGSEONG RIEUL + CHOSEONG CHIEUCH sub "ᆯᄏ", "lkh" # HANGUL JONGSEONG RIEUL + CHOSEONG KHIEUKH sub "ᆯᄐ", "lth" # HANGUL JONGSEONG RIEUL + CHOSEONG THIEUTH sub "ᆯᄑ", "lph" # HANGUL JONGSEONG RIEUL + CHOSEONG PHIEUPH sub "ᆯᄒ", "lh" # HANGUL JONGSEONG RIEUL + CHOSEONG HIEUH sub "ᆯᄁ", "lkk" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGKIYEOK sub "ᆯᄄ", "ltt" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGTIEUT sub "ᆯᄈ", "lpp" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGPIEUP sub "ᆯᄊ", "lss" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGSIOS sub "ᆯᄍ", "ljj" # HANGUL JONGSEONG RIEUL + CHOSEONG SSANGCIEUC sub "ᆷᄀ", "mg" # HANGUL JONGSEONG MIEUM + CHOSEONG KIYEOK sub "ᆷᄂ", "mn" # HANGUL JONGSEONG MIEUM + CHOSEONG NIEUN sub "ᆷᄃ", "md" # HANGUL JONGSEONG MIEUM + CHOSEONG TIEUT sub "ᆷᄅ", "mr" # HANGUL JONGSEONG MIEUM + CHOSEONG RIEUL # Note 3.1 sub "ᆷᄆ", "mm" # HANGUL JONGSEONG MIEUM + CHOSEONG MIEUM sub "ᆷᄇ", "mb" # HANGUL JONGSEONG MIEUM + CHOSEONG PIEUP sub "ᆷᄉ", "ms" # HANGUL JONGSEONG MIEUM + CHOSEONG SIOS sub "ᆷᄋ", "m" # HANGUL JONGSEONG MIEUM + CHOSEONG IEUNG sub "ᆷᄌ", "mj" # HANGUL JONGSEONG MIEUM + CHOSEONG CIEUC sub "ᆷᄎ", "mch" # HANGUL JONGSEONG MIEUM + CHOSEONG CHIEUCH sub "ᆷᄏ", "mkh" # HANGUL JONGSEONG MIEUM + CHOSEONG KHIEUKH sub "ᆷᄐ", "mth" # HANGUL JONGSEONG MIEUM + CHOSEONG THIEUTH sub "ᆷᄑ", "mph" # HANGUL JONGSEONG MIEUM + CHOSEONG PHIEUPH sub "ᆷᄒ", "mh" # HANGUL JONGSEONG MIEUM + CHOSEONG HIEUH sub "ᆷᄁ", "mkk" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGKIYEOK sub "ᆷᄄ", "mtt" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGTIEUT sub "ᆷᄈ", "mpp" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGPIEUP sub "ᆷᄊ", "mss" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGSIOS sub "ᆷᄍ", "mjj" # HANGUL JONGSEONG MIEUM + CHOSEONG SSANGCIEUC sub "ᆸᄀ", "pk" # HANGUL JONGSEONG PIEUP + CHOSEONG KIYEOK sub "ᆸᄂ", "mn" # HANGUL JONGSEONG PIEUP + CHOSEONG NIEUN sub "ᆸᄃ", "pt" # HANGUL JONGSEONG PIEUP + CHOSEONG TIEUT sub "ᆸᄅ", "mr" # HANGUL JONGSEONG PIEUP + CHOSEONG RIEUL sub "ᆸᄆ", "mm" # HANGUL JONGSEONG PIEUP + CHOSEONG MIEUM sub "ᆸᄇ", "pp" # HANGUL JONGSEONG PIEUP + CHOSEONG PIEUP sub "ᆸᄉ", "ps" # HANGUL JONGSEONG PIEUP + CHOSEONG SIOS sub "ᆸᄋ", "b" # HANGUL JONGSEONG PIEUP + CHOSEONG IEUNG sub "ᆸᄌ", "pj" # HANGUL JONGSEONG PIEUP + CHOSEONG CIEUC sub "ᆸᄎ", "pch" # HANGUL JONGSEONG PIEUP + CHOSEONG CHIEUCH sub "ᆸᄏ", "pkh" # HANGUL JONGSEONG PIEUP + CHOSEONG KHIEUKH sub "ᆸᄐ", "pth" # HANGUL JONGSEONG PIEUP + CHOSEONG THIEUTH sub "ᆸᄑ", "pph" # HANGUL JONGSEONG PIEUP + CHOSEONG PHIEUPH sub "ᆸᄒ", "ph" # HANGUL JONGSEONG PIEUP + CHOSEONG HIEUH sub "ᆸᄁ", "pkk" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGKIYEOK sub "ᆸᄄ", "ptt" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGTIEUT sub "ᆸᄈ", "ppp" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGPIEUP sub "ᆸᄊ", "pss" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGSIOS sub "ᆸᄍ", "pjj" # HANGUL JONGSEONG PIEUP + CHOSEONG SSANGCIEUC sub "ᆺᄀ", "tk" # HANGUL JONGSEONG SIOS + CHOSEONG KIYEOK sub "ᆺᄂ", "nn" # HANGUL JONGSEONG SIOS + CHOSEONG NIEUN sub "ᆺᄃ", "tt" # HANGUL JONGSEONG SIOS + CHOSEONG TIEUT sub "ᆺᄅ", "nr" # HANGUL JONGSEONG SIOS + CHOSEONG RIEUL # Note 3.1 sub "ᆺᄆ", "nm" # HANGUL JONGSEONG SIOS + CHOSEONG MIEUM sub "ᆺᄇ", "tp" # HANGUL JONGSEONG SIOS + CHOSEONG PIEUP sub "ᆺᄉ", "ts" # HANGUL JONGSEONG SIOS + CHOSEONG SIOS sub "ᆺᄋ", "d" # HANGUL JONGSEONG SIOS + CHOSEONG IEUNG sub "ᆺᄌ", "tj" # HANGUL JONGSEONG SIOS + CHOSEONG CIEUC sub "ᆺᄎ", "tch" # HANGUL JONGSEONG SIOS + CHOSEONG CHIEUCH sub "ᆺᄏ", "tkh" # HANGUL JONGSEONG SIOS + CHOSEONG KHIEUKH sub "ᆺᄐ", "tth" # HANGUL JONGSEONG SIOS + CHOSEONG THIEUTH sub "ᆺᄑ", "tph" # HANGUL JONGSEONG SIOS + CHOSEONG PHIEUPH sub "ᆺᄒ", "th" # HANGUL JONGSEONG SIOS + CHOSEONG HIEUH sub "ᆺᄁ", "tkk" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGKIYEOK sub "ᆺᄄ", "ttt" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGTIEUT sub "ᆺᄈ", "tpp" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGPIEUP sub "ᆺᄊ", "tss" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGSIOS sub "ᆺᄍ", "tjj" # HANGUL JONGSEONG SIOS + CHOSEONG SSANGCIEUC sub "ᆼᄀ", "ngg" # HANGUL JONGSEONG IEUNG + CHOSEONG KIYEOK sub "ᆼᄂ", "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG NIEUN sub "ᆼᄃ", "ngd" # HANGUL JONGSEONG IEUNG + CHOSEONG TIEUT sub "ᆼᄅ", "ngn" # HANGUL JONGSEONG IEUNG + CHOSEONG RIEUL sub "ᆼᄆ", "ngm" # HANGUL JONGSEONG IEUNG + CHOSEONG MIEUM sub "ᆼᄇ", "ngb" # HANGUL JONGSEONG IEUNG + CHOSEONG PIEUP sub "ᆼᄉ", "ngs" # HANGUL JONGSEONG IEUNG + CHOSEONG SIOS sub "ᆼᄋ", "ng" # HANGUL JONGSEONG IEUNG + CHOSEONG IEUNG sub "ᆼᄌ", "ngj" # HANGUL JONGSEONG IEUNG + CHOSEONG CIEUC sub "ᆼᄎ", "ngch" # HANGUL JONGSEONG IEUNG + CHOSEONG CHIEUCH sub "ᆼᄏ", "ngkh" # HANGUL JONGSEONG IEUNG + CHOSEONG KHIEUKH sub "ᆼᄐ", "ngth" # HANGUL JONGSEONG IEUNG + CHOSEONG THIEUTH sub "ᆼᄑ", "ngph" # HANGUL JONGSEONG IEUNG + CHOSEONG PHIEUPH sub "ᆼᄒ", "ngh" # HANGUL JONGSEONG IEUNG + CHOSEONG HIEUH sub "ᆼᄁ", "ngkk" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGKIYEOK sub "ᆼᄄ", "ngtt" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGTIEUT sub "ᆼᄈ", "ngpp" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGPIEUP sub "ᆼᄊ", "ngss" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGSIOS sub "ᆼᄍ", "ngjj" # HANGUL JONGSEONG IEUNG + CHOSEONG SSANGCIEUC sub "ᄀ", "g", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG KIYEOK # c.f. Note 3.3 sub "ᄂ", "n", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG NIEUN # c.f. Note 3.3 sub "ᄃ", "d", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG TIEUT # c.f. Note 3.3 sub "ᄅ", "r", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG RIEUL sub "ᄆ", "m", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG MIEUM # c.f. Note 3.3 sub "ᄇ", "b", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG PIEUP # c.f. Note 3.3 sub "ᄉ", "s", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG SIOS sub "ᄋ", "", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG IEUNG sub "ᄌ", "j", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG CIEUC sub "ᄎ", "ch", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG CHIEUCH sub "ᄏ", "kh", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG KHIEUKH sub "ᄐ", "th", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG THIEUTH sub "ᄑ", "ph", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG PHIEUPH sub "ᄒ", "h", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG HIEUH sub "ᄁ", "kk", before: any([any("-"), any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG SSANGKIYEOK sub "ᄄ", "tt", before: any([any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG SSANGTIEUT sub "ᄈ", "pp", before: any([any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG SSANGPIEUP sub "ᄊ", "ss", before: any([any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG SSANGSIOS sub "ᄍ", "jj", before: any([any("A".."Z"), any("a".."z"), any("0".."9"), any("ᅡᅢᅣᅤᅥᅦᅧᅨᅩᅪᅫᅬᅭᅮᅯᅰᅱᅲᅳᅴᅵ") ]) # VOWEL + CHOSEONG SSANGCIEUC sub "ᆰᄀ", "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KIYEOK sub "ᆰᄂ", "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG NIEUN sub "ᆰᄃ", "kt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG TIEUT sub "ᆰᄅ", "ngn" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG RIEUL sub "ᆰᄆ", "ngm" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG MIEUM sub "ᆰᄇ", "kp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PIEUP sub "ᆰᄉ", "ks" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SIOS sub "ᆰᄋ", "lg" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG IEUNG sub "ᆰᄌ", "kj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CIEUC sub "ᆰᄎ", "kch" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG CHIEUCH sub "ᆰᄏ", "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG KHIEUKH sub "ᆰᄐ", "kth" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG THIEUTH sub "ᆰᄑ", "kph" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG PHIEUPH sub "ᆰᄒ", "lkh" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG HIEUH sub "ᆰᄁ", "lkk" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGKIYEOK sub "ᆰᄄ", "ktt" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGTIEUT sub "ᆰᄈ", "kpp" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGPIEUP sub "ᆰᄊ", "kss" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGSIOS sub "ᆰᄍ", "kjj" # HANGUL JONGSEONG RIEUL-KIYEOK + CHOSEONG SSANGCIEUC sub "ᆱᄀ", "mg" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KIYEOK sub "ᆱᄂ", "mn" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG NIEUN sub "ᆱᄃ", "md" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG TIEUT sub "ᆱᄅ", "mr" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG RIEUL sub "ᆱᄆ", "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG MIEUM sub "ᆱᄇ", "mb" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PIEUP sub "ᆱᄉ", "ms" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SIOS sub "ᆱᄋ", "lm" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG IEUNG sub "ᆱᄌ", "mj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CIEUC sub "ᆱᄎ", "mch" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG CHIEUCH sub "ᆱᄏ", "mkh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG KHIEUKH sub "ᆱᄐ", "mth" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG THIEUTH sub "ᆱᄑ", "mph" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG PHIEUPH sub "ᆱᄒ", "mh" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG HIEUH sub "ᆱᄁ", "mkk" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGKIYEOK sub "ᆱᄄ", "mtt" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGTIEUT sub "ᆱᄈ", "mpp" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGPIEUP sub "ᆱᄊ", "mss" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGSIOS sub "ᆱᄍ", "mjj" # HANGUL JONGSEONG RIEUL-MIEUM + CHOSEONG SSANGCIEUC sub "ᆲᄀ", "pk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KIYEOK sub "ᆲᄂ", "mn" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG NIEUN sub "ᆲᄃ", "pt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG TIEUT sub "ᆲᄅ", "mr" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG RIEUL sub "ᆲᄆ", "mm" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG MIEUM sub "ᆲᄇ", "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PIEUP sub "ᆲᄉ", "ps" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SIOS sub "ᆲᄋ", "lb" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG IEUNG sub "ᆲᄌ", "pj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CIEUC sub "ᆲᄎ", "pch" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG CHIEUCH sub "ᆲᄏ", "pkh" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG KHIEUKH sub "ᆲᄐ", "pth" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG THIEUTH sub "ᆲᄑ", "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG PHIEUPH sub "ᆲᄒ", "lph" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG HIEUH sub "ᆲᄁ", "pkk" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGKIYEOK sub "ᆲᄄ", "ptt" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGTIEUT sub "ᆲᄈ", "lpp" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGPIEUP sub "ᆲᄊ", "pss" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGSIOS sub "ᆲᄍ", "pjj" # HANGUL JONGSEONG RIEUL-PIEUP + CHOSEONG SSANGCIEUC sub "ᄀ", "k", before: " " # HANGUL CHOSEONG KIYEOK sub "ᄂ", "n", before: " " # HANGUL CHOSEONG NIEUN sub "ᄃ", "t", before: " " # HANGUL CHOSEONG TIEUT # DPRK does not follow the R-onset rule # - pattern: "(?<= )ᄅ(?=[ᅣᅤᅧᅨᅭᅲ])" # result: "" # HANGUL CHOSEONG RIEUL # R-onset rule sub "ᄅ", "r", before: " " # result: "n" # HANGUL CHOSEONG RIEUL sub "ᄆ", "m", before: " " # HANGUL CHOSEONG MIEUM sub "ᄇ", "p", before: " " # HANGUL CHOSEONG PIEUP sub "ᄉ", "s", before: " " # HANGUL CHOSEONG SIOS sub "ᄋ", "", before: " " # HANGUL CHOSEONG IEUNG sub "ᄌ", "j", before: " " # HANGUL CHOSEONG CIEUC sub "ᄎ", "ch", before: " " # HANGUL CHOSEONG CHIEUCH sub "ᄏ", "kh", before: " " # HANGUL CHOSEONG KHIEUKH sub "ᄐ", "th", before: " " # HANGUL CHOSEONG THIEUTH sub "ᄑ", "ph", before: " " # HANGUL CHOSEONG PHIEUPH sub "ᄒ", "h", before: " " # HANGUL CHOSEONG HIEUH sub "ᄁ", "kk", before: " " # HANGUL CHOSEONG SSANGKIYEOK sub "ᄭ", "kk", before: " " # HANGUL CHOSEONG SIOS-KIYEOK sub "ᄄ", "tt", before: " " # HANGUL CHOSEONG SSANGTIEUT sub "ᄯ", "tt", before: " " # HANGUL CHOSEONG SIOS-TIEUT sub "ᄈ", "pp", before: " " # HANGUL CHOSEONG SSANGPIEUP sub "ᄲ", "pp", before: " " # HANGUL CHOSEONG SIOS-PIEUP sub "ᄊ", "ss", before: " " # HANGUL CHOSEONG SSANGSIOS sub "ᄍ", "jj", before: " " # HANGUL CHOSEONG SSANGCIEUC sub "ᄶ", "jj", before: " " # HANGUL CHOSEONG SIOS-CIEUC sub "ᅡ", "a" # HANGUL JUNGSEONG A sub "ᅣ", "ya" # HANGUL JUNGSEONG YA sub "ᅥ", "ŏ" # HANGUL JUNGSEONG EO sub "ᅧ", "yŏ" # HANGUL JUNGSEONG YEO sub "ᅩ", "o" # HANGUL JUNGSEONG O sub "ᅭ", "yo" # HANGUL JUNGSEONG YO sub "ᅮ", "u" # HANGUL JUNGSEONG U sub "ᅲ", "yu" # HANGUL JUNGSEONG YU sub "ᅳ", "ü" # HANGUL JUNGSEONG EU sub "ᅵ", "i" # HANGUL JUNGSEONG I sub "ᅢ", "ae" # HANGUL JUNGSEONG AE sub "ᅤ", "yae" # HANGUL JUNGSEONG YAE sub "ᅦ", "e" # HANGUL JUNGSEONG E sub "ᅨ", "ye" # HANGUL JUNGSEONG YE sub "ᅬ", "oe" # HANGUL JUNGSEONG OE sub "ᅱ", "wi" # HANGUL JUNGSEONG WI sub "ᅴ", "üi" # HANGUL JUNGSEONG YI sub "ᅪ", "wa" # HANGUL JUNGSEONG WA sub "ᅯ", "wo" # HANGUL JUNGSEONG WEO sub "ᅫ", "wae" # HANGUL JUNGSEONG WAE sub "ᅰ", "we" # HANGUL JUNGSEONG WE sub "ᆨ", "k", not_after: unicode_hangul # HANGUL JONGSEONG KIYEOK sub "ᆫ", "n", not_after: unicode_hangul # HANGUL JONGSEONG NIEUN sub "ᆮ", "t", not_after: unicode_hangul # HANGUL JONGSEONG TIEUT sub "ᆯ", "l", not_after: unicode_hangul # HANGUL JONGSEONG RIEUL sub "ᆷ", "m", not_after: unicode_hangul # HANGUL JONGSEONG MIEUM sub "ᆸ", "p", not_after: unicode_hangul # HANGUL JONGSEONG PIEUP sub "ᆺ", "t", not_after: unicode_hangul # HANGUL JONGSEONG SIOS sub "ᆼ", "ng", not_after: unicode_hangul # HANGUL JONGSEONG IEUNG sub "ᆽ", "t", not_after: unicode_hangul # HANGUL JONGSEONG CIEUC sub "ᆾ", "t", not_after: unicode_hangul # HANGUL JONGSEONG CHIEUCH sub "ᆿ", "k", not_after: unicode_hangul # HANGUL JONGSEONG KHIEUKH sub "ᇀ", "t", not_after: unicode_hangul # HANGUL JONGSEONG THIEUTH sub "ᇁ", "p", not_after: unicode_hangul # HANGUL JONGSEONG PHIEUPH sub "ᆰ", "k", not_after: unicode_hangul # HANGUL JONGSEONG RIEUL-KIYEOK sub "ᆲ", "p", not_after: unicode_hangul # HANGUL JONGSEONG RIEUL-PIEUP } # Remove space added sub line_start + " ", "" sub " " + line_end, "" # This is based on Jamo title_case
}