metadata {
authority_id: bgnpcgn id: 1994 language: iso-639-2:tgk source_script: Cyrl destination_script: Latn name: Romanization of Tajik (1994) url: https://geonames.nga.mil/gns/html/Romanization/ROMANIZATION%20OF%20TAJIK.pdf creation_date: 1994 confirmation_date: 2017-11 description: | The BGN/PCGN system for Tajik was designed for use in romanizing names written in the Tajik Cyrillic alphabet. The Tajik Cyrillic alphabet contains six characters not present in the Russian alphabet: ғ, ӣ, қ, ӯ, ҳ and ҷ. An orthographic reform of the Tajik Cyrillic alphabet was implemented under the auspices of the Academy of Sciences of Tajikistan. This reform was promulgated in a decree of 3 September 1998 by the government of the Republic of Tajikistan. The reform abolished the characters ц, щ, ь and ы (see notes 2 through 5). notes: - | The character sequences гҳ , зҳ , кҳ , and сҳ may be romanized g·h, z·h, k·h, and s·h in order to differentiate those romanizations from the digraphs gh, zh, kh, and sh which are used to render the characters ғ, ж, x, and ш. - | The obsolete character ц, abolished in 1998, should be romanized s (before a vowel and/or after a consonant within a word) or ts intervocalically. - The obsolete character щ, replaced by ш in 1998, should be romanized sh. - The obsolete character ь, abolished in 1998, should not be romanized. - The obsolete character ы, replaced by и in 1998, should be romanized i. - Unicode values are shown with the uppercase Cyrillic character first, followed by the lowercase character. - | An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is: All apostrophes appearing in romanization are U+2019 Í (U+00CD) í (U+00ED) Ŭ (U+016C) ŭ (U+016D) Ė (U+0116) ė (U+0117) - | The Romanization column shows only lowercase forms but, when romanizing, uppercase and lowercase Roman letters as appropriate should be used.
}
tests {
# https://ru.wikipedia.org/wiki/Таджикская_письменность#Образцы_записи test "Тамоми одамон озод ба дунё меоянд ва аз лиҳози манзилату ҳуқуқ бо ҳам баробаранд.\nҲама соҳиби ақлу виҷдонанд, бояд нисбат ба якдигар бародарвор муносабат намоянд.", "Tamomi odamon ozod ba dunyo meoyand va az lihozi manzilatu huquq bo ham barobarand.\nHama sohibi aqlu vijdonand, boyad nisbat ba yakdigar barodarvor munosabat namoyand." test "Баниодам аъзои як пайкаранд, ки дар офариниш зи як гавҳаранд. Чу узве ба дард оварад рӯзгор, дигар узвҳоро намонад қарор.", "Baniodam a’zoi yak paykarand, ki dar ofarinish zi yak gavharand. Chu uzve ba dard ovarad rŭzgor, digar uzvhoro namonad qaror." test "Саъдӣ", "Sa’dí" test "Мурда будам, зинда шудам; гиря будам, xанда шудам. Давлати ишқ омаду ман давлати поянда шудам.", "Murda budam, zinda shudam; girya budam, xanda shudam. Davlati ishq omadu man davlati poyanda shudam." test "Мавлавӣ", "Mavlaví" test "санг", "sang" test "барг", "barg" test "номвар", "nomvar" test "Бағдод", "Baghdod" test "ғор", "ghor" test "модар", "modar" test "меравам", "meravam" test "дарё", "daryo" test "осиёб", "osiyob" test "жола", "zhola" test "каждум", "kazhdum" test "баъз", "ba’z" test "назар", "nazar" test "заҳоб", "zahob" test "ихтиёр", "ikhtiyor" test "зебоӣ", "zeboí" test "май", "may" test "кадом", "kadom" test "қадам", "qadam" test "лола", "lola" test "мурдагӣ", "murdagí" test "нон", "non" test "орзу", "orzu" test "панҷ", "panj" test "ранг", "rang" test "сар", "sar" test "субҳ", "subh" test "сурайё", "surayyo" test "тоҷик", "tojik" test "талаб", "talab" test "дуд", "dud" test "хӯрдан", "khŭrdan" test "фурӯғ", "furŭgh" test "хондан", "khondan" test "ҳофиз", "hofiz" test "чӣ", "chí" test "ҷанг", "jang" test "шаб", "shab" test "таъриф", "ta’rif" test "эй", "ėy" test "июн", "iyun" test "ягонагӣ", "yagonagí" test "РАМЗҲО", "RAMZ·HO"
}
stage {
# RULES # note[1] sub "\u04B3", "·h", before: any("ГгЗзКкСс") sub "\u04B2", "·H", before: any("ГгЗзКкСс") # note[2] sub "\u0426", "S", after: any("АаЕеЁёИиОоУуЫыЭэЮюЯя") sub "\u0446", "s", after: any("АаЕеЁёИиОоУуЫыЭэЮюЯя") # CHARACTERS parallel { sub "А", "A" # А sub "Б", "B" # Б sub "В", "V" # В sub "Г", "G" # Г sub "Ғ", "Gh" # Ғ sub "Д", "D" # Д sub "Е", "E" # Е sub "Ё", "Yo" # Ё sub "Ж", "Zh" # Ж sub "З", "Z" # З sub "И", "I" # И sub "Ӣ", "Í" # Ӣ => Í sub "Й", "Y" # Й sub "К", "K" # К sub "Қ", "Q" # Қ sub "Л", "L" # Л sub "М", "M" # М sub "Н", "N" # Н sub "О", "O" # О sub "П", "P" # П sub "Р", "R" # Р sub "С", "S" # С sub "Т", "T" # Т sub "У", "U" # У sub "Ӯ", "Ŭ" # Ӯ => Ŭ sub "Ф", "F" # Ф sub "Х", "Kh" # Х sub "Ҳ", "H" # Ҳ sub "Ч", "Ch" # Ч sub "Ҷ", "J" # Ҷ sub "Ц", "Ts" # Ц note[2] sub "Ш", "Sh" # Ш sub "Щ", "Sh" # Щ note[3] sub "Ъ", "’" # Ъ sub "Ы", "I" # Ы note[5] sub "Ь", "" # Ь note[4] sub "Э", "Ė" # Э => Ė sub "Ю", "Yu" # Ю sub "Я", "Ya" # Я sub "а", "a" # а sub "б", "b" # б sub "в", "v" # в sub "г", "g" # г sub "ғ", "gh" # ғ sub "д", "d" # д sub "е", "e" # e sub "ё", "yo" # ё sub "ж", "zh" # ж sub "з", "z" # з sub "и", "i" # и sub "ӣ", "í" # ӣ => í sub "й", "y" # й sub "к", "k" # к sub "қ", "q" # қ sub "л", "l" # л sub "м", "m" # м sub "н", "n" # н sub "о", "o" # о sub "п", "p" # п sub "р", "r" # р sub "с", "s" # с sub "т", "t" # т sub "у", "u" # у sub "ӯ", "ŭ" # ӯ => ŭ sub "ф", "f" # ф sub "х", "kh" # х sub "ҳ", "h" # ҳ sub "ч", "ch" # ч sub "ҷ", "j" # ҷ sub "ц", "ts" # ц note[2] sub "ш", "sh" # ш sub "щ", "sh" # щ note[3] sub "ъ", "’" # ъ sub "ы", "i" # ы note[5] sub "ь", "" # ь note[4] sub "э", "ė" # э => ė sub "ю", "yu" # ю sub "я", "ya" # я }
}