metadata {
authority_id: un id: 2017 language: iso-639-2:ara source_script: Arab destination_script: Latn name: Romanization of Arabic (2017) url: https://www.eki.ee/wgrs/rom1_ur.htm creation_date: 2017 confirmation_date: 2018-06 description: | The United Nations recommended system was approved in 1972 ( II/11) and amended in 1977 (III/12), based on a report prepared by D. N. Sharma. The tables and their corrections were published in volume II of the conference reports1,2. There is no evidence of the use of the system either in Pakistan, India or in international cartographic products. Instead, in Pakistan the Hunterian system is officially used3. The resolutions III/12 (1977) and IV/17 (1982) recommended association, inter alia, with Pakistan, in carrying out further studies on the system. Urdu (Urdū) uses the Perso-Arabic script which is written from right to left. In the script vowel points are usually omitted which makes it difficult to obtain uniform romanizations. Some of the Arabic consonants are undifferentiated in romanization which means that the system is not fully reversible. notes: - A If preceded by short a, it is romanized ‘ā, e.g. مَعمُل M‘āmul. - B When و is imperceptible, e.g. in a few words of Persian origin when preceded by خ (ḳh). - C Word-finally after a short vowel. - D Marks aspiration of consonants. - E The character ے is used only word-finally.
}
tests {
test "بوغدِی", "Bvghdī" test "مَعمُل", "M‘āmul" test "پَالِير", "Pālīr" test "بیزوت كَلے", "Byzvt Kale" test "عَمَل كوٹ", "‘Amal Kvṭ" test "ثَابِر", "Sābir" test "شَاه نَثَار ميلة", "Shāh Nasār Mylah" test "چَپرِی", "Chaprī" test "أَحمَد خَان كَلے", "Ahmad Ḳhān Kale" test "دُرَانِي", "Durānī" test "ڈَنگِیلا", "Ḍangīlā" test "ذَرَانِی", "Zarānī" test "بُركِي", "Burkī" test "گِیدَڑَه", "Gīdaṙah" test "عَلِي زَائِي", "‘Alī Zā-ī" test "ژوب", "Ỵvb" test "بِسَاتُو", "Bisātū" test "أَحمَدِي شَامَا", "Ahmadī Shāmā" test "اَصَالَت كَلے", "Asālat Kale" test "خَضَر خَان", "Ḳhazar Ḳhān" test "سُلْطَان", "Sultān" test "عَزَم سَيِّد نُور كَلے", "‘Azam Sayyid Nūr Kale" test "بغَاكِي", "Bghākī" test "حَقدَرَه", "Haqdarah" test "کَچکِینَہ", "Kachkīnaḥ" test "بَاگَن", "Bāgan" test "بُلبَلَک", "Bulbalak" test "بِلیَامِین", "Bilyāmīn" test "نَہر", "Nahr" test "اَرَوْالِی", "Arawālī" test "مَہردِی", "Mahrdī" test "بَڑھ", "Baṙh" test "یَاردَا کَلے", "Yārdā Kale" test "بهَائِي خَان", "Bhā-ī Ḳhān" test "پھاشک", "Phāshk" test "تھَلّ", "Thall" test "پَٹھان ريَا", "Paṭhān Ryā" test "جھِیل", "Jhīl" test "غَزْنِي سْپِين", "Ghaznī Spīn" test "بَادشَاه چھُم", "Bādshāh Chhum" test "سِندھ", "Sindh" test "ڈھَنڈ", "Ḍhanḍ" test "خَان گھَڑِی", "Ḳhān Ghaṙī" test "غُلَامَک كَلے", "Ghulāmak Kale" test "خَپیَنگا", "Ḳhapyangā" test "گَندَه كَلے", "Gandah Kale" test "مَورپِتھِی", "Maurpithī" test "درے پلارِی", "Dre Plārī" test "آگرَہ", "Āgraḥ" test "ڈَنڈَر", "Ḍanḍar" test "گُبازانَہ", "Gubāzānaḥ" test "حَےدَر عَلِی كَلے", "Haidar ‘Alī Kale" test "تَودَہ چِینَہ", "Taudaḥ Chīnaḥ" test "مُوسى خَان كَلے", "Mūsá Ḳhān Kale" test "مُلَّا بَاغ", "Mullā Bāgh"
}
stage {
# Somehow, this should execute first to pass the test. sub any("\u064a\u06cc") + "\u0651", "yy" # ي # CHARACTERS parallel { # special rules sub space, "", after: "\u0622\u0628\u064E\u0627\u062F" # space followed by abad is removed sub "\ufdf2", "Allāh" # See note 5 # Vowels, Diphthongs, and Diacritical Marks sub "\u064e", "a" # َ fatha sub "\u064e\u0627", "ā" # ـَا fatha followed by ا sub "\u0627", "ā" # ا sub "\u0649\u0670", "ā" # ىٰ sub "\u06D2\u0670", "ā" # ےٰ sub "\u0622", "ā" # آ sub boundary + "\u0627", "" # ا sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota sub "\u064e", "", after: "a" + any("ht") # َ fatha followed by ta' marboota, handling different order of conversion sub "\u0652", "" # ْ sokoon sub "\u0659", "ê" sub "\u0650", "i" # karsra sub "\u0650" + any("\u064a\u06cc"), "ī" # ـِي kasra followed by ي sub "\u0650\u06d2\u0652", "e" # ـے sub "\u0650\u06d2", "e" # ـے sub "\u06d2", "e" # ـے sub "\u064f", "u" # ُ damma sub "\u064f\u0648", "ū" # ـُو damma followed by و sub "\u064f\u0648\u0652", "o" # ـَوْ sub "\u064e\u06d2", "ai" # ـے sub "\u064e\u0648", "au" # ـَو sub "\u0670", "á" # ىٰ sub "\u0649", "á" # ىٰ # shadda sub "\u0628\u0651", "bb" # ب sub "\u062a\u0651", "tt" # ت sub "\u062b\u0651", "ss" # ث sub "\u062c\u0651", "jj" # ج sub "\u062d\u0651", "hh" # ح sub "\u062e\u0651", "ḳhḳh" # خ sub "\u062f\u0651", "dd" # د sub "\u0630\u0651", "zz" # ذ sub "\u0631\u0651", "rr" # ر sub "\u0632\u0651", "zz" # ز sub "\u0633\u0651", "ss" # س sub "\u0634\u0651", "sh" # ش sub "\u0635\u0651", "ss" # ص sub "\u0636\u0651", "ḏḏ" # ض sub "\u0637\u0651", "tt" # ط sub "\u0638\u0651", "zz" # ظ sub "\u063a\u0651", "ghgh" # غ sub "\u0641\u0651", "ff" # ف sub "\u0642\u0651", "qq" # ق sub "\u0643\u0651", "kk" # ك sub "\u0644\u0651", "ll" # ل sub "\u0645\u0651", "mm" # م sub "\u0646\u0651", "nn" # ن sub "\u0647\u0651", "hh" # ه sub "\u0648\u0651", "vv" # و # NOTE 1 sub "\u0650" + boundary, "-e" # ِ kasra sub "\u0674", "-e" # ٴ sub "\u0654", "-e" # ٔ sub "\u0650\u064a\u0651\u064e", "īy" # ـِيَّ sub "\u0650\u064a", "iy", after: any(["\u064e", "u064f"]) # ـِي kasra followed by ي sub "\u064e\u0649", "ay" # ـَى fatha followed by ى which is ا not ي sub "\u064e\u0648\u0652", "aw" # ـَوْ sub "\u064e\u064a\u0652", "ay" # ـَيْ sub "\u0650\u06cc\u0651\u064e", "īy" # ـِيَّ sub "\u064e\u064a", "aī" # ـَي sub "\u064e\u06cc", "aī" # ـَي # - '-ye' # TODO: compress this # ta' marboota sub "\u0629", "at" # ة in the middle of the sentence sub "\u0629" + line_end, "ah" sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") sub "\u0621", "-" # ء sub "\u0624", "-" # ؤ sub "\u0626", "-" # ئ sub "\u0623", "" # أ sub "\u0625", "" # إ # See note B sub boundary + "\u0627\u0644", "al " # ال # '\uFE8E' : '' # ﺎ # Sun letters sub boundary + "\u0627\u0644\u062a" + maybe("\u0651"), "at t" # الت sub boundary + "\u0627\u0644\u062b" + maybe("\u0651"), "as s" # الث sub boundary + "\u0627\u0644\u062f" + maybe("\u0651"), "ad d" # الد sub boundary + "\u0627\u0644\u0630" + maybe("\u0651"), "az z" # الذ sub boundary + "\u0627\u0644\u0631" + maybe("\u0651"), "ar r" # الر sub boundary + "\u0627\u0644\u0632" + maybe("\u0651"), "az z" # الز sub boundary + "\u0627\u0644\u0633" + maybe("\u0651"), "as s" # الس sub boundary + "\u0627\u0644\u0634" + maybe("\u0651"), "ash sh" # الش sub boundary + "\u0627\u0644\u0635" + maybe("\u0651"), "as s" # الص sub boundary + "\u0627\u0644\u0636" + maybe("\u0651"), "az z" # الض sub boundary + "\u0627\u0644\u0637" + maybe("\u0651"), "at t" # الط sub boundary + "\u0627\u0644\u0638" + maybe("\u0651"), "az z" # الظ sub boundary + "\u0627\u0644\u0644" + maybe("\u0651"), "al l" # الل sub boundary + "\u0627\u0644\u0646" + maybe("\u0651"), "an n" # الن # consonant characters sub "\u0628", "b" # ب sub "\u067E", "p" # پ sub "\u062a", "t" # ت sub "\u0679", "ṭ" # ٹ sub "\u062B", "s" # ث sub "\u062c", "j" # ج sub "\u0686", "ch" # چ sub "\u062d", "h" # ح sub "\u062e", "ḳh" # خ sub "\u062f", "d" # د sub "\u0688", "ḍ" # ڈ sub "\u0630", "z" # ذ sub "\u0631", "r" # ر sub "\u0691", "ṙ" # ڑ sub "\u0632", "z" # ز sub "\u0698", "ỵ" # ژ sub "\u0633", "s" # س sub "\u0634", "sh" # ش sub "\u0635", "s" # ص sub "\u0636", "z" # ض sub "\u0637", "t" # ط sub "\u0638", "z" # ظ sub "\u0639", "‘" # ع sub "\u064e\u0639", "‘ā" # ع NOTE A sub "\u063a", "gh" # غ sub "\u0641", "f" # ف sub "\u0642", "q" # ق sub "\u0643", "k" # ك sub "\u06A9", "k" # ک sub "\u06AF", "g" # گ sub "\u0644", "l" # ل sub "\u0645", "m" # م sub any("\u06BA\u0646"), "n" # ن, ں sub any("\ufba9\u06c1"), "h" # ہ , ﮩ sub any("\ufba9\u06c1") + boundary, "ḥ", before: any("\u064e\u0650\u064f") # ہ , ﮩ NOTE C sub any("\u0647\u06be"), "h" # ه, ھ sub "\u0648", "v" # و sub "\u0648", "ẉ", before: "\u062e" # و NOTE B sub any("\u064a|\u06cc"), "y" # ي # '\u0649' : 'y' # ي sub "\u06D0", "ē" # ې sub "\u06CD", "êy" # ۍ } # POSTRULES sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'-") # don't capitalize defined article in the middle of a sentence sub " At T", " at T" # الت sub " As̄ S̄", " as̄ S̄" # الث sub " Ad D", " ad D" # الد sub " Az Z", " az Z" # الذ sub " Ar R", " ar R" # الر sub " Az Z", " az Z" # الز sub " As S", " as S" # الس sub " Ash Sh", " ash Sh" # الش sub " As S", " as S" # الص sub " Az Z", " az Z" # الض sub " At T", " at T" # الط sub " Az Z", " az Z" # الظ sub " Al L", " al L" # الل sub " An N", " an N" # الن sub " Al ", " al " # ال
}