metadata {
authority_id: iso id: 233-1984 language: iso-639-2:ara source_script: Arab destination_script: Latn name: ISO 233:1984 Documentation -- Transliteration of Arabic characters into Latin characters url: - https://www.iso.org/standard/4117.html - http://transliteration.eki.ee/pdf/Arabic_2.2.pdf - http://www.eki.ee/wgrs/rom1_ar.pdf creation_date: 1984 confirmation_date: 2018-06 description: | Is one of a series of International Standards dealing with the conversion of systems of writing, following the principles of stringent conversion in order to permit international information exchange. Its aim is to provide a means for international communication of written messages in a form which permits the automatic transmission and reconstitution of these by men or machines. Cancels and replaces ISO Recommendation R 233-1961 notes: - | The transliteration ISO 233:1984 WRT ara-arab-latn-2017 gives every character and diacritical mark a unique equivalent and e.g. long vowels in Arabic ā, ī and ū are consequently written a’, iy and uw respectively in the ISO transliteration. Other main correspondences ث is ṯ instead of th ج is ǧ instead of j ح is ḥ instead of ẖ خ is ẖ instead of kh ذ is ḏ instead of dh ش is š instead of sh ص is ṣ instead of s̱ ض is ḍ instead of ḏ ط is ṭ instead of ṯ ظ is ẓ instead of d͟h غ is ġ instead of gh ة is ẗ instead of h/t ى is ỳ ـِي is iy instead of ī ـُو is uw instead of ū ـَا is a’ instead of ā ـَى is aỳ instead of á
}
tests {
test "مِصر", "Miṣr" test "قَطَر", "Qaṭar" test "الجُمهُورِيَّة العِرَاقِيَّة", "Al Ǧumhuwriyaẗ al ‘Ira’qiyaẗ" test "جُمهُورِيَّة مِصر العَرَبِيَّة", "Ǧumhuwriyaẗ Miṣr al ‘Arabiyaẗ" test "الرِيَاض", "Ar Riya’ḍ" test "الشارِقة", "Aš Šâriqaẗ"
}
stage {
# CHARACTERS parallel { # pointing sub "\u064e", "a" # َ fatha sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota sub "\u0650", "i" # ِ kasra sub "\u064f", "u" # ُ damma sub "\u0652", "" # ْ sokoon, see note A below # special pointed letters # special pointed letters sub "\u0639\u064e", "‘a" # عَ sub "\u0639\u0650", "‘i" # عِ sub "\u0639\u064f", "‘ū" # عُ # handle MacOS regex difference sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و sub "\u0650\u064a", "iy" # ـِي kasra followed by ي sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي sub "\u064e\u0627", "a’" # ـَا fatha followed by ا sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي sub "\u064f\u0648", "uw" # ـُو damma followed by و sub "\u064e\u0648\u0652", "aw" # ـَوْ sub "\u064e\u064a\u0652", "ay" # ـَيْ # Sun letters sub boundary + "\u0627\u0644\u062a" + maybe("\u0651"), "at t" # الت sub boundary + "\u0627\u0644\u062b" + maybe("\u0651"), "aṯ ṯ" # الث sub boundary + "\u0627\u0644\u062f" + maybe("\u0651"), "ad d" # الد sub boundary + "\u0627\u0644\u0630" + maybe("\u0651"), "aḏ ḏ" # الذ sub boundary + "\u0627\u0644\u0631" + maybe("\u0651"), "ar r" # الر sub boundary + "\u0627\u0644\u0632" + maybe("\u0651"), "az z" # الز sub boundary + "\u0627\u0644\u0633" + maybe("\u0651"), "as s" # الس sub boundary + "\u0627\u0644\u0634" + maybe("\u0651"), "aš š" # الش sub boundary + "\u0627\u0644\u0635" + maybe("\u0651"), "aṣ ṣ" # الص sub boundary + "\u0627\u0644\u0636" + maybe("\u0651"), "aḍ ḍ" # الض sub boundary + "\u0627\u0644\u0637" + maybe("\u0651"), "aṭ ṭ" # الط sub boundary + "\u0627\u0644\u0638" + maybe("\u0651"), "aẓ ẓ" # الظ sub boundary + "\u0627\u0644\u0644" + maybe("\u0651"), "al l" # الل sub boundary + "\u0627\u0644\u0646" + maybe("\u0651"), "an n" # الن # ta' marboota in iso-233-1984 is all the same `aẗ` sub "\u0629", "aẗ" # ة in the middle of the sentence # Shadda sub "\u0628\u0651", "bb" # ب sub "\u062a\u0651", "tt" # ت sub "\u062b\u0651", "ṯṯ" # ث sub "\u062c\u0651", "ǧǧ" # ج sub "\u062d\u0651", "ḥḥ" # ح sub "\u062e\u0651", "ẖẖ" # خ sub "\u062f\u0651", "dd" # د sub "\u0630\u0651", "ḏḏ" # ذ sub "\u0631\u0651", "rr" # ر sub "\u0632\u0651", "zz" # ز sub "\u0633\u0651", "ss" # س sub "\u0634\u0651", "šš" # ش sub "\u0635\u0651", "ṣṣ" # ص sub "\u0636\u0651", "ḍḍ" # ض sub "\u0637\u0651", "ṭṭ" # ط sub "\u0638\u0651", "ẓẓ" # ظ sub "\u063a\u0651", "ġġ" # غ sub "\u0641\u0651", "ff" # ف sub "\u0642\u0651", "qq" # ق sub "\u0643\u0651", "kk" # ك sub "\u0644\u0651", "ll" # ل sub "\u0645\u0651", "mm" # م sub "\u0646\u0651", "nn" # ن sub "\u0647\u0651", "hh" # ه sub "\u0648\u0651", "ww" # و sub "\u064a\u0651", "yy" # ي sub "\u0622", "’â" # آ sub "\u0627", "â" # ا sub "\u0649", "ỳ" # ى sub "\u0626", "'" # ئ sub "\u0621", maybe("’") # ء# see note A sub "\u0623", "a" # أ # See note B sub boundary + "\u0627\u0644", "al " # ال # '\uFE8E' : '' # ﺎ sub "\u0628", "b" # ب sub "\uFE91", "b" # ﺑ sub "\uFE92", "b" # ﺒ sub "\uFE90", "b" # ﺐ # See note C sub "\u062a", "t" # ت sub "\ufe97", "t" # ﺗ sub "\ufe98", "t" # ﺘ sub "\ufe96", "t" # ﺖ sub "\u062b", "ṯ" # ث sub "\ufe9b", "ṯ" # ﺛ sub "\ufe9c", "ṯ" # ﺜ sub "\ufe9a", "ṯ" # ﺚ sub "\u062c", "ǧ" # ج sub "\ufe9f", "ǧ" # ﺟ sub "\ufea0", "ǧ" # ﺠ sub "\ufe9e", "ǧ" # ﺞ sub "\u062d", "ḥ" # ح sub "\ufea3", "ḥ" # ﺣ sub "\ufea4", "ḥ" # ﺤ sub "\ufea2", "ḥ" # ﺢ sub "\u062e", "ẖ" # خ sub "\ufea7", "ẖ" # ﺧ sub "\ufea8", "ẖ" # ﺨ sub "\ufea6", "ẖ" # ﺦ sub "\u062f", "d" # د sub "\ufeaa", "d" # ﺪ sub "\u0630", "ḏ" # ذ sub "\ufeac", "ḏ" # ﺬ sub "\u0631", "r" # ر sub "\ufeae", "r" # ﺮ sub "\u0632", "z" # ز sub "\ufeb0", "z" # ﺰ sub "\u0633", "s" # س sub "\ufeb3", "s" # ﺳ sub "\ufeb4", "s" # ﺴ sub "\ufeb2", "s" # ﺲ sub "\u0634", "š" # ش sub "\ufeb7", "š" # ﺷ sub "\ufeb8", "š" # ﺸ sub "\ufeb6", "š" # ﺶ sub "\u0635", "ṣ" # ص sub "\ufebb", "ṣ" # ﺻ sub "\ufebc", "ṣ" # ﺼ sub "\ufeba", "ṣ" # ﺺ sub "\u0636", "ḍ" # ض sub "\ufebf", "ḍ" # ﺿ sub "\ufec0", "ḍ" # ﻀ sub "\ufebe", "ḍ" # ﺾ sub "\u0637", "ṭ" # ط sub "\ufec3", "ṭ" # ﻃ sub "\ufec4", "ṭ" # ﻄ sub "\ufec2", "ṭ" # ﻂ sub "\u0638", "ẓ" # ظ sub "\ufec7", "ẓ" # ﻇ sub "\ufec8", "ẓ" # ﻈ sub "\ufec6", "ẓ" # ﻆ sub "\u0639", "‘" # ع sub "\ufecb", "‘" # ﻋ sub "\ufecc", "‘" # ﻌ sub "\ufeca", "‘" # ﻊ sub "\u063a", "ġ" # غ sub "\ufecf", "ġ" # ﻏ sub "\ufed0", "ġ" # ﻐ sub "\ufece", "ġ" # ﻎ sub "\u0641", "f" # ف sub "\ufed3", "f" # ﻓ sub "\ufed4", "f" # ﻔ sub "\ufed2", "f" # ﻒ sub "\u0642", "q" # ق sub "\ufed7", "q" # ﻗ sub "\ufed8", "q" # ﻘ sub "\ufed6", "q" # ﻖ sub "\u0643", "k" # ك sub "\ufedb", "k" # ﻛ sub "\ufedc", "k" # ﻜ sub "\ufeda", "k" # ﻚ sub "\u0644", "l" # ل sub "\ufedf", "l" # ﻟ sub "\ufee0", "l" # ﻠ sub "\ufede", "l" # ﻞ sub "\u0645", "m" # م sub "\ufee3", "m" # ﻣ sub "\ufee4", "m" # ﻤ sub "\ufee2", "m" # ﻢ sub "\u0646", "n" # ن sub "\ufee7", "n" # ﻧ sub "\ufee8", "n" # ﻨ sub "\ufee6", "n" # ﻦ # See note C sub "\u0647", "h" # ه sub "\ufeeb", "h" # ﻫ sub "\ufeec", "h" # ﻬ sub "\ufeea", "h" # ﻪ sub "\u0648", "w" # و sub "\ufeee", "w" # ﻮ sub "\u064a", "y" # ي sub "\ufef3", "y" # ﻳ sub "\ufef4", "y" # ﻴ sub "\ufef1", "y" # ﻱ } # POSTRULES sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'") # don't capitalize defined article in the middle of a sentence sub " At T", " at T" # الت sub " Aṯ Ṯ", " aṯ Ṯ" # الث sub " Ad D", " ad D" # الد sub " Aḏ Ḏ", " aḏ Ḏ" # الذ sub " Ar R", " ar R" # الر sub " Az Z", " az Z" # الز sub " As S", " as S" # الس sub " Aš Š", " aš Š" # الش sub " Aṣ Ṣ", " aṣ Ṣ" # الص sub " Aḍ Ḍ", " aḍ Ḍ" # الض sub " Aṭ Ṭ", " aṭ Ṭ" # الط sub " Aẓ Ẓ", " aẓ Ẓ" # الظ sub " Al L", " al L" # الل sub " An N", " an N" # الن sub " Al ", " al " # ال
}