metadata {
authority_id: iso id: 233-2-1993 language: iso-639-2:ara source_script: Arab destination_script: Latn name: ISO 233-2:1993 documentation -- Transliteration of Arabic characters into Latin characters url: - https://www.iso.org/standard/4118.html - https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf creation_date: 1993-08 confirmation_date: 2018-06 description: | Establishes a simplified system for the transliteration. This simplification of the stringent rules established by ISO 233:1984 is especially intended to facilitate the processing of bibliographic information (e.g. catalogues, indices, citations, etc.). Annex A gives the diacritical signs used (taken from the code table of ISO 5436:1983). notes: - | 4.1.1 In order to render a transliterated text more legible, the vowels are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is omitted, as well as the vowels and diphthongs which have only a flexional function in nominal forms. - | 4.1.2 The initial alif (`\u0627`) is not represented: the presence of an initial vowel in the transliterated word is enough to indicate an alif in the original script. - | 4.1.3 A character bearing a hamzat (`\u0621`), which depends on the vocalic context, is not represented. - | 4.1.4 The definite article (`\u0627\u0644`), is always represented by the characters “al-”, whatever its vocalization. - | 4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which in Arabic are joined to the word, are separated by a hyphen in the transliterated script.
}
tests {
test "مِصر", "Miṣr" test "قَطَر", "Qaṭar" test "الرِيَاض", "al-Riyāḍ" test "الشارِقة", "al-Šâriqaẗ" test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar" test "بِئْر", "Bi’r" test "سَأَلَ", "Sa’al" test "أَخْبَار", "Aẖbār" test "قُرْآن", "Qur’ān" test "آدَاب", "Ādāb" test "الشَمْسُ", "al-Šams" test "بِاللَيلِ", "bi-al-Layl" test "لِلوَلَدِ", "li-l-Walad"
}
# Although this system inherits from iso-ara-Arab-Latn-233-1984, # it utilizes a set of simplified rules. # It is therefore easier to be implemented as a separate map instead # of using the inherit flag. # inherit: iso-ara-Arab-Latn-233-1984
stage {
# CHARACTERS parallel { # pointing # Table 2 No. 30 sub "\u064e", "a" # َ fatha sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota # Table 2, No. 31 sub "\u064f", "u" # ُ damma # Table 2, No. 32 sub "\u0650", "i" # ِ kasra # Table 3, No. 33 sub "\u0652", "" # ْ sokoon, see 4.1.1 # pointing omitted in the end of words sub "\u064e" + boundary, "" # َ fatha sub "\u064f" + boundary, "" # ُ damma sub "\u0650" + boundary, "" # ِ kasra # special pointed letters sub "\u0639\u064e", "‘a" # عَ sub "\u0639\u0650", "‘i" # عِ sub "\u0639\u064f", "‘ū" # عُ # handle MacOS regex difference sub "\u0639\u064f\u0648", "‘ū" # عُو damma followed by و sub "\u0650\u064a", "ī" # ـِي kasra followed by ي sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي sub "\u064e\u0627", "ā" # ـَا fatha followed by ا sub "\u064e\u0649", "aỳ" # ـَى fatha followed by ى which is ا not ي sub "\u064f\u0648", "ū" # ـُو damma followed by و # Table 2 No. 31, column 4 sub "\u064e\u0648\u0652", "aw" # ـَوْ # Table 2 No. 32, column 4 sub "\u064e\u064a\u0652", "ay" # ـَيْ # Table 1 No. 27a # ta' marboota in iso-233-2-1993 is all the same `aẗ` sub "\u0629", "aẗ" # ة in the middle of the sentence # sub "\u0629", "ẗ" # ة # Table 3, No. 34 # Shadda sub "\u0628\u0651", "bb" # ب sub "\u062a\u0651", "tt" # ت sub "\u062b\u0651", "ṯṯ" # ث sub "\u062c\u0651", "ǧǧ" # ج sub "\u062d\u0651", "ḥḥ" # ح sub "\u062e\u0651", "ẖẖ" # خ sub "\u062f\u0651", "dd" # د sub "\u0630\u0651", "ḏḏ" # ذ sub "\u0631\u0651", "rr" # ر sub "\u0632\u0651", "zz" # ز sub "\u0633\u0651", "ss" # س sub "\u0634\u0651", "šš" # ش sub "\u0635\u0651", "ṣṣ" # ص sub "\u0636\u0651", "ḍḍ" # ض sub "\u0637\u0651", "ṭṭ" # ط sub "\u0638\u0651", "ẓẓ" # ظ sub "\u063a\u0651", "ġġ" # غ sub "\u0641\u0651", "ff" # ف sub "\u0642\u0651", "qq" # ق sub "\u0643\u0651", "kk" # ك sub "\u0644\u0651", "ll" # ل sub "\u0645\u0651", "mm" # م sub "\u0646\u0651", "nn" # ن sub "\u0647\u0651", "hh" # ه sub "\u0648\u0651", "ww" # و sub "\u064a\u0651", "yy" # ي sub "\u0627", "â" # ا sub "\u0649", "ỳ" # ى sub "\u0623", "’" # أ sub boundary + "\u0623", "" # أ # See 4.1.4 # '\uFE8E' : '' # ﺎ # Table 1 No. 3 sub "\u0628", "b" # ب sub "\uFE91", "b" # ﺑ sub "\uFE92", "b" # ﺒ sub "\uFE90", "b" # ﺐ # See note C # Table 1 No. 4 sub "\u062a", "t" # ت sub "\ufe97", "t" # ﺗ sub "\ufe98", "t" # ﺘ sub "\ufe96", "t" # ﺖ # Table 1 No. 5 sub "\u062b", "ṯ" # ث sub "\ufe9b", "ṯ" # ﺛ sub "\ufe9c", "ṯ" # ﺜ sub "\ufe9a", "ṯ" # ﺚ # Table 1 No. 6 sub "\u062c", "ǧ" # ج sub "\ufe9f", "ǧ" # ﺟ sub "\ufea0", "ǧ" # ﺠ sub "\ufe9e", "ǧ" # ﺞ # Table 1 No. 7 sub "\u062d", "ḥ" # ح sub "\ufea3", "ḥ" # ﺣ sub "\ufea4", "ḥ" # ﺤ sub "\ufea2", "ḥ" # ﺢ # Table 1 No. 8 sub "\u062e", "ẖ" # خ sub "\ufea7", "ẖ" # ﺧ sub "\ufea8", "ẖ" # ﺨ sub "\ufea6", "ẖ" # ﺦ # Table 1 No. 9 sub "\u062f", "d" # د sub "\ufeaa", "d" # ﺪ # Table 1 No. 10 sub "\u0630", "ḏ" # ذ sub "\ufeac", "ḏ" # ﺬ # Table 1 No. 11 sub "\u0631", "r" # ر sub "\ufeae", "r" # ﺮ # Table 1 No. 12 sub "\u0632", "z" # ز sub "\ufeb0", "z" # ﺰ # Table 1 No. 13 sub "\u0633", "s" # س sub "\ufeb3", "s" # ﺳ sub "\ufeb4", "s" # ﺴ sub "\ufeb2", "s" # ﺲ # Table 1 No. 14 sub "\u0634", "š" # ش sub "\ufeb7", "š" # ﺷ sub "\ufeb8", "š" # ﺸ sub "\ufeb6", "š" # ﺶ # Table 1 No. 15 sub "\u0635", "ṣ" # ص sub "\ufebb", "ṣ" # ﺻ sub "\ufebc", "ṣ" # ﺼ sub "\ufeba", "ṣ" # ﺺ # Table 1 No. 16 sub "\u0636", "ḍ" # ض sub "\ufebf", "ḍ" # ﺿ sub "\ufec0", "ḍ" # ﻀ sub "\ufebe", "ḍ" # ﺾ # Table 1 No. 17 sub "\u0637", "ṭ" # ط sub "\ufec3", "ṭ" # ﻃ sub "\ufec4", "ṭ" # ﻄ sub "\ufec2", "ṭ" # ﻂ # Table 1 No. 18 sub "\u0638", "ẓ" # ظ sub "\ufec7", "ẓ" # ﻇ sub "\ufec8", "ẓ" # ﻈ sub "\ufec6", "ẓ" # ﻆ # Table 1 No. 19 sub "\u0639", "‘" # ع sub "\ufecb", "‘" # ﻋ sub "\ufecc", "‘" # ﻌ sub "\ufeca", "‘" # ﻊ # Table 1 No. 20 sub "\u063a", "ġ" # غ sub "\ufecf", "ġ" # ﻏ sub "\ufed0", "ġ" # ﻐ sub "\ufece", "ġ" # ﻎ # Table 1 No. 21 sub "\u0641", "f" # ف sub "\ufed3", "f" # ﻓ sub "\ufed4", "f" # ﻔ sub "\ufed2", "f" # ﻒ sub "\u06a2", "f" # ڢ Maghrebi form # Table 1 No. 22 sub "\u0642", "q" # ق sub "\ufed7", "q" # ﻗ sub "\ufed8", "q" # ﻘ sub "\ufed6", "q" # ﻖ sub "\u06a8", "q" # ڧ Maghrebi form # Table 1 No. 23 sub "\u0643", "k" # ك sub "\ufedb", "k" # ﻛ sub "\ufedc", "k" # ﻜ sub "\ufeda", "k" # ﻚ # Table 1 No. 24 sub "\u0644", "l" # ل sub "\ufedf", "l" # ﻟ sub "\ufee0", "l" # ﻠ sub "\ufede", "l" # ﻞ # Table 1 No. 25 sub "\u0645", "m" # م sub "\ufee3", "m" # ﻣ sub "\ufee4", "m" # ﻤ sub "\ufee2", "m" # ﻢ # Table 1 No. 26 sub "\u0646", "n" # ن sub "\ufee7", "n" # ﻧ sub "\ufee8", "n" # ﻨ sub "\ufee6", "n" # ﻦ # Table 1 No. 27 sub "\u0647", "h" # ه sub "\ufeeb", "h" # ﻫ sub "\ufeec", "h" # ﻬ sub "\ufeea", "h" # ﻪ # Table 1 No. 28 sub "\u0648", "w" # و sub "\ufeee", "w" # ﻮ # Table 1 No. 29 sub "\u064a", "y" # ي sub "\ufef3", "y" # ﻳ sub "\ufef4", "y" # ﻴ sub "\ufef1", "y" # ﻱ # Table 4 row 1 sub "\u060c", "," # ، # Table 4 row 2 sub "\u061b", ";" # ؛ # Table 4 row 3 sub "\u061f", "?" # ؟ # 4.3 Notes to Tables sub "\u0626", "’" # ئ sub "\u0622", "’ā" # آ sub boundary + "\u0622", "ā" # آ # definite article sub boundary + "\u0627\u0644", "al-" # ال sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل } # POSTRULES sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'") # don't capitalize defined article in the middle of a sentence sub "Al-", "al-" # ال sub "Bi-", "bi-" # بِ sub "Li-L-", "li-l-" # بِل
}