metadata {

authority_id: iso
id: 233-2-1993
language: iso-639-2:ara
source_script: Arab
destination_script: Latn
name: ISO 233-2:1993 documentation -- Transliteration of Arabic characters into Latin characters
url:
  - https://www.iso.org/standard/4118.html
  - https://cdn.standards.iteh.ai/samples/4118/2f03c828842c4055a5619c1bded39381/ISO-233-2-1993.pdf
creation_date: 1993-08
confirmation_date: 2018-06
description: |
  Establishes a simplified system for the transliteration. This
  simplification of the stringent rules established by ISO 233:1984
  is especially intended to facilitate the processing of bibliographic
  information (e.g. catalogues, indices, citations, etc.). Annex A
  gives the diacritical signs used (taken from the code table of
  ISO 5436:1983).
notes:
  - |
    4.1.1 In order to render a transliterated text more legible, the vowels
    are supplied [method 2.1 c) of ISO 233:1984]. However sukün (`\u0652`) is
    omitted, as well as the vowels and diphthongs which have only a flexional
    function in nominal forms.

  - |
    4.1.2 The initial alif (`\u0627`) is not represented: the presence of an
    initial vowel in the transliterated word is enough to indicate an alif in
    the original script.

  - |
    4.1.3 A character bearing a hamzat (`\u0621`), which depends on the
    vocalic context, is not represented.

  - |
    4.1.4 The definite article (`\u0627\u0644`), is always represented by the
    characters “al-”, whatever its vocalization.

  - |
    4.1.5 The prepositions (li, bi, ka) as well as the conjunction wa, which
    in Arabic are joined to the word, are separated by a hyphen in the
    transliterated script.

}

tests {

test "مِصر", "Miṣr"
test "قَطَر", "Qaṭar"
test "الرِيَاض", "al-Riyāḍ"
test "الشارِقة", "al-Šâriqaẗ"
test "فِي نُورِ الْقَمَرِ", "Fī Nūr al-Qamar"
test "بِئْر", "Bi’r"
test "سَأَلَ", "Sa’al"
test "أَخْبَار", "Aẖbār"
test "قُرْآن", "Qur’ān"
test "آدَاب", "Ādāb"
test "الشَمْسُ", "al-Šams"
test "بِاللَيلِ", "bi-al-Layl"
test "لِلوَلَدِ", "li-l-Walad"

}

# Although this system inherits from iso-ara-Arab-Latn-233-1984, # it utilizes a set of simplified rules. # It is therefore easier to be implemented as a separate map instead # of using the inherit flag. # inherit: iso-ara-Arab-Latn-233-1984

stage {

# CHARACTERS
parallel {

  # pointing

  # Table 2 No. 30
  sub "\u064e", "a"                 # َ fatha
  sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota

  # Table 2, No. 31
  sub "\u064f", "u"                 # ُ damma

  # Table 2, No. 32
  sub "\u0650", "i"                 # ِ kasra

  # Table 3, No. 33
  sub "\u0652", "" # ْ sokoon, see 4.1.1

  # pointing omitted in the end of words
  sub "\u064e" + boundary, ""                 # َ fatha
  sub "\u064f" + boundary, ""                 # ُ damma
  sub "\u0650" + boundary, ""                 # ِ kasra

  # special pointed letters
  sub "\u0639\u064e", "‘a"  # عَ
  sub "\u0639\u0650", "‘i"  # عِ
  sub "\u0639\u064f", "‘ū"  # عُ
  # handle MacOS regex difference
  sub "\u0639\u064f\u0648", "‘ū"  # عُو damma followed by و

  sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
  sub "\u0650\u064a\u0651\u064e", "iy" # ـِيَّ
  sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي
  sub "\u064e\u0627", "ā"  # ـَا fatha followed by ا
  sub "\u064e\u0649", "aỳ"  # ـَى fatha followed by ى which is ا not ي
  sub "\u064f\u0648", "ū"  # ـُو damma followed by و

  # Table 2 No. 31, column 4
  sub "\u064e\u0648\u0652", "aw"  # ـَوْ

  # Table 2 No. 32, column 4
  sub "\u064e\u064a\u0652", "ay"  # ـَيْ

  # Table 1 No. 27a
  # ta' marboota in iso-233-2-1993 is all the same `aẗ`
  sub "\u0629", "aẗ" # ة in the middle of the sentence
  # sub "\u0629", "ẗ" # ة

  # Table 3, No. 34
  # Shadda
  sub "\u0628\u0651", "bb" # ب
  sub "\u062a\u0651", "tt" # ت
  sub "\u062b\u0651", "ṯṯ" # ث
  sub "\u062c\u0651", "ǧǧ" # ج
  sub "\u062d\u0651", "ḥḥ" # ح
  sub "\u062e\u0651", "ẖẖ" # خ
  sub "\u062f\u0651", "dd" # د
  sub "\u0630\u0651", "ḏḏ" # ذ
  sub "\u0631\u0651", "rr" # ر
  sub "\u0632\u0651", "zz" # ز
  sub "\u0633\u0651", "ss" # س
  sub "\u0634\u0651", "šš" # ش
  sub "\u0635\u0651", "ṣṣ" # ص
  sub "\u0636\u0651", "ḍḍ" # ض
  sub "\u0637\u0651", "ṭṭ" # ط
  sub "\u0638\u0651", "ẓẓ" # ظ
  sub "\u063a\u0651", "ġġ" # غ
  sub "\u0641\u0651", "ff" # ف
  sub "\u0642\u0651", "qq" # ق
  sub "\u0643\u0651", "kk" # ك
  sub "\u0644\u0651", "ll" # ل
  sub "\u0645\u0651", "mm" # م
  sub "\u0646\u0651", "nn" # ن
  sub "\u0647\u0651", "hh" # ه
  sub "\u0648\u0651", "ww" # و
  sub "\u064a\u0651", "yy" # ي

  sub "\u0627", "â"  # ا

  sub "\u0649", "ỳ"  # ى

  sub "\u0623", "’"  # أ
  sub boundary + "\u0623", ""  # أ

  # See 4.1.4
  # '\uFE8E' : ''  # ﺎ

  # Table 1 No. 3
  sub "\u0628", "b" # ب
  sub "\uFE91", "b" # ﺑ
  sub "\uFE92", "b" # ﺒ
  sub "\uFE90", "b" # ﺐ

  # See note C
  # Table 1 No. 4
  sub "\u062a", "t" # ت
  sub "\ufe97", "t" # ﺗ
  sub "\ufe98", "t" # ﺘ
  sub "\ufe96", "t" # ﺖ

  # Table 1 No. 5
  sub "\u062b", "ṯ" # ث
  sub "\ufe9b", "ṯ" # ﺛ
  sub "\ufe9c", "ṯ" # ﺜ
  sub "\ufe9a", "ṯ" # ﺚ

  # Table 1 No. 6
  sub "\u062c", "ǧ" # ج
  sub "\ufe9f", "ǧ" # ﺟ
  sub "\ufea0", "ǧ" # ﺠ
  sub "\ufe9e", "ǧ" # ﺞ

  # Table 1 No. 7
  sub "\u062d", "ḥ" # ح
  sub "\ufea3", "ḥ" # ﺣ
  sub "\ufea4", "ḥ" # ﺤ
  sub "\ufea2", "ḥ" # ﺢ

  # Table 1 No. 8
  sub "\u062e", "ẖ" # خ
  sub "\ufea7", "ẖ" # ﺧ
  sub "\ufea8", "ẖ" # ﺨ
  sub "\ufea6", "ẖ" # ﺦ

  # Table 1 No. 9
  sub "\u062f", "d" # د
  sub "\ufeaa", "d" # ﺪ

  # Table 1 No. 10
  sub "\u0630", "ḏ" # ذ
  sub "\ufeac", "ḏ" # ﺬ

  # Table 1 No. 11
  sub "\u0631", "r" # ر
  sub "\ufeae", "r" # ﺮ

  # Table 1 No. 12
  sub "\u0632", "z" # ز
  sub "\ufeb0", "z" # ﺰ

  # Table 1 No. 13
  sub "\u0633", "s" # س
  sub "\ufeb3", "s" # ﺳ
  sub "\ufeb4", "s" # ﺴ
  sub "\ufeb2", "s" # ﺲ

  # Table 1 No. 14
  sub "\u0634", "š" # ش
  sub "\ufeb7", "š" # ﺷ
  sub "\ufeb8", "š" # ﺸ
  sub "\ufeb6", "š" # ﺶ

  # Table 1 No. 15
  sub "\u0635", "ṣ" # ص
  sub "\ufebb", "ṣ" # ﺻ
  sub "\ufebc", "ṣ" # ﺼ
  sub "\ufeba", "ṣ" # ﺺ

  # Table 1 No. 16
  sub "\u0636", "ḍ" # ض
  sub "\ufebf", "ḍ" # ﺿ
  sub "\ufec0", "ḍ" # ﻀ
  sub "\ufebe", "ḍ" # ﺾ

  # Table 1 No. 17
  sub "\u0637", "ṭ" # ط
  sub "\ufec3", "ṭ" # ﻃ
  sub "\ufec4", "ṭ" # ﻄ
  sub "\ufec2", "ṭ" # ﻂ

  # Table 1 No. 18
  sub "\u0638", "ẓ" # ظ
  sub "\ufec7", "ẓ" # ﻇ
  sub "\ufec8", "ẓ" # ﻈ
  sub "\ufec6", "ẓ" # ﻆ

  # Table 1 No. 19
  sub "\u0639", "‘" # ع
  sub "\ufecb", "‘" # ﻋ
  sub "\ufecc", "‘" # ﻌ
  sub "\ufeca", "‘" # ﻊ

  # Table 1 No. 20
  sub "\u063a", "ġ" # غ
  sub "\ufecf", "ġ" # ﻏ
  sub "\ufed0", "ġ" # ﻐ
  sub "\ufece", "ġ" # ﻎ

  # Table 1 No. 21
  sub "\u0641", "f" # ف
  sub "\ufed3", "f" # ﻓ
  sub "\ufed4", "f" # ﻔ
  sub "\ufed2", "f" # ﻒ
  sub "\u06a2", "f" # ڢ Maghrebi form

  # Table 1 No. 22
  sub "\u0642", "q" # ق
  sub "\ufed7", "q" # ﻗ
  sub "\ufed8", "q" # ﻘ
  sub "\ufed6", "q" # ﻖ
  sub "\u06a8", "q" # ڧ Maghrebi form

  # Table 1 No. 23
  sub "\u0643", "k" # ك
  sub "\ufedb", "k" # ﻛ
  sub "\ufedc", "k" # ﻜ
  sub "\ufeda", "k" # ﻚ

  # Table 1 No. 24
  sub "\u0644", "l" # ل
  sub "\ufedf", "l" # ﻟ
  sub "\ufee0", "l" # ﻠ
  sub "\ufede", "l" # ﻞ

  # Table 1 No. 25
  sub "\u0645", "m" # م
  sub "\ufee3", "m" # ﻣ
  sub "\ufee4", "m" # ﻤ
  sub "\ufee2", "m" # ﻢ

  # Table 1 No. 26
  sub "\u0646", "n" # ن
  sub "\ufee7", "n" # ﻧ
  sub "\ufee8", "n" # ﻨ
  sub "\ufee6", "n" # ﻦ

  # Table 1 No. 27
  sub "\u0647", "h" # ه
  sub "\ufeeb", "h" # ﻫ
  sub "\ufeec", "h" # ﻬ
  sub "\ufeea", "h" # ﻪ

  # Table 1 No. 28
  sub "\u0648", "w" # و
  sub "\ufeee", "w" # ﻮ

  # Table 1 No. 29
  sub "\u064a", "y" # ي
  sub "\ufef3", "y" # ﻳ
  sub "\ufef4", "y" # ﻴ
  sub "\ufef1", "y" # ﻱ

  # Table 4 row 1
  sub "\u060c", "," # ،

  # Table 4 row 2
  sub "\u061b", ";" # ؛

  # Table 4 row 3
  sub "\u061f", "?" # ؟

  # 4.3 Notes to Tables
  sub "\u0626", "’" # ئ

  sub "\u0622", "’ā"  # آ

  sub boundary + "\u0622", "ā"  # آ

  # definite article

  sub boundary + "\u0627\u0644", "al-" # ال

  sub "\u0627\u0644", "al-", before: "\u0628\u0650" # بِال

  sub boundary + "\u0628\u0650", "bi-", after: "\u0627\u0644" # بـِ

  sub boundary + "\u0644\u0650\u0644", "li-l-" # لِل

}

# POSTRULES
sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'")
# don't capitalize defined article in the middle of a sentence
sub "Al-", "al-" # ال
sub "Bi-", "bi-" # بِ
sub "Li-L-", "li-l-" # بِل

}