metadata {

authority_id: ses
id: 1930
language: iso-639-2:ara
source_script: Arab
destination_script: Latn
name: Survey of Egypt Romanization
url: http://www.eki.ee/wgrs/rom1_ar.pdf
creation_date: 1930
confirmation_date: 2018-06
description: |
  The Survey of Egypt System (SES) of Romanization.
notes:
  - |
    The Survey of Egypt System (SES) of romanization has the following correspondences with
    the UN system:
    á = a #  ـَى fatha followed by ى which is ا not ي
    ā = â (a) # ـَا fatha followed by alef // آ
    -ah (ة- = (a # ة ta' marboota at the end of a sentence
    aw = ô (au) # ـَوْ
    ay = ei (ai) # ـَيْ
    ḏ = ḍ # ض
    dh = dh (z) # ذ
    d͟h = ẓ (d) # ظ
    ẖ = ḥ # ح
    ī = î
    j = g (j)
    q = q (k)
    s = s (c)
    s̱ = ṣ
    ṯ = ṭ
    th = th (t)
    ū = û
    ‘ = ‛
  - |
    The variants in parentheses are used depending on pronunciation and tradition. Not all the
    variations have been given above. The article is always written el- (El-Kafr el-Qadîm, Sharm
    el-Sheikh).

}

tests {

# Examples taken from:
# https://unstats.un.org/unsd/geoinfo/geonames/
test "شَرم الشَيْخ", "Sharm el-Sheikh"
test "الكَفر القَدِيم", "El-Kafr el-Qadîm"

}

dependency “un-ara-Arab-Latn-2017”, as: arablatn

stage {

# CHARACTERS
parallel {

  # special pointed letters
  sub "\u0639\u064e", "‛a" # عَ
  sub "\u0639\u0650", "‛i" # عِ
  sub "\u0639\u064f", "‛û" # عُ
  # handle MacOS regex difference
  sub "\u0639\u064f\u0648", "‛û" # عُو damma followed by و
  sub "\u0650\u064a", "î" # ـِي kasra followed by ي
  sub "\u0650\u064a\u0651\u064e", "îy" # ـِيَّ
  sub "\u064f\u0648", "û" # ـُو damma followed by و
  sub "\u064e\u0627", any(["â", "a"]) # ـَا fatha followed by ا
  sub "\u064e\u0649", "a" # ـَى fatha followed by ى which is ا not ي
  sub "\u064e\u0648\u0652", any(["ô", "au"]) # ـَوْ
  sub "\u064e\u064a\u0652", any(["ei", "ai"]) # ـَيْ
  sub "\u0622", any(["â", "a"]) # آ

  # ta' marboota  in iso-233-1984 is all the same `a`
  sub "\u0629" + line_end, "a"
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "a", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")

  # Sun letters
  sub boundary + "\u0627\u0644\u062a" + maybe("\u0651"), "el-t" # الت
  sub boundary + "\u0627\u0644\u062b" + maybe("\u0651"), any(["el-th", "el-t"]) # الث
  sub boundary + "\u0627\u0644\u062f" + maybe("\u0651"), "el-d" # الد
  sub boundary + "\u0627\u0644\u0630" + maybe("\u0651"), any(["el-dh", "el-z"]) # الذ
  sub boundary + "\u0627\u0644\u0631" + maybe("\u0651"), "el-r" # الر
  sub boundary + "\u0627\u0644\u0632" + maybe("\u0651"), "el-z" # الز
  sub boundary + "\u0627\u0644\u0633" + maybe("\u0651"), any(["el-s", "el-c"]) # الس
  sub boundary + "\u0627\u0644\u0634" + maybe("\u0651"), "el-sh" # الش
  sub boundary + "\u0627\u0644\u0635" + maybe("\u0651"), "el-ṣ" # الص
  sub boundary + "\u0627\u0644\u0636" + maybe("\u0651"), "el-ḍ" # الض
  sub boundary + "\u0627\u0644\u0637" + maybe("\u0651"), "el-ṭ" # الط
  sub boundary + "\u0627\u0644\u0638" + maybe("\u0651"), any(["el-ẓ", "el-d"]) # الظ
  sub boundary + "\u0627\u0644\u0644" + maybe("\u0651"), "el-l" # الل
  sub boundary + "\u0627\u0644\u0646" + maybe("\u0651"), "el-n" # الن

  # shadda
  sub "\u062b\u0651", any(["thth", "tt"]) # ث
  sub "\u062c\u0651", any(["gg", "jj"]) # ج
  sub "\u062d\u0651", "ḥḥ" # ح
  sub "\u062e\u0651", "khkh" # خ

  sub "\u0633\u0651", any(["ss", "cc"]) # س
  sub "\u0635\u0651", "ṣṣ" # ص
  sub "\u0636\u0651", "ḍḍ" # ض
  sub "\u0637\u0651", "ṭṭ" # ط
  sub "\u0638\u0651", any(["ẓẓ", "dd"]) # ظ
  sub "\u0642\u0651", any(["qq", "kk"]) # ق

  sub boundary + "\u0627\u0644", "el-" # ال

  # normal letters
  sub "\u062c", any(["g", "j"]) # ج
  sub "\ufe9f", any(["g", "j"]) # ﺟ
  sub "\ufea0", any(["g", "j"]) # ﺠ
  sub "\ufe9e", any(["g", "j"]) # ﺞ

  sub "\u062d", "ḥ" # ح
  sub "\ufea3", "ḥ" # ﺣ
  sub "\ufea4", "ḥ" # ﺤ
  sub "\ufea2", "ḥ" # ﺢ

  sub "\u062e", "kh" # خ
  sub "\ufea7", "kh" # ﺧ
  sub "\ufea8", "kh" # ﺨ
  sub "\ufea6", "kh" # ﺦ

  sub "\u0630", any(["dh", "z"]) # ذ
  sub "\ufeac", any(["dh", "z"]) # ﺬ

  sub "\u0633", any(["s", "c"]) # س
  sub "\ufeb3", any(["s", "c"]) # ﺳ
  sub "\ufeb4", any(["s", "c"]) # ﺴ
  sub "\ufeb2", any(["s", "c"]) # ﺲ

  sub "\u0635", "ṣ" # ص
  sub "\ufebb", "ṣ" # ﺻ
  sub "\ufebc", "ṣ" # ﺼ
  sub "\ufeba", "ṣ" # ﺺ

  sub "\u0636", "ḍ" # ض
  sub "\ufebf", "ḍ" # ﺿ
  sub "\ufec0", "ḍ" # ﻀ
  sub "\ufebe", "ḍ" # ﺾ

  sub "\u0637", "ṭ" # ط
  sub "\ufec3", "ṭ" # ﻃ
  sub "\ufec4", "ṭ" # ﻄ
  sub "\ufec2", "ṭ" # ﻂ

  sub "\u0639", "‛" # ع
  sub "\ufecb", "‛" # ﻋ
  sub "\ufecc", "‛" # ﻌ
  sub "\ufeca", "‛" # ﻊ

  sub "\u0638", any(["ẓ", "d"]) # ظ
  sub "\ufec7", any(["ẓ", "d"]) # ظ
  sub "\ufec8", any(["ẓ", "d"]) # ظ
  sub "\ufec6", any(["ẓ", "d"]) # ظ

  sub "\u0642", any(["q", "k"]) # ق
  sub "\ufed7", any(["q", "k"]) # ﻗ
  sub "\ufed8", any(["q", "k"]) # ﻘ
  sub "\ufed6", any(["q", "k"]) # ﻖ
}

run map.arablatn.stage.main

# POSTRULES
sub " El-", " el-" # الص

}