metadata {

authority_id: ungen
id: 2017
language: iso-639-3:prs
# prs stands for Dari (https://iso639-3.sil.org/code/prs&_ga=GA1.2.2054538372.1574092823)
source_script: Arab
destination_script: Latn
name: Romanization of Persian (1967)
url: http://www.eki.ee/wgrs/v2_2/rom1_fa.htm
creation_date: 1967
confirmation_date: 01-2003
description: |
  The United Nations recommended system was approved in 1967 (
  I/13), based on the official system adopted by Iran and
  published in its English version as Transliteration of
  Farsi Geographic Names to Latin Alphabet (September 1966).
  The romanization table was also published as an annex to
  the Toponymic Guidelines for the Islamic Republic of Iran
  in 2000 (Toponymic Guidelines for map and other editors –
  Revised edition 1998. Submitted by the Islamic Republic of
  Iran. UNGEGN, 20th session. New York, 17-28 January 2000,
  Working Paper No. 41.).

  The system is used in the Islamic Republic of Iran and in
  international cartographic products.

  Persian (Farsi) uses the Perso-Arabic script that is
  written from right to left. The Persian script usually
  omits vowel points and diacritical marks from writing which
  makes it difficult to obtain uniform results in the
  romanization of Persian. The romanization is generally
  reversible though there are some ambiguous letter
  sequences (kh, sh, th, zh) which also may represent the
  romanized values of two Persian characters in addition to
  the respective single ones.

notes:
  - A Word-initially.
  - B Not romanized; marks absence of the vowel.
  - C Doubling of the consonant letter.
  - D After a consonant (excl. -ah).
  - E After a vowel (see also note 2).
  - 1-The adjectival ending of Arabic origin -يه in Persian is
    romanized -īyeh. In romanizing the definite article the
    same rules of assimilation of consonants are applied as in
    Arabic, e.g. زين الدين Zeyn od Dīn.

  - 2-The relational suffix (eẕāfeh) -e is usually not
    expressed in Persian writing after a consonant. After final
    ا or و it is written with ى, e.g. پاى آب Pā-ye Āb. After
    final ى and ه it is expressed by writing hamzeh over the
    character دهانۀ ممبر Dahāneh-ye Mambar.

  - 3-To point Persian vowels two systems are in use that are
    separated by a column in the table. The first system is a
    Persian one while the other adheres to the Arabic
    tradition. In normal spelling vowel points are not used.

}

tests {

test "اَنجِيرة", "Anjīrah"
test "اِيْوَانِي", "Eyvānī"
test "آبَادَان", "Ābādān"
test "قُرآن", "Qor’ān"
test "مَآب", "Ma’āb"
test "مُحَمَّد", "Moḩammad"
test "كُوهِ مَرغُوب", "Kūh-e Marghūb"
test "پَايِ آب", "Pā-ye Āb"
test "جُويِ آس", "Jū-ye Ās"
test "دَهَانِهٴ مَمبَر", "Dahāneh-ye Mambar"
test "سَلَسِيٴ بُذُرگ", "Salasī-ye Boz̄org"
test "ذُو الفَقَار", "Z̄ū ol Faqār"

}

stage {

# CHARACTERS
parallel {

  sub "\u064e", "a" # َ fatha
  sub "\u064e", "", after: "\u0629" # َ fatha followed by ta' marboota
  sub "\u064e", "", after: "a" + any("ht") # َ fatha followed by ta' marboota, handling different order of conversion
  sub "\u0650", "e" # ِ kasra
  sub "\u064f", "o" # ُ damma
  sub "\u0652", "" # ْ sokoon, see Note B

  sub "\u064e\u0627", "ā" # ـَا fatha followed by ا
  sub "\u064e\u0649", "á" # ـَى fatha followed by ى which is ا not ي
  sub boundary + "\u0622", "ā" # آ  NOTE A
  sub "\u0622", "’ā" # آ
  sub "\u0650\u064a", "ī" # ـِي kasra followed by ي
  sub "\u064f\u0648", "ū" # ـُو damma followed by و
  sub any("\u064e\u0650") + "\u064a\u0652", "ey" # ـَيْ
  sub any("\u064e\u064f") + "\u0648\u0652", "ow" # ـَوْ
  sub "\u0621", "’" # ء
  sub "\u2013", "–"
  sub "\u2013" + any("\u0649\u064a") + "\u0647", "-īyeh"
  sub any("\u0654\u0674"), "-e" # ٴ ezafeh
  sub any("\u0654\u0674"), "-ye", before: any("\u064a\u0647") # ٴ ezafeh
  sub "\u0650" + boundary, "-e" # ِ kasra
  sub any("\u064a\u06cc") + "\u0650" + boundary, "-ye" # ِ kasra

  # NOTE C
  sub "\u0628\u0651", "bb" # ب
  sub "\u062a\u0651", "tt" # ت
  sub "\u062b\u0651", "s̄s̄" # ث
  sub "\u062c\u0651", "jj" # ج
  sub "\u062d\u0651", "ḩḩ" # ح
  sub "\u062e\u0651", "kh" # خ
  sub "\u062f\u0651", "dd" # د
  sub "\u0630\u0651", "z̄z̄" # ذ
  sub "\u0631\u0651", "rr" # ر
  sub "\u0632\u0651", "zz" # ز
  sub "\u0633\u0651", "ss" # س
  sub "\u0634\u0651", "sh" # ش
  sub "\u0635\u0651", "şş" # ص
  sub "\u0636\u0651", "ẕẕ" # ض
  sub "\u0637\u0651", "ţţ" # ط
  sub "\u0638\u0651", "z̧z̧" # ظ
  sub "\u063a\u0651", "gh" # غ
  sub "\u0641\u0651", "ff" # ف
  sub "\u0642\u0651", "qq" # ق
  sub "\u0643\u0651", "kk" # ك
  sub "\u0644\u0651", "ll" # ل
  sub "\u0645\u0651", "mm" # م
  sub "\u0646\u0651", "nn" # ن
  sub "\u0647\u0651", "hh" # ه
  sub "\u0648\u0651", "vv" # و
  sub "\u064a\u0651", "yy" # ي

  # NOTE 1
  # Sun letters
  sub boundary + "\u0627\u0644\u062a" + maybe("\u0651"), "ot t" # الت
  sub boundary + "\u0627\u0644\u062b" + maybe("\u0651"), "os̄ s̄" # الث
  sub boundary + "\u0627\u0644\u062f" + maybe("\u0651"), "od d" # الد
  sub boundary + "\u0627\u0644\u0630" + maybe("\u0651"), "oz̄ z̄" # الذ
  sub boundary + "\u0627\u0644\u0631" + maybe("\u0651"), "or r" # الر
  sub boundary + "\u0627\u0644\u0632" + maybe("\u0651"), "oz z" # الز
  sub boundary + "\u0627\u0644\u0633" + maybe("\u0651"), "os s" # الس
  sub boundary + "\u0627\u0644\u0634" + maybe("\u0651"), "osh sh" # الش
  sub boundary + "\u0627\u0644\u0635" + maybe("\u0651"), "oş ş" # الص
  sub boundary + "\u0627\u0644\u0636" + maybe("\u0651"), "oẕ ẕ" # الض
  sub boundary + "\u0627\u0644\u0637" + maybe("\u0651"), "oţ ţ" # الط
  sub boundary + "\u0627\u0644\u0638" + maybe("\u0651"), "oz̧ z̧" # الظ
  sub boundary + "\u0627\u0644\u0644" + maybe("\u0651"), "ol l" # الل
  sub boundary + "\u0627\u0644\u0646" + maybe("\u0651"), "on n" # الن

  sub "\u0650\u064a\u0651", "īy" # ـِيَّ
  sub "\u0650\u064a", "iy", after: any("\u064e\u064f") # ـِي kasra followed by ي

  # ta' marboota
  sub "\u0629", "at" # ة in the middle of the sentence
  sub "\u0629" + line_end, "ah"
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")
  sub "\u0629", "ah", before: boundary + "\u0627\u0644" + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff") + any("\u0600".."\u06ff")

  sub boundary + "\u0627\u0644", "al " # ال
  sub space + boundary + "\u0627\u0644", " ol " # ال #special Rule 1

  sub boundary + "\u0627", "" # ا initial
  sub "\u0627", "ā" # ا middial
  sub "\u0627" + boundary, "ā" # ا final

  sub "\u0628", "b" # ب
  sub "\u067E", "p" # پ
  sub "\u062A", "t" # ت
  sub "\u062B", "s̄" # ث
  sub "\u062C", "j" # ج
  sub "\u0686", "ch" # ‫چ‬
  sub "\u062D", "ḩ" # ح
  sub "\u062E", "kh" # خ
  sub "\u062F", "d" # د
  sub "\u0630", "z̄" # ذ
  sub "\u0631", "r" # ر
  sub "\u0632", "z" # ز
  sub "\u0698", "zh" # ‫ژ‬
  sub "\u0633", "s" # س
  sub "\u0634", "sh" # ش
  sub "\u0635", "ş" # ص
  sub "\u0636", "ẕ" # ض
  sub "\u0637", "ţ" # ط
  sub "\u0638", "z̧" # ظ
  sub "\u0639", "’" # ع
  sub "\u063A", "gh" # غ
  sub "\u0641", "f" # ف
  sub "\u0642", "q" # ق
  sub "\u0643", "k" # ك
  sub "\u06A9", "k" # ک
  sub "\u06AF", "g" # گ
  sub "\u0644", "l" # ل‫‬
  sub "\u0645", "m" # م
  sub "\u0646", "n" # ن
  sub "\u0648", "v" # و
  sub "\u0647", "h" # ه
  sub "\u0649", "y" # ي
  sub "\u064a", "y" # ي
}

# POSTRULES
sub any("\u0061".."\uFFFF"), upcase, before: boundary, not_before: boundary + any("‘’'-")

sub " Al", " al"
sub " Ol", " ol"

}