metadata {

authority_id: bgnpcgn
id: 2007
language: iso-639-2:tat
source_script: Cyrl
destination_script: Latn
name: Table of Correspondences for Tatar (Cyrillic-Roman) (2007 Agreement)
url: https://geonames.nga.mil/gns/html/Romanization/TABLE%20OF%20CORRESPONDENCES%20FOR%20TATAR.pdf
creation_date: 2007
confirmation_date: 2017-11
description: |
  Tatar is an official language within Respublika Tatarstan, one of the republics of the Russian
  Federation. It will normally be encountered in Cyrillic script, in which case it should be romanized by means
  of the Cyrillic-Roman table of correspondences given below.

notes:
  - |
    The alphabet portrayed in the above table is referred to as yaꞑalif-2.
    A set of simpler characters is also encountered; this is known as zamanalif.
    In this latter set, the alternative characters ä, ñ and ö are used for letters
    2, 17 and 19 respectively where the user has difficulty reproducing ə, ꞑ, and ө.
    Please note that all three alternatives must be used as a set, and the letters should not be intermingled.
  - Used only in borrowed words.
  - The first option is used in words with back vowels, the second in words with front vowels (though this does not apply to borrowed words).
  - yı/ye is used after a vowel (except и, ю), ъ and ь, also word-initially.
  - w is used after a vowel.
  - After ğ or q, ый is represented i
  - Э is represented ’ after a vowel in words of Arabic origin.
  - Ю and Я are represented ü and a/ä respectively after и.
  - |
    An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters
    of the basic Roman script is:
        Ə (U+018F) ə (U+0259)
        Ğ (U+011E) ğ (U+011F)
        İ (U+0130) ı (U+0131)
        Ü (U+00DC) ü (U+00FC)
        Ꞑ (U+A790) ꞑ (U+A791)
        Ɵ (U+019F) ɵ (U+0275)
        Ç (U+00C7) ç (U+00E7)
        Ş (U+015E) ş (U+015F)
        Ä (U+00C4) ä (U+00E4)
        ’ (U+2019)
  - |
    The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase
    Roman letters as appropriate should be used.

}

tests {

test "Гыйльмиев", "Ğil’miew" #  # note[6] in the note it's Ğilmiev which incorrect according to the rules
# https://en.wikipedia.org/wiki/Tatar_alphabet
test "баеды", "bayıdı" # note[4]
test "кардәш", "qardəş"
test "калынлык һәм аеру билгесе", "qalınlıq həm ayıru bilgese" # note[4]
test "Барлык кешеләр дә азат һәм үз абруйлары һәм хокуклары ягыннан тиң булып туалар.\nАларга акыл һәм вөҗдан бирелгән һәм бер-берсенә карата туганнарча мөнасәбәттә булырга тиешләр.", "Barlıq keşelər də azat həm üz abruyları həm xoquqları yağınnan tiꞑ bulıp tualar.\nAlarğa aqıl həm wocdan birelgən həm ber-bersenə qarata tuğannarça monasəbəttə bulırğa tieşlər."
# https://www.azatliq.org/a/30820571.html
test "Әлдермештән Әлмәндәр", "Əldermeştən Əlməndər"
test "Әссәламү галәйкүм", "Əssəlamü ğaləyküm"
test "Танымаган кешегә", "Tanımağan keşegə" # note[3]
test "Иң әүвәл кул бирешеп күрешик", "İꞑ əwwəl qul bireşep küreşiq" # note[5]
test "Ялгышмыйсың", "Yalğışmıysıꞑ"
test "Нәкъ үзе", "Nəq üze"
test "Кирәгеннән артыгын", "Kirəgennən artığın" # note[3]
test "мәңгелеккә килмәгән", "məꞑgeleqkə kilməgən"
test "кулыңны куй", "qulıꞑnı quy"
test "Өммия", "Ommiä" # note[8]
test "Җиһангир", "Cihangir" # note[3]

}

stage {

# RULES
# note[3] http://www.hintfox.com/article/sistema-glasnih-zvykov-na-tatarskom-i-anglijskom-jazikah.html
# back vowels: у, а, ы, о,
# front vowels: е, ә, и, ө, ү, э
sub "Г", "G", after: any("ЕеƏәИиӨөҮүЭэ")
sub "г", "g", after: any("ЕеƏәИиӨөҮүЭэ")
sub "К", "K", after: any("ЕеƏәИиӨөҮүЭэ")
sub "к", "k", after: any("ЕеƏәИиӨөҮүЭэ")
sub "Ю", "Yü", after: any("ЕеƏәИиӨөҮүЭэ")
sub "ю", "yü", after: any("ЕеƏәИиӨөҮүЭэ")
sub "Я", "Yä", after: any("ЕеƏәИиӨөҮүЭэ")
sub "я", "yä", after: any("ЕеƏәИиӨөҮүЭэ")

# note[4]
sub "\u0415", "Yı", before: any("АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь")
sub "\u0435", "yı", before: any("АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь")

# note[5]
sub "\u0423", "W", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь")
sub "\u0443", "w", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь")
sub "\u04AE", "W", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь")
sub "\u04AF", "w", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь")

# note[6]
sub "ый", "i", before: any("Гг")

# note[8]
sub "\u042E", "Ü", before: any("Ии")
sub "\u044E", "ü", before: any("Ии")
sub "\u042F", "Ä", before: any("Ии")
sub "\u044F", "ä", before: any("Ии")

# CHARACTERS
parallel {
  sub "А", "A" # А
  sub "Ә", any("ƏÄ") # Ә => [Ə, Ä] note[1]
  sub "Б", "B" # Б
  sub "В", any("WV") # В note[2]
  sub "Г", "Ğ" # Г => Ğ note[3]
  sub "Д", "D" # Д
  sub "Е", "E" # Е note[3] note[4]
  sub "Ж", "J" # Ж
  sub "Җ", "C" # Җ
  sub "З", "Z" # З
  sub "И", "İ" # И => İ
  sub "Й", "Y" # Й
  sub "К", "Q" # К note[3]
  sub "Л", "L" # Л
  sub "М", "M" # М
  sub "Н", "N" # Н
  sub "Ң", any("ꞐÑ") # Ң => [Ꞑ, Ñ] note[1]
  sub "О", "O" # О
  sub "Ө", any("OÖ") # Ө => [O, Ö] note[1]
  sub "П", "P" # П
  sub "Р", "R" # Р
  sub "С", "S" # С
  sub "Т", "T" # Т
  sub "У", "U" # У note[5]
  sub "Ү", "Ü" # Ү => Ü note[5]
  sub "Ф", "F" # Ф
  sub "Х", "Х" # Х
  sub "Һ", "H" # Һ
  sub "Ц", "Ts" # Ц
  sub "Ч", "Ç" # Ч => Ç
  sub "Ш", "Ş" # Ш => Ş
  sub "Щ", "ŞÇ" # Щ
  sub "Ъ", "" # Ъ
  sub "Ы", "I" # Ы => I note[2] note[6]
  sub "Ь", "’" # Ь => ’
  sub "Э", "E" # Э note[7]
  sub "Ю", "Yu" # Ю note[3] note[8]
  sub "Я", "Ya" # Я note[3] note[8]

  sub "\u0430", "a" # а
  sub "\u04D9", any("əä") # ә => [ə, ä] note[1]
  sub "\u0431", "b" # б
  sub "\u0432", any("wv") # в note[2]
  sub "\u0433", "ğ" # г => ğ note[3]
  sub "\u0434", "d" # д
  sub "\u0435", "e" # e note[3] note[4]
  sub "\u0436", "j" # ж
  sub "\u0497", "c" # җ
  sub "\u0437", "z" # з
  sub "\u0438", "i" # и
  sub "\u0439", "y" # й
  sub "\u043A", "q" # к note[3]
  sub "\u043B", "l" # л
  sub "\u043C", "m" # м
  sub "\u043D", "n" # н
  sub "\u04A3", any("ꞑñ") # ң => [ꞑ, ñ] note[1]
  sub "\u043E", "o" # о
  sub "\u04E9", any("oö") # ө => [o, ö] note[1]
  sub "\u043F", "p" # п
  sub "\u0440", "r" # р
  sub "\u0441", "s" # с
  sub "\u0442", "t" # т
  sub "\u0443", "u" # у note[5]
  sub "\u04AF", "ü" # ү => ü note[5]
  sub "\u0444", "f" # ф
  sub "\u0445", "x" # х
  sub "\u04BB", "h" # һ
  sub "\u0446", "ts" # ц
  sub "\u0447", "ç" # ч => ç
  sub "\u0448", "ş" # ш => ş
  sub "\u0449", "şç" # щ => şç
  sub "ъ", "" # ъ
  sub "\u044B", "ı" # ы => ı note[2] note[6]
  sub "ь", "’" # ь => ’
  sub "\u044D", "e" # э note[7]
  sub "\u044E", "yu" # ю note[3] note[8]
  sub "\u044F", "ya" # я note[3] note[8]
}

}