metadata {
authority_id: bgnpcgn id: 2007 language: iso-639-2:tat source_script: Cyrl destination_script: Latn name: Table of Correspondences for Tatar (Cyrillic-Roman) (2007 Agreement) url: https://geonames.nga.mil/gns/html/Romanization/TABLE%20OF%20CORRESPONDENCES%20FOR%20TATAR.pdf creation_date: 2007 confirmation_date: 2017-11 description: | Tatar is an official language within Respublika Tatarstan, one of the republics of the Russian Federation. It will normally be encountered in Cyrillic script, in which case it should be romanized by means of the Cyrillic-Roman table of correspondences given below. notes: - | The alphabet portrayed in the above table is referred to as yaꞑalif-2. A set of simpler characters is also encountered; this is known as zamanalif. In this latter set, the alternative characters ä, ñ and ö are used for letters 2, 17 and 19 respectively where the user has difficulty reproducing ə, ꞑ, and ө. Please note that all three alternatives must be used as a set, and the letters should not be intermingled. - Used only in borrowed words. - The first option is used in words with back vowels, the second in words with front vowels (though this does not apply to borrowed words). - yı/ye is used after a vowel (except и, ю), ъ and ь, also word-initially. - w is used after a vowel. - After ğ or q, ый is represented i - Э is represented ’ after a vowel in words of Arabic origin. - Ю and Я are represented ü and a/ä respectively after и. - | An inventory of letter-diacritic combinations, with their Unicode encoding, in addition to the unmodified letters of the basic Roman script is: Ə (U+018F) ə (U+0259) Ğ (U+011E) ğ (U+011F) İ (U+0130) ı (U+0131) Ü (U+00DC) ü (U+00FC) Ꞑ (U+A790) ꞑ (U+A791) Ɵ (U+019F) ɵ (U+0275) Ç (U+00C7) ç (U+00E7) Ş (U+015E) ş (U+015F) Ä (U+00C4) ä (U+00E4) ’ (U+2019) - | The Roman-script columns show only lowercase forms but, when applying the table, uppercase and lowercase Roman letters as appropriate should be used.
}
tests {
test "Гыйльмиев", "Ğil’miew" # # note[6] in the note it's Ğilmiev which incorrect according to the rules # https://en.wikipedia.org/wiki/Tatar_alphabet test "баеды", "bayıdı" # note[4] test "кардәш", "qardəş" test "калынлык һәм аеру билгесе", "qalınlıq həm ayıru bilgese" # note[4] test "Барлык кешеләр дә азат һәм үз абруйлары һәм хокуклары ягыннан тиң булып туалар.\nАларга акыл һәм вөҗдан бирелгән һәм бер-берсенә карата туганнарча мөнасәбәттә булырга тиешләр.", "Barlıq keşelər də azat həm üz abruyları həm xoquqları yağınnan tiꞑ bulıp tualar.\nAlarğa aqıl həm wocdan birelgən həm ber-bersenə qarata tuğannarça monasəbəttə bulırğa tieşlər." # https://www.azatliq.org/a/30820571.html test "Әлдермештән Әлмәндәр", "Əldermeştən Əlməndər" test "Әссәламү галәйкүм", "Əssəlamü ğaləyküm" test "Танымаган кешегә", "Tanımağan keşegə" # note[3] test "Иң әүвәл кул бирешеп күрешик", "İꞑ əwwəl qul bireşep küreşiq" # note[5] test "Ялгышмыйсың", "Yalğışmıysıꞑ" test "Нәкъ үзе", "Nəq üze" test "Кирәгеннән артыгын", "Kirəgennən artığın" # note[3] test "мәңгелеккә килмәгән", "məꞑgeleqkə kilməgən" test "кулыңны куй", "qulıꞑnı quy" test "Өммия", "Ommiä" # note[8] test "Җиһангир", "Cihangir" # note[3]
}
stage {
# RULES # note[3] http://www.hintfox.com/article/sistema-glasnih-zvykov-na-tatarskom-i-anglijskom-jazikah.html # back vowels: у, а, ы, о, # front vowels: е, ә, и, ө, ү, э sub "Г", "G", after: any("ЕеƏәИиӨөҮүЭэ") sub "г", "g", after: any("ЕеƏәИиӨөҮүЭэ") sub "К", "K", after: any("ЕеƏәИиӨөҮүЭэ") sub "к", "k", after: any("ЕеƏәИиӨөҮүЭэ") sub "Ю", "Yü", after: any("ЕеƏәИиӨөҮүЭэ") sub "ю", "yü", after: any("ЕеƏәИиӨөҮүЭэ") sub "Я", "Yä", after: any("ЕеƏәИиӨөҮүЭэ") sub "я", "yä", after: any("ЕеƏәИиӨөҮүЭэ") # note[4] sub "\u0415", "Yı", before: any("АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь") sub "\u0435", "yı", before: any("АаЕеƏәОоӨөҮүУуЫыЭэЯяЪъЬь") # note[5] sub "\u0423", "W", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь") sub "\u0443", "w", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь") sub "\u04AE", "W", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь") sub "\u04AF", "w", before: any("АаЕеƏәИиОоӨөҮүУуЫыЭэЮюЯяЪъЬь") # note[6] sub "ый", "i", before: any("Гг") # note[8] sub "\u042E", "Ü", before: any("Ии") sub "\u044E", "ü", before: any("Ии") sub "\u042F", "Ä", before: any("Ии") sub "\u044F", "ä", before: any("Ии") # CHARACTERS parallel { sub "А", "A" # А sub "Ә", any("ƏÄ") # Ә => [Ə, Ä] note[1] sub "Б", "B" # Б sub "В", any("WV") # В note[2] sub "Г", "Ğ" # Г => Ğ note[3] sub "Д", "D" # Д sub "Е", "E" # Е note[3] note[4] sub "Ж", "J" # Ж sub "Җ", "C" # Җ sub "З", "Z" # З sub "И", "İ" # И => İ sub "Й", "Y" # Й sub "К", "Q" # К note[3] sub "Л", "L" # Л sub "М", "M" # М sub "Н", "N" # Н sub "Ң", any("ꞐÑ") # Ң => [Ꞑ, Ñ] note[1] sub "О", "O" # О sub "Ө", any("OÖ") # Ө => [O, Ö] note[1] sub "П", "P" # П sub "Р", "R" # Р sub "С", "S" # С sub "Т", "T" # Т sub "У", "U" # У note[5] sub "Ү", "Ü" # Ү => Ü note[5] sub "Ф", "F" # Ф sub "Х", "Х" # Х sub "Һ", "H" # Һ sub "Ц", "Ts" # Ц sub "Ч", "Ç" # Ч => Ç sub "Ш", "Ş" # Ш => Ş sub "Щ", "ŞÇ" # Щ sub "Ъ", "" # Ъ sub "Ы", "I" # Ы => I note[2] note[6] sub "Ь", "’" # Ь => ’ sub "Э", "E" # Э note[7] sub "Ю", "Yu" # Ю note[3] note[8] sub "Я", "Ya" # Я note[3] note[8] sub "\u0430", "a" # а sub "\u04D9", any("əä") # ә => [ə, ä] note[1] sub "\u0431", "b" # б sub "\u0432", any("wv") # в note[2] sub "\u0433", "ğ" # г => ğ note[3] sub "\u0434", "d" # д sub "\u0435", "e" # e note[3] note[4] sub "\u0436", "j" # ж sub "\u0497", "c" # җ sub "\u0437", "z" # з sub "\u0438", "i" # и sub "\u0439", "y" # й sub "\u043A", "q" # к note[3] sub "\u043B", "l" # л sub "\u043C", "m" # м sub "\u043D", "n" # н sub "\u04A3", any("ꞑñ") # ң => [ꞑ, ñ] note[1] sub "\u043E", "o" # о sub "\u04E9", any("oö") # ө => [o, ö] note[1] sub "\u043F", "p" # п sub "\u0440", "r" # р sub "\u0441", "s" # с sub "\u0442", "t" # т sub "\u0443", "u" # у note[5] sub "\u04AF", "ü" # ү => ü note[5] sub "\u0444", "f" # ф sub "\u0445", "x" # х sub "\u04BB", "h" # һ sub "\u0446", "ts" # ц sub "\u0447", "ç" # ч => ç sub "\u0448", "ş" # ш => ş sub "\u0449", "şç" # щ => şç sub "ъ", "" # ъ sub "\u044B", "ı" # ы => ı note[2] note[6] sub "ь", "’" # ь => ’ sub "\u044D", "e" # э note[7] sub "\u044E", "yu" # ю note[3] note[8] sub "\u044F", "ya" # я note[3] note[8] }
}