metadata {

authority_id: bgnpcgn
id: 2007
language: iso-639-2:rus
source_script: Cyrl
destination_script: Latn
name: Table of Correspondences for Bashkir (Cyrillic-Roman) (2007 Agreement)
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/829203/TABLE_OF_CORRESPONDENCES__FOR_BASHKIR.pdf
creation_date: 2007
confirmation_date: 2019
description: |
  Bashkir is an official language within Respublika Bashkortostan, one of the
  republics of the Russian Federation. It will normally be encountered in Cyrillic script, in
  which case it should be romanized by means of the Cyrillic-Roman table of
  correspondences given below

notes:
  - The letter w is used word initially and before a vowel. # 'and' or 'or' ?
  - The letter sequence ye is used word initially and before a vowel. # 'and' or 'or' ?
  - The letter w is used between or after vowels.
  - The letter w is used after e, u, ö and ə.
  - |
    An inventory of letter-diacritic combinations, with their Unicode encoding,
    in addition to the unmodified letters of the basic Roman script is:
      Ğ (U+011E) ğ (U+011F)
      Ź (U+0179) ź (U+017A)
      Ë (U+00CB) ë (U+00EB)
      Ñ (U+00D1) ñ (U+00F1)
      Ö (U+00D6) ö (U+00F6)
      Ś (U+015A) ś (U+015B)
      Ü (U+00DC) ü (U+00FC)
      Ç (U+00C7) ç (U+00E7)
      Ş (U+015E) ş (U+015F)
      Ə (U+018F) ə (U+0259)
  - |
    The Roman-script columns show only lowercase forms but, when applying the table,
    uppercase and lowercase Roman letters as appropriate should be used.

}

tests {

# adopted http://www.eki.ee/knab/lat/kblba.pdf
test "Васйылға", "Wasyılğa"
test "Еҙем", "Yeźem"
test "Раевка", "Raevka"
test "Сәйетҡол", "Səyetqol"
test "Ауырғазы", "Awırğazı"
test "Бурһыҡтау", "Burhıqtaw"
test "Мәләүез", "Mələwez"
test "Ҡыҙылъяр", "Qıźılyar"
# adopted https://en.wikipedia.org/wiki/Bashkir_language#Grammar
test "кемдең", "kemdeñ"
test "кем", "kem"
test "был", "bıl"
test "ошо", "oşo"
test "быларҙың", "bılarźıñ"
test "һеҙҙән", "heźźən"
test "һин", "hin"
test "һеҙҙең", "heźźeñ"

}

stage {

# RULES
# note[1]
sub boundary + "\u0412", "W", after: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
sub boundary + "\u0432", "w", after: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
# note[2]
sub boundary + "\u0415", "Ye"
sub boundary + "\u0435", "ye"
sub "\u0415", "Ye", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя"), after: boundary
sub "\u0435", "ye", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя"), after: boundary

# note[3] # note[4]
sub any("\u0423\u04AE"), "W", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")
sub any("\u0443\u04AF"), "w", before: any("АаЕеЁёИиОоӨөУуҮЫыЭэӘәЮюЯя")

# CHARACTERS
parallel {
  sub "\u0410", "A" # А
  sub "\u0411", "B" # Б note[1]
  sub "\u0412", "V" # В
  sub "\u0413", "G" # Г
  sub "\u0492", "Ğ" # Ғ
  sub "\u0414", "D" # Д
  sub "\u0498", "Ź" # Ҙ
  sub "\u0415", "E" # Е note[2]
  sub "\u0401", "Ë" # Ё
  sub "\u0416", "J" # Ж
  sub "\u0417", "Z" # З
  sub "\u0418", "I" # И
  sub "\u0419", "Y" # Й
  sub "\u041A", "K" # К
  sub "\u04A0", "Q" # Ҡ
  sub "\u041B", "L" # Л
  sub "\u041C", "M" # М
  sub "\u041D", "N" # Н
  sub "\u04A2", "Ñ" # Ң
  sub "\u041E", "O" # О
  sub "\u04E8", "Ö" # Ө
  sub "\u041F", "P" # П
  sub "\u0420", "R" # Р
  sub "\u0421", "S" # С
  sub "\u04AA", "Ś" # Ҫ
  sub "\u0422", "T" # Т
  sub "\u0423", "U" # У
  sub "\u04AE", "Ü" # Ү note[3]
  sub "\u0424", "F" # Ф
  sub "\u0425", "X" # Х
  sub "\u04BA", "H" # Һ
  sub "\u0426", "Ts" # Ц
  sub "\u0427", "Ç" # Ч
  sub "\u0428", "Ş" # Ш
  sub "\u0429", "ŞÇ" # Щ
  sub "\u042A", "" # Ъ
  sub "\u042B", "I" # Ы
  sub "\u042C", "" # Ь
  sub "\u042D", "E" # Э
  sub "\u04D8", "Ə" # Ә
  sub "\u042E", "Yu" # Ю
  sub "\u042F", "Ya" # Я

  sub "\u0430", "a" # а
  sub "\u0431", "b" # б
  sub "\u0432", "v" # в note[1]
  sub "\u0433", "g" # г
  sub "\u0493", "ğ" # ғ
  sub "\u0434", "d" # д
  sub "\u0499", "ź" # ҙ
  sub "\u0435", "e" # e note[2]
  sub "\u0451", "yo" # ё
  sub "\u0436", "j" # ж
  sub "\u0437", "z" # з
  sub "\u0438", "i" # и
  sub "\u0439", "y" # й
  sub "\u043A", "k" # к
  sub "\u04A1", "q" # ҡ
  sub "\u043B", "l" # л
  sub "\u043C", "m" # м
  sub "\u043D", "n" # н
  sub "\u04A3", "ñ" # ң
  sub "\u043E", "o" # о
  sub "\u04E9", "ö" # ө
  sub "\u043F", "p" # п
  sub "\u0440", "r" # р
  sub "\u0441", "s" # с
  sub "\u04AB", "ś" # ҫ
  sub "\u0442", "t" # т
  sub "\u0443", "u" # у
  sub "ү", "ü" # ү note[3]
  sub "\u0444", "f" # ф
  sub "\u0445", "x" # х
  sub "\u04BB", "h" # һ
  sub "\u0446", "ts" # ц
  sub "\u0447", "ç" # ч
  sub "\u0448", "ş" # ш
  sub "\u0449", "şç" # щ
  sub "\u044A", "" # ъ
  sub "\u044B", "ı" # ы
  sub "\u044C", "" # ь
  sub "\u044D", "e" # э
  sub "\u04D9", "ə" # ә
  sub "\u044E", "yu" # ю
  sub "\u044F", "ya" # я
}

}