# Existence of this file denotes that there are Interscript Maps in this gem.
# Main locations of libraries and maps relative to this file. paths:
- libs - maps
# Staging map locations. Those maps don't pass the tests yet, they may be in a # process of porting. To enable them, one must pass an INTERSCRIPT_STAGING # environment variable. staging:
- maps-staging
# Index files for Secryst secryst-models:
- https://raw.githubusercontent.com/secryst/data-thai-interscript/master/index.yaml
# Here we describe Rababa configs for the maps that are required rababa-configs:
"200": model: https://github.com/secryst/rababa-models/releases/download/0.1/diacritization_model_max_len_200.onnx config: session_name: base text_encoder: ArabicEncoderWithStartSymbol text_cleaner: valid_arabic_cleaners max_len: 200 batch_size: 32
# This instruction denotes that the following maps are incompatible with certain # compilers and platforms and shouldn't be tested or compiled in. skip:
Interscript::Compiler::Javascript: - var-ara-Arab-Arab-rababa # It works for detection, but takes too much time and doesn't make sense for # the main usecase of Detector Interscript::Detector: - var-ara-Arab-Arab-rababa # Windows currently fails tests for rababa mswin64: - var-ara-Arab-Arab-rababa mswin32: - var-ara-Arab-Arab-rababa mingw32: - var-ara-Arab-Arab-rababa cygwin: - var-ara-Arab-Arab-rababa
# This part registers aliases. Those were previously a part of maps themselves, # moving this here saves us from a burden of having to load all the maps to # locate one that we are trying to access. aliases:
ogc11122: amh_Ethi2Latn_ALA_1997: description: Amharic ALA-Library of Congress 1997 System alias_to: alalc-amh-Ethi-Latn-1997 amh_Ethi2Latn_BGN_1967: description: Amharic Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1967 System alias_to: bgnpcgn-amh-Ethi-Latn-1967 ara_Arab2Latn_ALA_1997: description: Arabic ALA-Library of Congress 1997 System alias_to: alalc-ara-Arab-Latn-1997 ara_Arab2Latn_BGN_1956: description: Arabic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1956 System alias_to: bgnpcgn-ara-Arab-Latn-1956 ara_Arab2Latn_BUC_2002: description: Arabic Tim Buckwalter 2002 System ara_Arab2Latn_IGN_1973: description: Arabic Institut Geographique Nationale 1973 System ara_Arab2Latn_ODNI_2004: description: Arabic Personal Names Office of the Director Of National Intelligence 2004 System ara_Arab2Latn_RJGC_1996: description: Arabic Royal Jordanian Geographic Center 1996 System ara_Arab2Latn_SES_1930: description: Arabic Survey of Egypt System alias_to: ses-ara-Arab-Latn-1930 asm_Deva2Latn_ALA_1997: description: Assamese ALA-Library of Congress 1997 System alias_to: alalc-asm-Deva-Latn-1997 aze_Arab2Latn_ALA_1997: description: Arabic ALA-Library of Congress 1997 System alias_to: alalc-aze-Arab-Latn-1997 aze_Cyrl2Latn_ALA_1997: description: Azerbaijani ALA-Library of Congress 1997 System alias_to: alalc-aze-Cyrl-Latn-1997 aze_Cyrl2Latn_GAZ_1991: description: Azerbaijani Cyrillic-Roman Table of Correspondences Azeri Government 1991 bak_Cyrl2Latn_BGN_2007: description: Bashkir 2007 BGN/PCGN Cyrillic-Latin Table of Correspondences alias_to: bgnpcgn-bak-Cyrl-Latn-2007 ban_Bali2Latn_ALA_1997: description: Balinese ALA-Library of Congress 1997 System bel_Cyrl2Latn_ALA_1997: description: Byelorussian ALA-Library of Congress 1997 System alias_to: alalc-bel-Cyrl-Latn-1997 bel_Cyrl2Latn_BGN_1979: description: Byelorussian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System alias_to: bgnpcgn-bel-Cyrl-Latn-1979 bel_Cyrl2Latn_GBO_1992: description: Byelorussian National Cartographic Authority 1992-3 System (based on GOST 1983) alias_to: gki-bel-Cyrl-Latn-1992 bel_Cyrl2Latn_GBO_1998: description: Byelorussian Government of Belarus 1998 System alias_to: by-bel-Cyrl-Latn-1998 ben_Beng2Latn_ALA_1997: description: Bengali ALA-Library of Congress 1997 System alias_to: alalc-ben-Beng-Latn-1997 bod_Tibt2Latn_ALA_1997: description: Tibetan ALA-Library of Congress 1997 System bul_Cyrl2Latn_ALA_1997: description: Bulgarian ALA-Library of Congress Bulgarian 1997 System alias_to: alalc-bul-Cyrl-Latn-1997 bul_Cyrl2Latn_BGN_1952: description: Bulgarian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) Bulgarian 1952 System alias_to: bgnpcgn-bul-Cyrl-Latn-1952 bul_Cyrl2Latn_BGN_2013: description: Bulgarian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names(PCGN) Bulgarian 2013 System alias_to: bgnpcgn-bul-Cyrl-Latn-2013 bul_Cyrl2Latn_BUL_1952: description: Bulgarian 2002 Bulgarian Government Romanization System alias_to: bgnpcgn-bul-Cyrl-Latn-1952 bul_Cyrl2Latn_ODNI_2005: description: Bulgarian Personal Names Office of the Director Of National Intelligence Bulgarian Personal Names 2004 System alias_to: odni-bul-Cyrl-Latn-2005 che_Cyrl2Latn_BGN_2007: description: Chechen 2007 BGN/PCGN Cyrillic-Latin Table of Correspondences deu_Latn2Latn_BGN_1986: description: German Spelling Convention US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1986 div_Thaa2Latn_ALA_1997: description: Maldivian (Divehi) ALA-Library of Congress 1997 System alias_to: alalc-div-Thaa-Latn-1997 div_Thaa2Latn_BGN_1972: description: Maldivian (Divehi) US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1972 system (out of date) div_Thaa2Latn_GMV_1988: description: Maldivian (Divehi) Maldivian Government 1988 system alias_to: bgnpcgn-div-Thaa-Latn-1988 dzo_Tibt2Latn_GBT_1994: description: Dzongkha Government of Bhutan 1994 System ell_Grek2Latn_ALA_1997: description: Greek ALA-Library of Congress 1997 System alias_to: alalc-ell-Grek-Latn-1997 ell_Grek2Latn_BGN_1962: description: US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) Greek 1962 System (out of date) alias_to: bgnpcgn-ell-Grek-Latn-1962 ell_Grek2Latn_ELOT743_1996: description: Greek ELOT 743 System, US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1996 agreement alias_to: bgnpcgn-ell-Grek-Latn-1996 fao_Latn2Latn_BGN_1968: description: Faroese Spelling Convention US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1968 alias_to: bgnpcgn-fao-Latn-Latn-1968 fas_Arab2Latn_ALA_1997: description: Persian 1997 ALA-Library of Congress System fas_Arab2Latn_AMMI_1959: description: Afghanistan Ministry of Mines and Industries Transliteration System for Geographic Names in Afghanistan (Yaghubi) fas_Arab2Latn_BGN_1958: description: Persian (Afghan and Iranian) BGN/Permanent Committee on Geographical Names for British Official Use(PCGN) 1958 System alias_to: bgnpcgn-fas-Arab-Latn-1956 fas_Arab2Latn_NCO_2004: description: Persian 2004 Broad Transcription System National Cartographic Center of Iran fas_Arab2Latn_ODNI_2004: description: Farsi (Iranian Persian) Personal Names Office of the Director Of National Intelligence 2004 System guj_Gijr2Latn_ALA_1997: description: Gujerati 1997 ALA-Library of Congress System guj_Gujr2Latn_ALA_1997: description: Gujarati ALA-Library of Congress 1997 System alias_to: alalc-guj-Gujr-Latn-1997 guj_Gujr2Latn_ALA_2011: description: Gujarati ALA-Library of Congress 2011 System alias_to: alalc-guj-Gujr-Latn-2011 heb_Hebr2Latn_ALA_1997: description: Hebrew and Yiddish ALA-Library of Congress 1997 System heb_Hebr2Latn_BGN_2018: description: Hebrew US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 2018 Amended System heb_Hebr2Latn_HAS_1962: description: Hebrew Academy System, US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) Hebrew 1962 Agreement hin_Deva2Latn_ALA_1997: description: Hindi ALA-Library of Congress 1997 System alias_to: alalc-hin-Deva-Latn-1997 hye_Armn2Latn_ALA_1997: description: Armenian ALA-Library of Congress 1997 System hye_Armn2Latn_BGN_1981: description: Armenian 1981 BGN/PCGN Romanization System isl_Armn2Latn_BGN_1981: description: Armenian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1981 System isl_Latn2Latn_BGN_1968: description: Icelandic Spelling Convention US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1968 alias_to: bgnpcgn-isl-Latn-Latn-1968 jav_Java2Latn_ALA_1997: description: Javanese, Sundanese, and Madurese ALA-Library of Congress 1997 System jpn_Hrkt2Latn_ALA_1997: description: Japanese ALA-Library of Congress 1997 System jpn_Hrkt2Latn_BGN_1930: description: Japanese Kana Modified Hepburn 1930 System jpn_Hrkt2Latn_GJP_1954: description: Japanese Kunrei-siki 1954 System kas_Arab2Latn_ALA_1997: description: Kashmiri ALA-Library of Congress 1997 System kat_Geor2Latn_ALA_1997: description: Georgian ALA-Library of Congress 1997 System alias_to: alalc-kat-Geor-Latn-1997 kat_Geor2Latn_BGN_1981: description: Georgian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1981 System alias_to: bgnpcgn-kat-Geor-Latn-1981 kat_Geor2Latn_GGG_2002: description: Georgian State Department of Geodesy and Cartography 2002 System alias_to: ggg-kat-Geor-Latn-2002 kaz_Cyrl2Latn_BGN_1979: description: Kazakh Cyrillic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System khm_Khmr2Latn_ALA_1997: description: Cambodian (Khmer) ALA-Library of Congress 1997 System khm_Khmr2Latn_SGK_1959: description: Khmere Service Geographique 1959 System kir_Cyrl2Latn_BGN_1979: description: Kirghiz US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System kor_Hang2Latn_ALA_1997: description: Korean ALA-Library of Congress 1997 System alias_to: alalc-kor-Hang-Latn-1997 kor_Hang2Latn_GKN_2002: description: Korean Democratic People's Republic of Korea Korean 2002 System alias_to: kp-kor-Hang-Latn-2002 kor_Hang2Latn_MOCT_2000: description: Korean Ministry of Culture and Tourism 2000 System alias_to: moct-kor-Hang-Latn-2000 kor_Hang2Latn_MR_1939: description: Korean McCune-Reischauer System, 1943 US Board on Geographic Names(BGN) Agreement alias_to: bgn-kor-Hang-Latn-1943 kor_Hang2Latn_ODNI_2004: description: Korean Personal Names Office of the Director Of National Intelligence 2004 System kur_Arab2Latn_ALA_1997: description: Kurdish 1997 ALA-Library of Congress Kurdish 1997 System kur_Arab2Latn_BGN_1931: description: 2007 Kurdish Hawar Romanization System BGN/PCGN kur_Arab2Latn_EDMONDS_1931: description: Kurdish 1931 J.C. Edmonds Romanization System lad_Hebr2Latn_ALA_1997: description: Ladino ALA-Library of Congress 1997 System lao_Laoo2Latn_ALA_1997: description: Lao ALA-Library of Congress 1997 System lao_Laoo2Latn_CNT_1966: description: Lao Commission Nationale de Toponymie 1966 System, US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) Agreement mal_Mlym2Latn_ALA_1997: description: Malayalam ALA-Library of Congress 1997 System alias_to: alalc-mal-Mlym-Latn-1997 mal_Mlym2Latn_ALA_2012: description: Malayalam ALA-Library of Congress 2012 System alias_to: alalc-mal-Mlym-Latn-2012 mar_Deva2Latn_ALA_1997: description: Marathi ALA-Library of Congress 1997 System alias_to: alalc-mar-Deva-Latn-1997 mkd_Cyrl2Latn_BGN_1981: description: Macedonian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1981 System alias_to: bgnpcgn-mkd-Cyrl-Latn-1981 mkd_Cyrl2Latn_BGN_2013: description: Macedonian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 2013 System mkd_Cyrl2Latn_ODNI_2005: description: Macedonian Personal Names Office of the Director Of National Intelligence 2004 System mly_Arab2Latn_ALA_1997: description: Malay ALA-Library of Congress 1997 System mol_Cyrl2Latn_BGN_2002: description: Moldavian Table of Correspondences US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 2002 mon_Cyrl2Latn_ALA_1997: description: Mongolian ALA-Library of Congress 1997 System alias_to: alalc-mon-Cyrl-Latn-1997 mon_Cyrl2Latn_BGN_1964: description: Mongolian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1964 System alias_to: bgnpcgn-mon-Cyrl-Latn-1964 mya_Mymr2Latn_ALA_1997: description: Burmese ALA-Library of Congress 1997 System nep_Deva2Latn_BGN_1964: description: Nepalese US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1964 System # alias_to: bgnpcgn-nep-Deva-Latn-2011 nep_Mymr2Latn_BGN_1970: description: Burmese US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1970 Agreement ori_Orya2Latn_ALA_1997: description: Oriya ALA-Library of Congress 1997 System alias_to: alalc-ori-Orya-Latn-1997 ori_Orya2Latn_ALA_2011: description: Oriya ALA-Library of Congress 2011 System alias_to: alalc-ori-Orya-Latn-2011 pan_Deva2Latn_ALA_1997: description: Panjabi ALA-Library of Congress 1997 System alias_to: alalc-pan-Guru-Latn-1997 pan_Deva2Latn_ALA_2011: description: Panjabi ALA-Library of Congress 2011 System alias_to: alalc-pan-Guru-Latn-2011 pus_Arab2Latn_ALA_1997: description: Pashto ALA-Library of Congress 1997 System pus_Arab2Latn_AMMI_1959: description: Afghanistan Ministry of Mines and Industries Transliteration System for Geographic Names in Afghanistan (Yaghubi) pus_Arab2Latn_BGN_1968: description: Pashto (Pushtu) US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1968 System rue_Cyrl2Latn_BGN_2016: description: BGN/PCGN 2016 System alias_to: bgnpcgn-rue-Cyrl-Latn-2016 rus_Cyrl2Latn_ALA_1997: description: Russian ALA-Library of Congress 1997 System alias_to: alalc-rus-Cyrl-Latn-1997 rus_Cyrl2Latn_BGN_1947: description: Russian US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1947 System alias_to: bgnpcgn-rus-Cyrl-Latn-1947 rus_Cyrl2Latn_GOST_1983: description: Russian GOST 1983 System, Main Administration of Geodesy and Cartography of the Former Soviet Union alias_to: gost-rus-Cyrl-Latn-16876-71-1983 rus_Cyrl2Latn_ODNI_2004: description: Russian Personal Names Office of the Director Of National Intelligence 2004 System sin_Sinh2Latn_ALA_1997: description: Sinhalese ALA-Library of Congress 1997 System alias_to: alalc-sin-Sinh-Latn-1997 sin_Sinh2Latn_ALA_2011: description: Sinhalese ALA-Library of Congress 2011 System alias_to: alalc-sin-Sinh-Latn-2011 sme_Latn2Latn_BGN_1984: description: North Lappish Spelling Convention US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1984 System alias_to: bgnpcgn-sme-Latn-Latn-1984 snd_Arab2Latn_ALA_1997: description: Sindhi ALA-Library of Congress 1997 System srp_Cyrl2Latn_ALA_1997: description: Serbian and Macedonian ALA-Library of Congress 1997 System alias_to: alalc-srp-Cyrl-Latn-1997 srp_Cyrl2Latn_BGN_2005: description: Serbian Cyrillic Table of Correspondences US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 2005 alias_to: bgnpcgn-srp-Cyrl-Latn-2005 srp_Cyrl2Latn_ODNI_2005: description: Serbian Personal Names Office of the Director Of National Intelligence 2004 System tam_Taml2Latn_ALA_1997: description: Tamil ALA-Library of Congress 1997 System alias_to: alalc-tam-Taml-Latn-1997 tam_Taml2Latn_ALA_2011: description: Tamil ALA-Library of Congress 2011 System alias_to: alalc-tam-Taml-Latn-2011 tat_Cyrl2Latn_BGN_2005: description: Tatar 2005 BGN/PCGN Cyrillic-Roman Table of Correspondences alias_to: bgnpcgn-tat-Cyrl-Latn-2007 tel_Telu2Latn_ALA_1997: description: Telugu ALA-Library of Congress 1997 System alias_to: alalc-tel-Telu-Latn-1997 tgk_Cyrl2Latn_BGN_1994: description: Tajik Cyrillic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1994 System alias_to: bgnpcgn-tgk-Cyrl-Latn-1994 tha_Thai2Latn_ALA_1997: description: Thai ALA-Library of Congress 1997 System tha_Thai2Latn_RIT_1968: description: Thai Royal Institute of Thailand 1968 System (out of date) alias_to: royin-tha-Thai-Latn-1968 tir_Ethi2Latn_ALA_1997: description: Tigrinya ALA-Library of Congress 1997 System alias_to: alalc-tir-Ethi-Latn-1997 tir_Ethi2Latn_BGN_2007: description: Tigrinya 2007 BGN/PCGN Romanization System alias_to: bgnpcgn-tir-Ethi-Latn-2007 tir_Thai2Latn_RIT_2000: description: Thai Royal Institute of Thailand 2000 System alias_to: royin-tha-Thai-Latn-1999 tuk_Cyrl2Latn_BGN_1979: description: Turkmen Cyrillic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System (out of date) tuk_Cyrl2Latn_GTX_2000: description: Turkmen Cyrillic Table of Correspondences Government of Turkmenistan 2000 System uas_Arab2Latn_BGN_2007: description: Unified Afghan Romanization System US Board on Geographic Names (BGN)/The Permanent Committee on Geographical Names (PCGN) 2007 alias_to: bgnpcgn-urd-Arab-Latn-2007 uig_Arab2Latn_ALA_1997: description: Uyghur ALA-Library of Congress 1997 System uig_Arab2Latn_UCIS_2001: description: Uyghur 2001 Latin Script for Uyghur, Uyghur Computer Information Society uig_Arab2Latn_XLSC_1959: description: Uyghur Pinyin New Script 1959 XUAR Language and Script Committee ukr_Cyrl2Latn_ALA_1997: description: Ukrainian ALA-Library of Congress 1997 System alias_to: alalc-ukr-Cyrl-Latn-1997 ukr_Cyrl2Latn_BGN_1965: description: Ukrainian Board on Geographic Names/Permanent Committee on Geographical Names for British Official Use(PCGN) 1965 System alias_to: bgnpcgn-ukr-Cyrl-Latn-1965 ukr_Cyrl2Latn_GUP_1996: description: Ukrainian Government of Ukraine 1996 System alias_to: ua-ukr-Cyrl-Latn-1996 ukr_Cyrl2Latn_ODNI_2005: description: Ukrainian Personal Names Office of the Director Of National Intelligence 2004 System urd_Arab2Latn_ALA_1997: description: Urdu ALA-Library of Congress 1997 System urd_Arab2Latn_BGN_2007: description: Urdu 2007 BGN/PCGN Urdu Romanization System urd_Arab2Latn_HUN_: description: Hunterian System urd_Arab2Latn_ODNI_2004: description: Urdu Personal Names Office of the Director Of National Intelligence 2004 System urd_Arab2Latn_UN_1972: description: Urdu United Nations 1972 System uzb_Cyrl2Latn_BGN_1979: description: Uzbek Cyrillic US Board on Geographic Names(BGN)/Permanent Committee on Geographical Names for British Official Use(PCGN) 1979 System (out of date) uzb_Cyrl2Latn_GUZ_2000: description: Uzbek Cyrillic Table of Correspondences Government of Uzbekistan 2000 System zho_Hani2Latn_ALA_1997: description: Chinese ALA-Library of Congress 1997 System zho_Hani2Latn_AcadSin_2002: description: Chinese Tongyong Pinyin Academica Sinica 2002 System alias_to: acadsin-zho-Hani-Latn-2002 zho_Hani2Latn_GCH_1979: description: Chinese Hanyu Pinyin 1979 System alias_to: sac-zho-Hans-Latn-1979 zho_Hani2Latn_WDG_1979: description: Chinese Wade-Giles 1979 System alias_to: var-zho-Hani-Latn-wd-1979