metadata {

authority_id: alalc
id: 1997
language: iso-639-2:hin
source_script: Deva
destination_script: Latn
name: Romanization Table -- Hindi (1997)
url: http://catdir.loc.gov/catdir/cpso/romanization/hindi.pdf
creation_date: 1997
description: |
  ALA-LC Romanization table for Hindi

notes:

  - Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
    vowels following a consonant can be found in grammars; no distinction between the two is
    made in transliteration.

  - |
    The vowel a is implicit after all consonants and consonant clusters and is supplied in
    transliteration, with the following exceptions:

    a) when another vowel is indicated by its appropriate sign; and
    b) when the absence of any vowel is indicated by the subscript sign ( ◌् ) called halanta or
       virāma.

  - |
    Exception: Anusvāra is transliterated by:

    a) ṅ before gutturals,
    b) ñ before palatals,
    c) ṇ before cerebrals,
    d) n before dentals, and
    e) m before labials.

  - Anunāsika before guttural, palatal, cerebral, and dental occlusives is transliterated n̐. Before
    labials, sibilants, semivowels, aspirates, vowels, and in final position it is transliterated m̐.

  - When doubled, avagraha is transliterated by two apostrophes ( ’’ ).

}

tests {

test "हम", "hama"
test "मीन", "mīna"
test "औसत", "ăusata"
test "माँऽऽऽ!", "mān̐’’’!"
test "माँ", "mām̐"
test "गंभीर मरीजों के मामले में भारत दूसरे नंबर पर", "gaṃbhīr marījoṃ ke māmale meṃ bhārat dūsare naṃbar para"
test "कोरोना अपडेट्स", "koronā apaḍeṭsa"
test "सीडीसी चीफ का बयान अहम", "sīḍīsī cīph kā bayān ahama"
test "गूगल प्ले स्टोर पर पेटीएम की वापसी", "gūgal ple sṭor par peṭīem kī vāpasī"
test "भारत में गैंबलिंग की इजाजत नहीं", "bhārat meṃ gaiṃbaliṃg kī ijājat nahīṃ"
test "कोरोना वैक्सीन मुद्दे पर घिरे राष्ट्रपति; जो बाइडेन बोले- मुझे और देश को वैज्ञानिकों पर भरोसा है, डोनाल्ड ट्रम्प पर नहीं", "koronā vaiksīn mudde par ghire rāshṭrapati; jo bāiḍen bole- mujhe ăur deś ko vaijñānikoṃ par bharosā hai, ḍonālḍ ṭramp par nahīṃ"
test "गूगल की कार्रवाई पर पेटीएम ने कहा था कि ऐप को अस्थायी तौर पर प्ले-स्टोर से हटाया गया है, आपके पैसे सुरक्षित हैं", "gūgal kī kārravāī par peṭīem ne kahā thā ki aip ko asthāyī tăur par ple-sṭor se haṭāyā gayā hai, āpake paise surakshit haiṃ"
test "२५६८७५४४६४४६१६११", "2568754464461611"

}

aliases {

def_alias hin_cons_or_conscluster, any("\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d")

}

stage {

# RULES
# note[2]
sub capture("क" + maybe("=")), "k", after: hin_cons_or_conscluster
sub capture("क़" + maybe("=")), "q", after: hin_cons_or_conscluster
sub capture("ख" + maybe("=")), "kh", after: hin_cons_or_conscluster
sub capture("ख़" + maybe("=")), "kh", after: hin_cons_or_conscluster
sub capture("ग" + maybe("=")), "g", after: hin_cons_or_conscluster
sub capture("ग़" + maybe("=")), "gh", after: hin_cons_or_conscluster
sub capture("घ" + maybe("=")), "gh", after: hin_cons_or_conscluster
sub capture("ङ" + maybe("=")), "ṅ", after: hin_cons_or_conscluster
sub capture("च" + maybe("=")), "c", after: hin_cons_or_conscluster
sub capture("छ" + maybe("=")), "ch", after: hin_cons_or_conscluster
sub capture("ज" + maybe("=")), "j", after: hin_cons_or_conscluster
sub capture("ज़" + maybe("=")), "j", after: hin_cons_or_conscluster
sub capture("झ" + maybe("=")), "jh", after: hin_cons_or_conscluster
sub capture("ञ" + maybe("=")), "ñ", after: hin_cons_or_conscluster
sub capture("ट" + maybe("=")), "ṭ", after: hin_cons_or_conscluster
sub capture("ट़" + maybe("=")), "t̤", after: hin_cons_or_conscluster
sub capture("ठ" + maybe("=")), "ṭh", after: hin_cons_or_conscluster
sub capture("ड" + maybe("=")), "ḍ", after: hin_cons_or_conscluster
sub capture("ड़" + maybe("=")), "ṛ", after: hin_cons_or_conscluster
sub capture("ड़" + maybe("=")), "ṛ", after: hin_cons_or_conscluster
sub capture("ढ" + maybe("=")), "ḍh", after: hin_cons_or_conscluster
sub capture("ढ़" + maybe("=")), "ṛh", after: hin_cons_or_conscluster
sub capture("ण" + maybe("=")), "ṇ", after: hin_cons_or_conscluster
sub capture("त" + maybe("=")), "t", after: hin_cons_or_conscluster
sub capture("थ" + maybe("=")), "th", after: hin_cons_or_conscluster
sub capture("द" + maybe("=")), "d", after: hin_cons_or_conscluster
sub capture("ध" + maybe("=")), "dh", after: hin_cons_or_conscluster
sub capture("न" + maybe("=")), "n", after: hin_cons_or_conscluster
sub capture("प" + maybe("=")), "p", after: hin_cons_or_conscluster
sub capture("फ" + maybe("=")), "ph", after: hin_cons_or_conscluster
sub capture("फ़" + maybe("=")), "ph", after: hin_cons_or_conscluster
sub capture("ब" + maybe("=")), "b", after: hin_cons_or_conscluster
sub capture("भ" + maybe("=")), "bh", after: hin_cons_or_conscluster
sub capture("म" + maybe("=")), "m", after: hin_cons_or_conscluster
sub capture("य" + maybe("=")), "y", after: hin_cons_or_conscluster
sub capture("र" + maybe("=")), "r", after: hin_cons_or_conscluster
sub capture("ल" + maybe("=")), "l", after: hin_cons_or_conscluster
sub capture("व" + maybe("=")), "v", after: hin_cons_or_conscluster
sub capture("श" + maybe("=")), "ś", after: hin_cons_or_conscluster
sub capture("ष" + maybe("=")), "sh", after: hin_cons_or_conscluster
sub capture("स" + maybe("=")), "s", after: hin_cons_or_conscluster
sub capture("स़" + maybe("=")), "s̤", after: hin_cons_or_conscluster
sub capture("ह" + maybe("=")), "h", after: hin_cons_or_conscluster
sub capture("ह़" + maybe("=")), "h", after: hin_cons_or_conscluster

# note[3]
sub "\u0902", "ṅ", after: any("कक़खख़गग़घङ")
sub "\u0902", "ñ", after: any("चछजज़झञ")
sub "\u0902", "ṇ", after: any("टट़ठडड़ढढ़ण")
sub "\u0902", "n", after: any("तथदधन")
# note[4]
sub "\u0901", "m̐", after: boundary, before: ""

# CHARACTERS
parallel {

  # I. Vowels and Diphthongs (see Note 1)

  sub "अ", "a"
  sub "आ", "ā"
  sub "इ", "i"
  sub "ई", "ī"
  sub "उ", "u"
  sub "ऊ", "ū"
  sub "ऋ", "ṛ"
  sub "ॠ", "ṝ"
  sub "ऌ", "ḷ"
  sub "ॳ", "ĕ"
  sub "ए", "e"
  sub "ॲ", "ê"
  sub "अै", "ăi"
  sub "ऐ", "ai"
  sub "ऒ", "ŏ"
  sub "ओ", "o"
  sub "ऑ", "ô"
  sub "औ", "ău"

  # II. Consonants (see Note 2)
  # Gutturals
  sub "क", "ka"
  sub "क़", "qa"
  sub "ख", "kha"
  sub "ख़", "kha"
  sub "ग", "ga"
  sub "ग़", "gha"
  sub "घ", "gha"
  sub "ङ", "ṅa"

  # Palatals
  sub "च", "ca"
  sub "छ", "cha"
  sub "ज", "ja"
  sub "ज़", "ja"
  sub "झ", "jha"
  sub "ञ", "ña"

  # Cerebrals
  sub "ट", "ṭa"
  sub "ट़", "t̤a"
  sub "ठ", "ṭha"
  sub "ड", "ḍa"
  sub "ड़", "ṛa"
  sub "ड़", "ṛa"
  sub "ढ", "ḍha"
  sub "ढ़", "ṛha"
  sub "ण", "ṇa"

  # Dentals
  sub "त", "ta"
  sub "थ", "tha"
  sub "द", "da"
  sub "ध", "dha"
  sub "न", "na"

  # Labials
  sub "प", "pa"
  sub "फ", "pha"
  sub "फ़", "pha"
  sub "ब", "ba"
  sub "भ", "bha"
  sub "म", "ma"

  # Semivowels
  sub "य", "ya"
  sub "र", "ra"
  sub "ल", "la"
  sub "व", "va"

  # Sibilants
  sub "श", "śa"
  sub "ष", "sha"
  sub "स", "sa"
  sub "स़", "s̤a"

  # Aspirate
  sub "ह", "ha"
  sub "ह़", "ha"

  # Anusvāra
  sub "ं", "ṃ"

  # Bisarga
  sub "ः ", "ḥ"

  # Anunāsika
  sub "ँ", "n̐" # ঁ : n̐, m̐

  # Abagraha
  sub "ऽ", "’" # (apostrophe)

  # Medials # Needed for connecting constants
  sub "ा", "ā"
  sub "ॉ", "ô"
  sub "ि", "i"
  sub "ी", "ī"
  sub "ु", "u"
  sub "ू", "ū"
  sub "ृ", "ṛ"
  sub "ॄ", "ṝ"
  sub "े", "e"
  sub "ॊ", "ŏ"
  sub "ौ", "ău"
  sub "ै", "ai"
  sub "ो", "o"
  sub "्", ""
  sub "़", ""

  # digits

  sub "०", "0"
  sub "१", "1"
  sub "२", "2"
  sub "३", "3"
  sub "४", "4"
  sub "५", "5"
  sub "६", "6"
  sub "७", "7"
  sub "८", "8"
  sub "९", "9"
}
compose

}