metadata {

authority_id: un
id: 2016
language: iso-639-2:hin
source_script: Deva
destination_script: Latn
name: Romanization of Hindi -- UNGEGN 4.0
url: https://www.eki.ee/wgrs/rom1_hi.pdf
creation_date: 1972
confirmation_date: 2016
description: |
  The United Nations recommended system was approved in 1972 (II/11) and amended in 1977
  (III/12), based on a report prepared by D. N. Sharma. The tables and their corrections were
  published in volume II of the conference reports.

  There is no evidence of the use of the system either in India or in international cartographic
  products. It was stated in 1987 that the appropriate resolution had not been implemented in
  India and the Hunterian system was still in use in large-scale mapping

  Hindi uses the alphasyllabic script Devanāgarī whereby each character represents a syllable
  rather than one sound. Vowels and diphthongs are marked in two ways: as independent
  characters (used syllable-initially) and in an abbreviated form, to denote vowels after
  consonants. The romanization table is unambiguous but the user would have to recognize
  many ligatures not given in the original table (only three are given). The system is mostly
  reversible but there may exist some ambiguities in the romanization of vowels (independent
  vs. abbreviated characters) and consonants

notes:
  - |
    It is recommended that the vowel अ (a) should always be romanized except when it ends a
    name. If a name ends with a consonant, the consonant should carry a sub-macron. Such
    cases, however, will be very rare. For example, कानपुर Kānapur (not Kānapura), जगत्
    Jagat.
  - |
    If each letter of a digraph or any two parts of a trigraph has a distinct independent sound
    then it should be indicated by a hyphen, thus d-h.

  ######################################## Additional Note ############################################################
  #    It is recommended that the vowel अ (a) should always be romanized except when it ends a                        #
  #    name. In the scheme we are not yet able to detect a name. Analysing language pattern and the example           #
  #    we found that actually when a word ends with a consonant it should be transliterated without the vowel in it.  #
  #    so we are applying this rule as if a word ends with a consonant, in this case we are removing the ending 'a'.  #
  #####################################################################################################################

}

tests {

test "दिल्ली", "dillī"
test "भारत", "bhārat"
test "परिपक्क", "paripakk"
test "जगत्", "jagat"
test "संख्या", "saṁkhyā"
test "गंभीर मरीजों के मामले में भारत दूसरे नंबर पर", "gaṁbhīr marījoṁ ke māmale meṁ bhārat dūsare naṁbar par"
test "कोरोना अपडेट्स", "koronā apaḍeṭs"
test "सीडीसी चीफ का बयान अहम", "sīḍīsī chīph kā bayān aham"
test "गूगल प्ले स्टोर पर पेटीएम की वापसी", "gūgal ple sṭor par peṭīem kī vāpasī"
test "भारत में गैंबलिंग की इजाजत नहीं", "bhārat meṁ gaiṁbaliṁg kī ijājat nahīṁ"
test "कोरोना वैक्सीन मुद्दे पर घिरे राष्ट्रपति; जो बाइडेन बोले- मुझे और देश को वैज्ञानिकों पर भरोसा है, डोनाल्ड ट्रम्प पर नहीं", "koronā vaiksīn mudde par ghire rāṣhṭrapati; jo bāiḍen bole- mujhe aur desh ko vaijñānikoṁ par bharosā hai, ḍonālḍ ṭramp par nahīṁ"
test "गूगल की कार्रवाई पर पेटीएम ने कहा था कि ऐप को अस्थायी तौर पर प्ले-स्टोर से हटाया गया है, आपके पैसे सुरक्षित हैं", "gūgal kī kārravāī par peṭīem ne kahā thā ki aip ko asthāyī taur par ple-sṭor se haṭāyā gayā hai, āpake paise surakṣhit haiṁ"

}

aliases {

def_alias deva_characters_1, any("\u093E\u093F\u0940\u0941\u0942\u0943\u0944\u0945\u0947\u0948\u0949\u094B\u094C\u0020\u094d")

}

stage {

# RULES
sub "क" + maybe("="), "k", after: deva_characters_1
sub "क", "k", after: boundary

sub "ख" + maybe("="), "kh", after: deva_characters_1
sub "ख", "kh", after: boundary

sub "ग" + maybe("="), "g", after: deva_characters_1
sub "ग", "g", after: boundary

sub "घ" + maybe("="), "gh", after: deva_characters_1
sub "घ", "gh", after: boundary

sub "ङ" + maybe("="), "ṅ", after: deva_characters_1
sub "ङ", "ṅ", after: boundary

sub "च" + maybe("="), "ch", after: deva_characters_1
sub "च", "ch", after: boundary

sub "छ" + maybe("="), "chh", after: deva_characters_1
sub "छ", "chh", after: boundary

sub "ज" + maybe("="), "j", after: deva_characters_1
sub "ज", "j", after: boundary

sub "झ" + maybe("="), "jh", after: deva_characters_1
sub "झ", "jh", after: boundary

sub "ञ" + maybe("="), "ñ", after: deva_characters_1
sub "ञ", "ñ", after: boundary

sub "ट" + maybe("="), "ṭ", after: deva_characters_1
sub "ट", "ṭ", after: boundary

sub "ठ" + maybe("="), "ṭh", after: deva_characters_1
sub "ठ", "ṭh", after: boundary

sub "ड" + maybe("="), "ḍ", after: deva_characters_1
sub "ड", "ḍ", after: boundary

sub "ढ" + maybe("="), "ḍh", after: deva_characters_1
sub "ढ", "ḍh", after: boundary

sub "ण" + maybe("="), "ṇ", after: deva_characters_1
sub "ण", "ṇ", after: boundary

sub "त" + maybe("="), "t", after: deva_characters_1
sub "त", "t", after: boundary

sub "थ" + maybe("="), "th", after: deva_characters_1
sub "थ", "th", after: boundary

sub "द" + maybe("="), "d", after: deva_characters_1
sub "द", "d", after: boundary

sub "ध" + maybe("="), "dh", after: deva_characters_1
sub "ध", "dh", after: boundary

sub "न" + maybe("="), "n", after: deva_characters_1
sub "न", "n", after: boundary

sub "प" + maybe("="), "p", after: deva_characters_1
sub "प", "p", after: boundary

sub "फ" + maybe("="), "ph", after: deva_characters_1
sub "फ", "ph", after: boundary

sub "ब" + maybe("="), "b", after: deva_characters_1
sub "ब", "b", after: boundary

sub "भ" + maybe("="), "bh", after: deva_characters_1
sub "भ", "bh", after: boundary

sub "म" + maybe("="), "m", after: deva_characters_1
sub "म", "m", after: boundary

sub "य" + maybe("="), "y", after: deva_characters_1
sub "य", "y", after: boundary

sub "र" + maybe("="), "r", after: deva_characters_1
sub "र", "r", after: boundary

sub "ल" + maybe("="), "l", after: deva_characters_1
sub "ल", "l", after: boundary

sub "व" + maybe("="), "v", after: deva_characters_1
sub "व", "v", after: boundary

sub "श" + maybe("="), "sh", after: deva_characters_1
sub "श", "sh", after: boundary

sub "ष" + maybe("="), "ṣh", after: deva_characters_1
sub "ष", "ṣh", after: boundary

sub "स" + maybe("="), "s", after: deva_characters_1
sub "स", "s", after: boundary

sub "क़" + maybe("="), "q", after: deva_characters_1
sub "क़", "q", after: boundary

sub "ख़" + maybe("="), "ḳh", after: deva_characters_1
sub "ख़", "ḳh", after: boundary

sub "ग़" + maybe("="), "g", after: deva_characters_1
sub "ग़", "g", after: boundary

sub "ज़" + maybe("="), "z", after: deva_characters_1
sub "ज़", "z", after: boundary

sub "ड़" + maybe("="), "ṙ", after: deva_characters_1
sub "ड़", "ṙ", after: boundary

sub "ढ़" + maybe("="), "ṙh", after: deva_characters_1
sub "ढ़", "ṙh", after: boundary

sub "फ़" + maybe("="), "f", after: deva_characters_1
sub "फ़", "f", after: boundary

sub "ह" + maybe("="), "h", after: deva_characters_1
sub "ह", "h", after: boundary

# CHARACTERS
parallel {

  # I. Independent vowel characters
  sub "अ", "a"
  sub "आ", "ā"
  sub "इ", "i"
  sub "ई", "ī"
  sub "उ", "u"
  sub "ऊ", "ū"
  sub "ऋ", "ṛ"
  sub "ॠ", "ṝ"
  sub "ऌ", "l̤"
  sub "ए", "e"
  sub "ऐ", "ai"
  sub "ओ", "o"
  sub "औ", "au"

  # II. Abbreviated vowel characters

  sub "ा", "ā" # का
  sub "ॉ", "ā̆ " #  additional mark: कॉ
  sub "ि", "i" # कि i
  sub "ी", "ī" # की
  sub "ु", "u" # कु
  sub "ू", "ū" # कू
  sub "ृ", "ṛ" # कृ
  sub "े", "e" # के
  sub "ै", "ai" # कै
  sub "ो", "o" # को
  sub "ौ", "au" # कौ

  # Consonants (see Note 1)

  # Gutturals
  sub "क", "ka"
  sub "ख", "kha"
  sub "ग", "ga"
  sub "घ", "gha"
  sub "ङ", "ṅa"

  # Palatals
  sub "च", "cha"
  sub "छ", "chha"
  sub "ज", "ja"
  sub "झ", "jha"
  sub "ञ", "ña"

  # Cerebrals
  sub "ट", "ṭa"
  sub "ठ", "ṭha"
  sub "ड", "ḍa"
  sub "ढ", "ḍha"
  sub "ण", "ṇa"

  # Dentals
  sub "त", "ta"
  sub "थ", "tha"
  sub "द", "da"
  sub "ध", "dha"
  sub "न", "na"

  # Labials
  sub "प", "pa"
  sub "फ", "pha"
  sub "ब", "ba"
  sub "भ", "bha"
  sub "म", "ma"

  # Semivowels
  sub "य", "ya"
  sub "र", "ra"
  sub "ल", "la"
  sub "व", "va"

  # Sibilants
  sub "श", "sha"
  sub "ष", "ṣha"
  sub "स", "sa"

  # Dotted variants
  sub "क़", "qa"
  sub "ख़", "ḳha"
  sub "ग़", "ġa"
  sub "ज़", "za"
  sub "ड़", "ṙa"
  sub "ढ़", "ṙha"
  sub "फ़", "fa"

  # Aspirate
  sub "ह", "ha"

  # Anusvāra
  sub "ं", "ṁ"

  # Anunāsika
  sub "ँ", "m̐"

  # halanta
  sub "्", ""

  # bisharga
  sub "ः", "ḥ"

  #V. Ligatures(To cover all Ligatures at unicode)
  #  Implemnting Pronunciation without a vowel: क् k.

  # Gutturals
  sub "क्", "k"
  sub "ख्", "kh"
  sub "ग्", "g"
  sub "घ्", "gh"
  sub "ङ्", "ṅ"

  # Palatals
  sub "च्", "ch"
  sub "छ्", "chh"
  sub "ज्", "j"
  sub "झ्", "jh"
  sub "ञ्", "ñ"

  # Cerebrals
  sub "ट्", "ṭ"
  sub "ठ्", "ṭh"
  sub "ड्", "ḍ"
  sub "ढ्", "ḍh"
  sub "ण्", "ṇ"

  # Dentals
  sub "त्", "t"
  sub "थ्", "th"
  sub "द्", "d"
  sub "ध्", "dh"
  sub "न्", "n"

  # Labials
  sub "प्", "p"
  sub "फ्", "ph"
  sub "ब्", "b"
  sub "भ्", "bh"
  sub "म्", "m"

  # Semivowels
  sub "य्", "y"
  sub "र्", "r"
  sub "ल्", "l"
  sub "व्", "v"

  # Sibilants
  sub "श्", "sh"
  sub "ष्", "ṣh"
  sub "स्", "s"

  # Dotted variants
  sub "क़्", "q"
  sub "ख़्", "ḳh"
  sub "ग़्", "ġ"
  sub "ज़्", "z"
  sub "ड़्", "ṙ"
  sub "ढ़्", "ṙh"
  sub "फ़्", "f"
}
compose

}