metadata {

authority_id: odni
id: 2004
language: iso-639-2:hin
source_script: Deva
destination_script: Latn
name: Intelligence Community (IC) Standard for the Transliteration of Hindi and Urdu Personal Names (2004)
creation_date: 2004
description: |
  IC STANDARDS FOR TRANSLITERATION OF HINDI AND URDU PERSONAL NAMES

notes:
  - |
    Long/Short Vowels: Long and short vowels are not distinguished in the system:
    The borrowed Arabic name Samir could represent two distinct names, one with a
    long /a/ (Saamir) and one with a long /i/ (Samiir). One solution would be to use
    /ee/ to stand for the long /i/, as is often done (Sameer). The IC Standard will not
    distinguish between these.
  - |
    No distinction is made between: retroflex and non-retroflex consonants; and
    nasalized vowels and vowels followed by /n/.
  - |
    A distinction is drawn between Urdu letters qaf and kaf (and correspondingly,
    Hindi qa and ka).
  - |
    A distinction is drawn between aspirated (e.g., /d/) and nonaspirated consonants
    (e.g., /dh/), with the exception of ch/chh, both represented by /ch/.
  - |
    Digraphs: No distinction is made between digraphs such as /sh/ and single
    contiguous letters such as /s/ followed by /h/.
  - |
    Hyphens: Hyphens (-) are NOT used to connect name elements within a name:
    Abdur Rahman. The single exception to this is the izafat (i.e., linking vowel in
    noun-link-modifier construction of Persian origin), which does show a hypen
    before the /e/ and a following space: Koh-e Nur (‘mountain of light’), “Jaish-e
    xx” (‘Army of xx’ construction).
  - |
    Names incorporating “din” are written as one unit: Azermuddin, Badruddin,
    Faizuddin, Salahuddin.
  - |
    Names that incorporate Allah as part of the name show the Arabic grammatical
    marker /u/ rather than the /a/ of Allah: Abdullah (not Abdallah).
  - |
    Inherent short vowel /a/ in Devanagari is represented with an /a/ in Roman. Final
    consonants are assumed not to have a short /a/ (e.g., masc. name Ram Lal, not
    Rama Lala).
  - |
    As a general rule, Devanagari va is transcribed as a /v/: Vijay, Vishal, etc.
    Exception: /sw/ combination: Saraswati, Krishnaswami. Urdu wau, however, is
    transcribed as /w/: Wasim, Walid.

}

tests {

test "दिल्ली", "dilli"
test "भारत", "bhart"
test "विजय", "vijy"
test "विशाल", "vishal"
test "अब्दुल्ला", "abdulla"
test "संख्या", "snkhya"
test "संख्या", "snkhya"
test "समीर", "smir"
test "सरस्वती", "srsvti"
test "कृष्णास्वामी", "krishnasvami"

}

stage {

# CHARACTERS
parallel {

  #Independent vowel characters
  sub "अ", "a"
  sub "आ", "a"
  sub "इ", "i"
  sub "ई", "i"
  sub "उ", "u"
  sub "ऊ", "u"
  sub "ऋ", "ri"
  sub "ऌ", "l̤"
  sub "ए", "e"
  sub "ऐ", "ai"
  sub "ओ", "o"
  sub "ऑ", "au"
  sub "औ", "au"

  #Dependent Vowels
  sub "ा", "a"
  sub "ि", "i"
  sub "ी", "i"
  sub "ु", "u"
  sub "ू", "u"
  sub "ृ", "ri"
  sub "े", "e"
  sub "ै", "ai"
  sub "ॅ", "ai"
  sub "ो", "o"
  sub "ौ", "au"
  sub "ॉ", "au"

  # Consonants

  # Gutturals
  sub "क", "k"
  sub "क्ष", "ksha"
  sub "क़", "q"
  sub "ख", "kh"
  sub "ख़", "kh"
  sub "ग", "g"
  sub "ग़", "gh"
  sub "घ", "gh"
  sub "ङ", "n"

  # Palatals
  sub "च", "ch"
  sub "छ", "ch"
  sub "ज", "j"
  sub "ज़", "z"
  sub "झ", "gya"
  sub "झ", "jh"
  sub "ञ", "n"

  # Cerebrals
  sub "ट", "t"
  sub "ठ", "th"
  sub "ड", "d"
  sub "ड़", "r"
  sub "ढ़", "rh"
  sub "ढ", "dh"
  sub "ण", "n"

  # Dentals
  sub "त", "t"
  sub "थ", "th"
  sub "द", "d"
  sub "ध", "dh"
  sub "न", "n"

  # Labials
  sub "प", "p"
  sub "फ़", "f"
  sub "फ", "ph"
  sub "ब", "b"
  sub "भ", "bh"
  sub "म", "m"

  # Semivowels
  sub "य", "y"
  sub "र", "r"
  sub "ल", "l"
  sub "व", "v"

  # Sibilants
  sub "श", "sh"
  sub "ष", "sh"
  sub "स", "s"

  # Aspirate
  sub "ह", "h"

  # Anusvāra
  sub "ं", "n"

  # Anunāsika
  sub "ँ", "n"

  # halanta
  sub "्", ""

  # bisharga
  sub "ः", "h"

  sub "़", ""
}

}