metadata {

authority_id: un
id: 1972
language: iso-639-2:tel
source_script: Telu
destination_script: Latn
name: Romanization of Telugu -- UNGEGN 4.0 (1972)
url: https://www.eki.ee/wgrs/rom1_te.htm
creation_date: 1972
confirmation_date: 2016
description: |
  The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
  based on a report prepared by D. N. Sharma. The tables and their corrections were published in volume II
  of the conference reports.

  There is no evidence of the use of the system either in India or in international cartographic products.

  Telugu uses an alphasyllabic script whereby each character represents a syllable rather than one sound.
  Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially) and in
  an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous. The system is
  mostly reversible but there may exist some ambiguities in the romanization of vowels (independent vs. abbreviated characters)
  and consonants (combinations with subscript consonants vs. character sequences).

notes:

  - |
    Only the vowel forms that appear at the beginning of a syllable are listed; the forms used for
    vowels following a consonant can be found in grammars; no distinction between the two is
    made in transliteration.
  - |
    The vowel a is implicit after all consonants and consonant clusters and is supplied in
    transliteration, with the following exceptions:
    a) when another vowel is indicated by its appropriate sign; and
    b) when the absence of any vowel is indicated by the superscript sign ( ౬ ) called valapalagilaka.
  - |
    Exception: Sunna is transliterated by:
    a) ṅ before gutturals,
    b) ñ before palatals,
    c) ṇ before cerebrals,
    d) n before dentals, and
    e) m before labials.
  - |
    Ardhasunna before gutturals and palatal, cerebral, and dental occlusives is transliterated n̐.
    Before labials, sibilants, semivowels, the aspirate, vowels, and in final position it is
    transliterated m

}

tests {

test "తమిళనాడు", "tamiḷanāḍu"
test "తంటికొండ ఘటన: ఆగని మృత్యుఘోష", "taṃṭikŏṃḍa ghaṭana: āgani mṛtyughoṣha"
test "మళ్లీ వివాదం: అమితాబ్‌పై కేసు", "maḷlī vivādaṃ: amitābpai kesu"
test "వరద సాయం పేరుతో వైట్ కాలర్ దోపిడీ", "varada sāyaṃ peruto vaiṭ kālar dopiḍī"
test "రెండో విడత జీఎస్టీ పరిహారం", "rĕṃḍo viḍata jīĕsṭī parihāraṃ"
test "నితీష్‌ కుమార్‌ అధ్యాయం ముగిసినట్లేనా?!", "nitīṣh kumār adhyāyaṃ mugisinaṭlenā?!"
test "వారిపై జీవితాంతం నిషేధం విధించండి!", "vāripai jīvitāṃtaṃ niṣhedhaṃ vidhiṃchaṃḍi!"
test "మరో లాక్‌డౌన్‌ వల్ల అన్నీ అనర్థాలే!", "maro lākḍaun valla annī anarthāle!"
test "జెసిండా మరో సంచలనం", "jĕsiṃḍā maro saṃchalanaṃ"
test "స్వీయ నిర్బంధంలోకి డబ్ల్యూహెచ్‌ఓ డైరెక్టర్‌", "svīya nirbaṃdhaṃloki ḍablyūhĕcho ḍairĕkṭar"
test "కరోనాపై యుద్ధంలో సమిధలు", "karonāpai yuddhaṃlo samidhalu"
test "అమెరికా ఎన్నికలు: ‘పెద్దన్న’ ఎవరో?!", "amĕrikā ĕnnikalu: ‘pĕddanna’ ĕvaro?!"
test "౪౬౨౬౯", "46269"
test "రంగపూర్", "raṃgapūr"
# subscript consonant characters
test "ట్ట", "ṭṭa"
test "ప్ప", "ppa"
test "చ్చ", "chcha"

}

aliases {

def_alias telu_chars_1, any("\u0c3e\u0c3f\u0c40\u0c41\u0c42\u0c43\u0c46\u0c47\u0c48\u0c4a\u0c4b\u0c4c\u0c6c\u0c4d")

}

stage {

# RULES
sub "క", "k", after: telu_chars_1
sub "ఖ", "kh", after: telu_chars_1
sub "గ", "g", after: telu_chars_1
sub "ఘ", "gh", after: telu_chars_1
sub "ఙ", "ṅ", after: telu_chars_1
sub "చ", "ch", after: telu_chars_1
sub "ఛ", "chh", after: telu_chars_1
sub "జ", "j", after: telu_chars_1
sub "ఝ", "jh", after: telu_chars_1
sub "ఞ", "ñ", after: telu_chars_1
sub "ట", "ṭ", after: telu_chars_1
sub "ఠ", "ṭh", after: telu_chars_1
sub "డ", "ḍ", after: telu_chars_1
sub "ఢ", "ḍh", after: telu_chars_1
sub "ణ", "ṇ", after: telu_chars_1
sub "త", "t", after: telu_chars_1
sub "థ", "th", after: telu_chars_1
sub "ద", "d", after: telu_chars_1
sub "ధ", "dh", after: telu_chars_1
sub "న", "n", after: telu_chars_1
sub "ప", "p", after: telu_chars_1
sub "ఫ", "ph", after: telu_chars_1
sub "బ", "b", after: telu_chars_1
sub "భ", "bh", after: telu_chars_1
sub "మ", "m", after: telu_chars_1
sub "య", "y", after: telu_chars_1
sub "ర", "r", after: telu_chars_1
sub "ఱ", "r", after: telu_chars_1
sub "ల", "l", after: telu_chars_1
sub "వ", "v", after: telu_chars_1
sub "శ", "sh", after: telu_chars_1
sub "ష", "ṣh", after: telu_chars_1
sub "స", "s", after: telu_chars_1
sub "హ", "h", after: telu_chars_1
sub "ళ", "ḷ", after: telu_chars_1

# CHARACTERS
parallel {

  # I. Independent vowel characters

  sub "అ", "a"
  sub "ఆ", "ā"
  sub "ఇ", "i"
  sub "ఈ", "ī"
  sub "ఉ", "u"
  sub "ఊ", "ū"
  sub "ఋ", "ṛ"
  sub "ౠ", "ṝ"
  sub "ఎ", "ĕ"
  sub "ఏ", "e"
  sub "ఐ", "ai"
  sub "ఒ", "ŏ"
  sub "ఓ", "o"
  sub "ఔ", "au"

  # II. Abbreviated vowel characters and other symbols

  sub "ా", "ā"
  sub "ి", "i" # Variations: చి chi, ని ni, యి yi, లి li, etc.
  sub "ీ", "ī"
  sub "ు", "u" # Variations: జు ju, పు pu, వు vu, etc.
  sub "ూ", "ū"
  sub "ృ", "ṛ"
  sub "ె", "ĕ"
  sub "ే", "e"
  sub "ై", "ai"
  sub "ొ", "ŏ" # Variations: మొ mŏ, యొ yŏ.
  sub "ో", "o" # Variations: మో mo, యో yo.
  sub "ౌ", "au"
  sub "ঁ", "ṁ"
  sub "ఁ", "m̐" # Signified historic nasal sound, now obsolete.
  sub "ః", "ḥ"
  sub "ం", "ṃ"
  sub "\u0c4d", "" # End-of-syllable mark (i.e., a consonant without a vowel): రంగపూర్ Raṁgapūr.

  # III. Consonant characters

  # Gutturals
  sub "క", "ka"
  sub "ఖ", "kha"
  sub "గ", "ga"
  sub "ఘ", "gha"
  sub "ఙ", "ṅa"

  # Palatals
  sub "చ", "cha"
  sub "ఛ", "chha"
  sub "జ", "ja"
  sub "ఝ", "jha"
  sub "ఞ", "ña"

  # Cerebrals
  sub "ట", "ṭa"
  sub "ఠ", "ṭha"
  sub "డ", "ḍa"
  sub "ఢ", "ḍha"
  sub "ణ", "ṇa"

  # Dentals
  sub "త", "ta"
  sub "థ", "tha"
  sub "ద", "da"
  sub "ధ", "dha"
  sub "న", "na"

  # Labials
  sub "ప", "pa"
  sub "ఫ", "pha"
  sub "బ", "ba"
  sub "భ", "bha"
  sub "మ", "ma"

  # Semivowels
  sub "య", "ya"
  sub "ర", "ra" #Variant: ఱ.
  sub "ఱ", "ra"
  sub "ల", "la"
  sub "వ", "va"

  # Sibilants
  sub "శ", "sha"
  sub "ష", "ṣha"
  sub "స", "sa"

  # Aspirate
  sub "హ", "ha"
  sub "ళ", "ḷa"

  sub "\u09CD", "" # Used for joining
  sub "ౕ\t", ""
  sub "ౖ\t", ""
  sub "्", ""
  sub "़", ""
  sub "‍", "" # Used for joining
  sub "‌", "" # Used for non joining

  # numbers

  sub "౦", "0"
  sub "౧", "1"
  sub "౨", "2"
  sub "౩", "3"
  sub "౪", "4"
  sub "౫", "5"
  sub "౬", "6"
  sub "౭", "7"
  sub "౮", "8"
  sub "౯", "9"
}

compose

}