metadata {

authority_id: un
id: 1972
language: iso-639-2:mal
source_script: Mlym
destination_script: Latn
name: Romanization of Malayalam -- UNGEGN 4.0
url: https://www.eki.ee/wgrs/rom1_ml.htm
creation_date: 1972
confirmation_date: 2016
description: |
  The United Nations recommended system was approved in 1972 (II/11) and amended in 1977 (III/12),
  based on a report prepared by D. N. Sharma. The tables and their corrections were published in
  volume II of the conference reports.

  There is no evidence of the use of the system either in India or in international cartographic products.

  Malayalam (Malayāḷam) uses an alphasyllabic script whereby each character represents a syllable rather
  than one sound. Vowels and diphthongs are marked in two ways: as independent characters (used syllable-initially)
  and in an abbreviated form, to denote vowels after consonants. The romanization table is unambiguous but now
  outdated because since the 1970's a new orthography has been introduced and the use of many character
  combinations and ligatures has been simplified. Nevertheless the user of the romanization table would have to
  recognize many ligatures not given in the original table. The system is mostly reversible but there exist some
  ambiguities in the romanization of vowels (independent vs. abbreviated characters) and
  consonants (ligatures vs. character sequences), particularly in the case of older orthography.

notes:
  - |
    ടു For traditional script forms ഖു khu, മു mu, etc., exceptionally കു ku (ക്കു kku, ങ്കു ṅku), ഗു gu, ഛു chhu, ജു ju, ണു ṇu, തു tu, നു nu (ന്നു nnu), ഭു bhu, രു ru, ശു shu, ഹു hu
  - |
    ടൂ For traditional script forms ഖൂ khū, മൂ mū, etc., exceptionally കൂ kū, ഗൂ gū, ഛൂ chhū, ജൂ jū, ണൂ ṇū, തൂ tū, നൂ nū, ഭൂ bhū, രൂ rū, ശൂ shū, ഹൂ hū
  - |
    ടൃ Traditionally written in conjunction with the consonant character: കൃ kṛ .
  - |
    ടൗ In the older spelling: ൌ.
  - |
    ട്‌  Indicates absence of the inherent short [a] vowel, e.g. ക്‌ k , സ്‌ s. Special, co-called chillu-forms of consonants with that symbol: ണ്‍ ṇ, ന്‍ n, ര്‍ r, ല്‍ l, ള്‍ ḷ.
  - |
    റ  Special ligature: റ്റ ṭ̈.

}

tests {

test "ചൈനയ്ക്കെതിരെ ലഡാക്കിൽ സദാസജ്ജം; യുഎസിൽനിന്ന് ൭൨,൫൦൦ സിഗ്–൧൬ റൈഫിൾ", "chainaykkĕtirĕ laḍākkil sadāsajjaṃ; yuĕsilninn 72,500 sig–16 ṟaiphiḷ"
test "സർഗഭൂമിക’യ്ക്കില്ല; ലളിതച്ചേച്ചി അങ്ങനെ പറഞ്ഞിട്ടുണ്ടാവില്ല: ആർഎൽവി രാമകൃഷ്ണൻ", "sargabhūmika’ykkilla; laḷitachchechchi aṅṅanĕ paṟaññiṭṭuṇṭāvilla: ārĕlvi rāmakṛṣhṇan"
test "സ്വർണക്കടത്ത്‌: ഫൈസൽ ഫരീദും റബിന്‍സും ദുബായിൽ അറസ്റ്റിലായെന്ന്‌ എന്‍ഐഎ", "svarṇakkaṭatt: phaisal pharīduṃ ṟabinsuṃ dubāyil aṟasṟṟilāyĕnn ĕnaiĕ"
test "വരുമോ ചൈനയുടെ വാക്സീൻ?; ആഗോള ഉപയോഗത്തിന് ഡബ്ല്യുഎച്ച്ഒയുമായി ചർച്ച", "varumo chainayuṭĕ vāksīn?; āgoḷa upayogattin ḍablyuĕchchŏyumāyi charchcha"
test "കുട്ടികളുടെ മാനസിക പിരിമുറുക്കം മാറ്റാൻ പരിശീലനം; ക്ലാസുമായി പോക്സോ പ്രതി", "kuṭṭikaḷuṭĕ mānasika pirimuṟukkaṃ māṟṟān parishīlanaṃ; klāsumāyi pokso prati"
test "ആദ്യം അമിത് ഷാ, ഇപ്പോൾ മോദി; ബിജെപിയെ പുണരാൻ ജഗൻ; ആന്ധ്രയിലെ കരുനീക്കങ്ങൾ", "ādyaṃ amit ṣhā, ippoḷ modi; bijĕpiyĕ puṇarān jagan; āndhrayilĕ karunīkkaṅṅaḷ"
test "ലഹരിമരുന്ന് കേസ്: ബിനീഷ് കോടിയേരിയെ ഇഡി 6 മണിക്കൂർ ചോദ്യം ചെയ്തു", "laharimarunn kes: binīṣh koṭiyeriyĕ iḍi 6 maṇikkūr chodyaṃ chĕytu"
test "ഈന്തപ്പഴം വിതരണം ചെയ്തത് ശിവശങ്കര്‍ പറഞ്ഞതു പ്രകാരം: ടി.വി അനുപമയുടെ മൊഴി", "īntappaḻaṃ vitaraṇaṃ chĕytat shivashaṅkar paṟaññatu prakāraṃ: ṭi.vi anupamayuṭĕ mŏḻi"
test "൫൦൦൦ മണിക്കൂർ കാത്തിരിക്കാൻ തയാറെന്ന് രാഹുൽ: ഒടുവിൽ വഴങ്ങി ഹരിയാന", "5000 maṇikkūr kāttirikkān tayāṟĕnn rāhul: ŏṭuvil vaḻaṅṅi hariyāna"
test "കാരണം ഷോര്‍ട്ട്‌സര്‍ക്യൂട്ടല്ല; കത്തിയത് ഫയല്‍ മാത്രം, സാനിറ്റൈസര്‍ ഉള്‍പ്പെടെ കത്തിയില്ല", "kāraṇaṃ ṣhorṭṭsarkyūṭṭalla; kattiyat phayal mātraṃ, sāniṟṟaisar uḷppĕṭĕ kattiyilla"
test "വിമൺ സയൻറിസ്റ്റ്സ് സ്കീം", "vimaṇ sayanṟisṟṟs skīṃ"

}

aliases {

def_alias mlym_chars_1, any("\u0d3e\u0d3f\u0d40\u0d41\u0d42\u0d43\u0d46\u0d47\u0d48\u0d4a\u0d4b\u0d4c\u0d4d")

}

stage {

# RULES
sub "ക", "k", after: mlym_chars_1
sub "ഖ", "kh", after: mlym_chars_1
sub "ഗ", "g", after: mlym_chars_1
sub "ഘ", "gh", after: mlym_chars_1
sub "ങ", "ṅ", after: mlym_chars_1
sub "ച", "ch", after: mlym_chars_1
sub "ഛ", "chh", after: mlym_chars_1
sub "ജ", "j", after: mlym_chars_1
sub "ഝ", "jh", after: mlym_chars_1
sub "ഞ", "ñ", after: mlym_chars_1
sub "ട", "ṭ", after: mlym_chars_1
sub "ഠ", "ṭh", after: mlym_chars_1
sub "ഡ", "ḍ", after: mlym_chars_1
sub "ഢ", "ḍh", after: mlym_chars_1
sub "ണ", "ṇ", after: mlym_chars_1
sub "ത", "t", after: mlym_chars_1
sub "ഥ", "th", after: mlym_chars_1
sub "ദ", "d", after: mlym_chars_1
sub "ധ", "dh", after: mlym_chars_1
sub "ന", "n", after: mlym_chars_1
sub "പ", "p", after: mlym_chars_1
sub "ഫ", "ph", after: mlym_chars_1
sub "ബ", "b", after: mlym_chars_1
sub "ഭ", "bh", after: mlym_chars_1
sub "മ", "m", after: mlym_chars_1
sub "യ", "y", after: mlym_chars_1
sub "ര", "r", after: mlym_chars_1
sub "ല", "l", after: mlym_chars_1
sub "വ", "v", after: mlym_chars_1
sub "ശ", "sh", after: mlym_chars_1
sub "ഷ", "ṣh", after: mlym_chars_1
sub "സ", "s", after: mlym_chars_1
sub "ഹ", "h", after: mlym_chars_1
sub "ള", "ḷ", after: mlym_chars_1
sub "ഴ", "ḻ", after: mlym_chars_1
sub "റ", "ṟ", after: mlym_chars_1
sub "ക്ഷ", "kṣh", after: mlym_chars_1

# CHARACTERS
parallel {
  # I. Independent vowel characters
  sub "അ", "a"
  sub "ആ", "ā"
  sub "ഇ", "i"
  sub "ഈ", "ī"
  sub "ഉ", "u"
  sub "ഊ", "ū"
  sub "ഋ", "ṛ"
  sub "എ", "ĕ"
  sub "ഏ", "e"
  sub "ഐ", "ai"
  sub "ഒ", "ŏ"
  sub "ഓ", "o"
  sub "ഔ", "au"

  # II. Abbreviated vowel characters
  sub "ാ", "ā"
  sub "ി", "i"
  sub "ീ", "ī"
  sub "ു", "u"
  sub "ൂ", "ū"
  sub "ൃ", "ṛ"
  sub "െ", "ĕ"
  sub "േ", "e"
  sub "ൈ", "ai"
  sub "ൊ", "ŏ"
  sub "ോ", "o"
  sub "ൌ", "au"

  #III. Other symbols
  sub "ഃ", "ḥ"
  sub "ം", "ṃ"
  sub "്", ""

  #IV. Consonant characters
  sub "ക", "ka"
  sub "ഖ", "kha"
  sub "ഗ", "ga"
  sub "ഘ", "gha"
  sub "ങ", "ṅa"
  sub "ച", "cha"
  sub "ഛ", "chha"
  sub "ജ", "ja"
  sub "ഝ", "jha"
  sub "ഞ", "ña"
  sub "ട", "ṭa"
  sub "ഠ", "ṭha"
  sub "ഡ", "ḍa"
  sub "ഢ", "ḍha"
  sub "ണ", "ṇa"
  sub "ത", "ta"
  sub "ഥ", "tha"
  sub "ദ", "da"
  sub "ധ", "dha"
  sub "ന", "na"
  sub "പ", "pa"
  sub "ഫ", "pha"
  sub "ബ", "ba"
  sub "ഭ", "bha"
  sub "മ", "ma"
  sub "യ", "ya"
  sub "ര", "ra"
  sub "ല", "la"
  sub "വ", "va"
  sub "ശ", "sha"
  sub "ഷ", "ṣha"
  sub "സ", "sa"
  sub "ഹ", "ha"
  sub "ക്ഷ", "kṣha"
  sub "ള", "ḷa"
  sub "ഴ", "ḻa"
  sub "റ", "ṟa"
  sub "റ്റ", "ṭ̈"

  sub "्", ""
  sub "़", ""
  sub "ൗ", ""
  sub "‍", "" # no need for zero with joiner
  sub "‌", "" # no need for zero with non joiner

  # numbers
  sub "൦", "0"
  sub "൧", "1"
  sub "൨", "2"
  sub "൩", "3"
  sub "൪", "4"
  sub "൫", "5"
  sub "൬", "6"
  sub "൭", "7"
  sub "൮", "8"
  sub "൯", "9"
  sub "൰", "10"
  sub "൱", "100"
  sub "൲", "1000"

  # chillu-forms of consonants note 5
  sub "ൿ", "k"
  sub "ൺ", "ṇ"
  sub "ൻ", "n"
  sub "ൽ", "l"
  sub "ൾ", "ḷ"
  sub "ർ", "r"
  sub "സ്", "s"
}

compose

}