metadata {

authority_id: var
id: phonemic
language: iso-639-2:tha
source_script: Thai
destination_script: Zsym
name: Phonemic Thai to IPA
url:
creation_date:
adoption_date:
description:
  This maps phonemic Thai to IPA.

notes: |

}

tests {

test "คฺวาม-วาว", "kʰwaːm˧.waːw˧"
test "ที่-เปิด-ขวด", "tʰiː˥˩.pɤːt̚˨˩.kʰua̯t̚˨˩"
test "พื้น-หฺลัง", "pʰɯːn˦˥.laŋ˩˩˦"
test "สม-ชาย", "som˩˩˦.t͡ɕʰaːj˧"
test "อ็่อน-ค่อ", "ʔɔn˨˩.kʰɔː˥˩"
test "โค-ระ-ยอ", "kʰoː˧.ra˦˥.jɔː˧"
test "ไล่-ออก", "laj˥˩.ʔɔːk̚˨˩"
test "ไส้-เลื่อน", "saj˥˩.lɯa̯n˥˩"
test "ไอ-กฺรน", "ʔaj˧.kron˧"
test "ไอ-ซี-ยู", "ʔaj˧.siː˧.juː˧"
test "ไอ๊ส-แลน", "ʔajs˦˥.lɛːn˧"

}

# This map has been partially converted by the bin/maps_v1_to_v2 script # The section below requires human attention. Remember to remove this # comment and move the converted map to 'maps/' directory. Please also # take note that the maps-staging directory will be cleaned up whenever # you run the bin/maps_v1_to_v2 script. You should particularly be # concerned about any regular expressions found in this file and about # advanced expressions in parallel {} parts, and also about the order # of particular parts of the stage.

stage {

secryst model: "thai-ipa"

# RULES
# Cluster Onsets:   กขคดตทบปผพฟสห
# Cluster Onsets in groups:
# CO - High  ขผสห
# CO - Mid   กดตบป
# CO - Low   คทพฟ
# All Onsets:       กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ
# High Consonants:  ขฃฉฐถผฝศษสห
# Mid Consonants:   กจฎฏดตบปอ
# Low Consonants:   คฅฆงชซฌญฑฒณทธนพฟภมยรลวฬฮ
# Tone:             [\u0304\u0e48-\u0e4B] 0: 0304  1: 0E48  2: 0E49  3: 0E4A  4: 0E4B
# Vowels right before tone marks:    [\u0e31\u0e33-\u0e39]

# Handle Consonantal อ, which can never be part of a consonant cluster
# in phonemic Thai
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture("อ") + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")])]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_M" + ref( 4 )

# Handle Consonantal ยว
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")])]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_L" + ref( 4 )

sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("ขผสห") + "ฺ" + any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_H" + ref( 4 )
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("คทพฟ") + "ฺ" + any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_L" + ref( 4 )
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("กดตบป") + "ฺ" + any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_M" + ref( 4 )

# Move tones to the end of the syllable
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any([any("ขผสห") + "ฺ" + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมรลศษสหฬอฮ"), any("ขฃฉฐถผฝศษสห")])) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any("- ฺ"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_H" + ref( 4 ), after: any([" ", "-", line_end])
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any([any("กดตบป") + "ฺ" + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมรลศษสหฬฮ"), any("กจฎฏดตบป")])) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any("- ฺ"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_M" + ref( 4 ), after: any([" ", "-", line_end])
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any([any("คทพฟ") + "ฺ" + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมรลศษสหฬอฮ"), any("คฅฆงชซฌญฑฒณทธนพฟภมรลฬฮ")])) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any("- ฺ"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_L" + ref( 4 ), after: any([" ", "-", line_end])

# If ว is followed by another vowel, than it is actually part of the cluster
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("ขผสหกดตบปคทพฟ")) + "–" + capture(any("วรล")) + capture(any(["าอำะั" + any("ำ".."ู")]) + maybe("็")) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -")) + "_" + any("HML")), ref( 1 ) + ref( 2 ) + ref( 3 ) + "–" + ref( 4 ) + ref( 6 ) + ref( 5 )

sub capture(any("เแไใโ")) + capture(any("ขผสหกดตบปคทพฟ")) + "–" + capture(any("วรล")) + capture(maybe(any(["าอำะั" + any("ำ".."ู")])) + maybe("็")) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(some(any("คฅฆกขฃพภบปฟฌฑฒทธจฎฏดตฐถศษสชมญณนรลฬง")) + "_" + any("HML")), ref( 1 ) + ref( 2 ) + ref( 3 ) + "–" + ref( 4 ) + ref( 6 ) + ref( 5 )

# Move preceding vowels to the right position
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("เแไใโ")) + capture(any("ปคฟตดผทพกขบหส") + maybe("ฺ") + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ")), ref( 1 ) + ref( 3 ) + ref( 2 )
sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("เแไใโ")) + capture(any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ")), ref( 1 ) + ref( 3 ) + ref( 2 )

# Add inherit vowel if there is nothing between the onset and final consonants
sub capture(any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ")) + capture("–") + capture(any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษสมญณนรลฬง") + "_"), ref( 1 ) + "โ" + ref( 2 ) + "ะ" + ref( 3 )

# Add inherit vowel for single letters
sub capture(any([line_start, " ", "-", "ฺ"])) + capture(any("ก".."ฮ")) + "–", ref( 1 ) + ref( 2 ) + "–ะ", after: "_" + any("HML")

#Change tones to correct tone values
# IPA Tones:        0 ˧ , 1 ˨˩ , 2 ˥˩, 3 ˦˥, 4 ˩˩˦

sub "_" + any("HM") + "่", "˨˩"
sub "_" + any("HM") + "้", "˥˩"
sub "_" + any("HML") + "๊", "˦˥"
sub "_" + any("HML") + "๋", "˩˩˦"
sub "_L่", "˥˩"
sub "_L้", "˦˥"
sub "_" + any("HML") + "̄", "˧"

# Syllables ending in –ะ (U+0E30), –ิ (U+0E34), –ึ (U+0E36), –ุ (U+0E38)
# are dead syllables and follow a different set of tone rules
# The combination เ–ิ is an exception.

sub capture(any("เ") + "–ิ") + "_" + any("HM"), ref( 1 ) + "˨˩"
sub capture(any("เ") + "–ิ") + "_L", ref( 1 ) + "˦˥"
sub capture(any("ะึุ")) + "_" + any("HM"), ref( 1 ) + "˨˩"
sub capture(any("ะึุ")) + "_L", ref( 1 ) + "˦˥"

# Syllables with stop finals
sub capture(any("ะคกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_" + any("HM"), ref( 1 ) + "˨˩"
sub capture(any("เ") + "–ิ" + any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˦˥"
sub capture(any("ะัึุ็") + any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˦˥"
sub capture("็อ" + any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˦˥"
#   sub capture( :lookbehind_start any("มนง") :lookbehind_stop any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˥˩" # warning: :

# Assign the default tones if no tone mark is present.
sub "_" + any("ML"), "˧"
sub "_H", "˩˩˦"

# Remove tone-marking unpronounced ห
sub maybe("ห") + "ฺ", ""

# CHARACTERS
parallel {
  # Part 1
  sub "–ิว", "iw"
  sub "เ–็ว", "ew"
  sub "เ–ว", "eːw"
  sub "แ–็ว", "ɛw"
  sub "แ–ว", "ɛːw"
  sub "โ–ว", "oːw"
  sub "เ–ียว", "ia̯w"

  # Part 2
  sub "–ุย", "uj"
  sub "โ–ย", "oːj"
  sub "–อย", "ɔːj"
  sub "เ–ย", "ɤːj"
  sub "เ–ือย", "ɯa̯j"
  sub "–วย", "ua̯j"

  # Part 3
  sub "เ–ียะ", "ia̯"
  sub "เ–ีย", "ia̯"
  sub "เ–ือะ", "ɯa̯"
  sub "เ–ือ", "ɯa̯"
  sub "–ัวะ", "ua̯"
  sub "–ัว", "ua̯"
  sub "–ว", "ua̯"
  sub "ไ–ย", "aj"
  sub "ใ–", "aj"
  sub "ไ–", "aj"
  sub "–ัย", "aj"
  sub "–าย", "aːj"
  sub "เ–า", "aw"
  sub "–าว", "aːw"

  # Part 4
  sub "–ะ", "a"
  sub "–ั", "a"
  sub "–า", "aː"
  sub "รร", "an"
  sub "–ำ", "am"
  sub "–ิ", "i"
  sub "–ี", "iː"
  sub "–ึ", "ɯ"
  sub "–ือ", "ɯː"
  sub "–ื", "ɯː"
  sub "–ุ", "u"
  sub "–ู", "uː"
  sub "เ–ะ", "e"
  sub "เ–็", "e"
  sub "เ–", "eː"
  sub "แ–ะ", "ɛ"
  sub "แ–็", "ɛ"
  sub "แ–", "ɛː"
  sub "โ–ะ", "o"
  sub "โ–", "oː"
  sub "เ–าะ", "ɔ"
  sub "–็อ", "ɔ"
  sub "–อ", "ɔː"
  sub "เ–็อ", "ɤ"
  sub "เ–อะ", "ɤ"
  sub "เ–ิ็", "ɤ"
  sub "เ–ิ", "ɤː"
  sub "เ–อ", "ɤː"

  # FINAL CONSONANTS
  sub any("คฅฆกขฃ"), "k̚", after: maybe("ส") + any("˩˨˧˦˥")
  sub any("พภบป"), "p̚", after: any("˩˨˧˦˥")
  sub "ฟ", "f", after: any("˩˨˧˦˥")
  sub any("ฌฑฒทธจฎฏดตฐถ"), "t̚", after: any("˩˨˧˦˥")
  sub any("ศษส"), "s", after: any("˩˨˧˦˥")
  sub "ช", "t͡ɕʰ", after: any("˩˨˧˦˥")
  sub "ม", "m", after: any("˩˨˧˦˥")
  sub any("ญณนร"), "n", after: any("˩˨˧˦˥")
  sub any("ลฬ"), "l", after: any("˩˨˧˦˥")
  sub "ง", "ŋ", after: any("˩˨˧˦˥")

  # ​ONSET CONSONANTS

  sub "ก", "k"
  sub "ข", "kʰ"
  sub "ฃ", "kʰ"
  sub "ค", "kʰ"
  sub "ฅ", "kʰ"
  sub "ฆ", "kʰ"
  sub "ง", "ŋ"
  sub "จ", "t͡ɕ"
  sub "ฉ", "t͡ɕʰ"
  sub "ช", "t͡ɕʰ"
  sub "ซ", "s"
  sub "ฌ", "t͡ɕʰ"
  sub "ญ", "j"
  sub "ฎ", "d"
  sub "ฏ", "t"
  sub "ฐ", "tʰ"
  sub "ฑ", "tʰ"
  sub "ฒ", "tʰ"
  sub "ณ", "n"
  sub "ด", "d"
  sub "ต", "t"
  sub "ถ", "tʰ"
  sub "ท", "tʰ"
  sub "ธ", "tʰ"
  sub "น", "n"
  sub "บ", "b"
  sub "ป", "p"
  sub "ผ", "pʰ"
  sub "ฝ", "f"
  sub "พ", "pʰ"
  sub "ฟ", "f"
  sub "ภ", "pʰ"
  sub "ม", "m"
  sub "ย", "j"
  sub "ร", "r"
  sub "ฤ", "rɯ"
  sub "ฤๅ", "rɯː"
  sub "ล", "l"
  sub "ฦ", "lɯ"
  sub "ฦๅ", "lɯː"
  sub "ว", "w"
  sub "ศ", "s"
  sub "ษ", "s"
  sub "ส", "s"
  sub "ห", "h"
  sub "ฬ", "l"
  sub "อ", "ʔ"
  sub "ฮ", "h"

}

# POSTRULES
# If a syllable ends with a short vowel, the vowel is sometimes
# followed by glottal constriction. On Wiktionary, this is
# sometimes transcribed as ʔ, but usage has been inconsistent.
# This glottal stop is not phonemic and is not written in most
# existing transcription systems. Therefore all syllable-final
# ʔ will be removed from Wiktionary data.
#
# - pattern: "([aieuoɯɔɛɤ])(?=[˩˨˧˦˥]*( |$))"
#   result: "\\1ʔ"

# Remove unprocessed short vowel marks
sub "็", ""

# Using . instead of - as syllable delimiter in IPA
sub "-", "."

# Remove... spaces? Related to secryst. We will work it out, but for now let's
# have some commit :D
sub " ", none

}