metadata {
authority_id: var id: phonemic language: iso-639-2:tha source_script: Thai destination_script: Zsym name: Phonemic Thai to IPA url: creation_date: adoption_date: description: This maps phonemic Thai to IPA. notes: |
}
tests {
test "คฺวาม-วาว", "kʰwaːm˧.waːw˧" test "ที่-เปิด-ขวด", "tʰiː˥˩.pɤːt̚˨˩.kʰua̯t̚˨˩" test "พื้น-หฺลัง", "pʰɯːn˦˥.laŋ˩˩˦" test "สม-ชาย", "som˩˩˦.t͡ɕʰaːj˧" test "อ็่อน-ค่อ", "ʔɔn˨˩.kʰɔː˥˩" test "โค-ระ-ยอ", "kʰoː˧.ra˦˥.jɔː˧" test "ไล่-ออก", "laj˥˩.ʔɔːk̚˨˩" test "ไส้-เลื่อน", "saj˥˩.lɯa̯n˥˩" test "ไอ-กฺรน", "ʔaj˧.kron˧" test "ไอ-ซี-ยู", "ʔaj˧.siː˧.juː˧" test "ไอ๊ส-แลน", "ʔajs˦˥.lɛːn˧"
}
# This map has been partially converted by the bin/maps_v1_to_v2 script # The section below requires human attention. Remember to remove this # comment and move the converted map to 'maps/' directory. Please also # take note that the maps-staging directory will be cleaned up whenever # you run the bin/maps_v1_to_v2 script. You should particularly be # concerned about any regular expressions found in this file and about # advanced expressions in parallel {} parts, and also about the order # of particular parts of the stage.
stage {
secryst model: "thai-ipa" # RULES # Cluster Onsets: กขคดตทบปผพฟสห # Cluster Onsets in groups: # CO - High ขผสห # CO - Mid กดตบป # CO - Low คทพฟ # All Onsets: กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ # High Consonants: ขฃฉฐถผฝศษสห # Mid Consonants: กจฎฏดตบปอ # Low Consonants: คฅฆงชซฌญฑฒณทธนพฟภมยรลวฬฮ # Tone: [\u0304\u0e48-\u0e4B] 0: 0304 1: 0E48 2: 0E49 3: 0E4A 4: 0E4B # Vowels right before tone marks: [\u0e31\u0e33-\u0e39] # Handle Consonantal อ, which can never be part of a consonant cluster # in phonemic Thai sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture("อ") + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")])]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_M" + ref( 4 ) # Handle Consonantal ยว sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")])]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_L" + ref( 4 ) sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("ขผสห") + "ฺ" + any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_H" + ref( 4 ) sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("คทพฟ") + "ฺ" + any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_L" + ref( 4 ) sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("กดตบป") + "ฺ" + any("ยว")) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_M" + ref( 4 ) # Move tones to the end of the syllable sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any([any("ขผสห") + "ฺ" + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมรลศษสหฬอฮ"), any("ขฃฉฐถผฝศษสห")])) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any("- ฺ"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_H" + ref( 4 ), after: any([" ", "-", line_end]) sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any([any("กดตบป") + "ฺ" + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมรลศษสหฬฮ"), any("กจฎฏดตบป")])) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any("- ฺ"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_M" + ref( 4 ), after: any([" ", "-", line_end]) sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any([any("คทพฟ") + "ฺ" + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมรลศษสหฬอฮ"), any("คฅฆงชซฌญฑฒณทธนพฟภมรลฬฮ")])) + maybe(capture(any(["็", any(["ั" + any("ำ".."ู")]) + maybe("็")]))) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any("- ฺ"))), ref( 1 ) + ref( 2 ) + "–" + ref( 3 ) + ref( 5 ) + "_L" + ref( 4 ), after: any([" ", "-", line_end]) # If ว is followed by another vowel, than it is actually part of the cluster sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("ขผสหกดตบปคทพฟ")) + "–" + capture(any("วรล")) + capture(any(["าอำะั" + any("ำ".."ู")]) + maybe("็")) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(maybe_some(any(" -")) + "_" + any("HML")), ref( 1 ) + ref( 2 ) + ref( 3 ) + "–" + ref( 4 ) + ref( 6 ) + ref( 5 ) sub capture(any("เแไใโ")) + capture(any("ขผสหกดตบปคทพฟ")) + "–" + capture(any("วรล")) + capture(maybe(any(["าอำะั" + any("ำ".."ู")])) + maybe("็")) + maybe(capture(any(["̄" + any("่".."๋")]))) + capture(some(any("คฅฆกขฃพภบปฟฌฑฒทธจฎฏดตฐถศษสชมญณนรลฬง")) + "_" + any("HML")), ref( 1 ) + ref( 2 ) + ref( 3 ) + "–" + ref( 4 ) + ref( 6 ) + ref( 5 ) # Move preceding vowels to the right position sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("เแไใโ")) + capture(any("ปคฟตดผทพกขบหส") + maybe("ฺ") + any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ")), ref( 1 ) + ref( 3 ) + ref( 2 ) sub capture(any([line_start, " ", "-", any("เแไใโ")])) + capture(any("เแไใโ")) + capture(any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ")), ref( 1 ) + ref( 3 ) + ref( 2 ) # Add inherit vowel if there is nothing between the onset and final consonants sub capture(any("กขฃคฅฆงจฉชซฌญฎฏฐฑฒณดตถทธนบปผฝพฟภมยรลวศษสหฬอฮ")) + capture("–") + capture(any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษสมญณนรลฬง") + "_"), ref( 1 ) + "โ" + ref( 2 ) + "ะ" + ref( 3 ) # Add inherit vowel for single letters sub capture(any([line_start, " ", "-", "ฺ"])) + capture(any("ก".."ฮ")) + "–", ref( 1 ) + ref( 2 ) + "–ะ", after: "_" + any("HML") #Change tones to correct tone values # IPA Tones: 0 ˧ , 1 ˨˩ , 2 ˥˩, 3 ˦˥, 4 ˩˩˦ sub "_" + any("HM") + "่", "˨˩" sub "_" + any("HM") + "้", "˥˩" sub "_" + any("HML") + "๊", "˦˥" sub "_" + any("HML") + "๋", "˩˩˦" sub "_L่", "˥˩" sub "_L้", "˦˥" sub "_" + any("HML") + "̄", "˧" # Syllables ending in –ะ (U+0E30), –ิ (U+0E34), –ึ (U+0E36), –ุ (U+0E38) # are dead syllables and follow a different set of tone rules # The combination เ–ิ is an exception. sub capture(any("เ") + "–ิ") + "_" + any("HM"), ref( 1 ) + "˨˩" sub capture(any("เ") + "–ิ") + "_L", ref( 1 ) + "˦˥" sub capture(any("ะึุ")) + "_" + any("HM"), ref( 1 ) + "˨˩" sub capture(any("ะึุ")) + "_L", ref( 1 ) + "˦˥" # Syllables with stop finals sub capture(any("ะคกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_" + any("HM"), ref( 1 ) + "˨˩" sub capture(any("เ") + "–ิ" + any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˦˥" sub capture(any("ะัึุ็") + any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˦˥" sub capture("็อ" + any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˦˥" # sub capture( :lookbehind_start any("มนง") :lookbehind_stop any("คฅฆกขฃพฟภบปชฌฑฒทธจฎฏดตฐถศษส") + maybe("ส")) + "_L", ref( 1 ) + "˥˩" # warning: : # Assign the default tones if no tone mark is present. sub "_" + any("ML"), "˧" sub "_H", "˩˩˦" # Remove tone-marking unpronounced ห sub maybe("ห") + "ฺ", "" # CHARACTERS parallel { # Part 1 sub "–ิว", "iw" sub "เ–็ว", "ew" sub "เ–ว", "eːw" sub "แ–็ว", "ɛw" sub "แ–ว", "ɛːw" sub "โ–ว", "oːw" sub "เ–ียว", "ia̯w" # Part 2 sub "–ุย", "uj" sub "โ–ย", "oːj" sub "–อย", "ɔːj" sub "เ–ย", "ɤːj" sub "เ–ือย", "ɯa̯j" sub "–วย", "ua̯j" # Part 3 sub "เ–ียะ", "ia̯" sub "เ–ีย", "ia̯" sub "เ–ือะ", "ɯa̯" sub "เ–ือ", "ɯa̯" sub "–ัวะ", "ua̯" sub "–ัว", "ua̯" sub "–ว", "ua̯" sub "ไ–ย", "aj" sub "ใ–", "aj" sub "ไ–", "aj" sub "–ัย", "aj" sub "–าย", "aːj" sub "เ–า", "aw" sub "–าว", "aːw" # Part 4 sub "–ะ", "a" sub "–ั", "a" sub "–า", "aː" sub "รร", "an" sub "–ำ", "am" sub "–ิ", "i" sub "–ี", "iː" sub "–ึ", "ɯ" sub "–ือ", "ɯː" sub "–ื", "ɯː" sub "–ุ", "u" sub "–ู", "uː" sub "เ–ะ", "e" sub "เ–็", "e" sub "เ–", "eː" sub "แ–ะ", "ɛ" sub "แ–็", "ɛ" sub "แ–", "ɛː" sub "โ–ะ", "o" sub "โ–", "oː" sub "เ–าะ", "ɔ" sub "–็อ", "ɔ" sub "–อ", "ɔː" sub "เ–็อ", "ɤ" sub "เ–อะ", "ɤ" sub "เ–ิ็", "ɤ" sub "เ–ิ", "ɤː" sub "เ–อ", "ɤː" # FINAL CONSONANTS sub any("คฅฆกขฃ"), "k̚", after: maybe("ส") + any("˩˨˧˦˥") sub any("พภบป"), "p̚", after: any("˩˨˧˦˥") sub "ฟ", "f", after: any("˩˨˧˦˥") sub any("ฌฑฒทธจฎฏดตฐถ"), "t̚", after: any("˩˨˧˦˥") sub any("ศษส"), "s", after: any("˩˨˧˦˥") sub "ช", "t͡ɕʰ", after: any("˩˨˧˦˥") sub "ม", "m", after: any("˩˨˧˦˥") sub any("ญณนร"), "n", after: any("˩˨˧˦˥") sub any("ลฬ"), "l", after: any("˩˨˧˦˥") sub "ง", "ŋ", after: any("˩˨˧˦˥") # ONSET CONSONANTS sub "ก", "k" sub "ข", "kʰ" sub "ฃ", "kʰ" sub "ค", "kʰ" sub "ฅ", "kʰ" sub "ฆ", "kʰ" sub "ง", "ŋ" sub "จ", "t͡ɕ" sub "ฉ", "t͡ɕʰ" sub "ช", "t͡ɕʰ" sub "ซ", "s" sub "ฌ", "t͡ɕʰ" sub "ญ", "j" sub "ฎ", "d" sub "ฏ", "t" sub "ฐ", "tʰ" sub "ฑ", "tʰ" sub "ฒ", "tʰ" sub "ณ", "n" sub "ด", "d" sub "ต", "t" sub "ถ", "tʰ" sub "ท", "tʰ" sub "ธ", "tʰ" sub "น", "n" sub "บ", "b" sub "ป", "p" sub "ผ", "pʰ" sub "ฝ", "f" sub "พ", "pʰ" sub "ฟ", "f" sub "ภ", "pʰ" sub "ม", "m" sub "ย", "j" sub "ร", "r" sub "ฤ", "rɯ" sub "ฤๅ", "rɯː" sub "ล", "l" sub "ฦ", "lɯ" sub "ฦๅ", "lɯː" sub "ว", "w" sub "ศ", "s" sub "ษ", "s" sub "ส", "s" sub "ห", "h" sub "ฬ", "l" sub "อ", "ʔ" sub "ฮ", "h" } # POSTRULES # If a syllable ends with a short vowel, the vowel is sometimes # followed by glottal constriction. On Wiktionary, this is # sometimes transcribed as ʔ, but usage has been inconsistent. # This glottal stop is not phonemic and is not written in most # existing transcription systems. Therefore all syllable-final # ʔ will be removed from Wiktionary data. # # - pattern: "([aieuoɯɔɛɤ])(?=[˩˨˧˦˥]*( |$))" # result: "\\1ʔ" # Remove unprocessed short vowel marks sub "็", "" # Using . instead of - as syllable delimiter in IPA sub "-", "." # Remove... spaces? Related to secryst. We will work it out, but for now let's # have some commit :D sub " ", none
}