metadata {

authority_id: bgnpcgn
id: 2013
language: iso-639-2:mkd
source_script: Cyrl
destination_script: Latn
name: Romanization for Macedonian (2013)
url: https://assets.publishing.service.gov.uk/government/uploads/system/uploads/attachment_data/file/811514/ROMANIZATION_OF_MACEDONIAN.pdf
creation_date: 2013
description: |
  Macedonian was officially established as a written language in Yugoslavia during
  World War II and is now the official language of North Macedonia.
  This romanization system replaces the BGN/PCGN 1981 agreement and adheres
  to the most widely-accepted standardization of Macedonian orthography.

notes:
  - The Macedonian Cyrillic lowercase italic Д may sometimes be seen as g.
    There is no specific Unicode encoding for this variant form so a comparable character
    has been used here for illustrative purposes.
  - The Macedonian Cyrillic lowercase italic Ѓ may sometimes be seen as ī.
    There is no specific Unicode encoding for this variant form so a comparable character
    has been used here for illustrative purposes.
  - The Macedonian Cyrillic lowercase italic П may sometimes be seen as ū.
    There is no specific Unicode encoding for this variant form so a comparable character
    has been used here for illustrative purposes.
  - The Macedonian Cyrillic lowercase italic Т may sometimes be seen as w̄.
    There is no specific Unicode encoding for this variant form so a comparable character
    has been used here for illustrative purposes.
  - |
    An inventory of letter-diacritic combinations, with their Unicode encoding,
    in addition to the unmodified letters of the basic Roman script is:
    | Ǵ (U+01F4)   | ǵ (U+01F5)   |
    | Ž (U+017D)   | ž (U+017E)   |
    | Dz (U+01F2)* | dz (U+01F3)* |
    | Lj (U+01C8)* | lj (U+01C9)* |
    | Nj (U+01CB)* | nj(U+01CC)*  |
    | Ḱ (U+IE30)   | ḱ (U+IE31)   |
    | Č (U+010C)   | č (U+010D)   |
    | Dž (U+01C5)* | dž (U+01C6)* |
    | Š (U+0160)   | š (U+0161)   |
    * Note that these characters can also be reproduced with individual letters (e.g. l+j).
  - The Romanization column shows only lowercase forms but, when romanizing,
    uppercase and lowercase Roman letters as appropriate should be used.

}

tests {

test "Ѓенови Ливаѓе", "Ǵenovi Livaǵe"
test "ЛУЃЕ луѓе", "LUǴE luǵe"
test "ЅВЕЗДА ѕвезда Ѕвезда", "DZVEZDA dzvezda Dzvezda"
test "Јабежица", "Jabežica"
test "Љиќен и Бард", "Ljiḱen i Bard"
test "Ќамилов Чукар", "Ḱamilov Čukar"
test "Џавидин Кајнак", "Džavidin Kajnak"
test "Џамалџи", "Džamaldži"
test "Џибра Гури и Зи", "Džibra Guri i Zi"
test "Абазова Куќарица", "Abazova Kuḱarica"
test "Баба Анѓина Маала", "Baba Anǵina Maala"
test "Ваљановец", "Valjanovec"
test "Галал Једи Дереш", "Galal Jedi Dereš"
test "Дванаесет Клајнци", "Dvanaeset Klajnci"
test "Електродистрибуција Струга", "Elektrodistribucija Struga"
test "Железничка Станица Рајко Жинзифов", "Železnička Stanica Rajko Žinzifov"
test "Заедничко Речиште", "Zaedničko Rečište"
test "Испраена Плоча", "Ispraena Ploča"
test "Казнено-Поправна Установа Идризово", "Kazneno-Popravna Ustanova Idrizovo"
test "Лази и Зејнелит", "Lazi i Zejnelit"
test "Мавровско Езеро", "Mavrovsko Ezero"
test "Национален Парк Галичица", "Nacionalen Park Galičica"
test "Одморалиште Свети Стефан", "Odmoralište Sveti Stefan"
test "Планинарски Дом Караџица", "Planinarski Dom Karadžica"
test "Раса е Лисењит", "Rasa e Lisenjit"
test "Скочивирска Клисура", "Skočivirska Klisura"
test "Термо-електроцентрала Неготино", "Termo-elektrocentrala Negotino"
test "Узуновско Бресје", "Uzunovsko Bresje"
test "Фабрика Југохром", "Fabrika Jugohrom"
test "Хидроелектрана Сапунџица", "Hidroelektrana Sapundžica"
test "Цветковско Рамниште", "Cvetkovsko Ramnište"
test "Чалтанова Пештера", "Čaltanova Peštera"
test "Шкемби Вишнејц", "Škembi Višnejc"

}

dependency “posix”, import: true

stage {

# CHARACTERS
parallel {
  sub "А", "A"
  sub "Б", "B"
  sub "В", "V"
  sub "Г", "G"
  sub "Д", "D"
  sub "Ѓ", "Ǵ" # Ǵ
  sub "Е", "E"
  sub "Ж", "Ž" # Ž
  sub "З", "Z"
  sub "Ѕ", "Dz"
  sub "И", "I"
  sub "Ј", "J"
  sub "К", "K"
  sub "Л", "L"
  sub "Љ", "Lj"
  sub "М", "M"
  sub "Н", "N"
  sub "Њ", "Nj"
  sub "О", "O"
  sub "П", "P"
  sub "Р", "R"
  sub "С", "S"
  sub "Т", "T"
  sub "Ќ", "Ḱ" # Ḱ
  sub "У", "U"
  sub "Ф", "F"
  sub "Х", "H"
  sub "Ц", "C"
  sub "Ч", "Č" # Č
  sub "Џ", "Dž" # Dž
  sub "Ш", "Š" # Š
  sub "а", "a"
  sub "б", "b"
  sub "в", "v"
  sub "г", "g"
  sub "д", "d"
  sub "ѓ", "ǵ" # ǵ
  sub "е", "e"
  sub "ж", "ž" # ž
  sub "з", "z"
  sub "ѕ", "dz"
  sub "и", "i"
  sub "ј", "j"
  sub "к", "k"
  sub "л", "l"
  sub "љ", "lj"
  sub "м", "m"
  sub "н", "n"
  sub "њ", "nj"
  sub "о", "o"
  sub "п", "p"
  sub "р", "r"
  sub "с", "s"
  sub "т", "t"
  sub "ќ", "ḱ" # ḱ
  sub "у", "u"
  sub "ф", "f"
  sub "х", "h"
  sub "ц", "c"
  sub "ч", "č" # č
  sub "џ", "dž" # dž
  sub "ш", "š" # š
}
# POSTRULES
# rewritten by hand to work properly
# DZ
sub capture(maybe(upper)) + "Dz" + capture(upper), ref(1) + "DZ" + ref(2)
sub capture(upper) + "Dz" +  capture(maybe(upper)), ref(1) + "DZ" + ref(2)

#LJ
sub capture(upper) + "Lj" + capture(maybe(upper)), ref(1) + "LJ" + ref(2)
sub capture(maybe(upper)) + "Lj" + capture(upper), ref(1) + "LJ" + ref(2)

#NJ
sub capture(upper) + "Nj" + capture(maybe(upper)), ref(1) + "NJ" + ref(2)
sub capture(maybe(upper)) + "Nj" + capture(upper), ref(1) + "NJ" + ref(2)

#DŽ
sub capture(upper) + "Dž" + capture(maybe(upper)), ref(1) + "DŽ" + ref(2)
sub capture(maybe(upper)) + "Dž" + capture(upper), ref(1) + "DŽ" + ref(2)

compose

}