/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.rules.en;

import ai.grazie.nlp.langs.Language;
import ai.grazie.nlp.similarity.Levenshtein;
import ai.grazie.rules.Example;
import ai.grazie.rules.Rule;
import ai.grazie.rules.RuleClient;
import ai.grazie.rules.common.CommonPatterns;
import ai.grazie.rules.common.Diacritics;
import ai.grazie.rules.common.KnownPhrases;
import ai.grazie.rules.common.MultiWordSpelling;
import ai.grazie.rules.common.ProperNames;
import ai.grazie.rules.common.PunctuationTypos;
import ai.grazie.rules.en.Articles;
import ai.grazie.rules.en.EnglishParameters;
import ai.grazie.rules.en.EnglishTreePatterns;
import ai.grazie.rules.en.WordConfusion;
import ai.grazie.rules.en.WordSeparation;
import ai.grazie.rules.tree.Node;
import ai.grazie.rules.tree.NodeCorrector;
import ai.grazie.rules.tree.NodePattern;
import ai.grazie.rules.tree.NodePointer;
import ai.grazie.rules.tree.Tree;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import one.util.streamex.IntStreamEx;
import one.util.streamex.StreamEx;
import org.languagetool.tools.MostlySingularMultiMap;
import org.languagetool.tools.StringTools;

class SpellingRules {
    private static final String PROPER_NAMES_SHOULD_BE_CAPITALIZED = "Proper names should be capitalized";
    private static final String GEO_MSG = "If it\u2019s a city in %s, it\u2019s usually written with diacritics";
    private static final String LOAN_MSG = "This %s-borrowed phrase usually has diacritics";
    private static final String NER_GEO_LABELS = "GEO_POLITICAL_ENTITY|LOCATION";
    private static final String suffixableOrdinal = "[1-9]+\\d*";
    static final NodePattern numberWithSeparateSuffix = NodePattern.N.inFormSequence(0, "[1-9]+\\d*", "[a-z]{2}").spaceAfter().and(num -> SpellingRules.expectedSuffix(num.form()).equals(num.neighbor(1).lowForm()));
    static final KnownPhrases knownPhrases = new KnownPhrases(Language.ENGLISH);

    SpellingRules() {
    }

    static List<Rule> rules() {
        return List.of(CommonPatterns.latinCyrillicConfusion("Confusion of cyrillic and latin letters", "A letter belongs to another alphabet.", "Latin", "Cyrillic", " letter '%s' in '%s'", new Example("I <b>h\u0430v\u0435</b> a cat.", "Cyrillic letter '\u0430' in 'have'", "I <b>have</b> a cat."), "[\u0430-\u044f\u0451\u0456\u0457]", null, null), new Rule.PatternRule("Spelling.MISSING_DIACRITICS", "Add diacritics to generic expressions", "Some words and phrases borrowed from other languages are usually written with accents or other diacritical marks.", "https://en.wikipedia.org/wiki/Glossary_of_French_words_and_expressions_in_English", SpellingRules.missingDiacritic(), new Example[]{new Example("I love <b>cafe</b> au lait.", "This French-borrowed phrase usually has diacritics", "I love <b>caf\u00e9</b> au lait."), new Example("<b>Cr\u00e8me brulee</b> is my favourite way to start the day.", "This French-borrowed phrase usually has diacritics", "<b>Cr\u00e8me br\u00fbl\u00e9e</b> is my favourite way to start the day.")}){

            @Override
            public boolean isEnabledByDefault(RuleClient client) {
                return client.suggestDiacriticsByDefault();
            }
        }, new Rule.PatternRule("Spelling.MULTI_WORD", "Check multi-word spelling and capitalization", "Find case mistakes, missing diacritics, and typos in various phrases and names consisting of several words.", null, NodePattern.or(SpellingRules.geoDiacritics(), new MultiWordSpelling(knownPhrases).skipTypoFixes(MultiWordSpelling.DEFAULT_SKIP_TYPO_FIXES.noForm("untied|state|ice|traveler|forest|gray|get|fed|som|unite|who|humans").andNot(NodePattern.N.inFormSequence(1, 2, "whom?", "it", "ma?y")).andNot(EnglishTreePatterns.startsWithApostrophe.noSpaceBefore())).pattern("Did you mean '%s'?", "The standard spelling is '%s'").andNot(NodePattern.N.inFormSequence(0, "intellij", "ideal", "plugin")).andNot(NodePattern.N.inFormSequence(0, "london", EnglishTreePatterns.apostropheS.getFormRegex(), "underground").withNeighbor(2, NodePattern.N.withHeadRelation("amod"))).andNot(NodePattern.N.inFormSequence(0, "Google", "and")).noFormCaseSensitive("[Uu]s").andNot(NodePattern.N.inFormSequence(0, "open", "office").andOr(NodePattern.N.withNeighbor(1, NodePattern.N.withDependent("det(:poss)?")), NodePattern.N.withNeighbor(2, NodePattern.N.pos("NNS?").noLemma("program(me)?|software|tool|file|project|template|format|document|download|plugin|(present|install)ation|calc|draw|writer|online|portable|alternative|(spread)?sheet")))).noForm("macs")), new Example("I live in <b>Stratford upon Avon</b>.", "The standard spelling is 'Stratford-upon-Avon'", "I live in <b>Stratford-upon-Avon</b>."), new Example("I dream of travelling to <b>Moravsky Zizkov</b>, a serene village in Europe.", "If it\u2019s a city in the Czech Republic, it\u2019s usually written with diacritics", "I dream of travelling to <b>Moravsk\u00fd \u017di\u017ekov</b>, a serene village in Europe."), new Example("I live in <b>Mulsen</b>, Saxony.", "If it\u2019s a city in Germany, it\u2019s usually written with diacritics", "I live in <b>M\u00fclsen</b>, Saxony.")), new Rule.PatternRule("Spelling.PROPER_NAMES", "Proper name spelling and capitalization", "Person, product and geographical names are usually capitalized.", "https://www.aje.com/arc/editing-tip-capitalization-proper-and-common-nouns/", SpellingRules.properNames(), new Example("Have you tried <b>grazie</b>?", "Did you mean JetBrains Grazie?", "Have you tried <b>Grazie</b>?"), new Example("I started with <b>windows</b> 3.11.", "Did you mean Microsoft Windows?", "I started with <b>Windows</b> 3.11."), new Example("You need to install <b>opera</b>.", "Did you mean the Opera browser?", "You need to install <b>Opera</b>."), new Example("Do you like <b>apple</b> products?", "Did you mean the Apple corporation?", "Do you like <b>Apple</b> products?")), new Rule.PatternRule("Spelling.COMMON_TYPOS", "Common typos", "Find and correct common typos which result in unknown words or where letters are mistyped with non-letter symbols.", null, NodePattern.or(PunctuationTypos.findCommonTypos("Did you mean '%s'?", Map.of("[", "p", ";", "l", ",", "m"), null), SpellingRules.mistypedDet(), SpellingRules.mistypedOr(), SpellingRules.wrongAbbreviation()), new Example("Have you tried <b>a[ples</b>?", "Did you mean 'apples'?", "Have you tried <b>apples</b>?")).enableInFlatTrees().honorCrazyParses(), WordConfusion.similarWordConfusion(), CommonPatterns.misplacedSpace("Misplaced space", "A space between words is unnecessary or in a wrong position.", "Did you mean '%s'?", NodePattern.or(NodePattern.N.directlyBefore(NodePattern.N.noSpaceAfter().directlyBefore(NodePattern.N.form("="))), NodePattern.N.directlyAfter(EnglishTreePatterns.anyPunct).directlyBefore(NodePattern.N.directlyBefore(EnglishTreePatterns.anyPunct)), NodePattern.N.inFormSequence(0, ".", ".").and(node -> {
            String twoLetterWord = node.form() + node.neighbor(1).form();
            return node.tree().treeSupport().tagToken(twoLetterWord).tokenReadings().stream().allMatch(r -> r.pos() == null) || twoLetterWord.toLowerCase(Locale.ROOT).matches("((da|[QRYZEPTGMkhdcm\u03bcnf])?(s|m|g|A|K|mol|cd|rad|sr|Hz|N|Pa|J|W|V|F|\u03a9|S|Wb|T|lm|lx|Bq|Sv|kat|L|l|M)|MMBtu|lux|rad|grad|pt|mp[gh]|[ndkmgt](b|hz)|ms|px|[kdcm]m|[kmhc]g|[md]l|b?hp|cc|lb|ft|hr|min|sec|[symw]|[rf]p[smhdy])");
        })), new Example("Isn\u2019t <b>tha tspace</b> incorrect?", "Did you mean 'that space'?", "Isn\u2019t <b>that space</b> incorrect?")), new Rule.PatternRule("Spelling.NUMBER_ENDING", "Number ending", "Ordinal numbers should end with the right suffix, which should be placed immediately after the number without spaces.", "https://langster.org/en/grammar/english/a1/ordinal-numbers/", SpellingRules.numberEnding(), new Example("I have the <b>1rd</b> edition of this book.", "Incorrect ordinal number ending", "I have the <b>1st</b> edition of this book.")), WordSeparation.rule());
    }

    private static NodePattern wrongAbbreviation() {
        return NodePattern.or(NodePattern.N.form("~?\\d+([,.]\\d+)*ltr?").correct(NodeCorrector.regexReplace("(.*)l.*", "$1 l")), NodePattern.N.form("ltr?").directlyAfter(CommonPatterns.withNumberLikeForm).correct(NodeCorrector.replace("l"))).reportEverythingTouched().andOr(EnglishParameters.VARIANT.withValue("|US").message("The abbreviation for 'liter' is 'l'"), NodePattern.N.message("The abbreviation for 'litre' is 'l'"));
    }

    private static NodePattern mistypedDet() {
        return NodePattern.N.beforeHead().andOr(NodePattern.N.noPos().withHead("amod|case|compound", NodePattern.N.noDependents("det|nmod:poss")), NodePattern.N.noPos("DT").noForm("none").withHead("det", NodePattern.N.pos("NN.*").andNot(CommonPatterns.severalDependents("det")))).andNot(NodePattern.N.directlyAfter(NodePattern.N.withHeadRelation("det"))).andNot(NodePattern.N.form("(st|nd|rd|th)").directlyAfter(CommonPatterns.withNumberLikeForm)).andNot(NodePattern.N.withPrevSibling(NodePattern.N.withHeadRelation("amod|compound"))).andNot(NodePattern.N.withNextSibling(NodePattern.PUNCT)).noDependents().and((node, match) -> {
            ArrayList<String> determiners = new ArrayList<String>();
            determiners.add("the");
            if (node.neighbor(1).hasHeadRelation("nummod")) {
                determiners.add("only");
            }
            determiners.add(Objects.requireNonNull(node.head()).hasPos("NNP?S") ? "many" : "one");
            for (String det : determiners) {
                if (Levenshtein.WithDamerau.distance(node.lowForm(), det) != 1) continue;
                return match.withCorrector(NodeCorrector.replace(node, det)).withMessage("Did you mean '" + det + "'?");
            }
            return null;
        });
    }

    private static NodePattern mistypedOr() {
        return NodePattern.N.withHeadRelation("cc").form("o[^r]?").noDependents("fixed").and(EnglishTreePatterns.typoReplacement("or")).andOptionally(NodePattern.N.directlyBefore(NodePattern.N.pos("CD")).directlyAfter(NodePattern.N.pos("CD")).correct(NodeCorrector.replace("of", "to", "on"))).and((node, match) -> match.concedingToOtherGrammarCheckers());
    }

    private static NodePattern properNames() {
        NodePattern multiWord = NodePattern.or(SpellingRules.ensureCapitalized("Chrome|Chromium").directlyBefore(NodePattern.N.form("\\d+|browser")), NodePattern.N.inFormSequence(0, "google", "docks").correct(NodeCorrector.replaceNodes(NodePointer.anchor(), NodePointer.neighbor(1), "Google Docs")).message("Did you mean Google Docs?"), NodePattern.or(NodePattern.N.inFormSequence(0, "\\.net", "framework").andNot(NodePattern.N.formCaseSensitive("\\.NET").directlyBefore(NodePattern.N.formCaseSensitive("Framework"))).correct(NodeCorrector.replaceNodes(NodePointer.anchor(), NodePointer.neighbor(1), " .NET Framework")), NodePattern.N.formCaseSensitive("\\.Net").correct(NodeCorrector.replace(" .NET"))).message("Did you mean .NET Framework?"), CommonPatterns.spaceX.message("Did you mean SpaceX, the company?"), NodePattern.N.form("apple").directlyBefore(NodePattern.or(NodePattern.N.form("id").markAs("D"), NodePattern.N.form("i").directlyBefore(NodePattern.N.form("d").markAs("D")))).andNot(NodePattern.N.formCaseSensitive("Apple").directlyBefore(NodePattern.N.formCaseSensitive("ID"))).correct(NodeCorrector.replaceNodes(NodePointer.anchor(), NodePointer.marked("D"), "Apple ID")).message("Did you mean Apple ID?"), NodePattern.N.formCaseSensitive("apple").and(CommonPatterns.beforeSkipping(NodePattern.N.lemma("'s"), NodePattern.N.form("carplay|tv|products?|iphones?|ipads?|devices?|id|apps?|ceo|logo|revenues?|headquarters?|offices?|cloud|(mac)?os|computers?|silicon|fans?|stores?|hardware|developers?|stock|airpods|founders?|chargers?|launch(ed|es|ing)?|users?|employees?|releas(es|ed|ing)|software|ecosystem|mobile|engineers?|customers?|tablets?|platforms?|accounts?|laptops?|brands?|(smart)?phones?|login|profits?|(web)?site|mails?|germany|cell|technology|retail|app|system|mac|quicktime|safari|resellers?|repair|genius|upgrades?|macbooks?|icloud|jobs?|keyboard|news|nasdaq|newsroom|online|refurbished|support|updates?|usb|screen|thunderbolt|lightning|dividends?|events?|emails?|education|siri|security"))).correct(NodeCorrector.replace("Apple")).message("Did you mean the Apple corporation?"), NodePattern.N.form("macs?").noFormCaseSensitive("Macs?").noLabel("PERSON").andNot(NodePattern.N.directlyBefore(NodePattern.or(NodePattern.N.form("salad|cosmetics?|lipsticks?|address(es)?|spoof(ing|ers?|s|ed)?|os|books?|(sub)?layers?|protocols?"), NodePattern.N.inFormSequence(0, "and|&|n", "cheese"), CommonPatterns.HYPHEN_NODE.noSpaceBefore()))).andNot(CommonPatterns.afterSkipping(CommonPatterns.noSpaceHyphen, NodePattern.N.form("big"))).correct(NodeCorrector.regexReplace("mac", "Mac")).message("Did you mean Mac computer?"));
        NodePattern singleWord = NodePattern.or(SpellingRules.fixWordCase("Grazie", "JetBrains Grazie").andOr(NodePattern.N.withHeadRelation("nsubj(:pass|:outer)?|i?obj|obl(:npmod|:tmod)?|nmod|compound"), NodePattern.N.directlyAfterHead().withHead("discourse", NodePattern.N.pos("V.*"))).andNot(NodePattern.N.directlyAfter(NodePattern.N.form("\\[").noSpaceAfter())), SpellingRules.fixWordCase("IT", "the IT (Information Technology)").withDependent("det|nmod:poss", NodePattern.N.directlyBeforeHead()).andOptionally(NodePattern.N.directlyAfter(NodePattern.N.withHeadRelation("det").correct(NodeCorrector.replace("")))), SpellingRules.fixWordCase("Windows", "Microsoft Windows").andOr(NodePattern.N.directlyBefore(EnglishTreePatterns.number), NodePattern.ROOT.withDependent("case").withDependent("cop")), SpellingRules.fixWordCase("Opera", "the Opera browser").andOr(NodePattern.N.directlyBefore(NodePattern.or(EnglishTreePatterns.number, NodePattern.N.form("browser"))), NodePattern.N.withHead("obj", NodePattern.N.lemma("install|use"))), SpellingRules.fileTypes(), ProperNames.fixFritzProducts.message("Fritz! products are spelled with an exclamation mark"), ProperNames.openAI.message("Did you mean the company OpenAI?"), NodePattern.N.formCaseSensitive("Googl(e[ds]?|ing)").pos("VB.*").and(CommonPatterns.possiblyConj(NodePattern.or(EnglishTreePatterns.withToMark, NodePattern.N.withDependent("aux.*|obj|obl")))).andNot(CommonPatterns.firstWord).andNot(CommonPatterns.inAllCapitalizedSentence).message("The verb 'to google' should be in lower case").and((node, match) -> match.withCorrector(NodeCorrector.rawReplace(node.textRange(), node.lowForm()))), CommonPatterns.possiblyConj(NodePattern.or(NodePattern.N.withHeadRelation("i?obj|obl|nsubj.*|nmod").andNot(NodePattern.N.withHead("obj", CommonPatterns.severalDependents("obj"))), NodePattern.N.withHead("compound|amod", NodePattern.N.pos("NN.*").and(CommonPatterns.skipUp("compound", NodePattern.N.withDependent("det|case|nmod:poss")))), NodePattern.ROOT.withDependent("det|case|nmod:poss"), NodePattern.N.withDependent("cop").noDependents("aux.*"))).andNot(NodePattern.N.potentialPos("VB").directlyAfter(NodePattern.N.form("to"))).andNot(NodePattern.N.directlyBefore(CommonPatterns.HYPHEN_NODE)).andNot(NodePattern.N.directlyAfter(CommonPatterns.HYPHEN_NODE)).andOr(SpellingRules.fixWordCase("Google", "the Google search or company"), SpellingRules.fixWordCase("Gradle", "the Gradle build tool"), SpellingRules.fixWordCase("React", "the React JS framework").andNot(NodePattern.N.pos("VB").noHeadRelation("compound|amod")), SpellingRules.fixWordCase("Kindle", "Amazon Kindle"), SpellingRules.fixWordCase("Excel", "Microsoft Excel"), SpellingRules.fixWordCase("Skype", "the Skype videoconferencing tool"), SpellingRules.fixWordCase("Snickers", "the Snickers chocolate bar"), SpellingRules.fixWordCase("Twitter", "the social network").noPos("VB"), SpellingRules.fixWordCase("Markdown", "the formatting language"), SpellingRules.fixWordCase("Slack", "the Slack messenger").noDependents("det"), SpellingRules.fixWordCase("Chevy", "Chevrolet")), SpellingRules.fixWordCase("TV", "television")).andNot(NodePattern.N.directlyAfter(NodePattern.or(EnglishTreePatterns.quotations, CommonPatterns.HYPHEN_NODE, CommonPatterns.anySlash).noSpaceAfter())).andNot(NodePattern.N.directlyBefore(NodePattern.or(EnglishTreePatterns.quotations, CommonPatterns.HYPHEN_NODE, CommonPatterns.anySlash).noSpaceBefore())).andNot(CommonPatterns.hashtag);
        return NodePattern.or(multiWord, singleWord);
    }

    private static NodePattern fileTypes() {
        Pattern regex = Pattern.compile("(pdf|html|xml|css|gif|php|jsonl?|sql|xsl|yaml)(s?)", 2);
        return NodePattern.N.form(regex.pattern()).noFormCaseSensitive("[A-Z]{3,4}[sS]?").andOr(NodePattern.N.noForm("php"), NodePattern.N.withHead("amod|compound", NodePattern.N.lemma("file|source|inspection|analysis"))).andNot(NodePattern.N.noSpaceAround()).andNot(NodePattern.N.noSpaceAfter().directlyBefore(CommonPatterns.dot.noSpaceAfter().directlyBefore(NodePattern.N.form("\\p{L}.*")))).and((node, match) -> {
            Matcher matcher = regex.matcher(node.form());
            if (!matcher.matches()) {
                throw new AssertionError();
            }
            return match.withCorrector(NodeCorrector.rawReplace(node.textRange(), matcher.group(1).toUpperCase(Locale.ROOT) + matcher.group(2)));
        }).message("File types are usually capitalized");
    }

    private static NodePattern fixWordCase(String ideal, String whatWasMeant) {
        return NodePattern.N.form(ideal.toLowerCase(Locale.ROOT)).noFormCaseSensitive(ideal).andNot(CommonPatterns.upperCase).correct(NodeCorrector.replace(ideal)).message("Did you mean " + whatWasMeant + "?");
    }

    private static NodePattern ensureCapitalized(String ... patterns) {
        NodePattern alreadyCapitalized = NodePattern.N.formCaseSensitive(patterns[0]);
        for (int i = 1; i < patterns.length; ++i) {
            alreadyCapitalized = alreadyCapitalized.and(NodePattern.N.withNeighbor(i, NodePattern.N.formCaseSensitive(patterns[i])));
        }
        return NodePattern.N.inFormSequence(0, (String[])StreamEx.of((Object[])patterns).map(w -> w.toLowerCase(Locale.ROOT)).toArray(String.class)).and((node, match) -> {
            List ideal = IntStreamEx.range((int)0, (int)patterns.length).mapToObj(i -> StringTools.uppercaseFirstChar((String)node.neighbor(i).form())).toList();
            NodeCorrector corrector = null;
            for (int i2 = 0; i2 < patterns.length; ++i2) {
                if (node.neighbor(i2).form().equals(ideal.get(i2))) continue;
                NodeCorrector replace = NodeCorrector.replace(node.neighbor(i2), (String)ideal.get(i2));
                corrector = corrector == null ? replace : corrector.join(replace);
            }
            if (corrector == null) {
                return null;
            }
            return match.withCorrector(corrector).withMessage(PROPER_NAMES_SHOULD_BE_CAPITALIZED);
        });
    }

    private static NodePattern numberEnding() {
        String regex = "([1-9]+\\d*)[a-z]{2}";
        Pattern pattern = Pattern.compile(regex);
        NodePattern wrongEnding = NodePattern.N.formCaseSensitive(regex).noFormCaseSensitive("\\d+(bn|[ap]m|yo|((da|[QRYZEPTGMkhdcm\u03bcnf])?(s|m|g|A|K|mol|cd|rad|sr|Hz|N|Pa|J|W|V|F|\u03a9|S|Wb|T|lm|lx|Bq|Sv|kat|L|l|M)|MMBtu|lux|rad|grad|pt|mp[gh]|[ndkmgt](b|hz)|ms|px|[kdcm]m|[kmhc]g|[md]l|b?hp|cc|lb|ft|hr|min|sec|[symw]|[rf]p[smhdy])|xx)").andNot(NodePattern.N.withHead(NodePattern.N.label("LOCATION"))).andNot(NodePattern.N.directlyAfter(NodePattern.or(CommonPatterns.withNumberLikeForm, NodePattern.N.form("[$\u00a3\u20ac\u00a5\u20bd]")))).and((node, match) -> {
            Matcher m = pattern.matcher(node.form());
            if (!m.matches()) {
                return null;
            }
            String number = m.group(1);
            String replacement = number + SpellingRules.expectedSuffix(number);
            if (replacement.equalsIgnoreCase(node.form())) {
                return null;
            }
            if (knownPhrases.isPartOfValidPhrase(node)) {
                return null;
            }
            match = match.withCorrector(NodeCorrector.replace(node, replacement));
            String actualSuffix = node.form().substring(m.end(1));
            if (actualSuffix.length() == 2 && node.tree().treeSupport().tagToken(actualSuffix).hasPos("IN|CC")) {
                match = match.withCorrector(NodeCorrector.replace(node, number + " " + actualSuffix));
            }
            return match;
        }).message("Incorrect ordinal number ending");
        NodePattern separateEnding = numberWithSeparateSuffix.directlyBefore(CommonPatterns.forceConcatWithPrev).message("The ordinal suffix should go directly after the number");
        return NodePattern.or(wrongEnding, separateEnding);
    }

    private static String expectedSuffix(String number) {
        return SpellingRules.expectedSuffix(Integer.parseInt(number.length() > 2 ? number.substring(number.length() - 2) : number));
    }

    static String expectedSuffix(int number) {
        int lastTwoDigits = number % 100;
        if (lastTwoDigits > 3 && lastTwoDigits < 21) {
            return "th";
        }
        return switch (lastTwoDigits % 10) {
            case 1 -> "st";
            case 2 -> "nd";
            case 3 -> "rd";
            default -> "th";
        };
    }

    private static NodePattern diacriticRegex(List<KnownPhrases.Phrase> phrases, String message) {
        NodePattern continuingNer = NodePattern.N.directlyBefore(NodePattern.N.label("GEO_POLITICAL_ENTITY|LOCATION|ORGANIZATION"));
        MostlySingularMultiMap entries2 = new MostlySingularMultiMap(StreamEx.of(phrases).groupingBy(SpellingRules::diacriticKey));
        return NodePattern.custom((node, match) -> {
            List candidates = entries2.getList((Object)Diacritics.removeDiacritics(node.lowForm()));
            if (candidates == null) {
                return null;
            }
            for (KnownPhrases.Phrase phrase : candidates) {
                Node last;
                if (!SpellingRules.shouldCorrectDiacritics(phrase, node) || continuingNer.matches(last = node.neighbor(SpellingRules.words(phrase).length - 1))) continue;
                return match.withReportedRange(node.startOffset(), last.endOffset(), node.tree()).withCorrector(NodeCorrector.replaceNodes(node, last, phrase.phrase())).withMessage(String.format(message, phrase.source()));
            }
            return null;
        });
    }

    private static String[] words(KnownPhrases.Phrase phrase) {
        return phrase.phrase().split(" |(?=-)|(?<=-)");
    }

    private static String diacriticKey(KnownPhrases.Phrase phrase) {
        return Diacritics.removeDiacritics(SpellingRules.words(phrase)[0].toLowerCase(Locale.ROOT));
    }

    private static boolean shouldCorrectDiacritics(KnownPhrases.Phrase phrase, Node node) {
        Object[] words = SpellingRules.words(phrase);
        List wordsInText = ((StreamEx)node.forward().limit((long)words.length)).map(Tree.Token::lowForm).toList();
        List wordsLow = StreamEx.of((Object[])words).map(s -> s.toLowerCase(Locale.ROOT)).toList();
        List bareExpected = StreamEx.of((Collection)wordsLow).map(Diacritics::removeDiacritics).toList();
        return !wordsLow.equals(wordsInText) && bareExpected.equals(wordsInText.stream().map(Diacritics::removeDiacritics).toList());
    }

    private static NodePattern missingDiacritic() {
        return SpellingRules.diacriticRegex(knownPhrases.phrasesFromFile(knownPhrases.diacriticsPath()), LOAN_MSG).andNot(NodePattern.or(NodePattern.N.label("ORGANIZATION|LOCATION|PERSON"), NodePattern.N.form("a").directlyBefore(NodePattern.N.label("ORGANIZATION|LOCATION|PRODUCT|MISC")), CommonPatterns.upperCase, NodePattern.N.lemma("resume").andOr(Articles.nounWithoutDeterminer, NodePattern.N.noPos("NN"), EnglishTreePatterns.compound, NodePattern.N.directlyAfter(NodePattern.N.form("to"))), NodePattern.N.form("nee").andNot(NodePattern.N.directlyBefore(NodePattern.N.label("PERSON")))));
    }

    private static NodePattern geoDiacritics() {
        return CommonPatterns.capitalized.label(NER_GEO_LABELS).andNot(NodePattern.N.directlyAfter(NodePattern.N.label("GEO_POLITICAL_ENTITY|LOCATION|ORGANIZATION"))).andNot(NodePattern.N.directlyBefore(CommonPatterns.capitalizedMiddle.noLabel(".*"))).noDependents(".*", NodePattern.N.inFormSequence(1, "[ea]t", "al")).noHeadRelation("nummod|dep").noDependents("det").andNot(NodePattern.N.inFormSequence(2, "kingdom", "of", "god")).andNot(NodePattern.N.form("Que").inSentenceWith(NodePattern.N.form("canada|montr[\u00e9e]al"))).and(SpellingRules.diacriticRegex(knownPhrases.phrasesFromFile(knownPhrases.geoDiacriticsPath()), GEO_MSG));
    }
}

