/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.rules.de;

import ai.grazie.rules.Example;
import ai.grazie.rules.MatchingResult;
import ai.grazie.rules.NodeRuleMatch;
import ai.grazie.rules.Rule;
import ai.grazie.rules.common.CommonPatterns;
import ai.grazie.rules.de.GermanMetadata;
import ai.grazie.rules.de.GermanParameters;
import ai.grazie.rules.de.GermanTreePatterns;
import ai.grazie.rules.document.DocumentRule;
import ai.grazie.rules.document.DocumentSentence;
import ai.grazie.rules.document.Metadata;
import ai.grazie.rules.tree.Node;
import ai.grazie.rules.tree.NodeCorrector;
import ai.grazie.rules.tree.NodeMatch;
import ai.grazie.rules.tree.NodePattern;
import ai.grazie.rules.tree.Tree;
import ai.grazie.rules.tree.TreeSupport;
import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;
import one.util.streamex.StreamEx;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.languagetool.tools.StringTools;

class DigraphNormalization
extends DocumentRule.PatternRule {
    private static final String MSG = "Vermeiden Sie Ersetzungen f\u00fcr \u00e4, \u00f6, \u00fc oder \u00df";
    private static final Metadata.Key<List<Postponed>> KEY = new Metadata.Key<List<Postponed>>(GermanMetadata.postponedDigraphs){

        @Override
        protected void serialize(DataOutput out, List<Postponed> value) throws IOException {
            out.writeVInt(value.size());
            for (Postponed each : value) {
                out.writeVInt(each.offset);
                out.writeString(each.replacement);
            }
        }

        @Override
        protected List<Postponed> deserialize(DataInput in) throws IOException {
            ArrayList<Postponed> result2 = new ArrayList<Postponed>();
            int size = in.readVInt();
            for (int i = 0; i < size; ++i) {
                result2.add(new Postponed(in.readVInt(), in.readString()));
            }
            return result2;
        }

        @Override
        protected List<Postponed> mergeValues(List<Postponed> t1, List<Postponed> t2) {
            return StreamEx.of(t1).append(t2).toList();
        }

        @Override
        protected int fingerprint() {
            return Arrays.stream(Postponed.class.getDeclaredFields()).map(f -> f.getType().getName()).toList().hashCode();
        }
    };
    private static final Map<String, String> digraphEquivalents = Map.of("ss", "\u00df", "ae", "\u00e4", "ue", "\u00fc", "oe", "\u00f6");

    DigraphNormalization() {
        super("Spelling.DIGRAPH_NORMALIZATION", "Digraphen als Ersatz f\u00fcr \u00e4, \u00f6, \u00fc oder \u00df", "Digraphen wie \u201eae\u201c oder \u201ess\u201c werden automatisch in Unicode-Zeichen f\u00fcr Umlaute oder \u201e\u00df\u201c umgewandelt.", null, NodePattern.or(DigraphNormalization.digraphToUnicode(), DigraphNormalization.combiningCharacters(), DigraphNormalization.eszettToSSInSwiss()), new Example("Er <b>haette</b> ihn <b>anlaesslich</b> einer <b>Begruessung</b> besucht.", List.of(MSG, MSG, MSG), List.of("Er <b>h\u00e4tte</b> ihn <b>anl\u00e4sslich</b> einer <b>Begr\u00fc\u00dfung</b> besucht."), Map.of(GermanParameters.VARIANT, "DE")));
    }

    private static NodePattern digraphToUnicode() {
        return NodePattern.or(NodePattern.N.form(".*[auo]e.*"), NodePattern.N.form(".+ss.*").andNot(GermanParameters.VARIANT.withValue("CH"))).andOr(NodePattern.N.noPos(), NodePattern.N.formCaseSensitive("Spass"), CommonPatterns.capitalized.and(CommonPatterns.firstToken).onlyPos("EIG.*")).noForm("Sass|.*daemon.*").andNot(GermanTreePatterns.zuEnde).andOr(NodePattern.N.noLabel("PERSON|ORGANIZATION|MISC"), NodePattern.N.withDependent("det(:poss)?"), CommonPatterns.upperCase, CommonPatterns.skipConjUp(NodePattern.N.noHeadRelation("nsubj(:pass)?|i?obj|obl|nmod|compound|flat(:name)?|appos"))).andNot(NodePattern.N.noSpaceAfter().directlyBefore(CommonPatterns.HYPHEN_LIKE_NODE)).andNot(NodePattern.N.noSpaceBefore().directlyAfter(CommonPatterns.HYPHEN_LIKE_NODE)).andNot(NodePattern.N.directlyBefore(NodePattern.N.form("einander"))).and(NodePattern.custom((node, match) -> {
            String form;
            TreeSupport support = node.tree().treeSupport();
            if (support.isAcceptedBySpellchecker(form = node.form())) {
                return null;
            }
            String replacement = DigraphNormalization.generateCombinations(form, digraphEquivalents).stream().filter(r -> !r.equalsIgnoreCase(form) && (StringTools.isNotAllLowercase((String)r.substring(1)) || support.tagToken((String)r).hasPos(".*"))).sorted(Comparator.comparingInt(String::length)).limit(4L).filter(r -> DigraphNormalization.findSpellcheckerAcceptedCase(r, support)).findFirst().orElse(null);
            if (replacement == null) {
                return null;
            }
            String variant = GermanParameters.VARIANT.getValue(node.tree());
            if (replacement.contains("\u00df") && variant.isEmpty() && !node.tree().text().contains("\u00df")) {
                return match.withMetadata(KEY, List.of(new Postponed(node.startOffset(), replacement)));
            }
            if (variant.equals("CH")) {
                replacement = replacement.replaceAll("\u00df", "ss");
            }
            if (replacement.equals(form)) {
                return null;
            }
            return DigraphNormalization.addReplacement(match, replacement);
        }));
    }

    private static NodeMatch addReplacement(NodeMatch match, String replacement) {
        return match.withCorrector(NodeCorrector.replace(match.anchor(), replacement).batchCapable("digraphNormalization")).withMessage(MSG).enableAutoFix();
    }

    @Override
    public MatchingResult checkDocument(List<DocumentSentence.Analyzed> sentences2) {
        Supplier textHasEsZett = Suppliers.memoize(() -> sentences2.stream().anyMatch(s -> s.text.contains("\u00df")));
        ArrayList<NodeRuleMatch> matches = new ArrayList<NodeRuleMatch>();
        for (DocumentSentence.Analyzed sentence : sentences2) {
            List<Postponed> list = sentence.metadata.get(KEY);
            if (list == null) continue;
            if (!((Boolean)textHasEsZett.get()).booleanValue()) {
                return MatchingResult.EMPTY;
            }
            Tree tree = Objects.requireNonNull(sentence.tree);
            for (Postponed postponed : list) {
                Node node = Objects.requireNonNull(tree.findBestNodeAt(postponed.offset));
                matches.add(new NodeRuleMatch((Rule)this, DigraphNormalization.addReplacement(NodeMatch.EMPTY.withAnchor(node), postponed.replacement)));
            }
        }
        return MatchingResult.from(matches);
    }

    private static NodePattern combiningCharacters() {
        Map<String, String> equivalents = Map.of("a", "\u00e4", "u", "\u00fc", "o", "\u00f6");
        return NodePattern.N.form("\u00a8").andOr(NodePattern.N.directlyBefore(NodePattern.N.noSpaceBefore().form("[auo].+").noPos()).and((node, match) -> {
            Node next = node.neighbor(1);
            String nextForm = next.form();
            String replacement = (String)equivalents.get(nextForm.substring(0, 1)) + nextForm.substring(1);
            return match.withCorrector(NodeCorrector.replaceNodes(node, next, replacement));
        }), NodePattern.N.directlyAfter(NodePattern.N.noSpaceAfter().form(".+[auo]").noPos()).and((node, match) -> {
            Node prev = node.neighbor(-1);
            String prevForm = prev.form();
            String replacement = prevForm.substring(0, prevForm.length() - 1) + (String)equivalents.get(prevForm.substring(prevForm.length() - 1));
            return match.withCorrector(NodeCorrector.replaceNodes(prev, node, replacement));
        })).message("M\u00f6chten Sie einen Umlautbuchstaben verwenden?");
    }

    private static NodePattern eszettToSSInSwiss() {
        return GermanParameters.VARIANT.withValue("CH").and(NodePattern.N.form(".+\u00df.*").withSubstringHint("\u00df").and((node, match) -> match.withCorrector(NodeCorrector.replace(node, node.form().replaceAll("\u00df", "ss")))).message("Im Schweizerhochdeutschen wird \u201e\u00df\u201c nicht verwendet und durch \u201ess\u201c ersetzt"));
    }

    static List<String> generateCombinations(String input, Map<String, String> equivalents) {
        ArrayList<String> results = new ArrayList<String>();
        DigraphNormalization.generateCombinations(input, results, 0, 0, equivalents);
        return results;
    }

    private static void generateCombinations(String current, List<String> results, int index, int replacementCount, Map<String, String> equivalents) {
        results.add(current);
        for (int i = index; i < current.length() - 1; ++i) {
            String substring = current.substring(i, i + 2);
            String replacement = equivalents.get(substring.toLowerCase(Locale.ROOT));
            if (replacement == null) continue;
            String newString = current.substring(0, i) + replacement + current.substring(i + 2);
            if (replacementCount > 5) continue;
            DigraphNormalization.generateCombinations(newString, results, i + 1, replacementCount + 1, equivalents);
        }
    }

    private static boolean findSpellcheckerAcceptedCase(String replacement, TreeSupport support) {
        if (!StringTools.isNotAllLowercase((String)replacement.substring(1))) {
            return support.isAcceptedBySpellchecker(replacement);
        }
        String lower = replacement.toLowerCase(Locale.ROOT);
        String capitalized = StringTools.uppercaseFirstChar((String)lower);
        return support.tagToken(lower).hasPos(".*") && support.isAcceptedBySpellchecker(lower) || support.tagToken(capitalized).hasPos(".*") && support.isAcceptedBySpellchecker(capitalized);
    }

    private record Postponed(int offset, String replacement) {
    }
}

