/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.rules.common;

import ai.grazie.nlp.langs.Language;
import ai.grazie.nlp.patterns.Pattern;
import ai.grazie.rules.common.WordSet;
import ai.grazie.rules.tree.Formatter;
import ai.grazie.rules.tree.Node;
import ai.grazie.rules.tree.TextRange;
import ai.grazie.rules.tree.TreeSupport;
import ai.grazie.rules.util.CharUtil;
import ai.grazie.rules.util.LengthPreservingLowerCase;
import ai.grazie.rules.util.regex.Regex;
import com.hankcs.algorithm.AhoCorasickDoubleArrayTrie;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Stream;
import one.util.streamex.EntryStream;
import one.util.streamex.StreamEx;
import org.apache.commons.lang3.StringUtils;
import org.languagetool.tools.StringTools;

public class KnownPhrases {
    public final Language language;
    private final Map<String, List<Phrase>> knownPhrases = new LinkedHashMap<String, List<Phrase>>();
    private volatile AhoCorasickDoubleArrayTrie<Serializable> trie;

    public KnownPhrases(Language language) {
        this.language = language;
        for (String file : this.multiWordFiles()) {
            if (!WordSet.resourceExists(file)) continue;
            this.knownPhrases.put(file, KnownPhrases.readRegexFile(file));
        }
        HashMap<String, String> sourceInterner = new HashMap<String, String>();
        for (String file : List.of(this.diacriticsPath(), this.geoDiacriticsPath())) {
            if (!WordSet.resourceExists(file)) continue;
            ArrayList<Phrase> filePhrases = new ArrayList<Phrase>();
            this.knownPhrases.put(file, filePhrases);
            for (String line : WordSet.loadLines(file)) {
                String[] parts = line.split(";", 2);
                filePhrases.add(new Phrase(parts[0], sourceInterner.computeIfAbsent(parts[1], __ -> parts[1])));
            }
        }
        for (String file : Stream.of("international", language.getIso().toString()).map(s -> s + "/accepted_nosuggest.txt").toList()) {
            if (!WordSet.resourceExists(file)) continue;
            this.knownPhrases.put(file, KnownPhrases.readRegexFile(file));
        }
    }

    private static List<Phrase> readRegexFile(String file) {
        ArrayList<Phrase> filePhrases = new ArrayList<Phrase>();
        for (String line : WordSet.loadLines(file)) {
            if (line.startsWith("#") || line.isBlank()) continue;
            Set<String> phrases = Regex.parse(line).possibleValues();
            assert (phrases != null) : "The entries in " + file + " should be finite enumerable regexes with not too many possible values. Couldn't extract values from: " + line;
            for (String phrase : (StreamEx)StreamEx.of(phrases).sorted(String.CASE_INSENSITIVE_ORDER)) {
                filePhrases.add(new Phrase(phrase, line));
            }
        }
        return filePhrases;
    }

    public List<Phrase> phrasesFromFile(String file) {
        return this.knownPhrases.getOrDefault(file, List.of());
    }

    public String geoDiacriticsPath() {
        return this.language.getIso() + "/geo_diacritics.txt";
    }

    public String diacriticsPath() {
        return this.language.getIso() + "/diacritics.txt";
    }

    List<String> multiWordFiles() {
        return Stream.of("international", this.language.getIso().toString()).map(s -> s + "/multi-word-spelling.txt").toList();
    }

    public boolean isPartOfValidPhrase(Node node) {
        return this.validPhrases(node.tree().text()).stream().anyMatch(tr -> tr.containsInclusive(node.textRange()));
    }

    public List<TextRange> validPhrases(String sentence) {
        ArrayList<TextRange> result2 = new ArrayList<TextRange>();
        AhoCorasickDoubleArrayTrie<Serializable> trie = this.trie;
        if (trie == null) {
            this.trie = trie = this.buildTrie();
        }
        trie.parseText((CharSequence)new LengthPreservingLowerCase(sentence), (start, end, entry2) -> {
            if (Pattern.isWordBoundaryBefore(sentence, start) && Pattern.isWordBoundaryBefore(sentence, end)) {
                StreamEx candidates;
                StreamEx streamEx = candidates = entry2 instanceof String ? Stream.of((String)((Object)entry2)) : StreamEx.of((Object[])((String[])entry2));
                if (KnownPhrases.isActualTextAcceptable(start, sentence.substring(start, end), (Stream<String>)candidates, sentence)) {
                    result2.add(new TextRange(start, end));
                }
            }
        });
        return result2;
    }

    private AhoCorasickDoubleArrayTrie<Serializable> buildTrie() {
        HashMap<String, List> trieMap = new HashMap<String, List>();
        for (List<Phrase> value : this.knownPhrases.values()) {
            for (Phrase p : value) {
                String phrase = p.phrase;
                trieMap.computeIfAbsent(new LengthPreservingLowerCase(phrase).toString(), __ -> new ArrayList()).add(phrase);
            }
        }
        AhoCorasickDoubleArrayTrie trie = new AhoCorasickDoubleArrayTrie();
        trie.build((Map)EntryStream.of(trieMap).mapValues(v -> v.size() > 1 ? v.toArray(new String[0]) : (Serializable)v.get(0)).toSortedMap());
        return trie;
    }

    static boolean isActualTextAcceptable(int start, String actual, Stream<String> suggestions, String sentence) {
        String clean = actual.replaceAll("[\\s\\p{Z}]+", " ").replaceAll("\\p{So}", "").replaceAll("'", "\u2019");
        return suggestions.anyMatch(sug -> KnownPhrases.isAllowed(clean, sug, start, sentence));
    }

    private static boolean isAllowed(String actual, String expected, int start, String sentence) {
        if (expected.equals(actual)) {
            return true;
        }
        if (actual.equalsIgnoreCase(expected)) {
            if (KnownPhrases.isUpperCaseOnly(actual) || actual.matches("(Mc|Le)[A-Z]+")) {
                return true;
            }
            if (expected.split(" ")[0].chars().noneMatch(Character::isUpperCase)) {
                if ((TreeSupport.isCapitalizedSentenceStart(sentence, start) || Formatter.possiblyEndsWithSentenceBoundary(sentence.substring(0, start)) || start > 0 && CharUtil.isAnyOf("'\"\u201c\u201d\u201e\u00ab\u00bb`\u2018\u2019", sentence.charAt(start - 1))) && actual.equals(StringTools.uppercaseFirstChar((String)expected))) {
                    return true;
                }
                if (KnownPhrases.isAllCapitalized(actual) && (!StringUtils.isMixedCase((CharSequence)expected) || KnownPhrases.looksLikeAllCapitalizedHeader(sentence))) {
                    return true;
                }
            }
        }
        return false;
    }

    private static boolean looksLikeAllCapitalizedHeader(String sentence) {
        return KnownPhrases.isAllCapitalized(sentence) && !Formatter.possiblyEndsWithSentenceBoundary(sentence);
    }

    private static boolean isUpperCaseOnly(String phrase) {
        return phrase.chars().noneMatch(Character::isLowerCase);
    }

    private static boolean isAllCapitalized(String sentence) {
        for (int i = 0; i < sentence.length(); ++i) {
            if (!Character.isLowerCase(sentence.charAt(i)) || i != 0 && KnownPhrases.isWordChar(sentence.charAt(i - 1))) continue;
            return false;
        }
        return true;
    }

    private static boolean isWordChar(char c) {
        return Character.isLetterOrDigit(c) || c == '-' || c == '_';
    }

    public record Phrase(String phrase, String source) {
    }
}

