/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.rules.common;

import ai.grazie.ner.model.SentenceWithNERAnnotations;
import ai.grazie.nlp.similarity.Levenshtein;
import ai.grazie.rules.common.CommonPatterns;
import ai.grazie.rules.common.Diacritics;
import ai.grazie.rules.common.KnownPhrases;
import ai.grazie.rules.tree.Node;
import ai.grazie.rules.tree.NodeCorrector;
import ai.grazie.rules.tree.NodeMatch;
import ai.grazie.rules.tree.NodePattern;
import ai.grazie.rules.tree.TextRange;
import ai.grazie.rules.tree.Tree;
import ai.grazie.rules.util.CharUtil;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import one.util.streamex.StreamEx;
import org.jetbrains.annotations.Nullable;

public class MultiWordSpelling {
    public static final NodePattern DEFAULT_SKIP_TYPO_FIXES = NodePattern.or(NodePattern.N.anyPos().lemma(".*"), NodePattern.PUNCT.andNot(CommonPatterns.HYPHEN_NODE));
    private static final int MIN_KEY_LENGTH = 3;
    private static final int HASHING_START_LENGTH = 5;
    private static final int MAX_WORDS = 6;
    private static final String aposLike = "`\u2019'\"";
    private final Int2ObjectMap<List<Phrase>> byHash = new Int2ObjectOpenHashMap();
    private final int maxKeyLength;
    private final KnownPhrases knownPhrases;
    private NodePattern skipTypoFixes = NodePattern.or(new NodePattern[0]);
    private static final NodePattern innerEntityPunct = NodePattern.N.form("[.`\u2019'\"]").noSpaceBefore();
    private static final NodePattern hasLetterOrDigit = NodePattern.N.form(".*[\\p{L}\\d].*");
    private static final NodePattern matchEnd = NodePattern.or(NodePattern.PUNCT.andNot(innerEntityPunct), CommonPatterns.slash);

    public MultiWordSpelling(KnownPhrases knownPhrases) {
        this.knownPhrases = knownPhrases;
        HashSet<Phrase> added = new HashSet<Phrase>();
        for (String file : knownPhrases.multiWordFiles()) {
            for (KnownPhrases.Phrase pair : knownPhrases.phrasesFromFile(file)) {
                String phrase = pair.phrase();
                String line = pair.source();
                assert (!phrase.contains("'")) : "Phrases should contain smart apostrophes: '" + phrase + "' in line " + line;
                String[] words = MultiWordSpelling.splitWords(phrase);
                assert (words.length <= 6) : "Phrases of more than 6 are not yet supported: '" + phrase + "' in line " + line;
                Phrase p = new Phrase(phrase, MultiWordSpelling.multiWordKey(phrase));
                if (!added.add(p)) {
                    throw new AssertionError((Object)("Duplicate phrase: '" + phrase + "' in " + line));
                }
                assert (p.key.length() >= 3) : "Phrases should be long enough: '" + phrase + "' in line " + line;
                int[] hashes = MultiWordSpelling.startHashes(phrase, 0);
                assert (hashes != null) : "Disallowed non-word character in '" + phrase + "' in line " + line;
                for (int hash : hashes) {
                    ((List)this.byHash.computeIfAbsent(hash, __ -> new ArrayList())).add(p);
                }
            }
        }
        this.maxKeyLength = StreamEx.of((Collection)this.byHash.values()).flatCollection(Function.identity()).mapToInt(s -> s.key.length()).max().orElseThrow();
    }

    private static String[] splitWords(String phrase) {
        return phrase.split("[- ]");
    }

    private static String normalizeForDistance(String text2) {
        return Diacritics.removeDiacritics(text2).replaceAll("[-\\s\\p{Z}]+", " ").replaceAll("[`\u2019'\"]", "'");
    }

    public MultiWordSpelling skipTypoFixes(NodePattern pattern) {
        this.skipTypoFixes = pattern;
        return this;
    }

    public NodePattern pattern(String typoMsgPattern, String minorMsgPattern) {
        NodePattern beforeHyphen = NodePattern.N.noSpaceAfter().directlyBefore(CommonPatterns.HYPHEN_NODE);
        return NodePattern.custom((start, match) -> {
            if ("punct".equals(start.headRelation()) || !Character.isLetterOrDigit(start.form().charAt(0))) {
                return null;
            }
            List candidates = this.candidatePhrases(start);
            if (candidates.isEmpty()) {
                return null;
            }
            String sentence = start.tree().text();
            int startOffset = start.startOffset();
            NodeMatch longestResult = null;
            Node end = start;
            while (end != null && end.endOffset() - startOffset <= this.maxKeyLength + 6) {
                if (!beforeHyphen.matches(end)) {
                    List<Phrase> filtered;
                    List<Phrase> filteredByPrefix;
                    String phrase = sentence.substring(startOffset, end.endOffset());
                    String key = MultiWordSpelling.multiWordKey(phrase);
                    if (key.length() > this.maxKeyLength || (filteredByPrefix = MultiWordSpelling.filterByPrefix(candidates, key)).isEmpty()) break;
                    Phrase actual = new Phrase(phrase, key);
                    Map partition = StreamEx.of(filteredByPrefix).partitioningBy(c -> c.isFullMatch(actual));
                    List suggestions = (List)partition.get(true);
                    if (!(suggestions.isEmpty() || KnownPhrases.isActualTextAcceptable(startOffset, actual.original, suggestions.stream().map(Phrase::original), sentence) || (filtered = this.filterSignificantTypos(start, end, suggestions)).isEmpty() || this.isCoveredByValidPhrase(new TextRange(startOffset, end.endOffset()), start.tree()))) {
                        List<Phrase> closest = MultiWordSpelling.getClosestSuggestions(filtered, actual);
                        String msg = MultiWordSpelling.rangeWithSignificantTypos(start, end = MultiWordSpelling.expandForBetterMatch(start, end, closest.get(0), actual), closest.get(0)) != null ? typoMsgPattern : minorMsgPattern;
                        longestResult = match.withCorrector(NodeCorrector.replaceNodes(start, end, (String[])closest.stream().map(Phrase::original).toArray(String[]::new))).withMessage(msg.formatted(closest.get((int)0).original));
                    }
                    if ((candidates = (List)partition.get(false)).isEmpty()) break;
                }
                end = MultiWordSpelling.nextWord(end);
            }
            return longestResult;
        }).noForm("watches|macros|phone").andNot(NodePattern.N.formCaseSensitive("github|javascript|typescript").andOr(NodePattern.N.noSpaceAfter().directlyBefore(CommonPatterns.slash), NodePattern.N.noSpaceBefore().directlyAfter(CommonPatterns.slash), CommonPatterns.quotedWord)).andNot(NodePattern.N.directlyAfter(NodePattern.N.form("#").noSpaceAfter())).andNot(NodePattern.N.formCaseSensitive("FOX")).andNot(NodePattern.N.formCaseSensitive("Google").and(CommonPatterns.beforeSkipping(CommonPatterns.noSpaceHyphen, NodePattern.N.form("IDE"))));
    }

    private static Node expandForBetterMatch(Node start, Node end, Phrase sug, Phrase actual) {
        String sentence;
        Node next = MultiWordSpelling.nextWord(end);
        if (hasLetterOrDigit.matches(next) && sug.matchDistance(MultiWordSpelling.normalizeForDistance((sentence = start.tree().text()).substring(start.startOffset(), next.endOffset())), false) <= sug.matchDistance(actual.matchDistanceKey, false)) {
            return next;
        }
        return end;
    }

    @Nullable
    private static Node nextWord(Node node) {
        Node next = node.nextNode();
        if (CommonPatterns.HYPHEN_LIKE_NODE.matches(next)) {
            next = next.nextNode();
        }
        return next == null || matchEnd.matches(next) ? null : next;
    }

    private boolean isCoveredByValidPhrase(TextRange toReport, Tree tree) {
        return this.knownPhrases.validPhrases(tree.text().replace("'", "\u2019")).stream().anyMatch(r -> r.containsInclusive(toReport));
    }

    private List<Phrase> candidatePhrases(Node start) {
        int i;
        int[] hashes = MultiWordSpelling.startHashes(start.tree().text(), start.startOffset());
        if (hashes == null) {
            return List.of();
        }
        LinkedHashSet candidates = null;
        boolean mustIncludeFirstChar = start.form().length() == 1;
        int n = i = mustIncludeFirstChar ? 1 : 0;
        while (i < hashes.length) {
            int hash = hashes[i];
            List each = (List)this.byHash.get(hash);
            if (each != null) {
                if (candidates == null) {
                    candidates = new LinkedHashSet(each);
                } else {
                    candidates.addAll(each);
                }
            }
            ++i;
        }
        if (candidates == null) {
            return List.of();
        }
        if (start.form().length() <= 5) {
            return new ArrayList<Phrase>(candidates);
        }
        return MultiWordSpelling.filterByPrefix(candidates, MultiWordSpelling.multiWordKey(start.form()));
    }

    private List<Phrase> filterSignificantTypos(Node start, Node end, List<Phrase> suggestions) {
        ArrayList<Phrase> insignificant = new ArrayList<Phrase>();
        ArrayList<Phrase> allowedSignificant = new ArrayList<Phrase>();
        for (Phrase suggestion : suggestions) {
            TextRange range = MultiWordSpelling.rangeWithSignificantTypos(start, end, suggestion);
            if (range == null) {
                insignificant.add(suggestion);
                continue;
            }
            if (!MultiWordSpelling.spansExactNamedEntity(start, end)) {
                if (!MultiWordSpelling.nodesInRange(start, range).noneMatch(this.skipTypoFixes::matches)) continue;
            }
            allowedSignificant.add(suggestion);
        }
        return !insignificant.isEmpty() ? insignificant : allowedSignificant;
    }

    private static boolean spansExactNamedEntity(Node start, Node end) {
        return start != end && start.nerAnnotations().map(SentenceWithNERAnnotations.Annotation::getRange).anyMatch(r -> r.getStart() == start.startOffset() && r.getEndExclusive() == end.endOffset());
    }

    private static Stream<Node> nodesInRange(Node start, TextRange range) {
        List overlapping = ((StreamEx)((StreamEx)start.forward().dropWhile(n -> n.endOffset() <= range.start())).takeWhile(n -> n.startOffset() < range.end())).toList();
        if (!overlapping.isEmpty()) {
            return overlapping.stream();
        }
        return ((StreamEx)start.forward().dropWhile(n -> n.endOffset() < range.start())).takeWhile(n -> n.startOffset() < range.end());
    }

    private static MatchingPrefix matchPrefix(Phrase suggestion, int start, int end, String sentence) {
        int keyIndex = 0;
        while (start < end && keyIndex < suggestion.key.length()) {
            int significant;
            for (significant = start; significant < end && MultiWordSpelling.skipKeyChar(sentence.charAt(significant)); ++significant) {
            }
            if (significant == end || MultiWordSpelling.normalizeKeyChar(sentence.charAt(significant)) != suggestion.key.charAt(keyIndex++)) break;
            start = significant + 1;
        }
        return new MatchingPrefix(start, keyIndex == suggestion.key.length());
    }

    @Nullable
    private static TextRange rangeWithSignificantTypos(Node startNode, Node endNode, Phrase suggestion) {
        char c;
        String sentence = startNode.tree().text();
        int start = startNode.startOffset();
        int end = endNode.endOffset();
        MatchingPrefix mp = MultiWordSpelling.matchPrefix(suggestion, start, end, sentence);
        if (mp.prefixEnd == end && mp.fullMatch) {
            return null;
        }
        int keyIndex = suggestion.key.length();
        while (end - 1 > mp.prefixEnd && (MultiWordSpelling.skipKeyChar(c = sentence.charAt(end - 1)) || MultiWordSpelling.normalizeKeyChar(c) == suggestion.key.charAt(--keyIndex))) {
            --end;
        }
        return new TextRange(mp.prefixEnd, end);
    }

    private static List<Phrase> getClosestSuggestions(List<Phrase> suggestions, Phrase actual) {
        Comparator<Phrase> caseInsensitiveComparator = MultiWordSpelling.createComparator(actual, false);
        Comparator<Phrase> caseSensitiveComparator = MultiWordSpelling.createComparator(actual, true);
        List sortedList = ((StreamEx)StreamEx.of(suggestions).sorted(caseInsensitiveComparator)).toList();
        List<Phrase> closestMatches = MultiWordSpelling.findClosestMatches(sortedList, caseInsensitiveComparator);
        if (closestMatches.size() > 1) {
            sortedList = ((StreamEx)StreamEx.of(suggestions).sorted(caseSensitiveComparator)).toList();
            closestMatches = MultiWordSpelling.findClosestMatches(sortedList, caseSensitiveComparator);
        }
        return MultiWordSpelling.filterByCapitalization(closestMatches);
    }

    private static Comparator<Phrase> createComparator(Phrase phrase, boolean considerCase) {
        return Comparator.comparingInt(sug -> sug.matchDistance(phrase.matchDistanceKey, considerCase)).thenComparing(sug -> sug.original.length());
    }

    private static List<Phrase> findClosestMatches(List<Phrase> sortedList, Comparator<Phrase> comparator) {
        Phrase firstMatch = sortedList.get(0);
        return sortedList.stream().takeWhile(sug -> comparator.compare(firstMatch, (Phrase)sug) == 0).collect(Collectors.toList());
    }

    private static List<Phrase> filterByCapitalization(List<Phrase> phrases) {
        Set capitalizedSet = phrases.stream().filter(c -> Character.isLowerCase(c.original.charAt(0))).map(c -> c.original.substring(0, 1).toUpperCase(Locale.ROOT) + c.original.substring(1)).collect(Collectors.toSet());
        return phrases.stream().filter(c -> !capitalizedSet.contains(c.original)).toList();
    }

    private static List<Phrase> filterByPrefix(Collection<Phrase> candidates, String keyPrefix) {
        return candidates.stream().filter(c -> c.canBePrefix(keyPrefix)).toList();
    }

    private static int @Nullable [] startHashes(String phrase, int offset) {
        int keyIndex = 0;
        int[] hashes = new int[5];
        for (int i = offset; i < phrase.length() && keyIndex < hashes.length; ++i) {
            char c = phrase.charAt(i);
            if (MultiWordSpelling.skipKeyChar(c)) continue;
            if (!Character.isLetterOrDigit(c)) {
                return null;
            }
            char norm = MultiWordSpelling.normalizeKeyChar(c);
            for (int hashIndex = 0; hashIndex < hashes.length; ++hashIndex) {
                if (hashIndex == keyIndex) continue;
                hashes[hashIndex] = hashes[hashIndex] * 239 + norm;
            }
            ++keyIndex;
        }
        return hashes;
    }

    private static String multiWordKey(String phrase) {
        StringBuilder sb = new StringBuilder(phrase.length());
        for (int i = 0; i < phrase.length(); ++i) {
            char c = phrase.charAt(i);
            if (MultiWordSpelling.skipKeyChar(c)) continue;
            sb.append(MultiWordSpelling.normalizeKeyChar(c));
        }
        return sb.toString();
    }

    private static char normalizeKeyChar(char c) {
        return Character.toLowerCase(Diacritics.removeDiacritic(c));
    }

    private static boolean skipKeyChar(char c) {
        return c == '-' || c == '.' || c == '\u2013' || CharUtil.isAnyOf(aposLike, c) || CharUtil.isAnySpace(c);
    }

    private record Phrase(String original, String key, boolean allowSignificantTypos, String matchDistanceKey) {
        private static final int ALLOWED_MISTAKE_RATE = 5;

        private Phrase(String original, String key) {
            this(original, key, Arrays.stream(MultiWordSpelling.splitWords(original)).allMatch(w -> w.length() > 1), MultiWordSpelling.normalizeForDistance(original));
        }

        private boolean isFullMatch(Phrase actual) {
            if (this.matchDistance(actual.matchDistanceKey, false) > this.original.length() / 5) {
                return false;
            }
            return this.allowSignificantTypos || actual.key.equals(this.key);
        }

        private int matchDistance(String actualMatchDistanceKey, boolean preserveCase) {
            String norm1 = actualMatchDistanceKey;
            String norm2 = this.matchDistanceKey;
            if (!preserveCase) {
                norm1 = norm1.toLowerCase(Locale.ROOT);
                norm2 = norm2.toLowerCase(Locale.ROOT);
            }
            return Levenshtein.WithDamerau.distance(norm1, norm2);
        }

        private boolean canBePrefix(String keyPrefix) {
            int baseLen = Math.min(this.key.length(), keyPrefix.length());
            char lastKeyChar = keyPrefix.charAt(keyPrefix.length() - 1);
            return Phrase.hasAllowedMistakeRate(keyPrefix, this.key.substring(0, baseLen)) || baseLen < this.key.length() && this.key.charAt(baseLen) == lastKeyChar && Phrase.hasAllowedMistakeRate(keyPrefix, this.key.substring(0, baseLen + 1)) || baseLen > 1 && this.key.charAt(baseLen - 2) == lastKeyChar && Phrase.hasAllowedMistakeRate(keyPrefix, this.key.substring(0, baseLen - 1));
        }

        private static boolean hasAllowedMistakeRate(String keyPrefix, String actualPrefix) {
            return Levenshtein.WithDamerau.distance(actualPrefix, keyPrefix) <= actualPrefix.length() / 5 + 1;
        }
    }

    private record MatchingPrefix(int prefixEnd, boolean fullMatch) {
    }
}

