/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.encoder;

import ai.grazie.utils.cache.Cache;
import ai.grazie.utils.cache.Caching;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TuplesKt;
import kotlin.collections.CollectionsKt;
import kotlin.collections.MapsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.jvm.internal.SourceDebugExtension;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

@Metadata(mv={2, 1, 0}, k=1, xi=48, d1={"\u0000.\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0000\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0010$\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0007\u0018\u00002\u00020\u0001B+\u0012\u0018\u0010\u0002\u001a\u0014\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u00040\u0003\u0012\b\b\u0002\u0010\u0006\u001a\u00020\u0007\u00a2\u0006\u0004\b\b\u0010\tJ\u000e\u0010\u000e\u001a\u00020\u00052\u0006\u0010\u000f\u001a\u00020\u0005J\u0014\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00050\u00032\u0006\u0010\u000f\u001a\u00020\u0005J\u0016\u0010\u0011\u001a\b\u0012\u0004\u0012\u00020\u00050\u00032\u0006\u0010\u000f\u001a\u00020\u0005H\u0002J$\u0010\u0012\u001a\u0010\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u0005\u0018\u00010\u00042\f\u0010\u0013\u001a\b\u0012\u0004\u0012\u00020\u00050\u0003H\u0002R&\u0010\n\u001a\u001a\u0012\u0010\u0012\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\u0004\u0012\u0004\u0012\u00020\u00070\u000bX\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u001a\u0010\f\u001a\u000e\u0012\u0004\u0012\u00020\u0005\u0012\u0004\u0012\u00020\u00050\rX\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006\u0014"}, d2={"Lai/grazie/nlp/encoder/BPE;", "", "bpeMerges", "", "Lkotlin/Pair;", "", "cacheSize", "", "<init>", "(Ljava/util/List;I)V", "bpeRanks", "", "bpeCache", "Lai/grazie/utils/cache/Cache;", "tokenize", "word", "getTokens", "runBpeTokenization", "getSmallestIdxPair", "bpeWord", "nlp-encoder-engine"})
@SourceDebugExtension(value={"SMAP\nBPE.kt\nKotlin\n*S Kotlin\n*F\n+ 1 BPE.kt\nai/grazie/nlp/encoder/BPE\n+ 2 _Collections.kt\nkotlin/collections/CollectionsKt___CollectionsKt\n*L\n1#1,90:1\n1567#2:91\n1598#2,4:92\n1557#2:96\n1628#2,3:97\n*S KotlinDebug\n*F\n+ 1 BPE.kt\nai/grazie/nlp/encoder/BPE\n*L\n17#1:91\n17#1:92,4\n36#1:96\n36#1:97,3\n*E\n"})
public final class BPE {
    @NotNull
    private final Map<Pair<String, String>, Integer> bpeRanks;
    @NotNull
    private final Cache<String, String> bpeCache;

    /*
     * WARNING - void declaration
     */
    public BPE(@NotNull List<Pair<String, String>> bpeMerges, int cacheSize) {
        void $this$mapIndexedTo$iv$iv;
        void $this$mapIndexed$iv;
        Intrinsics.checkNotNullParameter(bpeMerges, (String)"bpeMerges");
        Iterable iterable = bpeMerges;
        BPE bPE = this;
        boolean $i$f$mapIndexed = false;
        void var5_6 = $this$mapIndexed$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$mapIndexed$iv, (int)10));
        boolean $i$f$mapIndexedTo = false;
        int index$iv$iv = 0;
        for (Object item$iv$iv : $this$mapIndexedTo$iv$iv) {
            void idx;
            void bpePair;
            int n;
            if ((n = index$iv$iv++) < 0) {
                CollectionsKt.throwIndexOverflow();
            }
            Pair pair = (Pair)item$iv$iv;
            int n2 = n;
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(TuplesKt.to((Object)bpePair, (Object)((int)idx)));
        }
        bPE.bpeRanks = MapsKt.toMap((Iterable)((List)destination$iv$iv));
        this.bpeCache = Caching.INSTANCE.default(cacheSize);
    }

    public /* synthetic */ BPE(List list2, int n, int n2, DefaultConstructorMarker defaultConstructorMarker) {
        if ((n2 & 2) != 0) {
            n = 5000;
        }
        this(list2, n);
    }

    @NotNull
    public final String tokenize(@NotNull String word) {
        Intrinsics.checkNotNullParameter((Object)word, (String)"word");
        String cached = this.bpeCache.get(word);
        if (cached != null) {
            return cached;
        }
        String bpeWord = CollectionsKt.joinToString$default((Iterable)this.runBpeTokenization(word), (CharSequence)" ", null, null, (int)0, null, null, (int)62, null);
        this.bpeCache.put(word, bpeWord);
        return bpeWord;
    }

    @NotNull
    public final List<String> getTokens(@NotNull String word) {
        Intrinsics.checkNotNullParameter((Object)word, (String)"word");
        return this.runBpeTokenization(word);
    }

    /*
     * WARNING - void declaration
     */
    private final List<String> runBpeTokenization(String word) {
        void $this$mapTo$iv$iv;
        Iterable $this$map$iv = StringsKt.toList((CharSequence)word);
        boolean $i$f$map = false;
        Iterable iterable = $this$map$iv;
        Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
        boolean $i$f$mapTo = false;
        for (Object item$iv$iv : $this$mapTo$iv$iv) {
            void it;
            char c = ((Character)item$iv$iv).charValue();
            Collection collection = destination$iv$iv;
            boolean bl = false;
            collection.add(String.valueOf((char)it));
        }
        List bpeWord = (List)destination$iv$iv;
        Pair<String, String> pair = this.getSmallestIdxPair(bpeWord);
        if (pair == null) {
            return bpeWord;
        }
        Pair<String, String> pair2 = pair;
        while (true) {
            Pair<String, String> pair3 = pair2;
            String firstSymbol = (String)pair3.component1();
            String secondSymbol = (String)pair3.component2();
            ArrayList<Object> newBpeWord = new ArrayList<Object>();
            int i = 0;
            while (i < bpeWord.size()) {
                int j = bpeWord.subList(i, bpeWord.size()).indexOf(firstSymbol);
                if (j == -1) {
                    newBpeWord.addAll(bpeWord.subList(i, bpeWord.size()));
                    break;
                }
                newBpeWord.addAll(bpeWord.subList(i, i + j));
                if ((i += j) < bpeWord.size() - 1 && Intrinsics.areEqual(bpeWord.get(i), (Object)firstSymbol) && Intrinsics.areEqual(bpeWord.get(i + 1), (Object)secondSymbol)) {
                    newBpeWord.add(firstSymbol + secondSymbol);
                    i += 2;
                    continue;
                }
                newBpeWord.add(firstSymbol);
                ++i;
            }
            if (((ArrayList)(bpeWord = (List)newBpeWord)).size() == 1 || this.getSmallestIdxPair(bpeWord) == null) break;
        }
        return bpeWord;
    }

    private final Pair<String, String> getSmallestIdxPair(List<String> bpeWord) {
        if (bpeWord.isEmpty()) {
            return null;
        }
        Pair pair = null;
        int smallestIndex = Integer.MAX_VALUE;
        String prevSymbol = bpeWord.get(0);
        int n = bpeWord.size();
        for (int idx = 1; idx < n; ++idx) {
            String currentSymbol = bpeWord.get(idx);
            int currentIndex = ((Number)this.bpeRanks.getOrDefault(TuplesKt.to((Object)prevSymbol, (Object)currentSymbol), Integer.MAX_VALUE)).intValue();
            if (currentIndex < smallestIndex) {
                smallestIndex = currentIndex;
                pair = TuplesKt.to((Object)prevSymbol, (Object)currentSymbol);
            }
            prevSymbol = currentSymbol;
        }
        return pair;
    }
}

