/*
 * Decompiled with CFR 0.152.
 */
package ai.grazie.nlp.tokenizer.spacy;

import ai.grazie.nlp.tokenizer.Tokenizer;
import ai.grazie.nlp.tokenizer.spacy.SpacyTokenInfo;
import ai.grazie.nlp.tokenizer.spacy.SpacyTokenizerSpecialCases;
import ai.grazie.nlp.tokenizer.spacy.en.SpacyEnglish;
import ai.grazie.nlp.utils.RangesKt;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import kotlin.Metadata;
import kotlin.collections.CollectionsKt;
import kotlin.jvm.internal.DefaultConstructorMarker;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.IntRange;
import kotlin.sequences.Sequence;
import kotlin.text.CharsKt;
import kotlin.text.MatchResult;
import kotlin.text.Regex;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

@Metadata(mv={1, 4, 3}, bv={1, 0, 3}, k=1, d1={"\u0000F\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0018\u0002\n\u0002\b\u0003\n\u0002\u0018\u0002\n\u0002\b\u0006\n\u0002\u0018\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\u0018\u0002\n\u0000\n\u0002\u0010\u000e\n\u0000\n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0002\b\u0005\u0018\u0000 \u001e2\u00020\u0001:\u0002\u001e\u001fB%\u0012\u0006\u0010\u0002\u001a\u00020\u0003\u0012\u0006\u0010\u0004\u001a\u00020\u0003\u0012\u0006\u0010\u0005\u001a\u00020\u0003\u0012\u0006\u0010\u0006\u001a\u00020\u0007\u00a2\u0006\u0002\u0010\bJ\u0010\u0010\r\u001a\u00020\u000e2\u0006\u0010\u000f\u001a\u00020\u000eH\u0002J\u0016\u0010\u0010\u001a\b\u0012\u0004\u0012\u00020\u00120\u00112\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0010\u0010\u0015\u001a\u00020\u00162\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0010\u0010\u0017\u001a\u00020\u00162\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0010\u0010\u0018\u001a\u00020\u000e2\u0006\u0010\u0013\u001a\u00020\u0014H\u0002J\u0016\u0010\u0019\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001a2\u0006\u0010\u001c\u001a\u00020\u0014H\u0016J\u0016\u0010\u001d\u001a\b\u0012\u0004\u0012\u00020\u001b0\u001a2\u0006\u0010\u0013\u001a\u00020\u0014H\u0002R\u000e\u0010\u0004\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000R\u0011\u0010\u0002\u001a\u00020\u0003\u00a2\u0006\b\n\u0000\u001a\u0004\b\t\u0010\nR\u0011\u0010\u0006\u001a\u00020\u0007\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000b\u0010\fR\u000e\u0010\u0005\u001a\u00020\u0003X\u0082\u0004\u00a2\u0006\u0002\n\u0000\u00a8\u0006 "}, d2={"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer;", "Lai/grazie/nlp/tokenizer/Tokenizer;", "prefix", "Lkotlin/text/Regex;", "infix", "suffix", "specialCases", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;", "(Lkotlin/text/Regex;Lkotlin/text/Regex;Lkotlin/text/Regex;Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;)V", "getPrefix", "()Lkotlin/text/Regex;", "getSpecialCases", "()Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizerSpecialCases;", "attachTokens", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$TokenSplits;", "splits", "findInfix", "Lkotlin/sequences/Sequence;", "Lkotlin/text/MatchResult;", "tok", "", "findPrefix", "", "findSuffix", "splitAffixes", "tokenize", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "text", "tokenizeToken", "Companion", "TokenSplits", "nlp-tokenizer"})
public final class SpacyTokenizer
implements Tokenizer {
    @NotNull
    private final Regex prefix;
    private final Regex infix;
    private final Regex suffix;
    @NotNull
    private final SpacyTokenizerSpecialCases specialCases;
    @NotNull
    public static final Companion Companion = new Companion(null);

    /*
     * WARNING - void declaration
     */
    @Override
    @NotNull
    public List<Tokenizer.Token> tokenize(@NotNull String text2) {
        Tokenizer.Token token;
        Object object;
        Object destination$iv$iv;
        Iterable span2;
        Intrinsics.checkNotNullParameter((Object)text2, (String)"text");
        CharSequence charSequence = text2;
        boolean bl = false;
        if (charSequence.length() == 0) {
            return CollectionsKt.emptyList();
        }
        int i2 = 0;
        int start = 0;
        ArrayList result = new ArrayList();
        boolean inWS = CharsKt.isWhitespace((char)text2.charAt(0));
        String string = text2;
        int n = string.length();
        for (int j = 0; j < n; ++j) {
            char uc = string.charAt(j);
            if (CharsKt.isWhitespace((char)uc) != inWS) {
                if (start < i2) {
                    void $this$mapTo$iv$iv;
                    Object object2 = text2;
                    int n2 = start;
                    boolean bl2 = false;
                    Intrinsics.checkNotNullExpressionValue((Object)((String)object2).substring(n2, i2), (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                    object2 = result;
                    Iterable $this$map$iv = this.tokenizeToken((String)((Object)span2));
                    boolean $i$f$map = false;
                    Iterable iterable = $this$map$iv;
                    destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
                    boolean $i$f$mapTo = false;
                    for (Object item$iv$iv : $this$mapTo$iv$iv) {
                        void it;
                        Tokenizer.Token token2 = (Tokenizer.Token)item$iv$iv;
                        object = destination$iv$iv;
                        boolean bl3 = false;
                        token = new Tokenizer.Token(it.getToken(), RangesKt.withOffset(it.getRange(), start));
                        object.add(token);
                    }
                    $this$map$iv = (List)destination$iv$iv;
                    bl2 = false;
                    CollectionsKt.addAll((Collection)object2, (Iterable)$this$map$iv);
                }
                start = uc == ' ' ? i2 + 1 : i2;
                inWS = !inWS;
            }
            ++i2;
        }
        if (start < i2) {
            void $this$mapTo$iv$iv;
            Object object3 = text2;
            int n3 = start;
            n = 0;
            String string2 = ((String)object3).substring(n3);
            Intrinsics.checkNotNullExpressionValue((Object)string2, (String)"(this as java.lang.String).substring(startIndex)");
            String span3 = string2;
            object3 = result;
            Iterable $this$map$iv = this.tokenizeToken(span3);
            boolean $i$f$map = false;
            span2 = $this$map$iv;
            Collection destination$iv$iv2 = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                destination$iv$iv = (Tokenizer.Token)item$iv$iv;
                object = destination$iv$iv2;
                boolean bl4 = false;
                token = new Tokenizer.Token(it.getToken(), RangesKt.withOffset(it.getRange(), start));
                object.add(token);
            }
            Iterable iterable = (List)destination$iv$iv2;
            n = 0;
            CollectionsKt.addAll((Collection)object3, (Iterable)iterable);
        }
        return result;
    }

    private final List<Tokenizer.Token> tokenizeToken(String tok) {
        TokenSplits splits = this.splitAffixes(tok);
        if (!splits.isSpecial()) {
            splits = this.attachTokens(splits);
        }
        return splits.toList();
    }

    private final TokenSplits splitAffixes(String tok) {
        int lastSize = 0;
        String tokVar = tok;
        TokenSplits splits = new TokenSplits();
        while (Intrinsics.areEqual((Object)tokVar, (Object)"") ^ true && tokVar.length() != lastSize) {
            int n;
            int n2;
            String string;
            if (this.specialCases.get(tokVar) != null) {
                List<SpacyTokenInfo> list = this.specialCases.get(tokVar);
                Intrinsics.checkNotNull(list);
                Iterable $this$forEach$iv = list;
                boolean $i$f$forEach = false;
                for (Object element$iv : $this$forEach$iv) {
                    SpacyTokenInfo it = (SpacyTokenInfo)element$iv;
                    boolean bl = false;
                    splits.getWordTokens().add(it.getOrth());
                }
                splits.setSpecial(true);
                break;
            }
            lastSize = tokVar.length();
            int prefixLength = this.findPrefix(tokVar);
            String minusPrefix = null;
            String prefix = null;
            if (prefixLength != 0) {
                String element$iv = tokVar;
                int it = 0;
                boolean bl = false;
                String string2 = element$iv;
                if (string2 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                String string3 = string2.substring(it, prefixLength);
                Intrinsics.checkNotNullExpressionValue((Object)string3, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                prefix = string3;
                element$iv = tokVar;
                it = 0;
                String string4 = element$iv;
                if (string4 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                String string5 = string4.substring(prefixLength);
                Intrinsics.checkNotNullExpressionValue((Object)string5, (String)"(this as java.lang.String).substring(startIndex)");
                minusPrefix = string5;
                if (Intrinsics.areEqual((Object)minusPrefix, (Object)"") ^ true && this.specialCases.get(minusPrefix) != null) {
                    tokVar = minusPrefix;
                    splits.getPrefixes().add(prefix);
                    break;
                }
            }
            int suffixLength = this.findSuffix(tokVar);
            String minusSuffix = null;
            String suffix = null;
            if (suffixLength != 0) {
                string = tokVar;
                n2 = tokVar.length() - suffixLength;
                n = 0;
                String string6 = string;
                if (string6 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                String string7 = string6.substring(n2);
                Intrinsics.checkNotNullExpressionValue((Object)string7, (String)"(this as java.lang.String).substring(startIndex)");
                suffix = string7;
                string = tokVar;
                n2 = 0;
                n = tokVar.length() - suffixLength;
                boolean bl = false;
                String string8 = string;
                if (string8 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                String string9 = string8.substring(n2, n);
                Intrinsics.checkNotNullExpressionValue((Object)string9, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                minusSuffix = string9;
                if (Intrinsics.areEqual((Object)minusSuffix, (Object)"") ^ true && this.specialCases.get(minusSuffix) != null) {
                    tokVar = minusSuffix;
                    splits.getSuffixes().add(suffix);
                    break;
                }
            }
            if (prefixLength != 0 && suffixLength != 0 && prefixLength + suffixLength <= tokVar.length()) {
                string = tokVar;
                n2 = tokVar.length() - suffixLength;
                n = 0;
                String string10 = string;
                if (string10 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                Intrinsics.checkNotNullExpressionValue((Object)string10.substring(prefixLength, n2), (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                ArrayList<String> arrayList = splits.getPrefixes();
                String string11 = prefix;
                Intrinsics.checkNotNull((Object)string11);
                arrayList.add(string11);
                ArrayList<String> arrayList2 = splits.getSuffixes();
                String string12 = suffix;
                Intrinsics.checkNotNull((Object)string12);
                arrayList2.add(string12);
            } else if (prefixLength != 0) {
                Intrinsics.checkNotNull((Object)minusPrefix);
                ArrayList<String> arrayList = splits.getPrefixes();
                String string13 = prefix;
                Intrinsics.checkNotNull((Object)string13);
                arrayList.add(string13);
            } else if (suffixLength != 0) {
                Intrinsics.checkNotNull((Object)minusSuffix);
                ArrayList<String> arrayList = splits.getSuffixes();
                String string14 = suffix;
                Intrinsics.checkNotNull((Object)string14);
                arrayList.add(string14);
            }
            if (!(Intrinsics.areEqual((Object)tokVar, (Object)"") ^ true) || this.specialCases.get(tokVar) == null) continue;
            break;
        }
        splits.setWord(tokVar);
        return splits;
    }

    private final TokenSplits attachTokens(TokenSplits splits) {
        String string = splits.getWord();
        Intrinsics.checkNotNull((Object)string);
        CharSequence charSequence = string;
        boolean bl = false;
        if (charSequence.length() > 0) {
            String string2 = splits.getWord();
            Intrinsics.checkNotNull((Object)string2);
            if (this.specialCases.urlMatch(string2)) {
                ArrayList<String> arrayList = splits.getWordTokens();
                String string3 = splits.getWord();
                Intrinsics.checkNotNull((Object)string3);
                arrayList.add(string3);
            } else {
                int start;
                String string4 = splits.getWord();
                Intrinsics.checkNotNull((Object)string4);
                Sequence<MatchResult> matches = this.findInfix(string4);
                int startBeforeInfixes = start = 0;
                for (MatchResult match : matches) {
                    String string5;
                    boolean bl2;
                    int startInfix = match.getRange().getFirst();
                    int endInfix = match.getRange().getLast() + 1;
                    if (startInfix == 0) continue;
                    if (startInfix != start) {
                        String spanInfix;
                        Intrinsics.checkNotNull((Object)splits.getWord());
                        bl2 = false;
                        String string6 = string5;
                        if (string6 == null) {
                            throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                        }
                        Intrinsics.checkNotNullExpressionValue((Object)string6.substring(start, startInfix), (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                        splits.getWordTokens().add(spanInfix);
                    }
                    if (startInfix != endInfix) {
                        String infix;
                        Intrinsics.checkNotNull((Object)splits.getWord());
                        bl2 = false;
                        String string7 = string5;
                        if (string7 == null) {
                            throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                        }
                        Intrinsics.checkNotNullExpressionValue((Object)string7.substring(startInfix, endInfix), (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                        splits.getWordTokens().add(infix);
                    }
                    start = endInfix;
                }
                String string8 = splits.getWord();
                Intrinsics.checkNotNull((Object)string8);
                String string9 = string8;
                String string10 = splits.getWord();
                Intrinsics.checkNotNull((Object)string10);
                int n = string10.length();
                boolean bl3 = false;
                String string11 = string9;
                if (string11 == null) {
                    throw new NullPointerException("null cannot be cast to non-null type java.lang.String");
                }
                String string12 = string11.substring(start, n);
                Intrinsics.checkNotNullExpressionValue((Object)string12, (String)"(this as java.lang.Strin\u2026ing(startIndex, endIndex)");
                String span2 = string12;
                if (Intrinsics.areEqual((Object)span2, (Object)"") ^ true) {
                    splits.getWordTokens().add(span2);
                }
            }
        }
        return splits;
    }

    private final int findPrefix(String tok) {
        MatchResult match = Regex.find$default((Regex)this.prefix, (CharSequence)tok, (int)0, (int)2, null);
        return match == null ? 0 : ((Number)CollectionsKt.last((Iterable)((Iterable)match.getRange()))).intValue() + 1 - ((Number)CollectionsKt.first((Iterable)((Iterable)match.getRange()))).intValue();
    }

    private final int findSuffix(String tok) {
        MatchResult match = Regex.find$default((Regex)this.suffix, (CharSequence)tok, (int)0, (int)2, null);
        return match == null ? 0 : ((Number)CollectionsKt.last((Iterable)((Iterable)match.getRange()))).intValue() + 1 - ((Number)CollectionsKt.first((Iterable)((Iterable)match.getRange()))).intValue();
    }

    private final Sequence<MatchResult> findInfix(String tok) {
        return Regex.findAll$default((Regex)this.infix, (CharSequence)tok, (int)0, (int)2, null);
    }

    @NotNull
    public final Regex getPrefix() {
        return this.prefix;
    }

    @NotNull
    public final SpacyTokenizerSpecialCases getSpecialCases() {
        return this.specialCases;
    }

    public SpacyTokenizer(@NotNull Regex prefix, @NotNull Regex infix, @NotNull Regex suffix, @NotNull SpacyTokenizerSpecialCases specialCases) {
        Intrinsics.checkNotNullParameter((Object)prefix, (String)"prefix");
        Intrinsics.checkNotNullParameter((Object)infix, (String)"infix");
        Intrinsics.checkNotNullParameter((Object)suffix, (String)"suffix");
        Intrinsics.checkNotNullParameter((Object)specialCases, (String)"specialCases");
        this.prefix = prefix;
        this.infix = infix;
        this.suffix = suffix;
        this.specialCases = specialCases;
    }

    @Metadata(mv={1, 4, 3}, bv={1, 0, 3}, k=1, d1={"\u0000.\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0010\u000b\n\u0002\b\u0004\n\u0002\u0018\u0002\n\u0002\u0010\u000e\n\u0002\u0018\u0002\n\u0002\b\f\n\u0002\u0010 \n\u0002\u0018\u0002\n\u0000\b\u0002\u0018\u00002\u00020\u0001B\u0005\u00a2\u0006\u0002\u0010\u0002J\f\u0010\u0017\u001a\b\u0012\u0004\u0012\u00020\u00190\u0018R\u001a\u0010\u0003\u001a\u00020\u0004X\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0003\u0010\u0005\"\u0004\b\u0006\u0010\u0007R!\u0010\b\u001a\u0012\u0012\u0004\u0012\u00020\n0\tj\b\u0012\u0004\u0012\u00020\n`\u000b\u00a2\u0006\b\n\u0000\u001a\u0004\b\f\u0010\rR!\u0010\u000e\u001a\u0012\u0012\u0004\u0012\u00020\n0\tj\b\u0012\u0004\u0012\u00020\n`\u000b\u00a2\u0006\b\n\u0000\u001a\u0004\b\u000f\u0010\rR\u001c\u0010\u0010\u001a\u0004\u0018\u00010\nX\u0086\u000e\u00a2\u0006\u000e\n\u0000\u001a\u0004\b\u0011\u0010\u0012\"\u0004\b\u0013\u0010\u0014R!\u0010\u0015\u001a\u0012\u0012\u0004\u0012\u00020\n0\tj\b\u0012\u0004\u0012\u00020\n`\u000b\u00a2\u0006\b\n\u0000\u001a\u0004\b\u0016\u0010\r\u00a8\u0006\u001a"}, d2={"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$TokenSplits;", "", "()V", "isSpecial", "", "()Z", "setSpecial", "(Z)V", "prefixes", "Ljava/util/ArrayList;", "", "Lkotlin/collections/ArrayList;", "getPrefixes", "()Ljava/util/ArrayList;", "suffixes", "getSuffixes", "word", "getWord", "()Ljava/lang/String;", "setWord", "(Ljava/lang/String;)V", "wordTokens", "getWordTokens", "toList", "", "Lai/grazie/nlp/tokenizer/Tokenizer$Token;", "nlp-tokenizer"})
    private static final class TokenSplits {
        @NotNull
        private final ArrayList<String> prefixes = new ArrayList();
        @NotNull
        private final ArrayList<String> suffixes = new ArrayList();
        @Nullable
        private String word;
        @NotNull
        private final ArrayList<String> wordTokens = new ArrayList();
        private boolean isSpecial;

        @NotNull
        public final ArrayList<String> getPrefixes() {
            return this.prefixes;
        }

        @NotNull
        public final ArrayList<String> getSuffixes() {
            return this.suffixes;
        }

        @Nullable
        public final String getWord() {
            return this.word;
        }

        public final void setWord(@Nullable String string) {
            this.word = string;
        }

        @NotNull
        public final ArrayList<String> getWordTokens() {
            return this.wordTokens;
        }

        public final boolean isSpecial() {
            return this.isSpecial;
        }

        public final void setSpecial(boolean bl) {
            this.isSpecial = bl;
        }

        /*
         * WARNING - void declaration
         */
        @NotNull
        public final List<Tokenizer.Token> toList() {
            void $this$mapTo$iv$iv;
            int cur = 0;
            Iterable $this$map$iv = CollectionsKt.plus((Collection)CollectionsKt.plus((Collection)this.prefixes, (Iterable)this.wordTokens), (Iterable)CollectionsKt.reversed((Iterable)this.suffixes));
            boolean $i$f$map = false;
            Iterable iterable = $this$map$iv;
            Collection destination$iv$iv = new ArrayList(CollectionsKt.collectionSizeOrDefault((Iterable)$this$map$iv, (int)10));
            boolean $i$f$mapTo = false;
            for (Object item$iv$iv : $this$mapTo$iv$iv) {
                void it;
                String string = (String)item$iv$iv;
                Collection collection = destination$iv$iv;
                boolean bl = false;
                int start = cur;
                Tokenizer.Token token = new Tokenizer.Token((String)it, new IntRange(start, (cur += it.length()) - 1));
                collection.add(token);
            }
            return (List)destination$iv$iv;
        }
    }

    @Metadata(mv={1, 4, 3}, bv={1, 0, 3}, k=1, d1={"\u0000\u0012\n\u0002\u0018\u0002\n\u0002\u0010\u0000\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0000\b\u0086\u0003\u0018\u00002\u00020\u0001B\u0007\b\u0002\u00a2\u0006\u0002\u0010\u0002J\u0006\u0010\u0003\u001a\u00020\u0004\u00a8\u0006\u0005"}, d2={"Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer$Companion;", "", "()V", "loadEnglish", "Lai/grazie/nlp/tokenizer/spacy/SpacyTokenizer;", "nlp-tokenizer"})
    public static final class Companion {
        @NotNull
        public final SpacyTokenizer loadEnglish() {
            return new SpacyTokenizer(SpacyEnglish.Prefix.INSTANCE.getRegex(), SpacyEnglish.Infix.INSTANCE.getRegex(), SpacyEnglish.Suffix.INSTANCE.getRegex(), SpacyTokenizerSpecialCases.INSTANCE);
        }

        private Companion() {
        }

        public /* synthetic */ Companion(DefaultConstructorMarker $constructor_marker) {
            this();
        }
    }
}

