/*
 * Decompiled with CFR 0.152.
 */
package org.languagetool.tagging.disambiguation;

import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.text.WordUtils;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.languagetool.AnalyzedSentence;
import org.languagetool.AnalyzedToken;
import org.languagetool.AnalyzedTokenReadings;
import org.languagetool.JLanguageTool;
import org.languagetool.tagging.disambiguation.AbstractDisambiguator;
import org.languagetool.tools.StringInterner;
import org.languagetool.tools.StringTools;

public class MultiWordChunker
extends AbstractDisambiguator {
    private static final Map<Settings, MultiWordChunker> chunkerCache = new ConcurrentHashMap<Settings, MultiWordChunker>();
    @NotNull
    private final Settings settings;
    private volatile boolean initialized;
    private Map<String, Integer> mStartSpace;
    private Map<String, Integer> mStartNoSpace;
    private Map<String, AnalyzedToken> mFullSpace;
    private Map<String, AnalyzedToken> mFullNoSpace;
    private static final int MAX_TOKENS_IN_MULTIWORD = 20;
    private static final String DEFAULT_SEPARATOR = "\t";
    private String separator;
    private boolean addIgnoreSpelling = false;
    private boolean isRemovePreviousTags = false;
    public static String tagForNotAddingTags = "_NONE_";
    private static final Pattern GermanLineExpander = Pattern.compile("^.*/[ESN]+$");

    private MultiWordChunker(@NotNull Settings settings) {
        this.settings = settings;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void lazyInit() {
        if (this.initialized) {
            return;
        }
        MultiWordChunker multiWordChunker = this;
        synchronized (multiWordChunker) {
            if (this.initialized) {
                return;
            }
            Object2IntOpenHashMap mStartSpace = new Object2IntOpenHashMap();
            Object2IntOpenHashMap mStartNoSpace = new Object2IntOpenHashMap();
            Object2ObjectOpenHashMap mFullSpace = new Object2ObjectOpenHashMap();
            Object2ObjectOpenHashMap mFullNoSpace = new Object2ObjectOpenHashMap();
            this.fillMaps((Map<String, Integer>)mStartSpace, (Map<String, Integer>)mStartNoSpace, (Map<String, AnalyzedToken>)mFullSpace, (Map<String, AnalyzedToken>)mFullNoSpace);
            mStartSpace.trim();
            mStartNoSpace.trim();
            mFullSpace.trim();
            mFullNoSpace.trim();
            this.mStartSpace = mStartSpace;
            this.mStartNoSpace = mStartNoSpace;
            this.mFullSpace = mFullSpace;
            this.mFullNoSpace = mFullNoSpace;
            this.initialized = true;
        }
    }

    private void fillMaps(Map<String, Integer> mStartSpace, Map<String, Integer> mStartNoSpace, Map<String, AnalyzedToken> mFullSpace, Map<String, AnalyzedToken> mFullNoSpace) {
        try (InputStream stream = JLanguageTool.getDataBroker().getFromResourceDirAsStream(this.settings.filename);){
            List<String> lines = this.loadWords(stream);
            for (String line : lines) {
                String[] stringAndTag = line.split(this.separator);
                if (stringAndTag.length != 2 && this.settings.defaultTag == null) {
                    throw new RuntimeException("Invalid format in " + this.settings.filename + ": '" + line + "', expected two tab-separated parts");
                }
                if (stringAndTag.length != 1 && this.settings.defaultTag != null) {
                    throw new RuntimeException("Invalid format in " + this.settings.filename + ": '" + line + "', expected one element with no separator");
                }
                ArrayList<String> casingVariants = new ArrayList<String>();
                String originalString = StringInterner.intern(stringAndTag[0]);
                String tag = StringInterner.intern(this.settings.defaultTag != null ? this.settings.defaultTag : stringAndTag[1]);
                boolean containsSpace = originalString.indexOf(32) > 0;
                casingVariants.add(originalString);
                if (containsSpace) {
                    casingVariants.addAll(this.getTokenLettercaseVariants(originalString, mFullSpace));
                } else {
                    casingVariants.addAll(this.getTokenLettercaseVariants(originalString, mFullNoSpace));
                }
                for (String casingVariant : casingVariants) {
                    if (!containsSpace) {
                        String firstChar = casingVariant.substring(0, 1);
                        if (mStartNoSpace.containsKey(firstChar)) {
                            if (mStartNoSpace.get(firstChar) < casingVariant.length()) {
                                mStartNoSpace.put(firstChar, casingVariant.length());
                            }
                        } else {
                            mStartNoSpace.put(firstChar, casingVariant.length());
                        }
                        mFullNoSpace.put(casingVariant, new AnalyzedToken(casingVariant, tag, originalString));
                        continue;
                    }
                    String[] tokens = casingVariant.split(" ");
                    String firstToken = tokens[0];
                    if (mStartSpace.containsKey(firstToken)) {
                        if (mStartSpace.get(firstToken) < tokens.length) {
                            mStartSpace.put(firstToken, tokens.length);
                        }
                    } else {
                        mStartSpace.put(firstToken, tokens.length);
                    }
                    mFullSpace.put(casingVariant, new AnalyzedToken(casingVariant, tag, originalString));
                }
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    public List<String> getTokenLettercaseVariants(String originalToken, Map<String, AnalyzedToken> tokenMap) {
        String tokenAllUppercase;
        ArrayList<String> newTokens = new ArrayList<String>();
        if (this.settings.allowAllUppercase && !StringTools.isCamelCase(originalToken) && !tokenMap.containsKey(tokenAllUppercase = originalToken.toUpperCase()) && !originalToken.equals(tokenAllUppercase)) {
            newTokens.add(tokenAllUppercase);
        }
        if (this.settings.allowFirstCapitalized) {
            String tokenFirstCapitalized = StringTools.uppercaseFirstChar(originalToken);
            if (!tokenMap.containsKey(tokenFirstCapitalized) && !originalToken.equals(tokenFirstCapitalized)) {
                newTokens.add(tokenFirstCapitalized);
            }
            if (this.settings.allowTitlecase && originalToken.split(" ").length > 1 && StringTools.allStartWithLowercase(originalToken)) {
                String tokenSmartlyTitlecased;
                String tokenNaivelyTitlecased = WordUtils.capitalize((String)originalToken);
                if (!tokenNaivelyTitlecased.equals(tokenFirstCapitalized) && !originalToken.equals(tokenNaivelyTitlecased)) {
                    newTokens.add(tokenNaivelyTitlecased);
                }
                if (!((tokenSmartlyTitlecased = StringTools.titlecaseGlobal(originalToken)).equals(tokenFirstCapitalized) || tokenSmartlyTitlecased.equals(tokenNaivelyTitlecased) || originalToken.equals(tokenSmartlyTitlecased))) {
                    newTokens.add(tokenSmartlyTitlecased);
                }
            }
        }
        return newTokens;
    }

    @Override
    public AnalyzedSentence disambiguate(AnalyzedSentence input) throws IOException {
        return this.disambiguate(input, null);
    }

    @Override
    public final AnalyzedSentence disambiguate(AnalyzedSentence input, @Nullable JLanguageTool.CheckCancelledCallback checkCanceled) throws IOException {
        AnalyzedTokenReadings[] anTokens;
        this.lazyInit();
        AnalyzedTokenReadings[] output = anTokens = input.getTokens();
        for (int i = 0; i < anTokens.length; ++i) {
            StringBuilder keyBuilder;
            String tok = output[i].getToken();
            if (tok.length() < 1) continue;
            StringBuilder tokBuilder = new StringBuilder(tok);
            for (int k = i + 1; k < anTokens.length && !anTokens[k].isWhitespace(); ++k) {
                tokBuilder.append(output[k].getToken());
            }
            tok = tokBuilder.toString();
            if (checkCanceled != null && checkCanceled.checkCancelled()) break;
            if (this.mStartSpace.containsKey(tok)) {
                int finalLen = 0;
                keyBuilder = new StringBuilder();
                int len = this.mStartSpace.get(tok);
                int j = i;
                int lenCounter = 0;
                while (j < anTokens.length && j - i < 20) {
                    if (!anTokens[j].isWhitespace()) {
                        keyBuilder.append(anTokens[j].getToken());
                        String keyStr = keyBuilder.toString();
                        AnalyzedToken at = this.mFullSpace.get(keyStr);
                        if (at != null) {
                            if (!at.getPOSTag().equals(tagForNotAddingTags)) {
                                if (finalLen == 0) {
                                    output[i] = this.setAndAnnotate(output[i], new AnalyzedToken(anTokens[j].getToken(), at.getPOSTag(), at.getLemma()));
                                } else {
                                    output[i] = this.prepareNewReading(at, output[i].getToken(), output[i], false);
                                    output[finalLen] = this.prepareNewReading(at, anTokens[finalLen].getToken(), output[finalLen], true);
                                }
                            }
                            if (this.addIgnoreSpelling) {
                                if (finalLen == 0) {
                                    output[i].ignoreSpelling();
                                } else {
                                    for (int m = i; m <= finalLen; ++m) {
                                        output[m].ignoreSpelling();
                                    }
                                }
                            }
                        }
                    } else {
                        if (j > 1 && !anTokens[j - 1].isWhitespace()) {
                            keyBuilder.append(' ');
                            ++lenCounter;
                        }
                        if (lenCounter == len) break;
                    }
                    finalLen = ++j;
                }
            }
            if (!this.mStartNoSpace.containsKey(tok.substring(0, 1))) continue;
            keyBuilder = new StringBuilder();
            for (int j = i; j < anTokens.length && !anTokens[j].isWhitespace() && j - i < 20; ++j) {
                keyBuilder.append(anTokens[j].getToken());
                String keyStr = keyBuilder.toString();
                AnalyzedToken at = this.mFullNoSpace.get(keyStr);
                if (at == null) continue;
                if (!at.getPOSTag().equals(tagForNotAddingTags)) {
                    if (i == j) {
                        String postag = at.getPOSTag();
                        if (!this.isLowPriorityTag(postag) || !output[i].hasReading() || output[i].isPosTagUnknown()) {
                            output[i] = this.setAndAnnotate(output[i], new AnalyzedToken(anTokens[j].getToken(), postag, at.getLemma()));
                        }
                    } else {
                        output[i] = this.prepareNewReading(at, anTokens[i].getToken(), output[i], false);
                        output[j] = this.prepareNewReading(at, anTokens[j].getToken(), output[j], true);
                    }
                }
                if (!this.addIgnoreSpelling) continue;
                for (int m = i; m <= j; ++m) {
                    output[m].ignoreSpelling();
                }
            }
        }
        if (this.isRemovePreviousTags) {
            return new AnalyzedSentence(this.removePreviousTags(output));
        }
        return new AnalyzedSentence(output);
    }

    private AnalyzedTokenReadings prepareNewReading(AnalyzedToken at, String token, AnalyzedTokenReadings atrs, boolean isLast) {
        StringBuilder sb = new StringBuilder();
        sb.append('<');
        if (isLast) {
            sb.append('/');
        }
        sb.append(at.getPOSTag());
        sb.append('>');
        return this.setAndAnnotate(atrs, new AnalyzedToken(token, sb.toString(), at.getLemma()));
    }

    private AnalyzedTokenReadings setAndAnnotate(AnalyzedTokenReadings oldReading, AnalyzedToken newReading) {
        AnalyzedTokenReadings newAtr = oldReading;
        newAtr.addReading(newReading, "MULTIWORD_CHUNKER");
        return newAtr;
    }

    private List<String> loadWords(InputStream stream) {
        ArrayList<String> lines = new ArrayList<String>();
        try (BufferedReader reader = new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8));){
            String line;
            this.separator = DEFAULT_SEPARATOR;
            while ((line = reader.readLine()) != null) {
                if ((line = line.trim()).startsWith("#separatorRegExp=")) {
                    this.separator = line.replace("#separatorRegExp=", "");
                }
                if (line.isEmpty() || line.charAt(0) == '#') continue;
                if (GermanLineExpander.matcher(line = StringUtils.substringBefore((String)line, (String)"#").trim()).matches()) {
                    String[] parts = line.split("/");
                    lines.add(parts[0].trim());
                    if (parts[1].contains("E")) {
                        lines.add(parts[0].trim() + "e");
                    }
                    if (parts[1].contains("S")) {
                        lines.add(parts[0].trim() + "s");
                    }
                    if (!parts[1].contains("N")) continue;
                    lines.add(parts[0].trim() + "n");
                    continue;
                }
                lines.add(line);
            }
        }
        catch (IOException e) {
            throw new RuntimeException(e);
        }
        return lines;
    }

    public void setIgnoreSpelling(boolean ignoreSpelling) {
        this.addIgnoreSpelling = ignoreSpelling;
    }

    public void setRemovePreviousTags(boolean removePreviousTags) {
        this.isRemovePreviousTags = removePreviousTags;
    }

    private AnalyzedTokenReadings[] removePreviousTags(AnalyzedTokenReadings[] aTokens) {
        String POSTag = "";
        String lemma = "";
        String nextPOSTag = "";
        AnalyzedToken analyzedToken = null;
        for (int i = 0; i < aTokens.length; ++i) {
            AnalyzedToken newAnalyzedToken;
            if (aTokens[i].isWhitespace()) continue;
            if (!nextPOSTag.isEmpty()) {
                newAnalyzedToken = new AnalyzedToken(aTokens[i].getToken(), nextPOSTag, lemma);
                if (aTokens[i].hasPosTagAndLemma("</" + POSTag + ">", lemma)) {
                    nextPOSTag = "";
                    lemma = "";
                }
                aTokens[i] = new AnalyzedTokenReadings(aTokens[i], Collections.singletonList(newAnalyzedToken), "HybridDisamb");
                continue;
            }
            analyzedToken = this.getMultiWordAnalyzedToken(aTokens, i);
            if (analyzedToken == null) continue;
            POSTag = analyzedToken.getPOSTag().substring(1, analyzedToken.getPOSTag().length() - 1);
            lemma = analyzedToken.getLemma();
            if (aTokens[i].hasPosTagAndLemma("</" + POSTag + ">", lemma)) {
                aTokens[i].removeReading(aTokens[i].readingWithTagRegex("</" + POSTag + ">"), "HybridDisamb");
                aTokens[i].removeReading(aTokens[i].readingWithTagRegex("<" + POSTag + ">"), "HybridDisamb");
                aTokens[i].addReading(new AnalyzedToken(analyzedToken.getToken(), POSTag, lemma), "HybridDisamb");
                nextPOSTag = "";
                lemma = "";
                continue;
            }
            newAnalyzedToken = new AnalyzedToken(analyzedToken.getToken(), POSTag, lemma);
            aTokens[i] = new AnalyzedTokenReadings(aTokens[i], Collections.singletonList(newAnalyzedToken), "HybridDisamb");
            nextPOSTag = this.getNextPosTag(POSTag);
        }
        return aTokens;
    }

    private AnalyzedToken getMultiWordAnalyzedToken(AnalyzedTokenReadings[] aTokens, Integer i) {
        ArrayList<AnalyzedToken> l = new ArrayList<AnalyzedToken>();
        for (AnalyzedToken reading : aTokens[i]) {
            String POSTag = reading.getPOSTag();
            if (POSTag == null || !POSTag.startsWith("<") || !POSTag.endsWith(">") || POSTag.startsWith("</")) continue;
            l.add(reading);
        }
        if (l.size() > 0) {
            AnalyzedToken selectedAT = null;
            int maxDistance = 0;
            block1: for (AnalyzedToken at : l) {
                String tag = "</" + at.getPOSTag().substring(1);
                String cleanTag = at.getPOSTag().substring(1, at.getPOSTag().length() - 2);
                String lemma = at.getLemma();
                int distance = 1;
                while (i + distance < aTokens.length) {
                    if (aTokens[i + distance].hasPosTagAndLemma(tag, lemma)) {
                        if (distance > maxDistance) {
                            maxDistance = distance;
                            selectedAT = at;
                        }
                        if (distance != maxDistance || this.isLowPriorityTag(cleanTag)) continue block1;
                        maxDistance = distance;
                        selectedAT = at;
                        continue block1;
                    }
                    ++distance;
                }
            }
            return selectedAT;
        }
        return null;
    }

    private String getNextPosTag(String postag) {
        if (postag.startsWith("NC")) {
            return "AQ0" + postag.substring(2, 4) + "0";
        }
        if (postag.startsWith("N ")) {
            return "J " + postag.substring(2);
        }
        return postag;
    }

    private boolean isLowPriorityTag(String tag) {
        return tag.equals("NPCN000");
    }

    @NotNull
    public static MultiWordChunker getInstance(@NotNull String filename) {
        return MultiWordChunker.getInstance(filename, false, false, false);
    }

    @NotNull
    public static MultiWordChunker getInstance(@NotNull String filename, boolean allowFirstCapitalized, boolean allowAllUppercase, boolean allowTitlecase) {
        return MultiWordChunker.getInstance(filename, allowFirstCapitalized, allowAllUppercase, allowTitlecase, null);
    }

    @NotNull
    public static MultiWordChunker getInstance(@NotNull String filename, boolean allowFirstCapitalized, boolean allowAllUppercase, boolean allowTitlecase, @Nullable String defaultTag) {
        Settings settings = new Settings(filename, allowFirstCapitalized, allowAllUppercase, allowTitlecase, defaultTag);
        return chunkerCache.computeIfAbsent(settings, key -> new MultiWordChunker(settings));
    }

    private static class Settings {
        @NotNull
        private final String filename;
        private final boolean allowFirstCapitalized;
        private final boolean allowAllUppercase;
        private final boolean allowTitlecase;
        @Nullable
        private final String defaultTag;

        private Settings(@NotNull String filename, boolean allowFirstCapitalized, boolean allowAllUppercase, boolean allowTitlecase, @Nullable String defaultTag) {
            this.filename = filename;
            this.allowFirstCapitalized = allowFirstCapitalized;
            this.allowAllUppercase = allowAllUppercase;
            this.allowTitlecase = allowTitlecase;
            this.defaultTag = defaultTag;
        }

        public final boolean equals(Object o) {
            if (!(o instanceof Settings)) {
                return false;
            }
            Settings settings = (Settings)o;
            return this.allowFirstCapitalized == settings.allowFirstCapitalized && this.allowAllUppercase == settings.allowAllUppercase && this.allowTitlecase == settings.allowTitlecase && this.filename.equals(settings.filename) && Objects.equals(this.defaultTag, settings.defaultTag);
        }

        public int hashCode() {
            int result = this.filename.hashCode();
            result = 31 * result + Boolean.hashCode(this.allowFirstCapitalized);
            result = 31 * result + Boolean.hashCode(this.allowAllUppercase);
            result = 31 * result + Boolean.hashCode(this.allowTitlecase);
            result = 31 * result + Objects.hashCode(this.defaultTag);
            return result;
        }
    }
}

