/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.ngram;

import java.util.Collection;
import java.util.HashSet;
import java.util.LinkedList;
import opennlp.tools.util.StringList;

public class NGramUtils {
    public static double calculateLaplaceSmoothingProbability(StringList ngram, Iterable<StringList> set, Double k) {
        return (NGramUtils.count(ngram, set) + k) / (NGramUtils.count(NGramUtils.getNMinusOneTokenFirst(ngram), set) + k * 1.0);
    }

    public static double calculateUnigramMLProbability(String word, Collection<StringList> set) {
        double vocSize = 0.0;
        for (StringList s : set) {
            vocSize += (double)s.size();
        }
        return NGramUtils.count(new StringList(word), set) / vocSize;
    }

    public static double calculateBigramMLProbability(String x0, String x1, Collection<StringList> set) {
        return NGramUtils.calculateNgramMLProbability(new StringList(x0, x1), set);
    }

    public static double calculateTrigramMLProbability(String x0, String x1, String x2, Iterable<StringList> set) {
        return NGramUtils.calculateNgramMLProbability(new StringList(x0, x1, x2), set);
    }

    public static double calculateNgramMLProbability(StringList ngram, Iterable<StringList> set) {
        StringList ngramMinusOne = NGramUtils.getNMinusOneTokenFirst(ngram);
        return NGramUtils.count(ngram, set) / NGramUtils.count(ngramMinusOne, set);
    }

    public static double calculateBigramPriorSmoothingProbability(String x0, String x1, Collection<StringList> set, Double k) {
        return (NGramUtils.count(new StringList(x0, x1), set) + k * NGramUtils.calculateUnigramMLProbability(x1, set)) / (NGramUtils.count(new StringList(x0), set) + k * (double)set.size());
    }

    public static double calculateTrigramLinearInterpolationProbability(String x0, String x1, String x2, Collection<StringList> set, Double lambda1, Double lambda2, Double lambda3) {
        assert (lambda1 + lambda2 + lambda3 == 1.0) : "lambdas sum should be equals to 1";
        assert (lambda1 > 0.0 && lambda2 > 0.0 && lambda3 > 0.0) : "lambdas should all be greater than 0";
        return lambda1 * NGramUtils.calculateTrigramMLProbability(x0, x1, x2, set) + lambda2 * NGramUtils.calculateBigramMLProbability(x1, x2, set) + lambda3 * NGramUtils.calculateUnigramMLProbability(x2, set);
    }

    public static double calculateMissingNgramProbabilityMass(StringList ngram, Double discount, Iterable<StringList> set) {
        Double missingMass = 0.0;
        Double countWord = NGramUtils.count(ngram, set);
        for (String word : NGramUtils.flatSet(set)) {
            missingMass = missingMass + (NGramUtils.count(NGramUtils.getNPlusOneNgram(ngram, word), set) - discount) / countWord;
        }
        return 1.0 - missingMass;
    }

    public static StringList getNMinusOneTokenFirst(StringList ngram) {
        String[] tokens = new String[ngram.size() - 1];
        for (int i = 0; i < ngram.size() - 1; ++i) {
            tokens[i] = ngram.getToken(i);
        }
        return tokens.length > 0 ? new StringList(tokens) : null;
    }

    public static StringList getNMinusOneTokenLast(StringList ngram) {
        String[] tokens = new String[ngram.size() - 1];
        for (int i = 1; i < ngram.size(); ++i) {
            tokens[i - 1] = ngram.getToken(i);
        }
        return tokens.length > 0 ? new StringList(tokens) : null;
    }

    private static StringList getNPlusOneNgram(StringList ngram, String word) {
        String[] tokens = new String[ngram.size() + 1];
        for (int i = 0; i < ngram.size(); ++i) {
            tokens[i] = ngram.getToken(i);
        }
        tokens[tokens.length - 1] = word;
        return new StringList(tokens);
    }

    private static Double count(StringList ngram, Iterable<StringList> sentences) {
        Double count = 0.0;
        for (StringList sentence : sentences) {
            int idx0 = NGramUtils.indexOf(sentence, ngram.getToken(0));
            if (idx0 < 0 || sentence.size() < idx0 + ngram.size()) continue;
            boolean match = true;
            for (int i = 1; i < ngram.size(); ++i) {
                String sentenceToken = sentence.getToken(idx0 + i);
                String ngramToken = ngram.getToken(i);
                match &= sentenceToken.equals(ngramToken);
            }
            if (!match) continue;
            Double d = count;
            Double d2 = count = Double.valueOf(count + 1.0);
        }
        return count;
    }

    private static int indexOf(StringList sentence, String token) {
        for (int i = 0; i < sentence.size(); ++i) {
            if (!token.equals(sentence.getToken(i))) continue;
            return i;
        }
        return -1;
    }

    private static Collection<String> flatSet(Iterable<StringList> set) {
        HashSet<String> flatSet = new HashSet<String>();
        for (StringList sentence : set) {
            for (String word : sentence) {
                flatSet.add(word);
            }
        }
        return flatSet;
    }

    public static Collection<StringList> getNGrams(StringList sequence, int size) {
        LinkedList<StringList> ngrams = new LinkedList<StringList>();
        if (size == -1 || size >= sequence.size()) {
            ngrams.add(sequence);
        } else {
            String[] ngram = new String[size];
            for (int i = 0; i < sequence.size() - size + 1; ++i) {
                ngram[0] = sequence.getToken(i);
                for (int j = 1; j < size; ++j) {
                    ngram[j] = sequence.getToken(i + j);
                }
                ngrams.add(new StringList(ngram));
            }
        }
        return ngrams;
    }

    public static Collection<String[]> getNGrams(String[] sequence, int size) {
        LinkedList<String[]> ngrams = new LinkedList<String[]>();
        if (size == -1 || size >= sequence.length) {
            ngrams.add(sequence);
        } else {
            for (int i = 0; i < sequence.length - size + 1; ++i) {
                String[] ngram = new String[size];
                ngram[0] = sequence[i];
                for (int j = 1; j < size; ++j) {
                    ngram[j] = sequence[i + j];
                }
                ngrams.add(ngram);
            }
        }
        return ngrams;
    }
}

