/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.classification.features;

import com.hankcs.hanlp.algorithm.MaxHeap;
import com.hankcs.hanlp.classification.corpus.IDataSet;
import com.hankcs.hanlp.classification.features.BaseFeatureData;
import com.hankcs.hanlp.classification.statistics.ContinuousDistributions;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;

public class ChiSquareFeatureExtractor {
    protected double chisquareCriticalValue = 10.83;
    protected int maxSize = 1000000;

    public static BaseFeatureData extractBasicFeatureData(IDataSet dataSet) {
        BaseFeatureData stats = new BaseFeatureData(dataSet);
        return stats;
    }

    /*
     * WARNING - void declaration
     */
    public Map<Integer, Double> chi_square(BaseFeatureData stats) {
        int feature;
        HashMap<Integer, Double> selectedFeatures = new HashMap<Integer, Double>();
        for (feature = 0; feature < stats.featureCategoryJointCount.length; ++feature) {
            void var20_14;
            int[] categoryList = stats.featureCategoryJointCount[feature];
            double N1dot = 0.0;
            for (int count : categoryList) {
                N1dot += (double)count;
            }
            double N0dot = (double)stats.n - N1dot;
            boolean bl = false;
            while (var20_14 < categoryList.length) {
                Double previousScore;
                double N11 = categoryList[var20_14];
                double N01 = (double)stats.categoryCounts[var20_14] - N11;
                double N00 = N0dot - N01;
                double N10 = N1dot - N11;
                double chisquareScore = (double)stats.n * Math.pow(N11 * N00 - N10 * N01, 2.0) / ((N11 + N01) * (N11 + N10) * (N10 + N00) * (N01 + N00));
                if (chisquareScore >= this.chisquareCriticalValue && ((previousScore = (Double)selectedFeatures.get(feature)) == null || chisquareScore > previousScore)) {
                    selectedFeatures.put(feature, chisquareScore);
                }
                ++var20_14;
            }
        }
        if (selectedFeatures.size() == 0) {
            for (feature = 0; feature < stats.featureCategoryJointCount.length; ++feature) {
                selectedFeatures.put(feature, 0.0);
            }
        }
        if (selectedFeatures.size() > this.maxSize) {
            MaxHeap<Map.Entry<Integer, Double>> maxHeap = new MaxHeap<Map.Entry<Integer, Double>>(this.maxSize, new Comparator<Map.Entry<Integer, Double>>(){

                @Override
                public int compare(Map.Entry<Integer, Double> o1, Map.Entry<Integer, Double> o2) {
                    return o1.getValue().compareTo(o2.getValue());
                }
            });
            for (Map.Entry entry : selectedFeatures.entrySet()) {
                maxHeap.add(entry);
            }
            selectedFeatures.clear();
            for (Map.Entry<Object, Object> entry : maxHeap) {
                selectedFeatures.put((Integer)entry.getKey(), (Double)entry.getValue());
            }
        }
        return selectedFeatures;
    }

    public double getChisquareCriticalValue() {
        return this.chisquareCriticalValue;
    }

    public void setChisquareCriticalValue(double chisquareCriticalValue) {
        this.chisquareCriticalValue = chisquareCriticalValue;
    }

    public ChiSquareFeatureExtractor setALevel(double aLevel) {
        this.chisquareCriticalValue = ContinuousDistributions.ChisquareInverseCdf(aLevel, 1);
        return this;
    }

    public double getALevel() {
        return ContinuousDistributions.ChisquareCdf(this.chisquareCriticalValue, 1);
    }
}

