/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.classification.features;

import com.hankcs.hanlp.algorithm.MaxHeap;
import com.hankcs.hanlp.classification.corpus.IDataSet;
import com.hankcs.hanlp.classification.features.BaseFeatureData;
import com.hankcs.hanlp.classification.statistics.ContinuousDistributions;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;

public class ChiSquareFeatureExtractor {
    protected double chisquareCriticalValue = 10.83;
    protected int maxSize = 1000000;

    public static BaseFeatureData extractBasicFeatureData(IDataSet dataSet) {
        BaseFeatureData stats = new BaseFeatureData(dataSet);
        return stats;
    }

    public Map<Integer, Double> chi_square(BaseFeatureData stats) {
        HashMap<Integer, Double> selectedFeatures = new HashMap<Integer, Double>();
        int feature = 0;
        while (feature < stats.featureCategoryJointCount.length) {
            int[] nArray = stats.featureCategoryJointCount[feature];
            int N1dot = 0;
            int[] nArray2 = nArray;
            int n = nArray.length;
            int n2 = 0;
            while (n2 < n) {
                int count = nArray2[n2];
                N1dot += count;
                ++n2;
            }
            int N0dot = stats.n - N1dot;
            int category = 0;
            while (category < nArray.length) {
                Double previousScore;
                int N11 = nArray[category];
                int N01 = stats.categoryCounts[category] - N11;
                int N00 = N0dot - N01;
                int N10 = N1dot - N11;
                double chisquareScore = (double)stats.n * Math.pow(N11 * N00 - N10 * N01, 2.0) / (double)((N11 + N01) * (N11 + N10) * (N10 + N00) * (N01 + N00));
                if (chisquareScore >= this.chisquareCriticalValue && ((previousScore = (Double)selectedFeatures.get(feature)) == null || chisquareScore > previousScore)) {
                    selectedFeatures.put(feature, chisquareScore);
                }
                ++category;
            }
            ++feature;
        }
        if (selectedFeatures.size() > this.maxSize) {
            MaxHeap<Map.Entry<Integer, Double>> maxHeap = new MaxHeap<Map.Entry<Integer, Double>>(this.maxSize, new Comparator<Map.Entry<Integer, Double>>(){

                @Override
                public int compare(Map.Entry<Integer, Double> o1, Map.Entry<Integer, Double> o2) {
                    return o1.getValue().compareTo(o2.getValue());
                }
            });
            for (Map.Entry entry : selectedFeatures.entrySet()) {
                maxHeap.add(entry);
            }
            selectedFeatures.clear();
            for (Map.Entry<Object, Object> entry : maxHeap.toList()) {
                selectedFeatures.put((Integer)entry.getKey(), (Double)entry.getValue());
            }
        }
        return selectedFeatures;
    }

    public double getChisquareCriticalValue() {
        return this.chisquareCriticalValue;
    }

    public void setChisquareCriticalValue(double chisquareCriticalValue) {
        this.chisquareCriticalValue = chisquareCriticalValue;
    }

    public ChiSquareFeatureExtractor setALevel(double aLevel) {
        this.chisquareCriticalValue = ContinuousDistributions.ChisquareInverseCdf(aLevel, 1);
        return this;
    }

    public double getALevel() {
        return ContinuousDistributions.ChisquareCdf(this.chisquareCriticalValue, 1);
    }
}

