/*
 * Decompiled with CFR 0.152.
 */
package com.hankcs.hanlp.mining.phrase;

import com.hankcs.hanlp.HanLP;
import com.hankcs.hanlp.corpus.occurrence.Occurrence;
import com.hankcs.hanlp.corpus.occurrence.PairFrequency;
import com.hankcs.hanlp.dictionary.stopword.CoreStopWordDictionary;
import com.hankcs.hanlp.dictionary.stopword.Filter;
import com.hankcs.hanlp.mining.phrase.IPhraseExtractor;
import com.hankcs.hanlp.seg.common.Term;
import com.hankcs.hanlp.tokenizer.NotionalTokenizer;
import java.util.LinkedList;
import java.util.List;

public class MutualInformationEntropyPhraseExtractor
implements IPhraseExtractor {
    @Override
    public List<String> extractPhrase(String text, int size) {
        LinkedList<String> phraseList = new LinkedList<String>();
        Occurrence occurrence = new Occurrence();
        Filter[] filterChain = new Filter[]{CoreStopWordDictionary.FILTER, new Filter(){

            @Override
            public boolean shouldInclude(Term term) {
                switch (term.nature) {
                    case nx: 
                    case t: {
                        return false;
                    }
                }
                return true;
            }
        }};
        for (List<Term> sentence : NotionalTokenizer.seg2sentence(text, filterChain)) {
            if (HanLP.Config.DEBUG) {
                System.out.println(sentence);
            }
            occurrence.addAll(sentence);
        }
        occurrence.compute();
        if (HanLP.Config.DEBUG) {
            System.out.println(occurrence);
            for (PairFrequency phrase : occurrence.getPhraseByMi()) {
                System.out.print(String.valueOf(((String)phrase.getKey()).replace('\u0000', '\u2192')) + "\tmi=" + phrase.mi + " , ");
            }
            System.out.println();
            for (PairFrequency phrase : occurrence.getPhraseByLe()) {
                System.out.print(String.valueOf(((String)phrase.getKey()).replace('\u0000', '\u2192')) + "\tle=" + phrase.le + " , ");
            }
            System.out.println();
            for (PairFrequency phrase : occurrence.getPhraseByRe()) {
                System.out.print(String.valueOf(((String)phrase.getKey()).replace('\u0000', '\u2192')) + "\tre=" + phrase.re + " , ");
            }
            System.out.println();
            for (PairFrequency phrase : occurrence.getPhraseByScore()) {
                System.out.print(String.valueOf(((String)phrase.getKey()).replace('\u0000', '\u2192')) + "\tscore=" + phrase.score + " , ");
            }
            System.out.println();
        }
        for (PairFrequency phrase : occurrence.getPhraseByScore()) {
            if (phraseList.size() == size) break;
            phraseList.add(String.valueOf(phrase.first) + phrase.second);
        }
        return phraseList;
    }

    public static List<String> extract(String text, int size) {
        MutualInformationEntropyPhraseExtractor extractor = new MutualInformationEntropyPhraseExtractor();
        return extractor.extractPhrase(text, size);
    }
}

