/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.eval;

import edu.stanford.nlp.international.Language;
import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.french.FrenchMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.parser.lexparser.TreebankLangParserParams;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.TwoDimensionalIntCounter;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class TreebankFactoredLexiconStats {
    private static Redwood.RedwoodChannels log = Redwood.channels(TreebankFactoredLexiconStats.class);

    public static void main(String[] args) {
        String[] features;
        String[] options;
        if (args.length != 3) {
            System.err.printf("Usage: java %s language filename features%n", TreebankFactoredLexiconStats.class.getName());
            System.exit(-1);
        }
        Language language = Language.valueOf(args[0]);
        TreebankLangParserParams tlpp = language.params;
        if (language.equals((Object)Language.Arabic)) {
            options = new String[]{"-arabicFactored"};
            tlpp.setOptionFlag(options, 0);
        } else {
            options = new String[]{"-frenchFactored"};
            tlpp.setOptionFlag(options, 0);
        }
        DiskTreebank tb = tlpp.diskTreebank();
        tb.loadPath(args[1]);
        MorphoFeatureSpecification morphoSpec = language.equals((Object)Language.Arabic) ? new ArabicMorphoFeatureSpecification() : new FrenchMorphoFeatureSpecification();
        for (String feature : features = args[2].trim().split(",")) {
            morphoSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.valueOf(feature));
        }
        ClassicCounter<String> wordTagCounter = new ClassicCounter<String>(30000);
        ClassicCounter<String> morphTagCounter = new ClassicCounter<String>(500);
        ClassicCounter<String> morphCounter = new ClassicCounter<String>(500);
        ClassicCounter<String> wordCounter = new ClassicCounter<String>(30000);
        ClassicCounter<String> tagCounter = new ClassicCounter<String>(300);
        ClassicCounter<String> lemmaCounter = new ClassicCounter<String>(25000);
        ClassicCounter<String> lemmaTagCounter = new ClassicCounter<String>(25000);
        ClassicCounter<String> richTagCounter = new ClassicCounter<String>(1000);
        ClassicCounter<String> reducedTagCounter = new ClassicCounter<String>(500);
        ClassicCounter<String> reducedTagLemmaCounter = new ClassicCounter<String>(500);
        Map wordLemmaMap = Generics.newHashMap();
        TwoDimensionalIntCounter<String, String> lemmaReducedTagCounter = new TwoDimensionalIntCounter<String, String>(30000);
        TwoDimensionalIntCounter<String, String> reducedTagTagCounter = new TwoDimensionalIntCounter<String, String>(500);
        TwoDimensionalIntCounter<String, String> tagReducedTagCounter = new TwoDimensionalIntCounter<String, String>(300);
        int numTrees = 0;
        for (Tree tree : tb) {
            for (Tree tree2 : tree) {
                if (tree2.isLeaf()) continue;
                tlpp.transformTree(tree2, tree);
            }
            List<Label> pretermList = tree.preTerminalYield();
            ArrayList<Label> arrayList = tree.yield();
            assert (arrayList.size() == pretermList.size());
            int yieldLen = arrayList.size();
            for (int i = 0; i < yieldLen; ++i) {
                String tag = pretermList.get(i).value();
                String word = ((Label)arrayList.get(i)).value();
                String morph = ((CoreLabel)arrayList.get(i)).originalText();
                Pair<String, String> lemmaTag = MorphoFeatureSpecification.splitMorphString(word, morph);
                String lemma = lemmaTag.first();
                String richTag = lemmaTag.second();
                if (tag.contains("MW")) {
                    lemma = lemma + "-MWE";
                }
                lemmaCounter.incrementCount(lemma);
                lemmaTagCounter.incrementCount(lemma + tag);
                richTagCounter.incrementCount(richTag);
                String reducedTag = morphoSpec.strToFeatures(richTag).toString();
                reducedTagCounter.incrementCount(reducedTag);
                reducedTagLemmaCounter.incrementCount(reducedTag + lemma);
                wordTagCounter.incrementCount(word + tag);
                morphTagCounter.incrementCount((String)morph + tag);
                morphCounter.incrementCount(morph);
                wordCounter.incrementCount(word);
                tagCounter.incrementCount(tag);
                String string = reducedTag = reducedTag.equals("") ? "NONE" : reducedTag;
                if (wordLemmaMap.containsKey(word)) {
                    ((Set)wordLemmaMap.get(word)).add(lemma);
                } else {
                    Set lemmas = Generics.newHashSet(1);
                    wordLemmaMap.put(word, lemmas);
                }
                lemmaReducedTagCounter.incrementCount(lemma, reducedTag);
                reducedTagTagCounter.incrementCount(lemma + reducedTag, tag);
                tagReducedTagCounter.incrementCount(tag, reducedTag);
            }
            ++numTrees;
        }
        System.out.println("Language: " + language.toString());
        System.out.printf("#trees:\t%d%n", numTrees);
        System.out.printf("#tokens:\t%d%n", (int)wordCounter.totalCount());
        System.out.printf("#words:\t%d%n", wordCounter.keySet().size());
        System.out.printf("#tags:\t%d%n", tagCounter.keySet().size());
        System.out.printf("#wordTagPairs:\t%d%n", wordTagCounter.keySet().size());
        System.out.printf("#lemmas:\t%d%n", lemmaCounter.keySet().size());
        System.out.printf("#lemmaTagPairs:\t%d%n", lemmaTagCounter.keySet().size());
        System.out.printf("#feattags:\t%d%n", reducedTagCounter.keySet().size());
        System.out.printf("#feattag+lemmas:\t%d%n", reducedTagLemmaCounter.keySet().size());
        System.out.printf("#richtags:\t%d%n", richTagCounter.keySet().size());
        System.out.printf("#richtag+lemma:\t%d%n", morphCounter.keySet().size());
        System.out.printf("#richtag+lemmaTagPairs:\t%d%n", morphTagCounter.keySet().size());
        System.out.println("==================");
        StringBuilder sbNoLemma = new StringBuilder();
        StringBuilder sbMultLemmas = new StringBuilder();
        for (Map.Entry entry : wordLemmaMap.entrySet()) {
            String word = (String)entry.getKey();
            Set lemmas = (Set)entry.getValue();
            if (lemmas.size() == 0) {
                sbNoLemma.append("NO LEMMAS FOR WORD: " + word + "\n");
                continue;
            }
            if (lemmas.size() > 1) {
                sbMultLemmas.append("MULTIPLE LEMMAS: " + word + " " + TreebankFactoredLexiconStats.setToString(lemmas) + "\n");
                continue;
            }
            String lemma = (String)lemmas.iterator().next();
            Set reducedTags = lemmaReducedTagCounter.getCounter(lemma).keySet();
            if (reducedTags.size() <= 1) continue;
            System.out.printf("%s --> %s%n", word, lemma);
            for (String reducedTag : reducedTags) {
                int count = lemmaReducedTagCounter.getCount(lemma, reducedTag);
                String posTags = TreebankFactoredLexiconStats.setToString(reducedTagTagCounter.getCounter(lemma + reducedTag).keySet());
                System.out.printf("\t%s\t%d\t%s%n", reducedTag, count, posTags);
            }
            System.out.println();
        }
        System.out.println("==================");
        System.out.println(sbNoLemma.toString());
        System.out.println(sbMultLemmas.toString());
        System.out.println("==================");
        ArrayList tags = new ArrayList(tagReducedTagCounter.firstKeySet());
        Collections.sort(tags);
        for (String tag : tags) {
            System.out.println(tag);
            Set reducedTags = tagReducedTagCounter.getCounter(tag).keySet();
            for (String reducedTag : reducedTags) {
                int count = tagReducedTagCounter.getCount(tag, reducedTag);
                System.out.printf("\t%s\t%d%n", reducedTag, count);
            }
            System.out.println();
        }
        System.out.println("==================");
    }

    private static String setToString(Set<String> set) {
        StringBuilder sb = new StringBuilder();
        sb.append("[");
        for (String string : set) {
            sb.append(string).append(" ");
        }
        sb.append("]");
        return sb.toString();
    }
}

