/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.tagger.util;

import edu.stanford.nlp.ling.TaggedWord;
import edu.stanford.nlp.tagger.io.TaggedFileReader;
import edu.stanford.nlp.tagger.io.TaggedFileRecord;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.TreeSet;

public class CountClosedTags {
    private static Redwood.RedwoodChannels log = Redwood.channels(CountClosedTags.class);
    Set<String> closedTags;
    Map<String, Set<String>> trainingWords = Generics.newHashMap();
    Map<String, Set<String>> allWords = Generics.newHashMap();
    static final double DEFAULT_TRAINING_RATIO = 0.6666666666666666;
    final double trainingRatio;
    final boolean printWords;
    private static final String tagSeparator = "_";
    public static final String TEST_FILE_PROPERTY = "testFile";
    public static final String TRAIN_FILE_PROPERTY = "trainFile";
    public static final String CLOSED_TAGS_PROPERTY = "closedTags";
    public static final String TRAINING_RATIO_PROPERTY = "trainingRatio";
    public static final String PRINT_WORDS_PROPERTY = "printWords";
    private static final Set<String> knownArgs = Generics.newHashSet(Arrays.asList("testFile", "trainFile", "closedTags", "trainingRatio", "printWords", "encoding", "tagSeparator"));

    private CountClosedTags(Properties props) {
        String tagList = props.getProperty(CLOSED_TAGS_PROPERTY);
        if (tagList != null) {
            this.closedTags = new TreeSet<String>();
            String[] pieces = tagList.split("\\s+");
            Collections.addAll(this.closedTags, pieces);
        } else {
            this.closedTags = null;
        }
        this.trainingRatio = props.containsKey(TRAINING_RATIO_PROPERTY) ? Double.valueOf(props.getProperty(TRAINING_RATIO_PROPERTY)) : 0.6666666666666666;
        this.printWords = Boolean.valueOf(props.getProperty(PRINT_WORDS_PROPERTY, "false"));
    }

    private static int countSentences(TaggedFileRecord file) throws IOException {
        int count = 0;
        for (List line : file.reader()) {
            ++count;
        }
        return count;
    }

    void addTaggedWords(List<TaggedWord> line, Map<String, Set<String>> tagWordMap) {
        for (TaggedWord taggedWord : line) {
            String word = taggedWord.word();
            String tag = taggedWord.tag();
            if (this.closedTags != null && !this.closedTags.contains(tag)) continue;
            if (!tagWordMap.containsKey(tag)) {
                tagWordMap.put(tag, new TreeSet());
            }
            tagWordMap.get(tag).add(word);
        }
    }

    void countTrainingTags(TaggedFileRecord file) throws IOException {
        List line;
        int sentences = CountClosedTags.countSentences(file);
        int trainSentences = (int)((double)sentences * this.trainingRatio);
        TaggedFileReader reader = file.reader();
        for (int i = 0; i < trainSentences && reader.hasNext(); ++i) {
            line = (List)reader.next();
            this.addTaggedWords(line, this.trainingWords);
            this.addTaggedWords(line, this.allWords);
        }
        while (reader.hasNext()) {
            line = (List)reader.next();
            this.addTaggedWords(line, this.allWords);
        }
    }

    void countTestTags(TaggedFileRecord file) throws IOException {
        for (List line : file.reader()) {
            this.addTaggedWords(line, this.allWords);
        }
    }

    void report() {
        ArrayList<String> successfulTags = new ArrayList<String>();
        TreeSet<String> tags = new TreeSet<String>();
        tags.addAll(this.allWords.keySet());
        tags.addAll(this.trainingWords.keySet());
        if (this.closedTags != null) {
            tags.addAll(this.closedTags);
        }
        for (String tag : tags) {
            int numTotal;
            int numTraining = this.trainingWords.containsKey(tag) ? this.trainingWords.get(tag).size() : 0;
            int n = numTotal = this.allWords.containsKey(tag) ? this.allWords.get(tag).size() : 0;
            if (numTraining == numTotal && numTraining > 0) {
                successfulTags.add(tag);
            }
            System.out.println(tag + " " + numTraining + " " + numTotal);
            if (!this.printWords) continue;
            Set<String> trainingSet = this.trainingWords.get(tag);
            if (trainingSet == null) {
                trainingSet = Collections.emptySet();
            }
            Set<String> allSet = this.allWords.get(tag);
            for (String word : trainingSet) {
                System.out.print(" " + word);
            }
            if (trainingSet.size() < allSet.size()) {
                System.out.println();
                System.out.print(" *");
                for (String word : this.allWords.get(tag)) {
                    if (trainingSet.contains(word)) continue;
                    System.out.print(" " + word);
                }
            }
            System.out.println();
        }
        System.out.println(successfulTags);
    }

    private static void help(String error) {
        if (error != null && !error.equals("")) {
            log.info(error);
        }
        System.exit(2);
    }

    private static void checkArgs(Properties props) {
        if (!props.containsKey(TRAIN_FILE_PROPERTY)) {
            CountClosedTags.help("No trainFile specified");
        }
        for (String arg : props.stringPropertyNames()) {
            if (knownArgs.contains(arg)) continue;
            CountClosedTags.help("Unknown arg " + arg);
        }
    }

    public static void main(String[] args) throws Exception {
        System.setOut(new PrintStream((OutputStream)System.out, true, "UTF-8"));
        System.setErr(new PrintStream((OutputStream)System.err, true, "UTF-8"));
        Properties config = StringUtils.argsToProperties(args);
        CountClosedTags.checkArgs(config);
        CountClosedTags cct = new CountClosedTags(config);
        String trainFiles = config.getProperty(TRAIN_FILE_PROPERTY);
        String testFiles = config.getProperty(TEST_FILE_PROPERTY);
        List<TaggedFileRecord> files = TaggedFileRecord.createRecords(config, trainFiles);
        for (TaggedFileRecord file : files) {
            cct.countTrainingTags(file);
        }
        if (testFiles != null) {
            files = TaggedFileRecord.createRecords(config, testFiles);
            for (TaggedFileRecord file : files) {
                cct.countTestTags(file);
            }
        }
        cct.report();
    }
}

