/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.fsm.FastExactAutomatonMinimizer;
import edu.stanford.nlp.fsm.QuasiDeterminizer;
import edu.stanford.nlp.fsm.TransducerGraph;
import edu.stanford.nlp.io.NumberRangeFileFilter;
import edu.stanford.nlp.parser.lexparser.BiLexPCFGParser;
import edu.stanford.nlp.parser.lexparser.BinaryGrammar;
import edu.stanford.nlp.parser.lexparser.BinaryRule;
import edu.stanford.nlp.parser.lexparser.EvaluateTreebank;
import edu.stanford.nlp.parser.lexparser.ExactGrammarCompactor;
import edu.stanford.nlp.parser.lexparser.ExhaustiveDependencyParser;
import edu.stanford.nlp.parser.lexparser.ExhaustivePCFGParser;
import edu.stanford.nlp.parser.lexparser.GrammarCompactor;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.ParentAnnotationStats;
import edu.stanford.nlp.parser.lexparser.PathExtractor;
import edu.stanford.nlp.parser.lexparser.Scorer;
import edu.stanford.nlp.parser.lexparser.TreeAnnotator;
import edu.stanford.nlp.parser.lexparser.UnaryGrammar;
import edu.stanford.nlp.parser.lexparser.UnaryRule;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LeftHeadFinder;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.Treebank;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Timing;
import edu.stanford.nlp.util.Triple;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.FileFilter;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Date;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class GrammarCompactionTester {
    private static Redwood.RedwoodChannels log = Redwood.channels(GrammarCompactionTester.class);
    ExhaustivePCFGParser parser = null;
    ExhaustiveDependencyParser dparser = null;
    BiLexPCFGParser bparser = null;
    Scorer scorer = null;
    Options op;
    GrammarCompactor compactor = null;
    Map<String, List<List<String>>> allTestPaths = Generics.newHashMap();
    Map<String, List<List<String>>> allTrainPaths = Generics.newHashMap();
    String asciiOutputPath = null;
    String path = "/u/nlp/stuff/corpora/Treebank3/parsed/mrg/wsj";
    int trainLow = 200;
    int trainHigh = 2199;
    int testLow = 2200;
    int testHigh = 2219;
    String suffixOrderString = null;
    String minArcNumString = null;
    String maxMergeCostString = null;
    String sizeCutoffString = null;
    String minPortionArcsString = null;
    String ignoreUnsupportedSuffixesString = "false";
    String splitParamString = null;
    String costModelString = null;
    String verboseString = null;
    String minArcCostString = null;
    String trainThresholdString = null;
    String heldoutThresholdString = null;
    int markovOrder = -1;
    String smoothParamString = null;
    String scoringData = null;
    String allowEpsilonsString = null;
    boolean saveGraphs = false;
    private int indexRangeLow;
    private int indexRangeHigh;
    private String outputFile = null;
    private String inputFile = null;
    private boolean toy = false;

    public Map<String, List<List<String>>> extractPaths(String path, int low, int high, boolean annotate) {
        MemoryTreebank trainTreebank = this.op.tlpParams.memoryTreebank();
        TreebankLanguagePack tlp = this.op.langpack();
        trainTreebank.loadPath(path, (FileFilter)new NumberRangeFileFilter(low, high, true));
        if (this.op.trainOptions.selectiveSplit) {
            this.op.trainOptions.splitters = ParentAnnotationStats.getSplitCategories(trainTreebank, this.op.trainOptions.selectiveSplitCutOff, this.op.tlpParams.treebankLanguagePack());
        }
        if (this.op.trainOptions.selectivePostSplit) {
            TreeAnnotator myTransformer = new TreeAnnotator(this.op.tlpParams.headFinder(), this.op.tlpParams, this.op);
            Treebank annotatedTB = ((Treebank)trainTreebank).transform(myTransformer);
            this.op.trainOptions.postSplitters = ParentAnnotationStats.getSplitCategories(annotatedTB, this.op.trainOptions.selectivePostSplitCutOff, this.op.tlpParams.treebankLanguagePack());
        }
        ArrayList<Tree> trainTrees = new ArrayList<Tree>();
        HeadFinder hf = null;
        hf = this.op.trainOptions.leftToRight ? new LeftHeadFinder() : this.op.tlpParams.headFinder();
        TreeAnnotator annotator = new TreeAnnotator(hf, this.op.tlpParams, this.op);
        for (Tree tree : trainTreebank) {
            if (annotate) {
                tree = annotator.transformTree(tree);
            }
            trainTrees.add(tree);
        }
        PathExtractor pExtractor = new PathExtractor(hf, this.op);
        Map allPaths = (Map)pExtractor.extract(trainTrees);
        return allPaths;
    }

    public static void main(String[] args) {
        new GrammarCompactionTester().runTest(args);
    }

    public void runTest(String[] args) {
        System.out.println("Currently " + new Date());
        System.out.print("Invoked with arguments:");
        for (String arg : args) {
            System.out.print(" " + arg);
        }
        System.out.println();
        int i = 0;
        while (i < args.length && args[i].startsWith("-")) {
            if (args[i].equalsIgnoreCase("-path") && i + 1 < args.length) {
                this.path = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-saveToAscii") && i + 1 < args.length) {
                this.asciiOutputPath = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-train") && i + 2 < args.length) {
                this.trainLow = Integer.parseInt(args[i + 1]);
                this.trainHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-test") && i + 2 < args.length) {
                this.testLow = Integer.parseInt(args[i + 1]);
                this.testHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-index") && i + 2 < args.length) {
                this.indexRangeLow = Integer.parseInt(args[i + 1]);
                this.indexRangeHigh = Integer.parseInt(args[i + 2]);
                i += 3;
                continue;
            }
            if (args[i].equalsIgnoreCase("-outputFile")) {
                this.outputFile = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-inputFile")) {
                this.inputFile = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-suffixOrder")) {
                this.suffixOrderString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-minArcNum")) {
                this.minArcNumString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-maxMergeCost")) {
                this.maxMergeCostString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-sizeCutoff")) {
                this.sizeCutoffString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-minPortionArcs")) {
                this.minPortionArcsString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-ignoreUnsupportedSuffixes")) {
                this.ignoreUnsupportedSuffixesString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-trainThreshold")) {
                this.trainThresholdString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-heldoutThreshold")) {
                this.heldoutThresholdString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-minArcCost")) {
                this.minArcCostString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-splitParam")) {
                this.splitParamString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-costModel")) {
                this.costModelString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-scoringData")) {
                this.scoringData = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-verbose")) {
                this.verboseString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-allowEpsilons")) {
                this.allowEpsilonsString = args[i + 1];
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-saveGraphs")) {
                this.saveGraphs = true;
                ++i;
                continue;
            }
            if (args[i].equalsIgnoreCase("-toy")) {
                this.toy = true;
                ++i;
                continue;
            }
            if (args[i].equalsIgnoreCase("-markovOrder")) {
                this.markovOrder = Integer.parseInt(args[i + 1]);
                i += 2;
                continue;
            }
            if (args[i].equalsIgnoreCase("-smoothParam")) {
                this.smoothParamString = args[i + 1];
                i += 2;
                continue;
            }
            i = this.op.setOptionOrWarn(args, i);
        }
        this.op.trainOptions.sisterSplitters = Generics.newHashSet(Arrays.asList(this.op.tlpParams.sisterSplitters()));
        if (this.op.trainOptions.compactGrammar() == 4) {
            System.out.println("Instantiating fsm.LossyGrammarCompactor");
            try {
                Class[] argTypes = new Class[13];
                Class<String> strClass = String.class;
                for (int j = 0; j < argTypes.length; ++j) {
                    argTypes[j] = strClass;
                }
                Object[] cArgs = new Object[]{this.suffixOrderString, this.minArcNumString, this.trainThresholdString, this.heldoutThresholdString, this.sizeCutoffString, this.minPortionArcsString, this.splitParamString, this.ignoreUnsupportedSuffixesString, this.minArcCostString, this.smoothParamString, this.costModelString, this.scoringData, this.verboseString};
                this.compactor = (GrammarCompactor)Class.forName("fsm.LossyGrammarCompactor").getConstructor(argTypes).newInstance(cArgs);
            }
            catch (Exception e) {
                log.info("Couldn't instantiate GrammarCompactor: " + e);
                e.printStackTrace();
            }
        } else if (this.op.trainOptions.compactGrammar() == 5) {
            System.out.println("Instantiating fsm.CategoryMergingGrammarCompactor");
            try {
                Class[] argTypes = new Class[6];
                Class<String> strClass = String.class;
                for (int j = 0; j < argTypes.length; ++j) {
                    argTypes[j] = strClass;
                }
                Object[] cArgs = new Object[]{this.splitParamString, this.trainThresholdString, this.heldoutThresholdString, this.minArcCostString, this.ignoreUnsupportedSuffixesString, this.smoothParamString};
                this.compactor = (GrammarCompactor)Class.forName("fsm.CategoryMergingGrammarCompactor").getConstructor(argTypes).newInstance(cArgs);
            }
            catch (Exception e) {
                throw new RuntimeException("Couldn't instantiate CategoryMergingGrammarCompactor." + e);
            }
        } else if (this.op.trainOptions.compactGrammar() == 3) {
            System.out.println("Instantiating fsm.ExactGrammarCompactor");
            this.compactor = new ExactGrammarCompactor(this.op, this.saveGraphs, true);
        } else if (this.op.trainOptions.compactGrammar() > 0) {
            // empty if block
        }
        if (this.markovOrder >= 0) {
            this.op.trainOptions.markovOrder = this.markovOrder;
            this.op.trainOptions.hSelSplit = false;
        }
        if (this.toy) {
            this.buildAndCompactToyGrammars();
        } else {
            this.testGrammarCompaction();
        }
    }

    public Pair<UnaryGrammar, BinaryGrammar> translateAndSort(Pair<UnaryGrammar, BinaryGrammar> grammar, Index<String> oldIndex, Index<String> newIndex) {
        System.out.println("oldIndex.size()" + oldIndex.size() + " newIndex.size()" + newIndex.size());
        UnaryGrammar ug = (UnaryGrammar)grammar.first;
        ArrayList<UnaryRule> unaryRules = new ArrayList<UnaryRule>();
        for (UnaryRule unaryRule : ug.rules()) {
            unaryRule.parent = GrammarCompactionTester.translate(unaryRule.parent, oldIndex, newIndex);
            unaryRule.child = GrammarCompactionTester.translate(unaryRule.child, oldIndex, newIndex);
            unaryRules.add(unaryRule);
        }
        Collections.sort(unaryRules);
        UnaryGrammar newUG = new UnaryGrammar(newIndex);
        for (UnaryRule unaryRule : unaryRules) {
            newUG.addRule(unaryRule);
        }
        newUG.purgeRules();
        BinaryGrammar binaryGrammar = (BinaryGrammar)grammar.second;
        ArrayList<BinaryRule> arrayList = new ArrayList<BinaryRule>();
        for (BinaryRule rule : binaryGrammar.rules()) {
            rule.parent = GrammarCompactionTester.translate(rule.parent, oldIndex, newIndex);
            rule.leftChild = GrammarCompactionTester.translate(rule.leftChild, oldIndex, newIndex);
            rule.rightChild = GrammarCompactionTester.translate(rule.rightChild, oldIndex, newIndex);
            arrayList.add(rule);
        }
        Collections.sort(unaryRules);
        BinaryGrammar newBG = new BinaryGrammar(newIndex);
        for (BinaryRule binaryRule : arrayList) {
            newBG.addRule(binaryRule);
        }
        newBG.splitRules();
        return Generics.newPair(newUG, newBG);
    }

    private static int translate(int i, Index<String> oldIndex, Index<String> newIndex) {
        return newIndex.addToIndex(oldIndex.get(i));
    }

    public int changeIfNecessary(int i, Index<String> n) {
        String s = n.get(i);
        if (s.equals("NP^PP")) {
            System.out.println("changed");
            return n.addToIndex("NP-987928374");
        }
        return i;
    }

    public boolean equalsBinary(List<BinaryRule> l1, List<BinaryRule> l2) {
        boolean bl;
        Map<BinaryRule, BinaryRule> map1 = Generics.newHashMap();
        for (BinaryRule binaryRule : l1) {
            map1.put(binaryRule, binaryRule);
        }
        Map<BinaryRule, BinaryRule> map2 = Generics.newHashMap();
        for (BinaryRule o : l2) {
            map2.put(o, o);
        }
        boolean bl2 = true;
        for (BinaryRule rule1 : map1.keySet()) {
            BinaryRule rule2 = (BinaryRule)map2.get(rule1);
            if (rule2 == null) {
                System.out.println("no rule for " + rule1);
                bl = false;
                continue;
            }
            map2.remove(rule2);
            if (rule1.score == rule2.score) continue;
            System.out.println(rule1 + " and " + rule2 + " have diff scores");
            bl = false;
        }
        System.out.println("left over: " + map2.keySet());
        return bl;
    }

    public boolean equalsUnary(List<UnaryRule> l1, List<UnaryRule> l2) {
        boolean bl;
        Map<UnaryRule, UnaryRule> map1 = Generics.newHashMap();
        for (UnaryRule unaryRule : l1) {
            map1.put(unaryRule, unaryRule);
        }
        Map<UnaryRule, UnaryRule> map2 = Generics.newHashMap();
        for (UnaryRule o : l2) {
            map2.put(o, o);
        }
        boolean bl2 = true;
        for (UnaryRule rule1 : map1.keySet()) {
            UnaryRule rule2 = (UnaryRule)map2.get(rule1);
            if (rule2 == null) {
                System.out.println("no rule for " + rule1);
                bl = false;
                continue;
            }
            map2.remove(rule2);
            if (rule1.score == rule2.score) continue;
            System.out.println(rule1 + " and " + rule2 + " have diff scores");
            bl = false;
        }
        System.out.println("left over: " + map2.keySet());
        return bl;
    }

    private static <T> boolean equalSets(Set<T> set1, Set<T> set2) {
        boolean isEqual = true;
        if (set1.size() != set2.size()) {
            System.out.println("sizes different: " + set1.size() + " vs. " + set2.size());
            isEqual = false;
        }
        Set newSet1 = (Set)((HashSet)set1).clone();
        newSet1.removeAll(set2);
        if (newSet1.size() > 0) {
            isEqual = false;
            System.out.println("set1 left with: " + newSet1);
        }
        Set newSet2 = (Set)((HashSet)set2).clone();
        newSet2.removeAll(set1);
        if (newSet2.size() > 0) {
            isEqual = false;
            System.out.println("set2 left with: " + newSet2);
        }
        return isEqual;
    }

    private static <T> int numTokens(List<List<T>> paths) {
        int result = 0;
        for (List<T> path : paths) {
            result += path.size();
        }
        return result;
    }

    public void buildAndCompactToyGrammars() {
        System.out.print("Extracting other paths...");
        this.allTrainPaths = this.extractPaths(this.path, this.trainLow, this.trainHigh, true);
        TransducerGraph.SetToStringNodeProcessor ntsp = new TransducerGraph.SetToStringNodeProcessor(new PennTreebankLanguagePack());
        TransducerGraph.ObjectToSetNodeProcessor otsp = new TransducerGraph.ObjectToSetNodeProcessor();
        TransducerGraph.InputSplittingProcessor isp = new TransducerGraph.InputSplittingProcessor();
        TransducerGraph.OutputCombiningProcessor ocp = new TransducerGraph.OutputCombiningProcessor();
        TransducerGraph.NormalizingGraphProcessor normalizer = new TransducerGraph.NormalizingGraphProcessor(false);
        QuasiDeterminizer quasiDeterminizer = new QuasiDeterminizer();
        FastExactAutomatonMinimizer exactMinimizer = new FastExactAutomatonMinimizer();
        for (String key : this.allTrainPaths.keySet()) {
            System.out.println("creating graph for " + key);
            List<List<String>> paths = this.allTrainPaths.get(key);
            ClassicCounter<List<String>> pathCounter = new ClassicCounter<List<String>>();
            for (List<String> o : paths) {
                pathCounter.incrementCount(o);
            }
            ClassicCounter newPathCounter = GrammarCompactionTester.removeLowCountPaths(pathCounter, 2.0);
            paths.retainAll(newPathCounter.keySet());
            TransducerGraph result = TransducerGraph.createGraphFromPaths(newPathCounter, 1000);
            int numArcs = result.getArcs().size();
            int numNodes = result.getNodes().size();
            if (numArcs == 0) continue;
            System.out.println("initial graph has " + numArcs + " arcs and " + numNodes + " nodes.");
            GrammarCompactor.writeFile(result, "unminimized", key);
            result = normalizer.processGraph(result);
            result = quasiDeterminizer.processGraph(result);
            result = new TransducerGraph(result, ocp);
            result = exactMinimizer.minimizeFA(result);
            result = new TransducerGraph(result, ntsp);
            result = new TransducerGraph(result, isp);
            numArcs = result.getArcs().size();
            numNodes = result.getNodes().size();
            System.out.println("after exact minimization graph has " + numArcs + " arcs and " + numNodes + " nodes.");
            GrammarCompactor.writeFile(result, "exactminimized", key);
        }
    }

    private static ClassicCounter<List<String>> removeLowCountPaths(ClassicCounter<List<String>> paths, double thresh) {
        ClassicCounter<List<String>> result = new ClassicCounter<List<String>>();
        int numRetained = 0;
        for (List<String> path : paths.keySet()) {
            double count = paths.getCount(path);
            if (!(count >= thresh)) continue;
            result.setCount(path, count);
            ++numRetained;
        }
        System.out.println("retained " + numRetained);
        return result;
    }

    public void testGrammarCompaction() {
        this.op = new Options();
        LexicalizedParser lp = LexicalizedParser.trainFromTreebank(this.path, new NumberRangeFileFilter(this.trainLow, this.trainHigh, true), this.op);
        if (this.compactor != null) {
            Timing.startTime();
            System.out.print("Extracting other paths...");
            this.allTrainPaths = this.extractPaths(this.path, this.trainLow, this.trainHigh, true);
            this.allTestPaths = this.extractPaths(this.path, this.testLow, this.testHigh, true);
            Timing.tick("done");
            Timing.startTime();
            System.out.print("Compacting grammars...");
            Pair<UnaryGrammar, BinaryGrammar> grammar = Generics.newPair(lp.ug, lp.bg);
            Triple<Index<String>, UnaryGrammar, BinaryGrammar> compactedGrammar = this.compactor.compactGrammar(grammar, this.allTrainPaths, this.allTestPaths, lp.stateIndex);
            lp.stateIndex = compactedGrammar.first();
            lp.ug = compactedGrammar.second();
            lp.bg = compactedGrammar.third();
            Timing.tick("done.");
        }
        if (this.asciiOutputPath != null) {
            lp.saveParserToTextFile(this.asciiOutputPath);
        }
        MemoryTreebank testTreebank = this.op.tlpParams.testMemoryTreebank();
        testTreebank.loadPath(this.path, (FileFilter)new NumberRangeFileFilter(this.testLow, this.testHigh, true));
        System.out.println("Currently " + new Date());
        EvaluateTreebank evaluator = new EvaluateTreebank(lp);
        evaluator.testOnTreebank(testTreebank);
        System.out.println("Currently " + new Date());
    }
}

