/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.patterns;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.Dataset;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.LinearClassifier;
import edu.stanford.nlp.classify.LinearClassifierFactory;
import edu.stanford.nlp.classify.LogPrior;
import edu.stanford.nlp.classify.LogisticClassifier;
import edu.stanford.nlp.classify.LogisticClassifierFactory;
import edu.stanford.nlp.classify.MultinomialLogisticClassifier;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.classify.SVMLightClassifier;
import edu.stanford.nlp.classify.SVMLightClassifierFactory;
import edu.stanford.nlp.classify.ShiftParamsLogisticClassifierFactory;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.BasicDatum;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.IndexedWord;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.patterns.CandidatePhrase;
import edu.stanford.nlp.patterns.ConstantsAndVariables;
import edu.stanford.nlp.patterns.Data;
import edu.stanford.nlp.patterns.DataInstance;
import edu.stanford.nlp.patterns.GetPatternsFromDataMultiClass;
import edu.stanford.nlp.patterns.Pattern;
import edu.stanford.nlp.patterns.PatternFactory;
import edu.stanford.nlp.patterns.PatternsAnnotations;
import edu.stanford.nlp.patterns.PhraseScorer;
import edu.stanford.nlp.patterns.dep.DataInstanceDep;
import edu.stanford.nlp.patterns.dep.ExtractPhraseFromPattern;
import edu.stanford.nlp.patterns.dep.ExtractedPhrase;
import edu.stanford.nlp.semgraph.SemanticGraph;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.stats.TwoDimensionalCounter;
import edu.stanford.nlp.util.ArgumentParser;
import edu.stanford.nlp.util.ArrayUtils;
import edu.stanford.nlp.util.BinaryHeapPriorityQueue;
import edu.stanford.nlp.util.CollectionUtils;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.IntPair;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.Quintuple;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.TypesafeMap;
import edu.stanford.nlp.util.concurrent.AtomicDouble;
import edu.stanford.nlp.util.concurrent.ConcurrentHashCounter;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.FileWriter;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.function.Predicate;
import java.util.stream.Collectors;

public class ScorePhrasesLearnFeatWt<E extends Pattern>
extends PhraseScorer<E> {
    @ArgumentParser.Option(name="scoreClassifierType")
    private ClassifierType scoreClassifierType;
    private static Map<String, double[]> wordVectors = null;
    public TwoDimensionalCounter<CandidatePhrase, ConstantsAndVariables.ScorePhraseMeasures> phraseScoresRaw;
    ConcurrentHashMap<CandidatePhrase, Counter<Integer>> wordClassClustersForPhrase;
    Counter<CandidatePhrase> closeToPositivesFirstIter;
    Counter<CandidatePhrase> closeToNegativesFirstIter;
    static Counter<PhrasePair> cacheSimilarities = new ConcurrentHashCounter<PhrasePair>();
    static Map<String, Map<String, double[]>> similaritiesWithLabeledPhrases = new ConcurrentHashMap<String, Map<String, double[]>>();

    public ScorePhrasesLearnFeatWt(ConstantsAndVariables constvar) {
        block6: {
            super(constvar);
            this.scoreClassifierType = ClassifierType.LR;
            this.phraseScoresRaw = new TwoDimensionalCounter();
            this.wordClassClustersForPhrase = new ConcurrentHashMap();
            this.closeToPositivesFirstIter = null;
            this.closeToNegativesFirstIter = null;
            if (!constvar.useWordVectorsToComputeSim || !constvar.subsampleUnkAsNegUsingSim && !constvar.expandPositivesWhenSampling && !constvar.expandNegativesWhenSampling && !this.constVars.usePhraseEvalWordVector || wordVectors != null) break block6;
            if (Data.rawFreq == null) {
                Data.rawFreq = new ClassicCounter<CandidatePhrase>();
                Data.computeRawFreqIfNull(PatternFactory.numWordsCompoundMax, constvar.batchProcessSents);
            }
            Redwood.log(new Object[]{Redwood.DBG, "Reading word vectors"});
            wordVectors = new HashMap<String, double[]>();
            for (String line : IOUtils.readLines(this.constVars.wordVectorFile)) {
                CandidatePhrase p;
                block8: {
                    String word;
                    String[] tok;
                    block7: {
                        tok = line.split("\\s+");
                        word = tok[0];
                        p = CandidatePhrase.createOrGet(word);
                        if (Data.rawFreq.containsKey(p)) break block7;
                        if (!constvar.getStopWords().contains(p) && !constvar.getEnglishWords().contains(word) && !constvar.hasSeedWordOrOtherSem(p)) break block8;
                    }
                    double[] d = new double[tok.length - 1];
                    for (int i = 1; i < tok.length; ++i) {
                        d[i - 1] = Double.valueOf(tok[i]);
                    }
                    wordVectors.put(word, d);
                    continue;
                }
                CandidatePhrase.deletePhrase(p);
            }
            Redwood.log(new Object[]{Redwood.DBG, "Read " + wordVectors.size() + " word vectors"});
        }
        this.OOVExternalFeatWt = 0.0;
        this.OOVdictOdds = 0.0;
        this.OOVDomainNgramScore = 0.0;
        this.OOVGoogleNgramScore = 0.0;
    }

    public Classifier learnClassifier(String label, boolean forLearningPatterns, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, Counter<E> allSelectedPatterns) throws IOException, ClassNotFoundException {
        Set<String> labels;
        List<Pair<ConstantsAndVariables.ScorePhraseMeasures, Double>> wtd;
        Counter<ConstantsAndVariables.ScorePhraseMeasures> weights;
        Classifier<String, ConstantsAndVariables.ScorePhraseMeasures> classifier;
        this.phraseScoresRaw.clear();
        this.learnedScores.clear();
        if (Data.domainNGramsFile != null) {
            Data.loadDomainNGrams();
        }
        boolean computeRawFreq = false;
        if (Data.rawFreq == null) {
            Data.rawFreq = new ClassicCounter<CandidatePhrase>();
            computeRawFreq = true;
        }
        GeneralDataset<String, ConstantsAndVariables.ScorePhraseMeasures> dataset = this.choosedatums(forLearningPatterns, label, wordsPatExtracted, allSelectedPatterns, computeRawFreq);
        if (this.scoreClassifierType.equals((Object)ClassifierType.LR)) {
            LogisticClassifierFactory<String, ConstantsAndVariables.ScorePhraseMeasures> logfactory = new LogisticClassifierFactory<String, ConstantsAndVariables.ScorePhraseMeasures>();
            LogPrior lprior = new LogPrior();
            lprior.setSigma(this.constVars.LRSigma);
            LogisticClassifier<String, ConstantsAndVariables.ScorePhraseMeasures> logisticClassifier = classifier = logfactory.trainClassifier(dataset, lprior, false);
            String l = logisticClassifier.getLabelForInternalPositiveClass();
            weights = logisticClassifier.weightsAsCounter();
            if (l.equals(Boolean.FALSE.toString())) {
                Counters.multiplyInPlace(weights, -1.0);
            }
            wtd = Counters.toDescendingMagnitudeSortedListWithCounts(weights);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(wtd.subList(0, Math.min(wtd.size(), 600)), "\n"));
        } else if (this.scoreClassifierType.equals((Object)ClassifierType.SVM)) {
            SVMLightClassifierFactory svmcf = new SVMLightClassifierFactory(true);
            classifier = svmcf.trainClassifier((GeneralDataset)dataset);
            labels = Generics.newHashSet(Arrays.asList("true"));
            List list = ((SVMLightClassifier)classifier).getTopFeatures(labels, 0.0, true, 600, true);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(list, "\n"));
        } else if (this.scoreClassifierType.equals((Object)ClassifierType.SHIFTLR)) {
            Dataset<String, ConstantsAndVariables.ScorePhraseMeasures> newdataset = new Dataset<String, ConstantsAndVariables.ScorePhraseMeasures>();
            for (RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures> rVFDatum : dataset) {
                ((GeneralDataset)newdataset).add(new BasicDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(rVFDatum.asFeatures(), rVFDatum.label()));
            }
            ShiftParamsLogisticClassifierFactory shiftParamsLogisticClassifierFactory = new ShiftParamsLogisticClassifierFactory();
            classifier = shiftParamsLogisticClassifierFactory.trainClassifier((GeneralDataset)newdataset);
            MultinomialLogisticClassifier logcl = (MultinomialLogisticClassifier)classifier;
            weights = logcl.weightsAsGenericCounter().get("true");
            wtd = Counters.toDescendingMagnitudeSortedListWithCounts(weights);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(wtd.subList(0, Math.min(wtd.size(), 600)), "\n"));
        } else if (this.scoreClassifierType.equals((Object)ClassifierType.LINEAR)) {
            LinearClassifierFactory lcf = new LinearClassifierFactory();
            classifier = lcf.trainClassifier((GeneralDataset)dataset);
            labels = Generics.newHashSet(Arrays.asList("true"));
            List list = ((LinearClassifier)classifier).getTopFeatures(labels, 0.0, true, 600, true);
            Redwood.log(ConstantsAndVariables.minimaldebug, "The weights are " + StringUtils.join(list, "\n"));
        } else {
            throw new RuntimeException("cannot identify classifier " + (Object)((Object)this.scoreClassifierType));
        }
        BufferedWriter w = new BufferedWriter(new FileWriter("tempscorestrainer.txt"));
        System.out.println("size of learned scores is " + this.phraseScoresRaw.size());
        for (CandidatePhrase candidatePhrase : this.phraseScoresRaw.firstKeySet()) {
            w.write(candidatePhrase + "\t" + this.phraseScoresRaw.getCounter((Object)candidatePhrase) + "\n");
        }
        w.close();
        return classifier;
    }

    @Override
    public void printReasonForChoosing(Counter<CandidatePhrase> phrases) {
        Redwood.log(new Object[]{Redwood.DBG, "Features of selected phrases"});
        for (Map.Entry<CandidatePhrase, Double> pEn : phrases.entrySet()) {
            Redwood.log(new Object[]{Redwood.DBG, pEn.getKey().getPhrase() + "\t" + pEn.getValue() + "\t" + this.phraseScoresRaw.getCounter((Object)pEn.getKey())});
        }
    }

    @Override
    public Counter<CandidatePhrase> scorePhrases(String label, TwoDimensionalCounter<CandidatePhrase, E> terms, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, Counter<E> allSelectedPatterns, Set<CandidatePhrase> alreadyIdentifiedWords, boolean forLearningPatterns) throws IOException, ClassNotFoundException {
        this.getAllLabeledWordsCluster();
        ClassicCounter<CandidatePhrase> scores = new ClassicCounter<CandidatePhrase>();
        Classifier classifier = this.learnClassifier(label, forLearningPatterns, wordsPatExtracted, allSelectedPatterns);
        for (Map.Entry<CandidatePhrase, ClassicCounter<E>> en : terms.entrySet()) {
            Double score = this.scoreUsingClassifer(classifier, en.getKey(), label, forLearningPatterns, en.getValue(), allSelectedPatterns);
            if (!score.isNaN() && !score.isInfinite()) {
                scores.setCount(en.getKey(), score);
                continue;
            }
            Redwood.log(new Object[]{Redwood.DBG, "Ignoring " + en.getKey() + " because score is " + score});
        }
        return scores;
    }

    @Override
    public Counter<CandidatePhrase> scorePhrases(String label, Set<CandidatePhrase> terms, boolean forLearningPatterns) throws IOException, ClassNotFoundException {
        this.getAllLabeledWordsCluster();
        ClassicCounter<CandidatePhrase> scores = new ClassicCounter<CandidatePhrase>();
        Classifier classifier = this.learnClassifier(label, forLearningPatterns, null, null);
        for (CandidatePhrase en : terms) {
            double score = this.scoreUsingClassifer(classifier, en, label, forLearningPatterns, null, null);
            scores.setCount(en, score);
        }
        return scores;
    }

    public static boolean getRandomBoolean(Random random, double p) {
        return (double)random.nextFloat() < p;
    }

    static double logistic(double d) {
        return 1.0 / (1.0 + Math.exp(-1.0 * d));
    }

    Counter<Integer> wordClass(String phrase, String phraseLemma) {
        ClassicCounter<Integer> cl = new ClassicCounter<Integer>();
        String[] phl = null;
        if (phraseLemma != null) {
            phl = phraseLemma.split("\\s+");
        }
        int i = 0;
        for (String w : phrase.split("\\s+")) {
            Integer cluster = this.constVars.getWordClassClusters().get(w);
            if (cluster == null && phl != null) {
                cluster = this.constVars.getWordClassClusters().get(phl[i]);
            }
            if (cluster == null && (cluster = this.constVars.getWordClassClusters().get(w.toLowerCase())) == null && phl != null) {
                cluster = this.constVars.getWordClassClusters().get(phl[i].toLowerCase());
            }
            if (cluster != null) {
                cl.incrementCount(cluster);
            }
            ++i;
        }
        return cl;
    }

    void getAllLabeledWordsCluster() {
        for (String label : this.constVars.getLabels()) {
            for (Map.Entry<CandidatePhrase, Double> entry : this.constVars.getLearnedWords(label).entrySet()) {
                this.wordClassClustersForPhrase.put(entry.getKey(), this.wordClass(entry.getKey().getPhrase(), entry.getKey().getPhraseLemma()));
            }
            for (CandidatePhrase candidatePhrase : this.constVars.getSeedLabelDictionary().get(label)) {
                this.wordClassClustersForPhrase.put(candidatePhrase, this.wordClass(candidatePhrase.getPhrase(), candidatePhrase.getPhraseLemma()));
            }
        }
    }

    private Counter<CandidatePhrase> computeSimWithWordVectors(Collection<CandidatePhrase> candidatePhrases, Collection<CandidatePhrase> otherPhrases, boolean ignoreWordRegex, String label) {
        ClassicCounter<CandidatePhrase> sims = new ClassicCounter<CandidatePhrase>(candidatePhrases.size());
        for (CandidatePhrase p : candidatePhrases) {
            double[] simsAvgMax;
            Map<String, double[]> simsAvgMaxAllLabels = similaritiesWithLabeledPhrases.get(p.getPhrase());
            if (simsAvgMaxAllLabels == null) {
                simsAvgMaxAllLabels = new HashMap<String, double[]>();
            }
            if ((simsAvgMax = simsAvgMaxAllLabels.get(label)) == null) {
                simsAvgMax = new double[PhraseScorer.Similarities.values().length];
            }
            if (!(!wordVectors.containsKey(p.getPhrase()) || ignoreWordRegex && PatternFactory.ignoreWordRegex.matcher(p.getPhrase()).matches())) {
                double[] d1 = wordVectors.get(p.getPhrase());
                BinaryHeapPriorityQueue<CandidatePhrase> topSimPhs = new BinaryHeapPriorityQueue<CandidatePhrase>(this.constVars.expandPhrasesNumTopSimilar);
                double allsum = 0.0;
                double max = Double.MIN_VALUE;
                boolean donotuse = false;
                for (CandidatePhrase other : otherPhrases) {
                    double sim;
                    if (p.equals(other)) {
                        donotuse = true;
                        break;
                    }
                    if (!wordVectors.containsKey(other.getPhrase())) continue;
                    PhrasePair pair = new PhrasePair(p.getPhrase(), other.getPhrase());
                    if (cacheSimilarities.containsKey(pair)) {
                        sim = cacheSimilarities.getCount(pair);
                    } else {
                        double[] d2 = wordVectors.get(other.getPhrase());
                        double sum = 0.0;
                        double d1sq = 0.0;
                        double d2sq = 0.0;
                        for (int i = 0; i < d1.length; ++i) {
                            sum += d1[i] * d2[i];
                            d1sq += d1[i] * d1[i];
                            d2sq += d2[i] * d2[i];
                        }
                        sim = sum / (Math.sqrt(d1sq) * Math.sqrt(d2sq));
                        cacheSimilarities.setCount(pair, sim);
                    }
                    topSimPhs.add(other, sim);
                    if (topSimPhs.size() > this.constVars.expandPhrasesNumTopSimilar) {
                        topSimPhs.removeLastEntry();
                    }
                    allsum += sim;
                    if (!(sim > max)) continue;
                    max = sim;
                }
                double finalSimScore = 0.0;
                int numEl = 0;
                while (topSimPhs.hasNext()) {
                    finalSimScore += topSimPhs.getPriority();
                    topSimPhs.next();
                    ++numEl;
                }
                finalSimScore /= (double)numEl;
                double prevNumItems = simsAvgMax[PhraseScorer.Similarities.NUMITEMS.ordinal()];
                double prevAvg = simsAvgMax[PhraseScorer.Similarities.AVGSIM.ordinal()];
                double prevMax = simsAvgMax[PhraseScorer.Similarities.MAXSIM.ordinal()];
                double newNumItems = prevNumItems + (double)otherPhrases.size();
                double newAvg = (prevAvg * prevNumItems + allsum) / newNumItems;
                double newMax = prevMax > max ? prevMax : max;
                simsAvgMax[PhraseScorer.Similarities.NUMITEMS.ordinal()] = newNumItems;
                simsAvgMax[PhraseScorer.Similarities.AVGSIM.ordinal()] = newAvg;
                simsAvgMax[PhraseScorer.Similarities.MAXSIM.ordinal()] = newMax;
                if (!donotuse) {
                    sims.setCount(p, finalSimScore);
                }
            } else {
                sims.setCount(p, Double.MIN_VALUE);
            }
            simsAvgMaxAllLabels.put(label, simsAvgMax);
            similaritiesWithLabeledPhrases.put(p.getPhrase(), simsAvgMaxAllLabels);
        }
        return sims;
    }

    private Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordVectors(List<CandidatePhrase> candidatePhrases, Collection<CandidatePhrase> positivePhrases, Map<String, Collection<CandidatePhrase>> allPossibleNegativePhrases, String label) {
        assert (wordVectors != null) : "Why are word vectors null?";
        Counter<CandidatePhrase> posSims = this.computeSimWithWordVectors(candidatePhrases, positivePhrases, true, label);
        ClassicCounter<CandidatePhrase> negSims = new ClassicCounter<CandidatePhrase>();
        for (Map.Entry<String, Collection<CandidatePhrase>> en : allPossibleNegativePhrases.entrySet()) {
            negSims.addAll(this.computeSimWithWordVectors(candidatePhrases, en.getValue(), true, en.getKey()));
        }
        Predicate<CandidatePhrase> retainPhrasesNotCloseToNegative = candidatePhrase -> !(negSims.getCount(candidatePhrase) > posSims.getCount(candidatePhrase));
        Counters.retainKeys(posSims, retainPhrasesNotCloseToNegative);
        return new Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>(posSims, negSims);
    }

    Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> computeSimWithWordCluster(Collection<CandidatePhrase> candidatePhrases, Collection<CandidatePhrase> positivePhrases, AtomicDouble allMaxSim) {
        ClassicCounter<CandidatePhrase> sims = new ClassicCounter<CandidatePhrase>(candidatePhrases.size());
        for (CandidatePhrase p : candidatePhrases) {
            Counter<Integer> feat = this.wordClassClustersForPhrase.get(p);
            if (feat == null) {
                feat = this.wordClass(p.getPhrase(), p.getPhraseLemma());
                this.wordClassClustersForPhrase.put(p, feat);
            }
            double avgSim = 0.0;
            if (feat.size() > 0) {
                for (CandidatePhrase pos : positivePhrases) {
                    Double j;
                    if (p.equals(pos)) continue;
                    Counter<Integer> posfeat = this.wordClassClustersForPhrase.get(pos);
                    if (posfeat == null) {
                        posfeat = this.wordClass(pos.getPhrase(), pos.getPhraseLemma());
                        this.wordClassClustersForPhrase.put(pos, feat);
                    }
                    if (posfeat.size() <= 0 || (j = Double.valueOf(Counters.jaccardCoefficient(posfeat, feat))).isInfinite() || j.isNaN()) continue;
                    avgSim += j.doubleValue();
                }
                avgSim /= (double)positivePhrases.size();
            }
            sims.setCount(p, avgSim);
            if (!(allMaxSim.get() < avgSim)) continue;
            allMaxSim.set(avgSim);
        }
        return new Pair(sims, null);
    }

    Set<CandidatePhrase> chooseUnknownAsNegatives(Set<CandidatePhrase> candidatePhrases, String label, Collection<CandidatePhrase> positivePhrases, Map<String, Collection<CandidatePhrase>> knownNegativePhrases, BufferedWriter logFile) throws IOException {
        List<List<CandidatePhrase>> threadedCandidates = GetPatternsFromDataMultiClass.getThreadBatches(CollectionUtils.toList(candidatePhrases), this.constVars.numThreads);
        ClassicCounter sims = new ClassicCounter();
        AtomicDouble allMaxSim = new AtomicDouble(Double.MIN_VALUE);
        ExecutorService executor = Executors.newFixedThreadPool(this.constVars.numThreads);
        ArrayList<Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>>> list = new ArrayList<Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>>>();
        for (List<CandidatePhrase> list2 : threadedCandidates) {
            ComputeSim task = new ComputeSim(label, list2, allMaxSim, positivePhrases, knownNegativePhrases);
            Future<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> submit = executor.submit(task);
            list.add(submit);
        }
        for (Future future : list) {
            try {
                sims.addAll((Counter)((Pair)future.get()).first());
            }
            catch (Exception e) {
                executor.shutdownNow();
                throw new RuntimeException(e);
            }
        }
        executor.shutdown();
        if (allMaxSim.get() == Double.MIN_VALUE) {
            Redwood.log(new Object[]{Redwood.DBG, "No similarity recorded between the positives and the unknown!"});
        }
        CandidatePhrase k = (CandidatePhrase)Counters.argmax(sims);
        System.out.println("Maximum similarity was " + sims.getCount(k) + " for word " + k);
        Counter counter = Counters.retainBelow(sims, this.constVars.positiveSimilarityThresholdLowPrecision);
        System.out.println("removing phrases as negative phrases that were higher that positive similarity threshold of " + this.constVars.positiveSimilarityThresholdLowPrecision + counter);
        if (logFile != null && wordVectors != null) {
            for (Map.Entry en : counter.entrySet()) {
                if (!wordVectors.containsKey(((CandidatePhrase)en.getKey()).getPhrase())) continue;
                logFile.write(en.getKey() + "-PN " + ArrayUtils.toString(wordVectors.get(((CandidatePhrase)en.getKey()).getPhrase()), " ") + "\n");
            }
        }
        return sims.keySet();
    }

    Set<CandidatePhrase> chooseUnknownPhrases(DataInstance sent, Random random, double perSelect, Class positiveClass, String label, int maxNum) {
        HashSet<CandidatePhrase> unknownSamples = new HashSet<CandidatePhrase>();
        if (maxNum == 0) {
            return unknownSamples;
        }
        Predicate<CoreLabel> acceptWord = coreLabel -> !coreLabel.get(positiveClass).equals(label) && !this.constVars.functionWords.contains(coreLabel.word());
        Random r = new Random(0L);
        ArrayList<Integer> lengths = new ArrayList<Integer>();
        for (int i = 1; i <= PatternFactory.numWordsCompoundMapped.get(label); ++i) {
            lengths.add(i);
        }
        int length = (Integer)CollectionUtils.sample(lengths, r);
        if (this.constVars.patternType.equals((Object)PatternFactory.PatternType.DEP)) {
            ExtractPhraseFromPattern extract = new ExtractPhraseFromPattern(true, length);
            SemanticGraph g = ((DataInstanceDep)sent).getGraph();
            Collection<CoreLabel> sampledHeads = CollectionUtils.sampleWithoutReplacement(sent.getTokens(), Math.min(maxNum, (int)(perSelect * (double)sent.getTokens().size())), random);
            List<String> textTokens = sent.getTokens().stream().map(x -> x.word()).collect(Collectors.toList());
            for (CoreLabel l : sampledHeads) {
                if (!acceptWord.test(l)) continue;
                IndexedWord w = g.getNodeByIndex(l.index());
                ArrayList<String> outputPhrases = new ArrayList<String>();
                ArrayList<ExtractedPhrase> extractedPhrases = new ArrayList<ExtractedPhrase>();
                ArrayList<IntPair> outputIndices = new ArrayList<IntPair>();
                extract.printSubGraph(g, w, new ArrayList<String>(), textTokens, outputPhrases, outputIndices, new ArrayList<IndexedWord>(), new ArrayList<IndexedWord>(), false, extractedPhrases, null, acceptWord);
                for (ExtractedPhrase p : extractedPhrases) {
                    unknownSamples.add(CandidatePhrase.createOrGet(p.getValue(), null, p.getFeatures()));
                }
            }
        } else if (this.constVars.patternType.equals((Object)PatternFactory.PatternType.SURFACE)) {
            CoreLabel[] tokens = sent.getTokens().toArray(new CoreLabel[0]);
            for (int i = 0; i < tokens.length; ++i) {
                if (!(random.nextDouble() < perSelect)) continue;
                int left = (int)((double)(length - 1) / 2.0);
                int right = length - 1 - left;
                String ph = "";
                boolean haspositive = false;
                for (int j = Math.max(0, i - left); j < tokens.length && j <= i + right; ++j) {
                    if (tokens[j].get(positiveClass).equals(label)) {
                        haspositive = true;
                        break;
                    }
                    ph = ph + " " + tokens[j].word();
                }
                ph = ph.trim();
                if (haspositive || ph.trim().isEmpty() || this.constVars.functionWords.contains(ph)) continue;
                unknownSamples.add(CandidatePhrase.createOrGet(ph));
            }
        } else {
            throw new RuntimeException("not yet implemented");
        }
        return unknownSamples;
    }

    private static <E, F> boolean hasElement(Map<E, Collection<F>> values, F value, E ignoreLabel) {
        for (Map.Entry<E, Collection<F>> en : values.entrySet()) {
            if (en.getKey().equals(ignoreLabel) || !en.getValue().contains(value)) continue;
            return true;
        }
        return false;
    }

    Counter<String> numLabeledTokens() {
        ClassicCounter<String> counter = new ClassicCounter<String>();
        ConstantsAndVariables.DataSentsIterator data = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        while (data.hasNext()) {
            Map sentsf = (Map)((Pair)data.next()).first();
            for (Map.Entry en : sentsf.entrySet()) {
                for (CoreLabel l : ((DataInstance)en.getValue()).getTokens()) {
                    for (Map.Entry<String, Class<? extends TypesafeMap.Key<String>>> enc : this.constVars.getAnswerClass().entrySet()) {
                        if (!((String)l.get(enc.getValue())).equals(enc.getKey())) continue;
                        counter.incrementCount(enc.getKey());
                    }
                }
            }
        }
        return counter;
    }

    Map<String, Collection<CandidatePhrase>> getAllPossibleNegativePhrases(String answerLabel) {
        HashMap<String, Collection<CandidatePhrase>> allPossiblePhrases = new HashMap<String, Collection<CandidatePhrase>>();
        HashSet<CandidatePhrase> negPhrases = new HashSet<CandidatePhrase>();
        ScorePhrasesLearnFeatWt scorePhrasesLearnFeatWt = this;
        negPhrases.addAll(scorePhrasesLearnFeatWt.constVars.getStopWords());
        negPhrases.addAll(CandidatePhrase.convertStringPhrases(this.constVars.functionWords));
        negPhrases.addAll(CandidatePhrase.convertStringPhrases(this.constVars.getEnglishWords()));
        allPossiblePhrases.put("NEGATIVE", negPhrases);
        for (String label : this.constVars.getLabels()) {
            if (label.equals(answerLabel)) continue;
            allPossiblePhrases.put(label, new HashSet());
            if (this.constVars.getLearnedWordsEachIter().containsKey(label)) {
                ((Collection)allPossiblePhrases.get(label)).addAll(this.constVars.getLearnedWords(label).keySet());
            }
            ((Collection)allPossiblePhrases.get(label)).addAll((Collection)this.constVars.getSeedLabelDictionary().get(label));
        }
        allPossiblePhrases.put("OTHERSEM", this.constVars.getOtherSemanticClassesWords());
        return allPossiblePhrases;
    }

    public GeneralDataset<String, ConstantsAndVariables.ScorePhraseMeasures> choosedatums(boolean forLearningPattern, String answerLabel, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, Counter<E> allSelectedPatterns, boolean computeRawFreq) throws IOException {
        RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures> datum;
        Counter<ConstantsAndVariables.ScorePhraseMeasures> feat;
        boolean expandNeg = false;
        if (this.closeToNegativesFirstIter == null) {
            this.closeToNegativesFirstIter = new ClassicCounter<CandidatePhrase>();
            if (this.constVars.expandNegativesWhenSampling) {
                expandNeg = true;
            }
        }
        boolean expandPos = false;
        if (this.closeToPositivesFirstIter == null) {
            this.closeToPositivesFirstIter = new ClassicCounter<CandidatePhrase>();
            if (this.constVars.expandPositivesWhenSampling) {
                expandPos = true;
            }
        }
        ClassicCounter<Integer> distSimClustersOfPositive = new ClassicCounter<Integer>();
        if ((expandPos || expandNeg) && !this.constVars.useWordVectorsToComputeSim) {
            for (CandidatePhrase s : CollectionUtils.union(this.constVars.getLearnedWords(answerLabel).keySet(), (Collection)this.constVars.getSeedLabelDictionary().get(answerLabel))) {
                String[] toks = s.getPhrase().split("\\s+");
                Integer num = this.constVars.getWordClassClusters().get(s.getPhrase());
                if (num == null) {
                    num = this.constVars.getWordClassClusters().get(s.getPhrase().toLowerCase());
                }
                if (num == null) {
                    for (String tok : toks) {
                        Integer toknum = this.constVars.getWordClassClusters().get(tok);
                        if (toknum == null) {
                            toknum = this.constVars.getWordClassClusters().get(tok.toLowerCase());
                        }
                        if (toknum == null) continue;
                        distSimClustersOfPositive.incrementCount(toknum);
                    }
                    continue;
                }
                distSimClustersOfPositive.incrementCount(num);
            }
        }
        Map<String, Collection<CandidatePhrase>> allPossibleNegativePhrases = this.getAllPossibleNegativePhrases(answerLabel);
        RVFDataset<String, ConstantsAndVariables.ScorePhraseMeasures> dataset = new RVFDataset<String, ConstantsAndVariables.ScorePhraseMeasures>();
        int numpos = 0;
        HashSet<CandidatePhrase> allNegativePhrases = new HashSet<CandidatePhrase>();
        HashSet<CandidatePhrase> allUnknownPhrases = new HashSet<CandidatePhrase>();
        HashSet<CandidatePhrase> allPositivePhrases = new HashSet<CandidatePhrase>();
        ConstantsAndVariables.DataSentsIterator sentsIter = new ConstantsAndVariables.DataSentsIterator(this.constVars.batchProcessSents);
        while (sentsIter.hasNext()) {
            Object sentsf = sentsIter.next();
            Map sents = (Map)((Pair)sentsf).first();
            Redwood.log(new Object[]{Redwood.DBG, "Sampling datums from " + ((Pair)sentsf).second()});
            if (computeRawFreq) {
                Data.computeRawFreqIfNull(sents, PatternFactory.numWordsCompoundMax);
            }
            Iterator<Object> threadedSentIds = GetPatternsFromDataMultiClass.getThreadBatches(new ArrayList(sents.keySet()), this.constVars.numThreads);
            ExecutorService executor = Executors.newFixedThreadPool(this.constVars.numThreads);
            ArrayList list = new ArrayList();
            Iterator iterator = threadedSentIds.iterator();
            while (iterator.hasNext()) {
                List list2 = (List)iterator.next();
                ChooseDatumsThread task = new ChooseDatumsThread(answerLabel, sents, list2, wordsPatExtracted, allSelectedPatterns, distSimClustersOfPositive, allPossibleNegativePhrases, expandPos, expandNeg);
                Future submit = executor.submit(task);
                list.add(submit);
            }
            for (Future future : list) {
                try {
                    Quintuple result = (Quintuple)future.get();
                    allPositivePhrases.addAll((Collection)result.first());
                    allNegativePhrases.addAll((Collection)result.second());
                    allUnknownPhrases.addAll((Collection)result.third());
                    if (expandPos) {
                        for (Map.Entry en : ((Counter)result.fourth()).entrySet()) {
                            this.closeToPositivesFirstIter.setCount((CandidatePhrase)en.getKey(), en.getValue());
                        }
                    }
                    if (!expandNeg) continue;
                    for (Map.Entry en : ((Counter)result.fifth()).entrySet()) {
                        this.closeToNegativesFirstIter.setCount((CandidatePhrase)en.getKey(), en.getValue());
                    }
                }
                catch (Exception e) {
                    executor.shutdownNow();
                    throw new RuntimeException(e);
                }
            }
            executor.shutdown();
        }
        allPositivePhrases.addAll(this.constVars.getLearnedWords(answerLabel).keySet());
        BufferedWriter logFile = null;
        BufferedWriter logFileFeat = null;
        if (this.constVars.logFileVectorSimilarity != null) {
            logFile = new BufferedWriter(new FileWriter(this.constVars.logFileVectorSimilarity));
            logFileFeat = new BufferedWriter(new FileWriter(this.constVars.logFileVectorSimilarity + "_feat"));
            if (wordVectors != null) {
                for (CandidatePhrase p : allPositivePhrases) {
                    if (!wordVectors.containsKey(p.getPhrase())) continue;
                    logFile.write(p.getPhrase() + "-P " + ArrayUtils.toString(wordVectors.get(p.getPhrase()), " ") + "\n");
                }
            }
        }
        if (this.constVars.expandPositivesWhenSampling) {
            Redwood.log("Expanding positives by adding " + Counters.toSortedString(this.closeToPositivesFirstIter, this.closeToPositivesFirstIter.size(), "%1$s:%2$f", "\t") + " phrases");
            allPositivePhrases.addAll(this.closeToPositivesFirstIter.keySet());
            if (logFile != null && wordVectors != null && expandNeg) {
                for (CandidatePhrase p : this.closeToPositivesFirstIter.keySet()) {
                    if (!wordVectors.containsKey(p.getPhrase())) continue;
                    logFile.write(p.getPhrase() + "-PP " + ArrayUtils.toString(wordVectors.get(p.getPhrase()), " ") + "\n");
                }
            }
        }
        if (this.constVars.expandNegativesWhenSampling) {
            Redwood.log("Expanding negatives by adding " + Counters.toSortedString(this.closeToNegativesFirstIter, this.closeToNegativesFirstIter.size(), "%1$s:%2$f", "\t") + " phrases");
            allNegativePhrases.addAll(this.closeToNegativesFirstIter.keySet());
            if (logFile != null && wordVectors != null && expandNeg) {
                for (CandidatePhrase p : this.closeToNegativesFirstIter.keySet()) {
                    if (!wordVectors.containsKey(p.getPhrase())) continue;
                    logFile.write(p.getPhrase() + "-NN " + ArrayUtils.toString(wordVectors.get(p.getPhrase()), " ") + "\n");
                }
            }
        }
        System.out.println("all positive phrases of size " + allPositivePhrases.size() + " are  " + allPositivePhrases);
        for (CandidatePhrase candidate : allPositivePhrases) {
            feat = forLearningPattern ? this.getPhraseFeaturesForPattern(answerLabel, candidate) : this.getFeatures(answerLabel, candidate, wordsPatExtracted.getCounter((Object)candidate), allSelectedPatterns);
            datum = new RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(feat, "true");
            ((GeneralDataset)dataset).add(datum);
            ++numpos;
            if (logFileFeat == null) continue;
            logFileFeat.write("POSITIVE " + candidate.getPhrase() + "\t" + Counters.toSortedByKeysString(feat, "%1$s:%2$.0f", ";", "%s") + "\n");
        }
        Redwood.log(new Object[]{Redwood.DBG, "Number of pure negative phrases is " + allNegativePhrases.size()});
        Redwood.log(new Object[]{Redwood.DBG, "Number of unknown phrases is " + allUnknownPhrases.size()});
        if (this.constVars.subsampleUnkAsNegUsingSim) {
            Set<CandidatePhrase> chosenUnknown = this.chooseUnknownAsNegatives(allUnknownPhrases, answerLabel, allPositivePhrases, allPossibleNegativePhrases, logFile);
            Redwood.log(new Object[]{Redwood.DBG, "Choosing " + chosenUnknown.size() + " unknowns as negative based to their similarity to the positive phrases"});
            allNegativePhrases.addAll(chosenUnknown);
        } else {
            allNegativePhrases.addAll(allUnknownPhrases);
        }
        if (allNegativePhrases.size() > numpos) {
            Redwood.log(new Object[]{Redwood.WARN, "Num of negative (" + allNegativePhrases.size() + ") is higher than number of positive phrases (" + numpos + ") = " + (double)allNegativePhrases.size() / (double)numpos + ". Capping the number by taking the first numPositives as negative. Consider decreasing perSelectRand"});
            int i = 0;
            HashSet<CandidatePhrase> selectedNegPhrases = new HashSet<CandidatePhrase>();
            for (CandidatePhrase p : allNegativePhrases) {
                if (i >= numpos) break;
                selectedNegPhrases.add(p);
                ++i;
            }
            allNegativePhrases.clear();
            allNegativePhrases = selectedNegPhrases;
        }
        System.out.println("all negative phrases are " + allNegativePhrases);
        for (CandidatePhrase negative : allNegativePhrases) {
            feat = forLearningPattern ? this.getPhraseFeaturesForPattern(answerLabel, negative) : this.getFeatures(answerLabel, negative, wordsPatExtracted.getCounter((Object)negative), allSelectedPatterns);
            datum = new RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(feat, "false");
            ((GeneralDataset)dataset).add(datum);
            if (logFile != null && wordVectors != null && wordVectors.containsKey(negative.getPhrase())) {
                logFile.write(negative.getPhrase() + "-N " + ArrayUtils.toString(wordVectors.get(negative.getPhrase()), " ") + "\n");
            }
            if (logFileFeat == null) continue;
            logFileFeat.write("NEGATIVE " + negative.getPhrase() + "\t" + Counters.toSortedByKeysString(feat, "%1$s:%2$.0f", ";", "%s") + "\n");
        }
        if (logFile != null) {
            logFile.close();
        }
        if (logFileFeat != null) {
            logFileFeat.close();
        }
        System.out.println("Before feature count threshold, dataset stats are ");
        ((GeneralDataset)dataset).summaryStatistics();
        ((GeneralDataset)dataset).applyFeatureCountThreshold(this.constVars.featureCountThreshold);
        System.out.println("AFTER feature count threshold of " + this.constVars.featureCountThreshold + ", dataset stats are ");
        ((GeneralDataset)dataset).summaryStatistics();
        Redwood.log(new Object[]{Redwood.DBG, "Eventually, number of positive datums:  " + numpos + " and number of negative datums: " + allNegativePhrases.size()});
        return dataset;
    }

    private static Map<String, double[]> getSimilarities(String phrase) {
        return similaritiesWithLabeledPhrases.get(phrase);
    }

    Counter<ConstantsAndVariables.ScorePhraseMeasures> getPhraseFeaturesForPattern(String label, CandidatePhrase word) {
        double ed;
        if (this.phraseScoresRaw.containsFirstKey(word)) {
            return this.phraseScoresRaw.getCounter((Object)word);
        }
        ClassicCounter<ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter<ConstantsAndVariables.ScorePhraseMeasures>();
        if (word.getFeatures() != null) {
            scoreslist.addAll(Counters.transform(word.getFeatures(), x -> ConstantsAndVariables.ScorePhraseMeasures.create(x)));
        } else {
            Redwood.log(ConstantsAndVariables.extremedebug, "features are null for " + word);
        }
        if (this.constVars.usePatternEvalSemanticOdds) {
            double dscore = this.getDictOddsScore(word, label, 0.0);
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.SEMANTICODDS, dscore);
        }
        if (this.constVars.usePatternEvalGoogleNgram) {
            Double gscore = ScorePhrasesLearnFeatWt.getGoogleNgramScore(word);
            if (gscore.isInfinite() || gscore.isNaN()) {
                throw new RuntimeException("how is the google ngrams score " + gscore + " for " + word);
            }
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.GOOGLENGRAM, gscore);
        }
        if (this.constVars.usePatternEvalDomainNgram) {
            Double gscore = this.getDomainNgramScore(word.getPhrase());
            if (gscore.isInfinite() || gscore.isNaN()) {
                throw new RuntimeException("how is the domain ngrams score " + gscore + " for " + word + " when domain raw freq is " + Data.domainNGramRawFreq.getCount(word) + " and raw freq is " + Data.rawFreq.getCount(word));
            }
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.DOMAINNGRAM, gscore);
        }
        if (this.constVars.usePatternEvalWordClass) {
            Integer wordclass = this.constVars.getWordClassClusters().get(word.getPhrase());
            if (wordclass == null) {
                wordclass = this.constVars.getWordClassClusters().get(word.getPhrase().toLowerCase());
            }
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.DISTSIM.toString() + "-" + wordclass), 1.0);
        }
        if (this.constVars.usePatternEvalEditDistSame) {
            ed = this.constVars.getEditDistanceScoresThisClass(label, word.getPhrase());
            assert (ed <= 1.0) : " how come edit distance from the true class is " + ed + " for word " + word;
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTSAME, ed);
        }
        if (this.constVars.usePatternEvalEditDistOther) {
            ed = this.constVars.getEditDistanceScoresOtherClass(label, word.getPhrase());
            assert (ed <= 1.0) : " how come edit distance from the true class is " + ed + " for word " + word;
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTOTHER, ed);
        }
        if (this.constVars.usePatternEvalWordShape) {
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPE, this.getWordShapeScore(word.getPhrase(), label));
        }
        if (this.constVars.usePatternEvalWordShapeStr) {
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPESTR + "-" + this.wordShape(word.getPhrase())), 1.0);
        }
        if (this.constVars.usePatternEvalFirstCapital) {
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.ISFIRSTCAPITAL, StringUtils.isCapitalized(word.getPhrase()) ? 1.0 : 0.0);
        }
        if (this.constVars.usePatternEvalBOW) {
            for (String s : word.getPhrase().split("\\s+")) {
                scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.BOW + "-" + s), 1.0);
            }
        }
        this.phraseScoresRaw.setCounter(word, scoreslist);
        return scoreslist;
    }

    public double scoreUsingClassifer(Classifier classifier, CandidatePhrase word, String label, boolean forLearningPatterns, Counter<E> patternsThatExtractedPat, Counter<E> allSelectedPatterns) {
        double score;
        if (this.learnedScores.containsKey(word)) {
            return this.learnedScores.getCount(word);
        }
        if (this.scoreClassifierType.equals((Object)ClassifierType.DT)) {
            Counter<ConstantsAndVariables.ScorePhraseMeasures> feat = null;
            feat = forLearningPatterns ? this.getPhraseFeaturesForPattern(label, word) : this.getFeatures(label, word, patternsThatExtractedPat, allSelectedPatterns);
            RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures> d = new RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(feat, Boolean.FALSE.toString());
            Counter<String> sc = classifier.scoresOf(d);
            score = sc.getCount(Boolean.TRUE.toString());
        } else if (this.scoreClassifierType.equals((Object)ClassifierType.LR)) {
            LogisticClassifier logcl = (LogisticClassifier)classifier;
            String l = (String)logcl.getLabelForInternalPositiveClass();
            Counter<ConstantsAndVariables.ScorePhraseMeasures> feat = forLearningPatterns ? this.getPhraseFeaturesForPattern(label, word) : this.getFeatures(label, word, patternsThatExtractedPat, allSelectedPatterns);
            RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures> d = new RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(feat, Boolean.TRUE.toString());
            score = logcl.probabilityOf(d);
        } else if (this.scoreClassifierType.equals((Object)ClassifierType.SHIFTLR)) {
            Counter<ConstantsAndVariables.ScorePhraseMeasures> feat = forLearningPatterns ? this.getPhraseFeaturesForPattern(label, word) : this.getFeatures(label, word, patternsThatExtractedPat, allSelectedPatterns);
            BasicDatum<String, ConstantsAndVariables.ScorePhraseMeasures> d = new BasicDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(feat.keySet(), Boolean.FALSE.toString());
            Counter<String> sc = ((MultinomialLogisticClassifier)classifier).probabilityOf(d);
            score = sc.getCount(Boolean.TRUE.toString());
        } else if (this.scoreClassifierType.equals((Object)ClassifierType.SVM) || this.scoreClassifierType.equals((Object)ClassifierType.RF) || this.scoreClassifierType.equals((Object)ClassifierType.LINEAR)) {
            Counter<ConstantsAndVariables.ScorePhraseMeasures> feat = null;
            feat = forLearningPatterns ? this.getPhraseFeaturesForPattern(label, word) : this.getFeatures(label, word, patternsThatExtractedPat, allSelectedPatterns);
            RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures> d = new RVFDatum<String, ConstantsAndVariables.ScorePhraseMeasures>(feat, Boolean.FALSE.toString());
            Counter<String> sc = classifier.scoresOf(d);
            score = sc.getCount(Boolean.TRUE.toString());
        } else {
            throw new RuntimeException("cannot identify classifier " + (Object)((Object)this.scoreClassifierType));
        }
        this.learnedScores.setCount(word, score);
        return score;
    }

    Counter<ConstantsAndVariables.ScorePhraseMeasures> getFeatures(String label, CandidatePhrase word, Counter<E> patThatExtractedWord, Counter<E> allSelectedPatterns) {
        if (this.phraseScoresRaw.containsFirstKey(word)) {
            return this.phraseScoresRaw.getCounter((Object)word);
        }
        ClassicCounter<ConstantsAndVariables.ScorePhraseMeasures> scoreslist = new ClassicCounter<ConstantsAndVariables.ScorePhraseMeasures>();
        if (word.getFeatures() != null) {
            scoreslist.addAll(Counters.transform(word.getFeatures(), x -> ConstantsAndVariables.ScorePhraseMeasures.create(x)));
        } else {
            Redwood.log(ConstantsAndVariables.extremedebug, "features are null for " + word);
        }
        if (this.constVars.usePhraseEvalPatWtByFreq) {
            double tfscore = this.getPatTFIDFScore(word, patThatExtractedWord, allSelectedPatterns);
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.PATWTBYFREQ, tfscore);
        }
        if (this.constVars.usePhraseEvalSemanticOdds) {
            double dscore = this.getDictOddsScore(word, label, 0.0);
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.SEMANTICODDS, dscore);
        }
        if (this.constVars.usePhraseEvalGoogleNgram) {
            Double gscore = ScorePhrasesLearnFeatWt.getGoogleNgramScore(word);
            if (gscore.isInfinite() || gscore.isNaN()) {
                throw new RuntimeException("how is the google ngrams score " + gscore + " for " + word);
            }
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.GOOGLENGRAM, gscore);
        }
        if (this.constVars.usePhraseEvalDomainNgram) {
            Double gscore = this.getDomainNgramScore(word.getPhrase());
            if (gscore.isInfinite() || gscore.isNaN()) {
                throw new RuntimeException("how is the domain ngrams score " + gscore + " for " + word + " when domain raw freq is " + Data.domainNGramRawFreq.getCount(word) + " and raw freq is " + Data.rawFreq.getCount(word));
            }
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.DOMAINNGRAM, gscore);
        }
        if (this.constVars.usePhraseEvalWordClass) {
            Integer wordclass = this.constVars.getWordClassClusters().get(word.getPhrase());
            if (wordclass == null) {
                wordclass = this.constVars.getWordClassClusters().get(word.getPhrase().toLowerCase());
            }
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.DISTSIM.toString() + "-" + wordclass), 1.0);
        }
        if (this.constVars.usePhraseEvalWordVector) {
            Map<String, double[]> sims = ScorePhrasesLearnFeatWt.getSimilarities(word.getPhrase());
            if (sims == null) {
                Map<String, Collection<CandidatePhrase>> allPossibleNegativePhrases = this.getAllPossibleNegativePhrases(label);
                Set<CandidatePhrase> knownPositivePhrases = CollectionUtils.unionAsSet(this.constVars.getLearnedWords(label).keySet(), (Collection)this.constVars.getSeedLabelDictionary().get(label));
                this.computeSimWithWordVectors(Arrays.asList(word), knownPositivePhrases, allPossibleNegativePhrases, label);
                sims = ScorePhrasesLearnFeatWt.getSimilarities(word.getPhrase());
            }
            assert (sims != null) : " Why are there no similarities for " + word;
            double avgPosSim = sims.get(label)[PhraseScorer.Similarities.AVGSIM.ordinal()];
            double maxPosSim = sims.get(label)[PhraseScorer.Similarities.MAXSIM.ordinal()];
            double sumNeg = 0.0;
            double maxNeg = Double.MIN_VALUE;
            double allNumItems = 0.0;
            for (Map.Entry<String, double[]> simEn : sims.entrySet()) {
                if (simEn.getKey().equals(label)) continue;
                double numItems = simEn.getValue()[PhraseScorer.Similarities.NUMITEMS.ordinal()];
                sumNeg += simEn.getValue()[PhraseScorer.Similarities.AVGSIM.ordinal()] * numItems;
                allNumItems += numItems;
                double maxNegLabel = simEn.getValue()[PhraseScorer.Similarities.MAXSIM.ordinal()];
                if (!(maxNeg < maxNegLabel)) continue;
                maxNeg = maxNegLabel;
            }
            double avgNegSim = sumNeg / allNumItems;
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECPOSSIMAVG, avgPosSim);
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECPOSSIMMAX, maxPosSim);
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECNEGSIMAVG, avgNegSim);
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDVECNEGSIMAVG, maxNeg);
        }
        if (this.constVars.usePhraseEvalEditDistSame) {
            double ed = this.constVars.getEditDistanceScoresThisClass(label, word.getPhrase());
            assert (ed <= 1.0) : " how come edit distance from the true class is " + ed + " for word " + word;
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTSAME, ed);
        }
        if (this.constVars.usePhraseEvalEditDistOther) {
            double ed = this.constVars.getEditDistanceScoresOtherClass(label, word.getPhrase());
            assert (ed <= 1.0) : " how come edit distance from the true class is " + ed + " for word " + word;
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.EDITDISTOTHER, ed);
        }
        if (this.constVars.usePhraseEvalWordShape) {
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPE, this.getWordShapeScore(word.getPhrase(), label));
        }
        if (this.constVars.usePhraseEvalWordShapeStr) {
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.WORDSHAPESTR + "-" + this.wordShape(word.getPhrase())), 1.0);
        }
        if (this.constVars.usePhraseEvalFirstCapital) {
            scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.ISFIRSTCAPITAL, StringUtils.isCapitalized(word.getPhrase()) ? 1.0 : 0.0);
        }
        if (this.constVars.usePhraseEvalBOW) {
            for (String s : word.getPhrase().split("\\s+")) {
                scoreslist.setCount(ConstantsAndVariables.ScorePhraseMeasures.create(ConstantsAndVariables.ScorePhraseMeasures.BOW + "-" + s), 1.0);
            }
        }
        this.phraseScoresRaw.setCounter(word, scoreslist);
        return scoreslist;
    }

    private static class PhrasePair {
        final String p1;
        final String p2;
        final int hashCode;

        public PhrasePair(String p1, String p2) {
            if (p1.compareTo(p2) <= 0) {
                this.p1 = p1;
                this.p2 = p2;
            } else {
                this.p1 = p2;
                this.p2 = p1;
            }
            this.hashCode = p1.hashCode() + p2.hashCode() + 331;
        }

        public int hashCode() {
            return this.hashCode;
        }

        public boolean equals(Object o) {
            if (!(o instanceof PhrasePair)) {
                return false;
            }
            PhrasePair p = (PhrasePair)o;
            return p.getPhrase1().equals(this.getPhrase1()) && p.getPhrase2().equals(this.getPhrase2());
        }

        public String getPhrase1() {
            return this.p1;
        }

        public String getPhrase2() {
            return this.p2;
        }
    }

    public class ChooseDatumsThread
    implements Callable {
        Collection<String> keys;
        Map<String, DataInstance> sents;
        Class answerClass;
        String answerLabel;
        TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted;
        Counter<E> allSelectedPatterns;
        Counter<Integer> wordClassClustersOfPositive;
        Map<String, Collection<CandidatePhrase>> allPossiblePhrases;
        boolean expandPos;
        boolean expandNeg;

        public ChooseDatumsThread(String label, Map<String, DataInstance> sents, Collection<String> keys, TwoDimensionalCounter<CandidatePhrase, E> wordsPatExtracted, Counter<E> allSelectedPatterns, Counter<Integer> wordClassClustersOfPositive, Map<String, Collection<CandidatePhrase>> allPossiblePhrases, boolean expandPos, boolean expandNeg) {
            this.answerLabel = label;
            this.sents = sents;
            this.keys = keys;
            this.wordsPatExtracted = wordsPatExtracted;
            this.allSelectedPatterns = allSelectedPatterns;
            this.wordClassClustersOfPositive = wordClassClustersOfPositive;
            this.allPossiblePhrases = allPossiblePhrases;
            this.answerClass = ScorePhrasesLearnFeatWt.this.constVars.getAnswerClass().get(this.answerLabel);
            this.expandNeg = expandNeg;
            this.expandPos = expandPos;
        }

        public Quintuple<Set<CandidatePhrase>, Set<CandidatePhrase>, Set<CandidatePhrase>, Counter<CandidatePhrase>, Counter<CandidatePhrase>> call() throws Exception {
            Random r = new Random(10L);
            Random rneg = new Random(10L);
            HashSet<CandidatePhrase> allPositivePhrases = new HashSet<CandidatePhrase>();
            HashSet<CandidatePhrase> allNegativePhrases = new HashSet<CandidatePhrase>();
            HashSet<CandidatePhrase> allUnknownPhrases = new HashSet<CandidatePhrase>();
            ClassicCounter<CandidatePhrase> allCloseToPositivePhrases = new ClassicCounter<CandidatePhrase>();
            ClassicCounter<CandidatePhrase> allCloseToNegativePhrases = new ClassicCounter<CandidatePhrase>();
            Set<CandidatePhrase> knownPositivePhrases = CollectionUtils.unionAsSet(ScorePhrasesLearnFeatWt.this.constVars.getLearnedWords(this.answerLabel).keySet(), (Collection)ScorePhrasesLearnFeatWt.this.constVars.getSeedLabelDictionary().get(this.answerLabel));
            HashSet<CandidatePhrase> allConsideredPhrases = new HashSet<CandidatePhrase>();
            Map<Class, Object> otherIgnoreClasses = ScorePhrasesLearnFeatWt.this.constVars.getIgnoreWordswithClassesDuringSelection().get(this.answerLabel);
            int numlabeled = 0;
            for (String sentid : this.keys) {
                DataInstance sentInst = this.sents.get(sentid);
                List<CoreLabel> value = sentInst.getTokens();
                CoreLabel[] sent = value.toArray(new CoreLabel[value.size()]);
                for (int i = 0; i < sent.length; ++i) {
                    double simneg;
                    double sim;
                    CoreLabel l = sent[i];
                    if (l.get(this.answerClass).equals(this.answerLabel)) {
                        ++numlabeled;
                        CandidatePhrase candidate = (CandidatePhrase)((Map)l.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class)).get(this.answerLabel);
                        if (candidate == null) {
                            throw new RuntimeException("for sentence id " + sentid + " and token id " + i + " candidate is null for " + l.word() + " and longest matching" + l.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class) + " and matched phrases are " + l.get(PatternsAnnotations.MatchedPhrases.class));
                        }
                        if (!Data.rawFreq.containsKey(candidate)) {
                            candidate = CandidatePhrase.createOrGet(l.word());
                        }
                        if (ScorePhrasesLearnFeatWt.hasElement(this.allPossiblePhrases, candidate, this.answerLabel) || PatternFactory.ignoreWordRegex.matcher(candidate.getPhrase()).matches()) continue;
                        allPositivePhrases.add(candidate);
                        continue;
                    }
                    Map longestMatching = (Map)l.get(PatternsAnnotations.LongestMatchedPhraseForEachLabel.class);
                    boolean ignoreclass = false;
                    CandidatePhrase candidate = CandidatePhrase.createOrGet(l.word());
                    for (Class cl : otherIgnoreClasses.keySet()) {
                        if (!((Boolean)l.get(cl)).booleanValue()) continue;
                        ignoreclass = true;
                        candidate = longestMatching.containsKey("OTHERSEM") ? (CandidatePhrase)longestMatching.get("OTHERSEM") : candidate;
                        break;
                    }
                    if (!ignoreclass) {
                        ignoreclass = ScorePhrasesLearnFeatWt.this.constVars.functionWords.contains(l.word());
                    }
                    boolean negative = false;
                    boolean add = false;
                    for (Map.Entry lo : longestMatching.entrySet()) {
                        if (((String)lo.getKey()).equals(this.answerLabel) || lo.getValue() == null) continue;
                        negative = true;
                        add = true;
                        if (!Data.rawFreq.containsKey((CandidatePhrase)lo.getValue())) continue;
                        candidate = (CandidatePhrase)lo.getValue();
                    }
                    if (!negative && ignoreclass) {
                        add = true;
                    }
                    if (add && rneg.nextDouble() < ScorePhrasesLearnFeatWt.this.constVars.perSelectNeg) {
                        assert (!candidate.getPhrase().isEmpty());
                        allNegativePhrases.add(candidate);
                    }
                    if (negative || ignoreclass || !this.expandPos && !this.expandNeg || ScorePhrasesLearnFeatWt.hasElement(this.allPossiblePhrases, candidate, this.answerLabel) || PatternFactory.ignoreWordRegex.matcher(candidate.getPhrase()).matches() || allConsideredPhrases.contains(candidate)) continue;
                    assert (candidate != null);
                    Pair sims = ScorePhrasesLearnFeatWt.this.constVars.useWordVectorsToComputeSim ? ScorePhrasesLearnFeatWt.this.computeSimWithWordVectors(Arrays.asList(candidate), (Collection<CandidatePhrase>)knownPositivePhrases, this.allPossiblePhrases, this.answerLabel) : ScorePhrasesLearnFeatWt.this.computeSimWithWordCluster(Arrays.asList(candidate), knownPositivePhrases, new AtomicDouble());
                    boolean addedAsPos = false;
                    if (this.expandPos && (sim = ((Counter)sims.first()).getCount(candidate)) > ScorePhrasesLearnFeatWt.this.constVars.similarityThresholdHighPrecision) {
                        allCloseToPositivePhrases.setCount(candidate, sim);
                        addedAsPos = true;
                    }
                    if (this.expandNeg && !addedAsPos && (simneg = ((Counter)sims.second()).getCount(candidate)) > ScorePhrasesLearnFeatWt.this.constVars.similarityThresholdHighPrecision) {
                        allCloseToNegativePhrases.setCount(candidate, simneg);
                    }
                    allConsideredPhrases.add(candidate);
                }
                allUnknownPhrases.addAll(ScorePhrasesLearnFeatWt.this.chooseUnknownPhrases(sentInst, r, ScorePhrasesLearnFeatWt.this.constVars.perSelectRand, ScorePhrasesLearnFeatWt.this.constVars.getAnswerClass().get(this.answerLabel), this.answerLabel, Math.max(0, Integer.MAX_VALUE)));
            }
            return new Quintuple<Set<CandidatePhrase>, Set<CandidatePhrase>, Set<CandidatePhrase>, Counter<CandidatePhrase>, Counter<CandidatePhrase>>(allPositivePhrases, allNegativePhrases, allUnknownPhrases, allCloseToPositivePhrases, allCloseToNegativePhrases);
        }
    }

    class ComputeSim
    implements Callable<Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>>> {
        List<CandidatePhrase> candidatePhrases;
        String label;
        AtomicDouble allMaxSim;
        Collection<CandidatePhrase> positivePhrases;
        Map<String, Collection<CandidatePhrase>> knownNegativePhrases;

        public ComputeSim(String label, List<CandidatePhrase> candidatePhrases, AtomicDouble allMaxSim, Collection<CandidatePhrase> positivePhrases, Map<String, Collection<CandidatePhrase>> knownNegativePhrases) {
            this.label = label;
            this.candidatePhrases = candidatePhrases;
            this.allMaxSim = allMaxSim;
            this.positivePhrases = positivePhrases;
            this.knownNegativePhrases = knownNegativePhrases;
        }

        @Override
        public Pair<Counter<CandidatePhrase>, Counter<CandidatePhrase>> call() throws Exception {
            if (ScorePhrasesLearnFeatWt.this.constVars.useWordVectorsToComputeSim) {
                Pair phs = ScorePhrasesLearnFeatWt.this.computeSimWithWordVectors(this.candidatePhrases, (Collection<CandidatePhrase>)this.positivePhrases, this.knownNegativePhrases, this.label);
                Redwood.log(new Object[]{Redwood.DBG, "Computed similarities with positive and negative phrases"});
                return phs;
            }
            return ScorePhrasesLearnFeatWt.this.computeSimWithWordCluster(this.candidatePhrases, this.positivePhrases, this.allMaxSim);
        }
    }

    public static enum ClassifierType {
        DT,
        LR,
        RF,
        SVM,
        SHIFTLR,
        LINEAR;

    }
}

