/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.coref.statistical;

import edu.stanford.nlp.coref.CorefDocumentProcessor;
import edu.stanford.nlp.coref.CorefUtils;
import edu.stanford.nlp.coref.data.Document;
import edu.stanford.nlp.coref.statistical.StatisticalCorefTrainer;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.util.Pair;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.stream.Collectors;

public class DatasetBuilder
implements CorefDocumentProcessor {
    private final int maxExamplesPerDocument;
    private final double minClassImbalancedPerDocument;
    private final Map<Integer, Map<Pair<Integer, Integer>, Boolean>> mentionPairs;
    private final Random random;

    public DatasetBuilder() {
        this(0.0, Integer.MAX_VALUE);
    }

    public DatasetBuilder(double minClassImbalancedPerDocument, int maxExamplesPerDocument) {
        this.maxExamplesPerDocument = maxExamplesPerDocument;
        this.minClassImbalancedPerDocument = minClassImbalancedPerDocument;
        this.mentionPairs = new HashMap<Integer, Map<Pair<Integer, Integer>, Boolean>>();
        this.random = new Random(0L);
    }

    @Override
    public void process(int id, Document document) {
        List negative;
        int numN;
        Map<Pair<Integer, Integer>, Boolean> labeledPairs = CorefUtils.getLabeledMentionPairs(document);
        long numP = labeledPairs.keySet().stream().filter(m -> (Boolean)labeledPairs.get(m)).count();
        if ((double)((float)numP / (float)(numP + (long)(numN = (negative = labeledPairs.keySet().stream().filter(m -> (Boolean)labeledPairs.get(m) == false).collect(Collectors.toList())).size()))) < this.minClassImbalancedPerDocument) {
            numN = (int)((double)numP / this.minClassImbalancedPerDocument - (double)numP);
            Collections.shuffle(negative);
            for (int i = numN; i < negative.size(); ++i) {
                labeledPairs.remove(negative.get(i));
            }
        }
        HashMap<Integer, ArrayList<Integer>> mentionToCandidateAntecedents = new HashMap<Integer, ArrayList<Integer>>();
        for (Pair<Integer, Integer> pair : labeledPairs.keySet()) {
            ArrayList<Integer> candidateAntecedents = (ArrayList<Integer>)mentionToCandidateAntecedents.get(pair.second);
            if (candidateAntecedents == null) {
                candidateAntecedents = new ArrayList<Integer>();
                mentionToCandidateAntecedents.put((Integer)pair.second, candidateAntecedents);
            }
            candidateAntecedents.add((Integer)pair.first);
        }
        ArrayList mentions = new ArrayList(mentionToCandidateAntecedents.keySet());
        while (labeledPairs.size() > this.maxExamplesPerDocument) {
            int mention = (Integer)mentions.remove(this.random.nextInt(mentions.size()));
            Iterator iterator = ((List)mentionToCandidateAntecedents.get(mention)).iterator();
            while (iterator.hasNext()) {
                int candidateAntecedent = (Integer)iterator.next();
                labeledPairs.remove(new Pair<Integer, Integer>(candidateAntecedent, mention));
            }
        }
        this.mentionPairs.put(id, labeledPairs);
    }

    @Override
    public void finish() throws Exception {
        IOUtils.writeObjectToFile(this.mentionPairs, StatisticalCorefTrainer.datasetFile);
    }
}

