/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.quoteattribution.Sieves.training;

import edu.stanford.nlp.classify.Classifier;
import edu.stanford.nlp.classify.GeneralDataset;
import edu.stanford.nlp.classify.RVFDataset;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.RVFDatum;
import edu.stanford.nlp.paragraphs.ParagraphAnnotator;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.QuoteAttributionAnnotator;
import edu.stanford.nlp.quoteattribution.ChapterAnnotator;
import edu.stanford.nlp.quoteattribution.ExtractQuotesClassifier;
import edu.stanford.nlp.quoteattribution.ExtractQuotesUtil;
import edu.stanford.nlp.quoteattribution.Person;
import edu.stanford.nlp.quoteattribution.QuoteAttributionUtils;
import edu.stanford.nlp.quoteattribution.Sieves.Sieve;
import edu.stanford.nlp.quoteattribution.XMLToAnnotation;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Set;

public class SupervisedSieveTraining {
    private static Sieve sieve;
    public static final Set<String> punctuation;
    public static final Set<String> punctuationForFeatures;

    private static int getParagraphBeginToken(CoreMap sentence, List<CoreMap> sentences) {
        CoreMap currSentence;
        int paragraphId = (Integer)sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class);
        int paragraphBeginToken = (Integer)sentence.get(CoreAnnotations.TokenBeginAnnotation.class);
        for (int i = (Integer)sentence.get(CoreAnnotations.SentenceIndexAnnotation.class) - 1; i >= 0 && (Integer)(currSentence = sentences.get(i)).get(CoreAnnotations.ParagraphIndexAnnotation.class) == paragraphId; --i) {
            paragraphBeginToken = (Integer)currSentence.get(CoreAnnotations.TokenBeginAnnotation.class);
        }
        return paragraphBeginToken;
    }

    private static int getParagraphEndToken(CoreMap sentence, List<CoreMap> sentences) {
        CoreMap currSentence;
        int quoteParagraphId = (Integer)sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class);
        int paragraphEndToken = (Integer)sentence.get(CoreAnnotations.TokenEndAnnotation.class) - 1;
        for (int i = ((Integer)sentence.get(CoreAnnotations.SentenceIndexAnnotation.class)).intValue(); i < sentences.size() && (Integer)(currSentence = sentences.get(i)).get(CoreAnnotations.ParagraphIndexAnnotation.class) == quoteParagraphId; ++i) {
            paragraphEndToken = (Integer)currSentence.get(CoreAnnotations.TokenEndAnnotation.class) - 1;
        }
        return paragraphEndToken;
    }

    private static Map<Integer, List<CoreMap>> getQuotesInParagraph(Annotation doc) {
        List quotes = (List)doc.get(CoreAnnotations.QuotationsAnnotation.class);
        List sentences = (List)doc.get(CoreAnnotations.SentencesAnnotation.class);
        HashMap<Integer, List<CoreMap>> paragraphToQuotes = new HashMap<Integer, List<CoreMap>>();
        for (CoreMap quote : quotes) {
            CoreMap sentence = (CoreMap)sentences.get((Integer)quote.get(CoreAnnotations.SentenceBeginAnnotation.class));
            paragraphToQuotes.putIfAbsent((Integer)sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class), new ArrayList());
            ((List)paragraphToQuotes.get(sentence.get(CoreAnnotations.ParagraphIndexAnnotation.class))).add(quote);
        }
        return paragraphToQuotes;
    }

    private static List<Pair<Integer, Integer>> getRangeExclusion(Pair<Integer, Integer> originalRange, List<Pair<Integer, Integer>> exclusionList) {
        ArrayList<Pair<Integer, Integer>> leftoverRanges = new ArrayList<Pair<Integer, Integer>>();
        Pair<Integer, Integer> currRange = originalRange;
        for (Pair<Integer, Integer> exRange : exclusionList) {
            Pair leftRange = new Pair(currRange.first, (Integer)exRange.first - 1);
            if ((Integer)leftRange.second - (Integer)leftRange.first >= 0) {
                leftoverRanges.add(leftRange);
            }
            if (currRange.second == exRange.second) break;
            currRange = new Pair((Integer)exRange.second + 1, currRange.second);
        }
        if ((Integer)currRange.first < (Integer)currRange.second) {
            leftoverRanges.add(currRange);
        }
        return leftoverRanges;
    }

    public static FeaturesData featurize(SieveData sd, List<XMLToAnnotation.GoldQuoteInfo> goldList, boolean isTraining) {
        Annotation doc = sd.doc;
        sieve = new Sieve(doc, sd.characterMap, sd.pronounCorefMap, sd.animacyList);
        List quotes = (List)doc.get(CoreAnnotations.QuotationsAnnotation.class);
        List sentences = (List)doc.get(CoreAnnotations.SentencesAnnotation.class);
        List tokens = (List)doc.get(CoreAnnotations.TokensAnnotation.class);
        Map<Integer, List<CoreMap>> paragraphToQuotes = SupervisedSieveTraining.getQuotesInParagraph(doc);
        RVFDataset<String, String> dataset = new RVFDataset<String, String>();
        HashMap<Integer, Pair<Integer, Integer>> mapQuoteToDataRange = new HashMap<Integer, Pair<Integer, Integer>>();
        HashMap<Integer, Sieve.MentionData> mapDatumToMention = new HashMap<Integer, Sieve.MentionData>();
        if (isTraining && goldList.size() != quotes.size()) {
            throw new RuntimeException("Gold Quote List size doesn't match quote list size!");
        }
        for (int quoteIdx = 0; quoteIdx < quotes.size(); ++quoteIdx) {
            CoreMap sentence;
            int initialSize = dataset.size();
            CoreMap quote = (CoreMap)quotes.get(quoteIdx);
            XMLToAnnotation.GoldQuoteInfo gold = null;
            if (isTraining) {
                gold = goldList.get(quoteIdx);
                if (gold.speaker == "") continue;
            }
            CoreMap quoteFirstSentence = (CoreMap)sentences.get((Integer)quote.get(CoreAnnotations.SentenceBeginAnnotation.class));
            Pair<Integer, Integer> quoteRun = new Pair<Integer, Integer>((Integer)quote.get(CoreAnnotations.TokenBeginAnnotation.class), (Integer)quote.get(CoreAnnotations.TokenEndAnnotation.class));
            int quoteParagraphIdx = (Integer)quoteFirstSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class);
            int rightValue = (Integer)quoteRun.first - 1;
            int leftValue = (Integer)quoteRun.first - 1;
            for (int sentIdx = ((Integer)quote.get(CoreAnnotations.SentenceBeginAnnotation.class)).intValue(); sentIdx >= 0; --sentIdx) {
                CoreMap sentence2 = (CoreMap)sentences.get(sentIdx);
                if ((Integer)sentence2.get(CoreAnnotations.ParagraphIndexAnnotation.class) == quoteParagraphIdx) continue;
                if ((Integer)sentence2.get(CoreAnnotations.ParagraphIndexAnnotation.class) != quoteParagraphIdx - 1) break;
                leftValue = (Integer)sentence2.get(CoreAnnotations.TokenBeginAnnotation.class);
            }
            List<Object> mentionsInPreviousParagraph = new ArrayList();
            if (leftValue > -1 && rightValue > -1) {
                mentionsInPreviousParagraph = SupervisedSieveTraining.eliminateDuplicates(sieve.findClosestMentionsInSpanBackward(new Pair<Integer, Integer>(leftValue, rightValue)));
            }
            leftValue = (Integer)quoteRun.second + 1;
            rightValue = (Integer)quoteRun.second + 1;
            for (int sentIdx = ((Integer)quote.get(CoreAnnotations.SentenceEndAnnotation.class)).intValue(); sentIdx < sentences.size() && (Integer)(sentence = (CoreMap)sentences.get(sentIdx)).get(CoreAnnotations.ParagraphIndexAnnotation.class) == quoteParagraphIdx; ++sentIdx) {
                rightValue = (Integer)sentence.get(CoreAnnotations.TokenEndAnnotation.class) - 1;
            }
            List<Object> mentionsInNextParagraph = new ArrayList();
            if (leftValue < tokens.size() && rightValue < tokens.size()) {
                mentionsInNextParagraph = sieve.findClosestMentionsInSpanForward(new Pair<Integer, Integer>(leftValue, rightValue));
            }
            ArrayList<Object> candidateMentions = new ArrayList<Object>();
            candidateMentions.addAll(mentionsInPreviousParagraph);
            candidateMentions.addAll(mentionsInNextParagraph);
            int rankedDistance = 1;
            int numBackwards = mentionsInPreviousParagraph.size();
            for (Sieve.MentionData mentionData : candidateMentions) {
                RVFDatum datum;
                CoreMap currSentence;
                List mentionCandidateTokens = ((List)doc.get(CoreAnnotations.TokensAnnotation.class)).subList(mentionData.begin, mentionData.end + 1);
                CoreMap mentionCandidateSentence = (CoreMap)sentences.get(((CoreLabel)mentionCandidateTokens.get(0)).sentIndex());
                ClassicCounter<String> features = new ClassicCounter<String>();
                boolean isLeft = true;
                int distance = (Integer)quoteRun.first - mentionData.end;
                if (distance < 0) {
                    isLeft = false;
                    distance = mentionData.begin - (Integer)quoteRun.second;
                }
                if (distance < 0) continue;
                features.setCount("wordDistance", distance);
                List betweenTokens = isLeft ? tokens.subList(mentionData.end + 1, (Integer)quoteRun.first) : tokens.subList((Integer)quoteRun.second + 1, mentionData.begin);
                for (CoreLabel token : betweenTokens) {
                    if (!punctuation.contains(token.word())) continue;
                    features.setCount("punctuationPresence:" + token.word(), 1.0);
                }
                features.setCount("rankedDistance", rankedDistance);
                if (++rankedDistance == numBackwards) {
                    rankedDistance = 1;
                }
                int mentionParagraphIdx = -1;
                CoreMap sentenceInMentionParagraph = null;
                int quoteParagraphBeginToken = SupervisedSieveTraining.getParagraphBeginToken(quoteFirstSentence, sentences);
                int quoteParagraphEndToken = SupervisedSieveTraining.getParagraphEndToken(quoteFirstSentence, sentences);
                if (isLeft) {
                    if (quoteParagraphBeginToken <= mentionData.begin && mentionData.end <= quoteParagraphEndToken) {
                        features.setCount("leftParagraphDistance", 0.0);
                        mentionParagraphIdx = quoteParagraphIdx;
                        sentenceInMentionParagraph = quoteFirstSentence;
                    } else {
                        int paragraphDistance = 1;
                        currSentence = quoteFirstSentence;
                        int currSentenceIdx = (Integer)currSentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
                        for (int currParagraphIdx = quoteParagraphIdx - paragraphDistance; currParagraphIdx >= 0; --currParagraphIdx) {
                            while ((Integer)currSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class) != currParagraphIdx) {
                                currSentence = (CoreMap)sentences.get(--currSentenceIdx);
                            }
                            int prevParagraphBegin = SupervisedSieveTraining.getParagraphBeginToken(currSentence, sentences);
                            int prevParagraphEnd = SupervisedSieveTraining.getParagraphEndToken(currSentence, sentences);
                            if (prevParagraphBegin <= mentionData.begin && mentionData.end <= prevParagraphEnd) {
                                mentionParagraphIdx = currParagraphIdx;
                                sentenceInMentionParagraph = currSentence;
                                features.setCount("leftParagraphDistance", paragraphDistance);
                                if (paragraphDistance % 2 != 0) break;
                                features.setCount("leftParagraphDistanceEven", 1.0);
                                break;
                            }
                            ++paragraphDistance;
                        }
                    }
                } else if (quoteParagraphBeginToken <= mentionData.begin && mentionData.end <= quoteParagraphEndToken) {
                    features.setCount("rightParagraphDistance", 0.0);
                    sentenceInMentionParagraph = quoteFirstSentence;
                    mentionParagraphIdx = quoteParagraphIdx;
                } else {
                    int paragraphDistance = 1;
                    int nextParagraphIndex = quoteParagraphIdx + paragraphDistance;
                    currSentence = quoteFirstSentence;
                    int currSentenceIdx = (Integer)currSentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
                    while (currSentenceIdx < sentences.size()) {
                        while ((Integer)currSentence.get(CoreAnnotations.ParagraphIndexAnnotation.class) != nextParagraphIndex) {
                            currSentence = (CoreMap)sentences.get(++currSentenceIdx);
                        }
                        int nextParagraphBegin = SupervisedSieveTraining.getParagraphBeginToken(currSentence, sentences);
                        int nextParagraphEnd = SupervisedSieveTraining.getParagraphEndToken(currSentence, sentences);
                        if (nextParagraphBegin <= mentionData.begin && mentionData.end <= nextParagraphEnd) {
                            sentenceInMentionParagraph = currSentence;
                            features.setCount("rightParagraphDistance", paragraphDistance);
                            break;
                        }
                        ++paragraphDistance;
                        ++nextParagraphIndex;
                    }
                }
                if (sentenceInMentionParagraph != null) {
                    int mentionParagraphBegin = SupervisedSieveTraining.getParagraphBeginToken(sentenceInMentionParagraph, sentences);
                    int mentionParagraphEnd = SupervisedSieveTraining.getParagraphEndToken(sentenceInMentionParagraph, sentences);
                    if (mentionParagraphBegin != quoteParagraphBeginToken || mentionParagraphEnd != quoteParagraphEndToken) {
                        List quotesInMentionParagraph = paragraphToQuotes.getOrDefault(mentionParagraphIdx, new ArrayList());
                        Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesInMentionParagraph = sieve.scanForNames(new Pair<Integer, Integer>(mentionParagraphBegin, mentionParagraphEnd));
                        features.setCount("quotesInMentionParagraph", quotesInMentionParagraph.size());
                        features.setCount("wordsInMentionParagraph", mentionParagraphEnd - mentionParagraphBegin + 1);
                        features.setCount("namesInMentionParagraph", ((ArrayList)namesInMentionParagraph.first).size());
                        for (int i = 0; i < ((ArrayList)namesInMentionParagraph.second).size(); ++i) {
                            if (!ExtractQuotesUtil.rangeContains(new Pair<Integer, Integer>(mentionData.begin, mentionData.end), (Pair)((ArrayList)namesInMentionParagraph.second).get(i))) continue;
                            features.setCount("orderInParagraph", i);
                        }
                        if (quotesInMentionParagraph.size() == 1) {
                            CoreMap qInMentionParagraph = (CoreMap)quotesInMentionParagraph.get(0);
                            if ((Integer)qInMentionParagraph.get(CoreAnnotations.TokenBeginAnnotation.class) == mentionParagraphBegin && (Integer)qInMentionParagraph.get(CoreAnnotations.TokenEndAnnotation.class) - 1 == mentionParagraphEnd) {
                                features.setCount("mentionParagraphIsInConversation", 1.0);
                            } else {
                                features.setCount("mentionParagraphIsInConversation", -1.0);
                            }
                        }
                        Iterator qInMentionParagraph = quotesInMentionParagraph.iterator();
                        while (qInMentionParagraph.hasNext()) {
                            CoreMap quoteIMP = (CoreMap)qInMentionParagraph.next();
                            if (!ExtractQuotesUtil.rangeContains(new Pair<Integer, Integer>((Integer)quoteIMP.get(CoreAnnotations.TokenBeginAnnotation.class), (Integer)quoteIMP.get(CoreAnnotations.TokenEndAnnotation.class) - 1), new Pair<Integer, Integer>(mentionData.begin, mentionData.end))) continue;
                            features.setCount("mentionInQuote", 1.0);
                        }
                        if (features.getCount("mentionInQuote") != 1.0) {
                            features.setCount("mentionNotInQuote", 1.0);
                        }
                    }
                }
                if (mentionData.begin > 0) {
                    CoreLabel prevWord = (CoreLabel)tokens.get(mentionData.begin - 1);
                    features.setCount("prevWordType:" + prevWord.tag(), 1.0);
                    if (punctuationForFeatures.contains(prevWord.lemma())) {
                        features.setCount("prevWordPunct:" + prevWord.lemma(), 1.0);
                    }
                }
                if (mentionData.end + 1 < tokens.size()) {
                    CoreLabel nextWord = (CoreLabel)tokens.get(mentionData.end + 1);
                    features.setCount("nextWordType:" + nextWord.tag(), 1.0);
                    if (punctuationForFeatures.contains(nextWord.lemma())) {
                        features.setCount("nextWordPunct:" + nextWord.lemma(), 1.0);
                    }
                }
                List<CoreMap> quotesInQuoteParagraph = paragraphToQuotes.get(quoteParagraphIdx);
                features.setCount("QuotesInQuoteParagraph", quotesInQuoteParagraph.size());
                features.setCount("WordsInQuoteParagraph", quoteParagraphEndToken - quoteParagraphBeginToken + 1);
                features.setCount("NamesInQuoteParagraph", ((ArrayList)SupervisedSieveTraining.sieve.scanForNames(new Pair<Integer, Integer>(Integer.valueOf((int)quoteParagraphBeginToken), Integer.valueOf((int)quoteParagraphEndToken))).first).size());
                features.setCount("quoteLength", (Integer)quote.get(CoreAnnotations.TokenEndAnnotation.class) - (Integer)quote.get(CoreAnnotations.TokenBeginAnnotation.class) + 1);
                for (int i = 0; i < quotesInQuoteParagraph.size(); ++i) {
                    if (!quotesInQuoteParagraph.get(i).equals(quote)) continue;
                    features.setCount("quotePosition", i + 1);
                }
                if (features.getCount("quotePosition") == 0.0) {
                    throw new RuntimeException("Check this (equality not working)");
                }
                Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> namesData = sieve.scanForNames(quoteRun);
                for (String name : (ArrayList)namesData.first) {
                    features.setCount("charactersInQuote:" + sd.characterMap.get((Object)name).get((int)0).name, 1.0);
                }
                if ((Integer)quote.get(CoreAnnotations.TokenBeginAnnotation.class) == quoteParagraphBeginToken && (Integer)quote.get(CoreAnnotations.TokenEndAnnotation.class) == quoteParagraphEndToken) {
                    features.setCount("isImplicitSpeaker", 1.0);
                } else {
                    features.setCount("isImplicitSpeaker", -1.0);
                }
                if (mentionData.type.equals("name")) {
                    List<Person> pList = sd.characterMap.get(sieve.tokenRangeToString(new Pair<Integer, Integer>(mentionData.begin, mentionData.end)));
                    Person p = null;
                    if (pList != null) {
                        p = pList.get(0);
                    } else {
                        String scanForNamesResultString;
                        Pair<ArrayList<String>, ArrayList<Pair<Integer, Integer>>> scanForNamesResultPair = sieve.scanForNames(new Pair<Integer, Integer>(mentionData.begin, mentionData.end));
                        if (((ArrayList)scanForNamesResultPair.first).size() != 0 && (scanForNamesResultString = (String)((ArrayList)scanForNamesResultPair.first).get(0)) != null && sd.characterMap.containsKey(scanForNamesResultString)) {
                            p = sd.characterMap.get(scanForNamesResultString).get(0);
                        }
                    }
                    if (p != null) {
                        for (String name : (ArrayList)namesData.first) {
                            if (!p.aliases.contains(name)) continue;
                            features.setCount("nameInQuote", 1.0);
                        }
                        if (quoteParagraphIdx > 0) {
                            List quotesInPrevParagraph = paragraphToQuotes.getOrDefault(quoteParagraphIdx - 1, new ArrayList());
                            ArrayList<Pair<Integer, Integer>> exclusionList = new ArrayList<Pair<Integer, Integer>>();
                            for (CoreMap quoteIPP : quotesInPrevParagraph) {
                                Pair<Integer, Integer> quoteRange = new Pair<Integer, Integer>((Integer)quoteIPP.get(CoreAnnotations.TokenBeginAnnotation.class), (Integer)quoteIPP.get(CoreAnnotations.TokenEndAnnotation.class));
                                exclusionList.add(quoteRange);
                                for (String name : (ArrayList)SupervisedSieveTraining.sieve.scanForNames(quoteRange).first) {
                                    if (!p.aliases.contains(name)) continue;
                                    features.setCount("nameInPrevParagraphQuote", 1.0);
                                }
                            }
                            int sentenceIdx = (Integer)quoteFirstSentence.get(CoreAnnotations.SentenceIndexAnnotation.class);
                            CoreMap sentenceInPrevParagraph = null;
                            for (int i = sentenceIdx - 1; i >= 0; --i) {
                                CoreMap currSentence2 = (CoreMap)sentences.get(i);
                                if ((Integer)currSentence2.get(CoreAnnotations.ParagraphIndexAnnotation.class) != quoteParagraphIdx - 1) continue;
                                sentenceInPrevParagraph = currSentence2;
                                break;
                            }
                            int prevParagraphBegin = SupervisedSieveTraining.getParagraphBeginToken(sentenceInPrevParagraph, sentences);
                            int prevParagraphEnd = SupervisedSieveTraining.getParagraphEndToken(sentenceInPrevParagraph, sentences);
                            List<Pair<Integer, Integer>> prevParagraphNonQuoteRuns = SupervisedSieveTraining.getRangeExclusion(new Pair<Integer, Integer>(prevParagraphBegin, prevParagraphEnd), exclusionList);
                            for (Pair<Integer, Integer> nonQuoteRange : prevParagraphNonQuoteRuns) {
                                for (String name : (ArrayList)SupervisedSieveTraining.sieve.scanForNames(nonQuoteRange).first) {
                                    if (!p.aliases.contains(name)) continue;
                                    features.setCount("nameInPrevParagraphNonQuote", 1.0);
                                }
                            }
                        }
                    }
                }
                if (isTraining) {
                    if (QuoteAttributionUtils.rangeContains(new Pair<Integer, Integer>(gold.mentionStartTokenIndex, gold.mentionEndTokenIndex), new Pair<Integer, Integer>(mentionData.begin, mentionData.end))) {
                        datum = new RVFDatum(features, "isMention");
                        datum.setID(Integer.toString(dataset.size()));
                        mapDatumToMention.put(dataset.size(), mentionData);
                        ((GeneralDataset)dataset).add(datum);
                        continue;
                    }
                    datum = new RVFDatum(features, "isNotMention");
                    datum.setID(Integer.toString(dataset.size()));
                    ((GeneralDataset)dataset).add(datum);
                    mapDatumToMention.put(dataset.size(), mentionData);
                    continue;
                }
                datum = new RVFDatum(features, "none");
                datum.setID(Integer.toString(dataset.size()));
                mapDatumToMention.put(dataset.size(), mentionData);
                ((GeneralDataset)dataset).add(datum);
            }
            mapQuoteToDataRange.put(quoteIdx, new Pair<Integer, Integer>(initialSize, dataset.size() - 1));
        }
        return new FeaturesData(mapQuoteToDataRange, mapDatumToMention, dataset);
    }

    private static List<Sieve.MentionData> eliminateDuplicates(List<Sieve.MentionData> mentionCandidates) {
        ArrayList<Sieve.MentionData> newList = new ArrayList<Sieve.MentionData>();
        HashSet<String> seenText = new HashSet<String>();
        for (int i = 0; i < mentionCandidates.size(); ++i) {
            Sieve.MentionData mentionCandidate = mentionCandidates.get(i);
            String text = mentionCandidate.text;
            if (!seenText.contains(text) || mentionCandidate.type.equals("Pronoun")) {
                newList.add(mentionCandidate);
            }
            seenText.add(text);
        }
        return newList;
    }

    public static void outputModel(String fileName, Classifier<String, String> clf) {
        FileOutputStream fo = null;
        try {
            fo = new FileOutputStream(fileName);
            ObjectOutputStream so = new ObjectOutputStream(fo);
            so.writeObject(clf);
            so.flush();
            so.close();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public static void train(XMLToAnnotation.Data data, Properties props) {
        Map<String, List<Person>> characterMap = QuoteAttributionUtils.readPersonMap(props.getProperty("charactersPath"));
        Map<Integer, String> pronounCorefMap = QuoteAttributionUtils.setupCoref(props.getProperty("booknlpCoref"), characterMap, data.doc);
        Set<String> animacyList = QuoteAttributionUtils.readAnimacyList(QuoteAttributionAnnotator.ANIMACY_WORD_LIST);
        FeaturesData fd = SupervisedSieveTraining.featurize(new SieveData(data.doc, characterMap, pronounCorefMap, animacyList), data.goldList, true);
        ExtractQuotesClassifier quotesClassifier = new ExtractQuotesClassifier(fd.dataset);
        SupervisedSieveTraining.outputModel(props.getProperty("modelPath"), quotesClassifier.getClassifier());
    }

    public static void main(String[] args) throws Exception {
        String home = "/home/mjfang/action_grammars/";
        String specificFile = "1PPDevUncollapsed.props";
        if (args.length >= 1) {
            home = args[0];
        }
        if (args.length >= 2) {
            specificFile = args[1];
        }
        System.out.println("Base directory: " + home);
        Properties props = StringUtils.propFileToProperties(home + "ExtractQuotesXMLScripts/" + specificFile);
        XMLToAnnotation.Data data = XMLToAnnotation.readXMLFormat(props.getProperty("file"));
        Properties propsPara = new Properties();
        propsPara.setProperty("paragraphBreak", "one");
        ParagraphAnnotator pa = new ParagraphAnnotator(propsPara, false);
        pa.annotate(data.doc);
        Properties annotatorProps = new Properties();
        annotatorProps.setProperty("charactersPath", props.getProperty("charactersPath"));
        annotatorProps.setProperty("booknlpCoref", props.getProperty("booknlpCoref"));
        annotatorProps.setProperty("modelPath", props.getProperty("modelPath"));
        QuoteAttributionAnnotator qaa = new QuoteAttributionAnnotator(annotatorProps);
        qaa.annotate(data.doc);
        ChapterAnnotator ca = new ChapterAnnotator();
        ca.annotate(data.doc);
        SupervisedSieveTraining.train(data, annotatorProps);
    }

    static {
        punctuation = new HashSet<String>(Arrays.asList(",", ".", "\"", "\n"));
        punctuationForFeatures = new HashSet<String>(Arrays.asList(",", ".", "!", "?"));
    }

    public static class SieveData {
        Annotation doc;
        Map<String, List<Person>> characterMap;
        Map<Integer, String> pronounCorefMap;
        Set<String> animacyList;

        public SieveData(Annotation doc, Map<String, List<Person>> characterMap, Map<Integer, String> pronounCorefMap, Set<String> animacyList) {
            this.doc = doc;
            this.characterMap = characterMap;
            this.pronounCorefMap = pronounCorefMap;
            this.animacyList = animacyList;
        }
    }

    public static class FeaturesData {
        public GeneralDataset<String, String> dataset;
        public Map<Integer, Pair<Integer, Integer>> mapQuoteToDataRange;
        public Map<Integer, Sieve.MentionData> mapDatumToMention;

        public FeaturesData(Map<Integer, Pair<Integer, Integer>> mapQuoteToDataRange, Map<Integer, Sieve.MentionData> mapDatumToMention, GeneralDataset<String, String> dataset) {
            this.mapQuoteToDataRange = mapQuoteToDataRange;
            this.mapDatumToMention = mapDatumToMention;
            this.dataset = dataset;
        }
    }
}

