/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.sequences;

import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.io.RuntimeIOException;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.objectbank.ObjectBank;
import edu.stanford.nlp.sequences.DocumentReaderAndWriter;
import edu.stanford.nlp.sequences.IOBUtils;
import edu.stanford.nlp.sequences.SeqClassifierFlags;
import edu.stanford.nlp.stats.ClassicCounter;
import edu.stanford.nlp.stats.Counter;
import edu.stanford.nlp.stats.Counters;
import edu.stanford.nlp.util.AbstractIterator;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class CoNLLDocumentReaderAndWriter
implements DocumentReaderAndWriter<CoreLabel> {
    private static final long serialVersionUID = 6281374154299530460L;
    public static final String BOUNDARY = "*BOUNDARY*";
    private static final boolean TREAT_FILE_AS_ONE_DOCUMENT = false;
    private static final Pattern docPattern = Pattern.compile("^\\s*-DOCSTART-\\s");
    private static final Pattern white = Pattern.compile("^\\s*$");
    private SeqClassifierFlags flags;

    @Override
    public void init(SeqClassifierFlags flags) {
        this.flags = flags;
    }

    public String toString() {
        return "CoNLLDocumentReaderAndWriter[entitySubclassification: " + this.flags.entitySubclassification + ", intern: " + this.flags.intern + ']';
    }

    @Override
    public Iterator<List<CoreLabel>> getIterator(Reader r) {
        return new CoNLLIterator(r);
    }

    private static Iterator<String> splitIntoDocs(Reader r) {
        ArrayList<String> docs = new ArrayList<String>();
        ObjectBank<String> ob = ObjectBank.getLineIterator(r);
        StringBuilder current = new StringBuilder();
        Matcher matcher = docPattern.matcher("");
        for (String line : ob) {
            if (matcher.reset(line).lookingAt() && current.length() > 0) {
                docs.add(current.toString());
                current.setLength(0);
            }
            current.append(line).append('\n');
        }
        if (current.length() > 0) {
            docs.add(current.toString());
        }
        return docs.iterator();
    }

    private List<CoreLabel> processDocument(String doc) {
        String[] lines;
        ArrayList<CoreLabel> list = new ArrayList<CoreLabel>();
        for (String line : lines = doc.split("\n")) {
            if (this.flags.deleteBlankLines && white.matcher(line).matches()) continue;
            list.add(this.makeCoreLabel(line));
        }
        IOBUtils.entitySubclassify(list, CoreAnnotations.AnswerAnnotation.class, this.flags.backgroundSymbol, this.flags.entitySubclassification, this.flags.intern);
        return list;
    }

    private CoreLabel makeCoreLabel(String line) {
        CoreLabel wi = new CoreLabel();
        String[] bits = line.split("\\s+");
        switch (bits.length) {
            case 0: 
            case 1: {
                wi.setWord(BOUNDARY);
                wi.set(CoreAnnotations.AnswerAnnotation.class, this.flags.backgroundSymbol);
                break;
            }
            case 2: {
                wi.setWord(bits[0]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[1]);
                break;
            }
            case 3: {
                wi.setWord(bits[0]);
                wi.setTag(bits[1]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[2]);
                break;
            }
            case 4: {
                wi.setWord(bits[0]);
                wi.setTag(bits[1]);
                wi.set(CoreAnnotations.ChunkAnnotation.class, bits[2]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[3]);
                break;
            }
            case 5: {
                if (this.flags.useLemmaAsWord) {
                    wi.setWord(bits[1]);
                } else {
                    wi.setWord(bits[0]);
                }
                wi.set(CoreAnnotations.LemmaAnnotation.class, bits[1]);
                wi.setTag(bits[2]);
                wi.set(CoreAnnotations.ChunkAnnotation.class, bits[3]);
                wi.set(CoreAnnotations.AnswerAnnotation.class, bits[4]);
                break;
            }
            default: {
                throw new RuntimeIOException("Unexpected input (many fields): " + line);
            }
        }
        wi.set(CoreAnnotations.ValueAnnotation.class, wi.word());
        wi.set(CoreAnnotations.GoldAnswerAnnotation.class, (String)wi.get(CoreAnnotations.AnswerAnnotation.class));
        return wi;
    }

    private void deEndify(List<CoreLabel> tokens) {
        if (this.flags.retainEntitySubclassification) {
            return;
        }
        IOBUtils.entitySubclassify(tokens, CoreAnnotations.AnswerAnnotation.class, this.flags.backgroundSymbol, "iob1", this.flags.intern);
    }

    @Override
    public void printAnswers(List<CoreLabel> doc, PrintWriter out2) {
        if (!"iob1".equalsIgnoreCase(this.flags.entitySubclassification)) {
            this.deEndify(doc);
        }
        for (CoreLabel fl : doc) {
            String word = fl.word();
            if (word == BOUNDARY) {
                out2.println();
                continue;
            }
            String gold = fl.getString(CoreAnnotations.GoldAnswerAnnotation.class);
            String guess = (String)fl.get(CoreAnnotations.AnswerAnnotation.class);
            String pos = fl.getString(CoreAnnotations.PartOfSpeechAnnotation.class);
            String chunk = fl.getString(CoreAnnotations.ChunkAnnotation.class);
            out2.println(fl.word() + '\t' + pos + '\t' + chunk + '\t' + gold + '\t' + guess);
        }
    }

    private static StringBuilder maybeIncrementCounter(StringBuilder inProgressMisc, Counter<String> miscCounter) {
        if (inProgressMisc.length() > 0) {
            miscCounter.incrementCount(inProgressMisc.toString());
            inProgressMisc = new StringBuilder();
        }
        return inProgressMisc;
    }

    public static void main(String[] args) throws IOException, ClassNotFoundException {
        CoNLLDocumentReaderAndWriter rw = new CoNLLDocumentReaderAndWriter();
        rw.init(new SeqClassifierFlags());
        int numDocs = 0;
        int numTokens = 0;
        int numEntities = 0;
        String lastAnsBase = "";
        ClassicCounter<String> miscCounter = new ClassicCounter<String>();
        StringBuilder inProgressMisc = new StringBuilder();
        Iterator<List<CoreLabel>> it = rw.getIterator(IOUtils.readerFromString(args[0]));
        while (it.hasNext()) {
            List<CoreLabel> doc = it.next();
            ++numDocs;
            for (CoreLabel fl : doc) {
                String ansPrefix;
                String ansBase;
                String word = fl.word();
                if (word.equals(BOUNDARY)) continue;
                String ans = (String)fl.get(CoreAnnotations.AnswerAnnotation.class);
                String[] bits = ans.split("-");
                if (bits.length == 1) {
                    ansBase = bits[0];
                    ansPrefix = "";
                } else {
                    ansBase = bits[1];
                    ansPrefix = bits[0];
                }
                ++numTokens;
                if (!ansBase.equals("O")) {
                    if (ansBase.equals(lastAnsBase)) {
                        if (ansPrefix.equals("B")) {
                            ++numEntities;
                            inProgressMisc = CoNLLDocumentReaderAndWriter.maybeIncrementCounter(inProgressMisc, miscCounter);
                        }
                    } else {
                        ++numEntities;
                        inProgressMisc = CoNLLDocumentReaderAndWriter.maybeIncrementCounter(inProgressMisc, miscCounter);
                    }
                    if (ansBase.equals("MISC")) {
                        if (inProgressMisc.length() > 0) {
                            inProgressMisc.append(' ');
                        }
                        inProgressMisc.append(word);
                    }
                } else {
                    inProgressMisc = CoNLLDocumentReaderAndWriter.maybeIncrementCounter(inProgressMisc, miscCounter);
                }
                lastAnsBase = ansBase;
            }
        }
        System.out.println("File " + args[0] + " has " + numDocs + " documents, " + numTokens + " (non-blank line) tokens and " + numEntities + " entities.");
        System.out.printf("Here are the %.0f MISC items with counts:%n", miscCounter.totalCount());
        System.out.println(Counters.toVerticalString(miscCounter, "%.0f\t%s"));
    }

    private class CoNLLIterator
    extends AbstractIterator<List<CoreLabel>> {
        private Iterator<String> stringIter;

        public CoNLLIterator(Reader r) {
            this.stringIter = CoNLLDocumentReaderAndWriter.splitIntoDocs(r);
        }

        @Override
        public boolean hasNext() {
            return this.stringIter.hasNext();
        }

        @Override
        public List<CoreLabel> next() {
            return CoNLLDocumentReaderAndWriter.this.processDocument(this.stringIter.next());
        }
    }
}

