/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.ie.machinereading.domains.roth;

import edu.stanford.nlp.ie.machinereading.GenericDataSetReader;
import edu.stanford.nlp.ie.machinereading.structure.AnnotationUtils;
import edu.stanford.nlp.ie.machinereading.structure.EntityMention;
import edu.stanford.nlp.ie.machinereading.structure.ExtractionObject;
import edu.stanford.nlp.ie.machinereading.structure.MachineReadingAnnotations;
import edu.stanford.nlp.ie.machinereading.structure.RelationMention;
import edu.stanford.nlp.ie.machinereading.structure.Span;
import edu.stanford.nlp.io.IOUtils;
import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.StringUtils;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.logging.Level;
import java.util.logging.Logger;

public class RothCONLL04Reader
extends GenericDataSetReader {
    private boolean warnedNER;

    public RothCONLL04Reader() {
        super(null, true, true, true);
        this.logger = Logger.getLogger(RothCONLL04Reader.class.getName());
        this.logger.setLevel(Level.SEVERE);
    }

    @Override
    public Annotation read(String path) throws IOException {
        Annotation doc = new Annotation("");
        this.logger.info("Reading file: " + path);
        Iterator<String> lineIterator = IOUtils.readLines(path).iterator();
        while (lineIterator.hasNext()) {
            Annotation sentence = this.readSentence(path, lineIterator);
            AnnotationUtils.addSentence(doc, sentence);
        }
        return doc;
    }

    private String getNormalizedNERTag(String ner) {
        if (ner.equalsIgnoreCase("O")) {
            return "O";
        }
        if (ner.equalsIgnoreCase("Peop")) {
            return "PERSON";
        }
        if (ner.equalsIgnoreCase("Loc")) {
            return "LOCATION";
        }
        if (ner.equalsIgnoreCase("Org")) {
            return "ORGANIZATION";
        }
        if (ner.equalsIgnoreCase("Other")) {
            return "OTHER";
        }
        if (!this.warnedNER) {
            this.warnedNER = true;
            this.logger.warning("This file contains NER tags not in the original Roth/Yih dataset, e.g.: " + ner);
        }
        throw new RuntimeException("Cannot normalize ner tag " + ner);
    }

    private Annotation readSentence(String docId, Iterator<String> lineIterator) {
        Annotation sentence = new Annotation("");
        sentence.set(CoreAnnotations.DocIDAnnotation.class, docId);
        sentence.set(MachineReadingAnnotations.EntityMentionsAnnotation.class, new ArrayList());
        StringBuilder textContent = new StringBuilder();
        int tokenCount = 0;
        ArrayList<CoreLabel> tokens = new ArrayList<CoreLabel>();
        int numBlankLinesSeen = 0;
        String sentenceID = null;
        HashMap<String, EntityMention> indexToEntityMention = new HashMap<String, EntityMention>();
        while (lineIterator.hasNext() && numBlankLinesSeen < 2) {
            String currentLine = lineIterator.next();
            currentLine = currentLine.replace("COMMA", ",");
            List<String> pieces = StringUtils.split(currentLine);
            int size = pieces.size();
            switch (size) {
                case 1: {
                    ++numBlankLinesSeen;
                    break;
                }
                case 3: {
                    String type = pieces.get(2);
                    ArrayList<ExtractionObject> args = new ArrayList<ExtractionObject>();
                    EntityMention entity1 = (EntityMention)indexToEntityMention.get(pieces.get(0));
                    EntityMention entity2 = (EntityMention)indexToEntityMention.get(pieces.get(1));
                    args.add(entity1);
                    args.add(entity2);
                    Span span = new Span(entity1.getExtentTokenStart(), entity2.getExtentTokenEnd());
                    String identifier = RelationMention.makeUniqueId();
                    RelationMention relationMention = new RelationMention(identifier, (CoreMap)sentence, span, type, null, args);
                    AnnotationUtils.addRelationMention(sentence, relationMention);
                    break;
                }
                case 9: {
                    List<String> words = StringUtils.split(pieces.get(5), "/");
                    String text = StringUtils.join(words, " ");
                    String identifier = "entity" + pieces.get(0) + '-' + pieces.get(2);
                    String nerTag = this.getNormalizedNERTag(pieces.get(1));
                    if (sentenceID == null) {
                        sentenceID = pieces.get(0);
                    }
                    if (!nerTag.equals("O")) {
                        Span extentSpan = new Span(tokenCount, tokenCount + words.size());
                        EntityMention entity = new EntityMention(identifier, sentence, extentSpan, extentSpan, nerTag, null, null);
                        AnnotationUtils.addEntityMention(sentence, entity);
                        String index = pieces.get(2);
                        indexToEntityMention.put(index, entity);
                    }
                    for (String word : words) {
                        CoreLabel label = new CoreLabel();
                        label.setWord(word);
                        label.set(CoreAnnotations.TextAnnotation.class, word);
                        label.set(CoreAnnotations.ValueAnnotation.class, word);
                        tokens.add(label);
                    }
                    textContent.append(text);
                    textContent.append(' ');
                    tokenCount += words.size();
                }
            }
        }
        sentence.set(CoreAnnotations.TextAnnotation.class, textContent.toString());
        sentence.set(CoreAnnotations.ValueAnnotation.class, textContent.toString());
        sentence.set(CoreAnnotations.TokensAnnotation.class, tokens);
        sentence.set(CoreAnnotations.SentenceIDAnnotation.class, sentenceID);
        return sentence;
    }

    private static <X> int getIndexByObjectEquality(List<X> list, X obj) {
        int sz = list.size();
        for (int i = 0; i < sz; ++i) {
            if (list.get(i) != obj) continue;
            return i;
        }
        return -1;
    }

    private void setHeadWord(EntityMention entity, Tree tree) {
        List leaves = tree.getLeaves();
        Tree argRoot = tree.joinNode((Tree)leaves.get(entity.getExtentTokenStart()), (Tree)leaves.get(entity.getExtentTokenEnd()));
        Tree headWordNode = argRoot.headTerminal(this.headFinder);
        int headWordIndex = RothCONLL04Reader.getIndexByObjectEquality(leaves, headWordNode);
        if (StringUtils.isPunct(((Tree)leaves.get(entity.getExtentTokenEnd())).label().value().trim()) && (headWordIndex >= entity.getExtentTokenEnd() || headWordIndex < entity.getExtentTokenStart()) && (headWordIndex = RothCONLL04Reader.getIndexByObjectEquality(leaves, headWordNode = (argRoot = tree.joinNode((Tree)leaves.get(entity.getExtentTokenStart()), (Tree)leaves.get(entity.getExtentTokenEnd() - 1))).headTerminal(this.headFinder))) >= entity.getExtentTokenStart() && headWordIndex <= entity.getExtentTokenEnd() - 1) {
            entity.setHeadTokenPosition(headWordIndex);
            entity.setHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1));
        }
        if (headWordIndex >= entity.getExtentTokenStart() && headWordIndex <= entity.getExtentTokenEnd()) {
            entity.setHeadTokenPosition(headWordIndex);
            entity.setHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1));
        } else {
            ArrayList<String> argWords = new ArrayList<String>();
            for (int i = entity.getExtentTokenStart(); i <= entity.getExtentTokenEnd(); ++i) {
                argWords.add(((Tree)leaves.get(i)).label().value());
            }
            if (StringUtils.isPunct((String)argWords.get(argWords.size() - 1))) {
                argWords.remove(argWords.size() - 1);
            }
            Tree argTree = this.parseStrings(argWords);
            headWordNode = argTree.headTerminal(this.headFinder);
            headWordIndex = RothCONLL04Reader.getIndexByObjectEquality(argTree.getLeaves(), headWordNode) + entity.getExtentTokenStart();
            entity.setHeadTokenPosition(headWordIndex);
            entity.setHeadTokenSpan(new Span(headWordIndex, headWordIndex + 1));
        }
    }

    public static void main(String[] args) throws Exception {
        Properties props = StringUtils.argsToProperties(args);
        RothCONLL04Reader reader = new RothCONLL04Reader();
        reader.setLoggerLevel(Level.INFO);
        reader.setProcessor(new StanfordCoreNLP(props));
        Annotation doc = reader.parse("/u/nlp/data/RothCONLL04/conll04.corp");
        System.out.println(AnnotationUtils.datasetToString(doc));
    }
}

