/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.CleanXmlAnnotator;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import edu.stanford.nlp.pipeline.WordsToSentencesAnnotator;
import edu.stanford.nlp.util.CoreMap;
import edu.stanford.nlp.util.PropertiesUtils;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Properties;
import java.util.stream.Collectors;
import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

public class CleanXmlAnnotatorTest {
    private static Annotator ptbInvertible;
    private static Annotator ptbNotInvertible;
    private static Annotator cleanXmlAllTags;
    private static Annotator cleanXmlSomeTags;
    private static Annotator cleanXmlEndSentences;
    private static Annotator cleanXmlWithFlaws;
    private static Annotator wtsSplitter;

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Before
    public void setUp() throws Exception {
        Class<CleanXmlAnnotatorTest> clazz = CleanXmlAnnotatorTest.class;
        synchronized (CleanXmlAnnotatorTest.class) {
            if (ptbInvertible == null) {
                ptbInvertible = new TokenizerAnnotator(false, "en", "invertible,ptb3Escaping=true");
            }
            if (ptbNotInvertible == null) {
                ptbNotInvertible = new TokenizerAnnotator(false, "en", "invertible=false,ptb3Escaping=true");
            }
            if (cleanXmlAllTags == null) {
                cleanXmlAllTags = new CleanXmlAnnotator(".*", "", "", false);
            }
            if (cleanXmlSomeTags == null) {
                cleanXmlSomeTags = new CleanXmlAnnotator("p", "", "", false);
            }
            if (cleanXmlEndSentences == null) {
                cleanXmlEndSentences = new CleanXmlAnnotator(".*", "p", "", false);
            }
            if (cleanXmlWithFlaws == null) {
                cleanXmlWithFlaws = new CleanXmlAnnotator(".*", "", "", true);
            }
            if (wtsSplitter == null) {
                wtsSplitter = new WordsToSentencesAnnotator(false);
            }
            // ** MonitorExit[var1_1] (shouldn't be in output)
            return;
        }
    }

    public static Annotation annotate(String text, Annotator tokenizer, Annotator xmlRemover, Annotator splitter) {
        Annotation annotation = new Annotation(text);
        tokenizer.annotate(annotation);
        if (xmlRemover != null) {
            xmlRemover.annotate(annotation);
        }
        if (splitter != null) {
            splitter.annotate(annotation);
        }
        return annotation;
    }

    private static void checkResult(Annotation annotation, String ... gold) {
        ArrayList goldTokens = new ArrayList();
        Annotation[] goldAnnotations = new Annotation[gold.length];
        for (int i = 0; i < gold.length; ++i) {
            goldAnnotations[i] = CleanXmlAnnotatorTest.annotate(gold[i], ptbInvertible, null, null);
            goldTokens.addAll((Collection)goldAnnotations[i].get(CoreAnnotations.TokensAnnotation.class));
        }
        List annotationLabels = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        if (goldTokens.size() != annotationLabels.size()) {
            for (CoreLabel annotationLabel : annotationLabels) {
                System.err.print(annotationLabel.word());
                System.err.print(' ');
            }
            System.err.println();
            for (CoreLabel goldToken : goldTokens) {
                System.err.print(goldToken.word());
                System.err.print(' ');
            }
            System.err.println();
        }
        Assert.assertEquals((String)"Token count mismatch (gold vs: actual)", (long)goldTokens.size(), (long)annotationLabels.size());
        for (int i = 0; i < annotationLabels.size(); ++i) {
            Assert.assertEquals((Object)((CoreLabel)goldTokens.get(i)).word(), (Object)((CoreLabel)annotationLabels.get(i)).word());
        }
        if (annotation.get(CoreAnnotations.SentencesAnnotation.class) != null) {
            List sentences = (List)annotation.get(CoreAnnotations.SentencesAnnotation.class);
            Assert.assertEquals((String)"Sentence count mismatch", (long)gold.length, (long)sentences.size());
        }
    }

    private static void checkInvert(Annotation annotation, String gold) {
        List annotationLabels = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        StringBuilder original = new StringBuilder();
        for (CoreLabel label : annotationLabels) {
            original.append((String)label.get(CoreAnnotations.BeforeAnnotation.class));
            original.append((String)label.get(CoreAnnotations.OriginalTextAnnotation.class));
        }
        original.append((String)((CoreLabel)annotationLabels.get(annotationLabels.size() - 1)).get(CoreAnnotations.AfterAnnotation.class));
        Assert.assertEquals((Object)gold, (Object)original.toString());
    }

    private static void checkContext(CoreLabel label, String ... expectedContext) {
        List xmlContext = (List)label.get(CoreAnnotations.XmlContextAnnotation.class);
        Assert.assertEquals((long)expectedContext.length, (long)xmlContext.size());
        for (int i = 0; i < expectedContext.length; ++i) {
            Assert.assertEquals((Object)expectedContext[i], xmlContext.get(i));
        }
    }

    @Test
    public void testRemoveXML() {
        String testString = "<xml>This is a test string.</xml>";
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter), "This is a test string.");
    }

    @Test
    public void testExtractSpecificTag() {
        String testString = "<p>This is a test string.</p><foo>This should not be found</foo>";
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlSomeTags, wtsSplitter), "This is a test string.");
    }

    @Test
    public void testSentenceSplitting() {
        String testString = "<p>This sentence is split</p><foo>over two tags</foo>";
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter), "This sentence is split over two tags");
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlEndSentences, wtsSplitter), "This sentence is split", "over two tags");
    }

    @Test
    public void testNestedTags() {
        String testString = "<p><p>This text is in a</p>nested tag</p>";
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter), "This text is in a nested tag");
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlEndSentences, wtsSplitter), "This text is in a", "nested tag");
    }

    @Test
    public void testMissingCloseTags() {
        String testString = "<text><p>This text <p>has closing tags wrong</text>";
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlWithFlaws, wtsSplitter), "This text has closing tags wrong");
        try {
            CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter), "This text has closing tags wrong");
            throw new RuntimeException("it was supposed to barf");
        }
        catch (IllegalArgumentException illegalArgumentException) {
            return;
        }
    }

    @Test
    public void testEarlyEnd() {
        String testString = "<text>This text ends before all tags closed";
        CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlWithFlaws, wtsSplitter), "This text ends before all tags closed");
        try {
            CleanXmlAnnotatorTest.checkResult(CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter), "This text ends before all tags closed");
            throw new RuntimeException("it was supposed to barf");
        }
        catch (IllegalArgumentException illegalArgumentException) {
            return;
        }
    }

    @Test
    public void testInvertible() {
        String testNoTags = "This sentence should be invertible.";
        String testTags = "  <xml>  This sentence should  be  invertible.  </xml>  ";
        String testManyTags = " <xml>   <foo>       <bar>This sentence should     </bar>be invertible.   </foo>   </xml> ";
        Annotation annotation = CleanXmlAnnotatorTest.annotate(testNoTags, ptbInvertible, cleanXmlAllTags, wtsSplitter);
        CleanXmlAnnotatorTest.checkResult(annotation, testNoTags);
        CleanXmlAnnotatorTest.checkInvert(annotation, testNoTags);
        annotation = CleanXmlAnnotatorTest.annotate(testTags, ptbInvertible, cleanXmlAllTags, wtsSplitter);
        CleanXmlAnnotatorTest.checkResult(annotation, testNoTags);
        CleanXmlAnnotatorTest.checkInvert(annotation, testTags);
        annotation = CleanXmlAnnotatorTest.annotate(testManyTags, ptbInvertible, cleanXmlAllTags, wtsSplitter);
        CleanXmlAnnotatorTest.checkResult(annotation, testNoTags);
        CleanXmlAnnotatorTest.checkInvert(annotation, testManyTags);
    }

    @Test
    public void testContext() {
        int i;
        String testManyTags = " <xml>   <foo>       <bar>This sentence should     </bar>be invertible.   </foo>   </xml> ";
        Annotation annotation = CleanXmlAnnotatorTest.annotate(testManyTags, ptbInvertible, cleanXmlAllTags, wtsSplitter);
        List annotationLabels = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        for (i = 0; i < 3; ++i) {
            CleanXmlAnnotatorTest.checkContext((CoreLabel)annotationLabels.get(i), "xml", "foo", "bar");
        }
        for (i = 3; i < 5; ++i) {
            CleanXmlAnnotatorTest.checkContext((CoreLabel)annotationLabels.get(i), "xml", "foo");
        }
    }

    @Test
    public void testOffsets() {
        String testString = "<p><p>This text is in a</p>nested tag</p>";
        Annotation annotation = CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter);
        CleanXmlAnnotatorTest.checkResult(annotation, "This text is in a nested tag");
        List labels = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        Assert.assertEquals((long)6L, (long)((Integer)((CoreLabel)labels.get(0)).get(CoreAnnotations.CharacterOffsetBeginAnnotation.class)).intValue());
        Assert.assertEquals((long)10L, (long)((Integer)((CoreLabel)labels.get(0)).get(CoreAnnotations.CharacterOffsetEndAnnotation.class)).intValue());
    }

    @Test
    public void testAttributes() {
        String testString = "<p a=\"b\">This text has an attribute</p>";
        Annotation annotation = CleanXmlAnnotatorTest.annotate(testString, ptbInvertible, cleanXmlAllTags, wtsSplitter);
        CleanXmlAnnotatorTest.checkResult(annotation, "This text has an attribute");
    }

    @Test
    public void testViaCoreNlp() {
        int i;
        String testManyTags = " <xml>   <foo>       <bar>This sentence should     </bar>be invertible.   </foo>   </xml> ";
        Annotation anno = new Annotation(testManyTags);
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize, ssplit, cleanxml", "tokenizer.options", "invertible,ptb3Escaping=true", "cleanxml.xmltags", ".*", "cleanxml.sentenceendingtags", "p", "cleanxml.datetags", "", "cleanxml.allowflawedxml", "false");
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        pipeline.annotate(anno);
        CleanXmlAnnotatorTest.checkInvert(anno, testManyTags);
        List annotationLabels = (List)anno.get(CoreAnnotations.TokensAnnotation.class);
        for (i = 0; i < 3; ++i) {
            CleanXmlAnnotatorTest.checkContext((CoreLabel)annotationLabels.get(i), "xml", "foo", "bar");
        }
        for (i = 3; i < 5; ++i) {
            CleanXmlAnnotatorTest.checkContext((CoreLabel)annotationLabels.get(i), "xml", "foo");
        }
    }

    @Test
    public void testKbpSectionMatching() {
        Properties props = PropertiesUtils.asProperties("annotators", "tokenize,cleanxml,ssplit", "tokenize.language", "es", "tokenize.options", "tokenizeNLs,ptb3Escaping=true", "ssplit.newlineIsSentenceBreak", "two", "ssplit.tokenPatternsToDiscard", "\\n,\\*NL\\*", "ssplit.boundaryMultiTokenRegex", "/\\*NL\\*/ /\\p{Lu}[-\\p{L}]+/+ /,/ ( /[-\\p{L}]+/+ /,/ )? /[1-3]?[0-9]/ /\\p{Ll}{3,5}/ /=LRB=/ /\\p{Lu}\\p{L}+/ /=RRB=/ /--/", "clean.xmltags", "headline|text|post", "clean.singlesentencetags", "HEADLINE|AUTHOR", "clean.sentenceendingtags", "TEXT|POST|QUOTE", "clean.turntags", "POST|QUOTE", "clean.speakertags", "AUTHOR", "clean.datetags", "DATE_TIME", "clean.doctypetags", "DOC", "clean.docAnnotations", "docID=doc[id]", "clean.sectiontags", "HEADLINE|POST", "clean.sectionAnnotations", "sectionID=post[id],sectionDate=post[datetime],author=post[author]", "clean.quotetags", "quote", "clean.quoteauthorattributes", "orig_author", "clean.tokenAnnotations", "link=a[href],speaker=post[author],speaker=quote[orig_author]");
        String document = "<doc id=\"SPA_DF_000389_20090909_G00A09SM4\">\n<headline>\nProblema para Activar Restaurar Sistema En Win Ue\n</headline>\n<post author=\"mysecondskin\" datetime=\"2009-09-09T00:00:00\" id=\"p1\">\nhola portalianos tengo un problemita,mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5,he tratado de arregl\u00e1rselo pero no he podido dar con la soluci\u00f3n y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar\nojala alguien me pueda ayudar\nvale socios\n</post>\n<post author=\"pajenri\" datetime=\"2009-09-09T00:00:00\" id=\"p2\">\n<quote orig_author=\"mysecondskin\">\nhola portalianos tengo un problemita,mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5,he tratado de arregl\u00e1rselo pero no he podido dar con la soluci\u00f3n y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar\nojala alguien me pueda ayudar\nvale socios\n</quote>\n\npor lo que tengo entendido esa opcion en los win ue vienen eliminadas no desactivadas, asi que para activarla habria que reinstalar un xp limpio no tuneado. como dato es tipico en sistemas tuneados comos el win ue que suceda esto. el restaurador salva mas de lo que se cree. si toy equibocado con la info que alguien me corrija\n</post>\n<post author=\"UnknownCnR\" datetime=\"2009-09-09T00:00:00\" id=\"p3\">\n<a href=\"http://www.sendspace.com/file/54pxbl\">http://www.sendspace.com/file/54pxbl</a>\n\nCon este registro podras activarlo ;)\n</post>\n<post author=\"mysecondskin\" datetime=\"2009-09-11T00:00:00\" id=\"p4\">\ngracias pero de verdad esa solucion no sirve\n</post>\n</doc>\n";
        String[][] sections = new String[][]{{null, null, "Problema para Activar Restaurar Sistema En Win Ue\n"}, {"mysecondskin", "2009-09-09T00:00:00", "hola portalianos tengo un problemita , mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5 , he tratado de arregl\u00e1rselo pero no he podido dar con la soluci\u00f3n y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar ojala alguien me pueda ayudar vale socios\n"}, {"pajenri", "2009-09-09T00:00:00", "(QUOTING: mysecondskin) hola portalianos tengo un problemita , mi vieja tiene un pc en su casa y no tiene activado restaurar sistema ya que el pc tiene el xp ue v5 , he tratado de arregl\u00e1rselo pero no he podido dar con la soluci\u00f3n y no he querido formatearle el pc porque tiene un sin numero de programas que me da paja reinstalar ojala alguien me pueda ayudar vale socios\npor lo que tengo entendido esa opcion en los win ue vienen eliminadas no desactivadas , asi que para activarla habria que reinstalar un xp limpio no tuneado .\ncomo dato es tipico en sistemas tuneados comos el win ue que suceda esto .\nel restaurador salva mas de lo que se cree .\nsi toy equibocado con la info que alguien me corrija\n"}, {"UnknownCnR", "2009-09-09T00:00:00", "http://www.sendspace.com/file/54pxbl\nCon este registro podras activarlo ;=RRB=\n"}, {"mysecondskin", "2009-09-11T00:00:00", "gracias pero de verdad esa solucion no sirve\n"}};
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        Annotation testDocument = new Annotation(document);
        pipeline.annotate(testDocument);
        int num = 0;
        for (CoreMap discussionForumPost : (List)testDocument.get(CoreAnnotations.SectionsAnnotation.class)) {
            Assert.assertEquals((Object)sections[num][0], discussionForumPost.get(CoreAnnotations.AuthorAnnotation.class));
            Assert.assertEquals((Object)sections[num][1], discussionForumPost.get(CoreAnnotations.SectionDateAnnotation.class));
            StringBuilder sb = new StringBuilder();
            for (CoreMap sentence : (List)discussionForumPost.get(CoreAnnotations.SentencesAnnotation.class)) {
                boolean sentenceQuoted = sentence.get(CoreAnnotations.QuotedAnnotation.class) != null && (Boolean)sentence.get(CoreAnnotations.QuotedAnnotation.class) != false;
                System.err.println("Sentence " + sentence + " quoted=" + sentenceQuoted);
                String sentenceAuthor = (String)sentence.get(CoreAnnotations.AuthorAnnotation.class);
                String potentialQuoteText = sentenceQuoted ? "(QUOTING: " + sentenceAuthor + ") " : "";
                sb.append(potentialQuoteText);
                sb.append(((List)sentence.get(CoreAnnotations.TokensAnnotation.class)).stream().map(CoreLabel::word).collect(Collectors.joining(" ")));
                sb.append('\n');
            }
            Assert.assertEquals((Object)sections[num][2], (Object)sb.toString());
            ++num;
        }
        Assert.assertEquals((String)"Too few sections", (long)sections.length, (long)num);
    }
}

