/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.process;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.Annotator;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import edu.stanford.nlp.process.WordToSentenceProcessor;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.PropertiesUtils;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import junit.framework.TestCase;

public class WordToSentenceProcessorTest
extends TestCase {
    private static final Annotator ptb = new TokenizerAnnotator(false, "en");
    private static final Annotator ptbNL = new TokenizerAnnotator(false, "en", "invertible,ptb3Escaping=true,tokenizeNLs=true");
    private static final Annotator wsNL = new TokenizerAnnotator(false, PropertiesUtils.asProperties("tokenize.whitespace", "true", "invertible", "true", "tokenizeNLs", "true"));
    private static final WordToSentenceProcessor<CoreLabel> wts = new WordToSentenceProcessor();
    private static final WordToSentenceProcessor<CoreLabel> wtsNull = new WordToSentenceProcessor(true);
    private static final WordToSentenceProcessor<CoreLabel> cwts = new WordToSentenceProcessor("[.\u3002]|[!?\uff01\uff1f]+", WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE, false);

    private static void checkResult(WordToSentenceProcessor<CoreLabel> wts, String testSentence, String ... gold) {
        WordToSentenceProcessorTest.checkResult(wts, ptb, testSentence, gold);
    }

    private static void checkResult(WordToSentenceProcessor<CoreLabel> wts, Annotator tokenizer, String testSentence, String ... gold) {
        Annotation annotation = new Annotation(testSentence);
        ptbNL.annotate(annotation);
        List tokens = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        List<List<CoreLabel>> sentences = wts.process(tokens);
        WordToSentenceProcessorTest.assertEquals((String)("Output number of sentences didn't match:\n" + Arrays.toString(gold) + " vs. \n" + sentences + '\n'), (int)gold.length, (int)sentences.size());
        Annotation[] goldAnnotations = new Annotation[gold.length];
        for (int i = 0; i < gold.length; ++i) {
            goldAnnotations[i] = new Annotation(gold[i]);
            tokenizer.annotate(goldAnnotations[i]);
            List goldTokens = (List)goldAnnotations[i].get(CoreAnnotations.TokensAnnotation.class);
            List<CoreLabel> testTokens = sentences.get(i);
            int goldTokensSize = goldTokens.size();
            WordToSentenceProcessorTest.assertEquals((String)("Sentence lengths didn't match:\n" + goldTokens + " vs. \n" + testTokens + '\n'), (int)goldTokensSize, (int)testTokens.size());
            for (int j = 0; j < goldTokensSize; ++j) {
                WordToSentenceProcessorTest.assertEquals((String)((CoreLabel)goldTokens.get(j)).word(), (String)testTokens.get(j).word());
            }
        }
    }

    public void testNoSplitting() {
        WordToSentenceProcessorTest.checkResult(wts, "This should only be one sentence.", "This should only be one sentence.");
    }

    public void testTwoSentences() {
        WordToSentenceProcessorTest.checkResult(wts, "This should be two sentences.  There is a split.", "This should be two sentences.", "There is a split.");
        WordToSentenceProcessorTest.checkResult(wts, "This should be two sentences!  There is a split.", "This should be two sentences!", "There is a split.");
        WordToSentenceProcessorTest.checkResult(wts, "This should be two sentences?  There is a split.", "This should be two sentences?", "There is a split.");
        WordToSentenceProcessorTest.checkResult(wts, "This should be two sentences!!!?!!  There is a split.", "This should be two sentences!!!?!!", "There is a split.");
    }

    public void testEdgeCases() {
        WordToSentenceProcessorTest.checkResult(wts, "This should be two sentences.  Second one incomplete", "This should be two sentences.", "Second one incomplete");
        WordToSentenceProcessorTest.checkResult(wts, "One incomplete sentence", "One incomplete sentence");
        WordToSentenceProcessorTest.checkResult(wts, "(Break after a parenthesis.)  (Or after \"quoted stuff!\")", "(Break after a parenthesis.)", "(Or after \"quoted stuff!\")");
        WordToSentenceProcessorTest.checkResult(wts, "  ", new String[0]);
        WordToSentenceProcessorTest.checkResult(wts, "This should be\n one sentence.", "This should be one sentence.");
        WordToSentenceProcessorTest.checkResult(wts, "'') Funny stuff joined on.", "'') Funny stuff joined on.");
    }

    public void testMr() {
        WordToSentenceProcessorTest.checkResult(wts, "Mr. White got a loaf of bread", "Mr. White got a loaf of bread");
    }

    public void testNullSplitter() {
        WordToSentenceProcessorTest.checkResult(wtsNull, "This should be one sentence.  There is no split.", "This should be one sentence.  There is no split.");
    }

    public void testParagraphStrategies() {
        WordToSentenceProcessor<CoreLabel> wtsNever = new WordToSentenceProcessor<CoreLabel>(WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER);
        WordToSentenceProcessor<CoreLabel> wtsAlways = new WordToSentenceProcessor<CoreLabel>(WordToSentenceProcessor.NewlineIsSentenceBreak.ALWAYS);
        WordToSentenceProcessor<CoreLabel> wtsTwo = new WordToSentenceProcessor<CoreLabel>(WordToSentenceProcessor.NewlineIsSentenceBreak.TWO_CONSECUTIVE);
        String input1 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.";
        String input2 = "Depending on the options,\nthis could be all sorts of things,\n as I like chocolate. And cookies.";
        WordToSentenceProcessorTest.checkResult(wtsNever, input1, "Depending on the options,\nthis could be all sorts of things,\n\nas I like chocolate.", "And cookies.");
        WordToSentenceProcessorTest.checkResult(wtsAlways, input1, "Depending on the options,", "this could be all sorts of things,", "as I like chocolate.", "And cookies.");
        WordToSentenceProcessorTest.checkResult(wtsTwo, input1, "Depending on the options, this could be all sorts of things,", "as I like chocolate.", "And cookies.");
        WordToSentenceProcessorTest.checkResult(wtsNever, input2, "Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.", "And cookies.");
        WordToSentenceProcessorTest.checkResult(wtsAlways, input2, "Depending on the options,", "this could be all sorts of things,", "as I like chocolate.", "And cookies.");
        WordToSentenceProcessorTest.checkResult(wtsTwo, input2, "Depending on the options,\nthis could be all sorts of things,\nas I like chocolate.", "And cookies.");
        String input3 = "Specific descriptions are absent.\n\n''Mossy Head Industrial Park'' it says.";
        WordToSentenceProcessorTest.checkResult(wtsTwo, input3, "Specific descriptions are absent.", "''Mossy Head Industrial Park'' it says.");
    }

    public void testXmlElements() {
        WordToSentenceProcessor<CoreLabel> wtsXml = new WordToSentenceProcessor<CoreLabel>(null, null, null, Generics.newHashSet(Arrays.asList("p", "chapter")), WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null);
        String input1 = "<chapter>Chapter 1</chapter><p>This is text. So is this.</p> <p>One without end</p><p>Another</p><p>And another</p>";
        WordToSentenceProcessorTest.checkResult(wtsXml, input1, "Chapter 1", "This is text.", "So is this.", "One without end", "Another", "And another");
    }

    public void testRegion() {
        WordToSentenceProcessor<CoreLabel> wtsRegion = new WordToSentenceProcessor<CoreLabel>("[.\u3002]|[!?\uff01\uff1f]+", "[\\p{Pe}\\p{Pf}\"'>\uff02\uff07\uff1e]|''|-R[CRS]B-", WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD, Generics.newHashSet(Collections.singletonList("p")), "chapter|preface", WordToSentenceProcessor.NewlineIsSentenceBreak.NEVER, null, null, false, false);
        String input1 = "<title>Chris rules!</title><preface><p>Para one</p><p>Para two</p></preface><chapter><p>Text we like. Two sentences \n\n in it.</p></chapter><coda>Some more text here</coda>";
        WordToSentenceProcessorTest.checkResult(wtsRegion, input1, "Para one", "Para two", "Text we like.", "Two sentences in it.");
    }

    public void testBlankLines() {
        WordToSentenceProcessor<CoreLabel> wtsLines = new WordToSentenceProcessor<CoreLabel>(Generics.newHashSet(WordToSentenceProcessor.DEFAULT_SENTENCE_BOUNDARIES_TO_DISCARD));
        String input1 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.";
        WordToSentenceProcessorTest.checkResult(wtsLines, input1, "Depending on the options,", "this could be all sorts of things,", "", "as I like chocolate. And cookies.");
        String input2 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.\n";
        WordToSentenceProcessorTest.checkResult(wtsLines, input2, "Depending on the options,", "this could be all sorts of things,", "", "as I like chocolate. And cookies.");
        String input3 = "Depending on the options,\nthis could be all sorts of things,\n\n as I like chocolate. And cookies.\n\n";
        WordToSentenceProcessorTest.checkResult(wtsLines, input3, "Depending on the options,", "this could be all sorts of things,", "", "as I like chocolate. And cookies.", "");
    }

    public void testExclamationPoint() {
        Annotation annotation = new Annotation("Foo!!");
        ptb.annotate(annotation);
        List list = (List)annotation.get(CoreAnnotations.TokensAnnotation.class);
        WordToSentenceProcessorTest.assertEquals((String)"Wrong double bang", (String)"[Foo, !!]", (String)list.toString());
    }

    public void testChinese() {
        WordToSentenceProcessorTest.checkResult(cwts, wsNL, "\u5df4\u62c9\u7279 \u8bf4 \uff1a \u300c \u6211\u4eec \u672a \u518d \u83b7\u5f97 \u4efb\u4f55 \u7ed3\u679c \u3002 \u300d \uff1c \u91d1\u878d\u65f6\u62a5 \uff1f \uff1e \u300a \u91d1\u878d\u65f6\u62a5 \u300b \u5468\u4e09", "\u5df4\u62c9\u7279 \u8bf4 \uff1a \u300c \u6211\u4eec \u672a \u518d \u83b7\u5f97 \u4efb\u4f55 \u7ed3\u679c \u3002 \u300d", "\uff1c \u91d1\u878d\u65f6\u62a5 \uff1f \uff1e", "\u300a \u91d1\u878d\u65f6\u62a5 \u300b \u5468\u4e09");
    }

    public void testParagraphSeparator() {
        WordToSentenceProcessorTest.checkResult(wts, "Hello\u2029World.", "Hello", "World.");
        WordToSentenceProcessorTest.checkResult(wts, "Hello.\u2029World.", "Hello.", "World.");
        WordToSentenceProcessorTest.checkResult(wts, "Hello  \u2029World.", "Hello", "World.");
    }
}

