/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.pipeline;

import edu.stanford.nlp.ling.CoreAnnotations;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.pipeline.Annotation;
import edu.stanford.nlp.pipeline.StanfordCoreNLP;
import edu.stanford.nlp.pipeline.TokenizerAnnotator;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import junit.framework.TestCase;

public class TokenizerAnnotatorTest
extends TestCase {
    private static final String text = "She'll prove it ain't so.";
    private static List<String> tokenWords = Arrays.asList("She", "'ll", "prove", "it", "ai", "n't", "so", ".");

    public void testNewVersion() {
        Annotation ann = new Annotation(text);
        TokenizerAnnotator annotator = new TokenizerAnnotator("en");
        annotator.annotate(ann);
        Iterator<String> it = tokenWords.iterator();
        for (CoreLabel word : (List)ann.get(CoreAnnotations.TokensAnnotation.class)) {
            TokenizerAnnotatorTest.assertEquals((String)"Bung token in new CoreLabel usage", (String)it.next(), (String)word.word());
        }
        TokenizerAnnotatorTest.assertFalse((String)"Too few tokens in new CoreLabel usage", (boolean)it.hasNext());
        Iterator<String> it2 = tokenWords.iterator();
        for (CoreLabel word : (List)ann.get(CoreAnnotations.TokensAnnotation.class)) {
            TokenizerAnnotatorTest.assertEquals((String)"Bung token in new CoreLabel usage", (String)it2.next(), (String)((String)word.get(CoreAnnotations.TextAnnotation.class)));
        }
        TokenizerAnnotatorTest.assertFalse((String)"Too few tokens in new CoreLabel usage", (boolean)it2.hasNext());
    }

    public void testBadLanguage() {
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize");
        props.setProperty("tokenize.language", "notalanguage");
        try {
            new StanfordCoreNLP(props);
            throw new RuntimeException("Should have failed");
        }
        catch (IllegalArgumentException illegalArgumentException) {
            return;
        }
    }

    public void testDefaultNoNLsPipeline() {
        String t = "Text with \n\n a new \nline.";
        List<String> tWords = Arrays.asList("Text", "with", "a", "new", "line", ".");
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize");
        Annotation ann = new Annotation(t);
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        pipeline.annotate(ann);
        Iterator<String> it = tWords.iterator();
        for (CoreLabel word : (List)ann.get(CoreAnnotations.TokensAnnotation.class)) {
            TokenizerAnnotatorTest.assertEquals((String)"Bung token in new CoreLabel usage", (String)it.next(), (String)word.word());
        }
        TokenizerAnnotatorTest.assertFalse((String)"Too few tokens in new CoreLabel usage", (boolean)it.hasNext());
        Iterator<String> it2 = tWords.iterator();
        for (CoreLabel word : (List)ann.get(CoreAnnotations.TokensAnnotation.class)) {
            TokenizerAnnotatorTest.assertEquals((String)"Bung token in new CoreLabel usage", (String)it2.next(), (String)((String)word.get(CoreAnnotations.TextAnnotation.class)));
        }
        TokenizerAnnotatorTest.assertFalse((String)"Too few tokens in new CoreLabel usage", (boolean)it2.hasNext());
    }

    public void testHyphens() {
        String test = "Hyphen-ated words should be split except when school-aged-children eat anti-disestablishmentariansm for breakfast at the o-kay choral infront of some explor-o-toriums.";
        Properties props = new Properties();
        props.setProperty("annotators", "tokenize");
        Annotation ann = new Annotation(test);
        StanfordCoreNLP pipeline = new StanfordCoreNLP(props);
        pipeline.annotate(ann);
        List toks = (List)ann.get(CoreAnnotations.TokensAnnotation.class);
        TokenizerAnnotatorTest.assertEquals((int)21, (int)toks.size());
        Properties props2 = new Properties();
        props2.setProperty("annotators", "tokenize");
        props2.setProperty("tokenize.options", "splitHyphenated=true");
        Annotation ann2 = new Annotation(test);
        StanfordCoreNLP pipeline2 = new StanfordCoreNLP(props2);
        pipeline2.annotate(ann2);
        List toks2 = (List)ann2.get(CoreAnnotations.TokensAnnotation.class);
        TokenizerAnnotatorTest.assertEquals((int)27, (int)toks2.size());
    }
}

