/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.process;

import edu.stanford.nlp.international.arabic.pipeline.DefaultLexicalMapper;
import edu.stanford.nlp.international.arabic.process.ArabicTokenizer;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.process.TokenizerFactory;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.io.UnsupportedEncodingException;
import java.util.Arrays;
import java.util.List;

public class ArabicTokenizerTester {
    private static Redwood.RedwoodChannels log = Redwood.channels(ArabicTokenizerTester.class);

    public static void main(String[] args) {
        if (args.length != 2) {
            System.out.printf("Usage: java %s OPTS filename%n", ArabicTokenizerTester.class.getName());
            System.exit(-1);
        }
        String tokOptions = args[0];
        File path = new File(args[1]);
        log.info("Reading from: " + path.getPath());
        try {
            String line;
            BufferedReader br = new BufferedReader(new InputStreamReader((InputStream)new FileInputStream(path), "UTF-8"));
            TokenizerFactory<CoreLabel> tf = ArabicTokenizer.factory();
            tf.setOptions(tokOptions);
            DefaultLexicalMapper lexMapper = new DefaultLexicalMapper();
            lexMapper.setup(null, "StripSegMarkersInUTF8", "StripMorphMarkersInUTF8");
            int lineId = 0;
            while ((line = br.readLine()) != null) {
                String[] toks;
                line = line.trim();
                List<CoreLabel> tokenizedLine = tf.getTokenizer(new StringReader(line)).tokenize();
                System.out.println(SentenceUtils.listToString(tokenizedLine));
                StringBuilder sb = new StringBuilder();
                for (String tok : toks = line.split("\\s+")) {
                    String mappedTok = lexMapper.map(null, tok);
                    sb.append(mappedTok).append(" ");
                }
                List<String> mappedToks = Arrays.asList(sb.toString().trim().split("\\s+"));
                if (mappedToks.size() != tokenizedLine.size()) {
                    System.err.printf("Line length mismatch:%norig: %s%ntok: %s%nmap: %s%n%n", line, SentenceUtils.listToString(tokenizedLine), SentenceUtils.listToString(mappedToks));
                } else {
                    boolean printLines = false;
                    for (int i = 0; i < mappedToks.size(); ++i) {
                        String tokenizedTok;
                        String mappedTok = mappedToks.get(i);
                        if (mappedTok.equals(tokenizedTok = tokenizedLine.get(i).word())) continue;
                        System.err.printf("Token mismatch:%nmap: %s%ntok: %s%n", mappedTok, tokenizedTok);
                        printLines = true;
                    }
                    if (printLines) {
                        System.err.printf("orig: %s%ntok: %s%nmap: %s%n%n", line, SentenceUtils.listToString(tokenizedLine), SentenceUtils.listToString(mappedToks));
                    }
                }
                ++lineId;
            }
            System.err.printf("Read %d lines.%n", lineId);
        }
        catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            e.printStackTrace();
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }
}

