/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.international.arabic.pipeline;

import edu.stanford.nlp.international.arabic.pipeline.ATBArabicDataset;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.util.logging.Redwood;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.io.UnsupportedEncodingException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class LabeledATBDataset
extends ATBArabicDataset {
    private static Redwood.RedwoodChannels log = Redwood.channels(LabeledATBDataset.class);

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    @Override
    public void build() {
        for (File path : this.pathsToData) {
            int prevSize = this.treebank.size();
            if (this.splitFilter == null) {
                this.treebank.loadPath(path, this.treeFileExtension, false);
            } else {
                this.treebank.loadPath(path, this.splitFilter);
            }
            this.toStringBuffer.append(String.format(" Loaded %d trees from %s\n", this.treebank.size() - prevSize, path.getPath()));
        }
        PrintWriter outfile = null;
        PrintWriter flatFile = null;
        try {
            outfile = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.outFileName), "UTF-8")));
            flatFile = this.makeFlatFile ? new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)new FileOutputStream(this.flatFileName), "UTF-8"))) : null;
            LabelingTreeNormalizer tv = new LabelingTreeNormalizer(outfile, flatFile);
            this.treebank.apply(tv);
            this.outputFileList.add(this.outFileName);
            if (this.makeFlatFile) {
                this.outputFileList.add(this.flatFileName);
                this.toStringBuffer.append(" Made flat files\n");
            }
        }
        catch (UnsupportedEncodingException e) {
            System.err.printf("%s: Filesystem does not support UTF-8 output\n", this.getClass().getName());
            e.printStackTrace();
        }
        catch (FileNotFoundException e) {
            System.err.printf("%s: Could not open %s for writing\n", this.getClass().getName(), this.outFileName);
        }
        finally {
            if (outfile != null) {
                outfile.close();
            }
            if (flatFile != null) {
                flatFile.close();
            }
        }
    }

    protected class LabelingTreeNormalizer
    extends ATBArabicDataset.ArabicRawTreeNormalizer {
        private final Pattern leftClitic;
        private final Pattern rightClitic;

        public LabelingTreeNormalizer(PrintWriter outFile, PrintWriter flatFile) {
            super(LabeledATBDataset.this, outFile, flatFile);
            this.leftClitic = Pattern.compile("^-");
            this.rightClitic = Pattern.compile("-$");
        }

        @Override
        protected void processPreterminal(Tree node) {
            String rawTag = node.value();
            if (rawTag.equals("-NONE-")) {
                return;
            }
            String rawWord = node.firstChild().value().trim();
            Matcher left = this.leftClitic.matcher(rawWord);
            boolean hasLeft = left.find();
            Matcher right = this.rightClitic.matcher(rawWord);
            boolean hasRight = right.find();
            if (rawTag.equals("PUNC") || !hasRight && !hasLeft) {
                node.firstChild().setValue("XSEG");
            } else if (hasRight && hasLeft) {
                node.firstChild().setValue("SEGC");
            } else if (hasRight) {
                node.firstChild().setValue("SEGL");
            } else if (hasLeft) {
                node.firstChild().setValue("SEGR");
            } else {
                throw new RuntimeException("Messy token: " + rawWord);
            }
        }
    }
}

