/*
 * Decompiled with CFR 0.152.
 */
package edu.stanford.nlp.parser.lexparser;

import edu.stanford.nlp.international.arabic.ArabicMorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatureSpecification;
import edu.stanford.nlp.international.morph.MorphoFeatures;
import edu.stanford.nlp.ling.CoreLabel;
import edu.stanford.nlp.ling.HasTag;
import edu.stanford.nlp.ling.HasWord;
import edu.stanford.nlp.ling.Label;
import edu.stanford.nlp.ling.SentenceUtils;
import edu.stanford.nlp.parser.lexparser.AbstractTreebankParserParams;
import edu.stanford.nlp.parser.lexparser.BaseLexicon;
import edu.stanford.nlp.parser.lexparser.FactoredLexicon;
import edu.stanford.nlp.parser.lexparser.Lexicon;
import edu.stanford.nlp.parser.lexparser.Options;
import edu.stanford.nlp.parser.lexparser.TreeCollinizer;
import edu.stanford.nlp.process.SerializableFunction;
import edu.stanford.nlp.trees.DiskTreebank;
import edu.stanford.nlp.trees.HeadFinder;
import edu.stanford.nlp.trees.LabeledScoredTreeFactory;
import edu.stanford.nlp.trees.MemoryTreebank;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TreeFactory;
import edu.stanford.nlp.trees.TreeReaderFactory;
import edu.stanford.nlp.trees.TreeTransformer;
import edu.stanford.nlp.trees.TreebankLanguagePack;
import edu.stanford.nlp.trees.international.arabic.ArabicHeadFinder;
import edu.stanford.nlp.trees.international.arabic.ArabicTreeReaderFactory;
import edu.stanford.nlp.trees.international.arabic.ArabicTreebankLanguagePack;
import edu.stanford.nlp.trees.tregex.TregexMatcher;
import edu.stanford.nlp.trees.tregex.TregexParseException;
import edu.stanford.nlp.trees.tregex.TregexPattern;
import edu.stanford.nlp.trees.tregex.TregexPatternCompiler;
import edu.stanford.nlp.util.Generics;
import edu.stanford.nlp.util.Index;
import edu.stanford.nlp.util.Pair;
import edu.stanford.nlp.util.StringUtils;
import edu.stanford.nlp.util.logging.Redwood;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.regex.PatternSyntaxException;

public class ArabicTreebankParserParams
extends AbstractTreebankParserParams {
    private static final Redwood.RedwoodChannels log = Redwood.channels(ArabicTreebankParserParams.class);
    private static final long serialVersionUID = 8853426784197984653L;
    private final StringBuilder optionsString;
    private boolean retainNPTmp = false;
    private boolean retainNPSbj = false;
    private boolean retainPRD = false;
    private boolean retainPPClr = false;
    private boolean changeNoLabels = false;
    private boolean collinizerRetainsPunctuation = false;
    private boolean discardX = false;
    private HeadFinder headFinder;
    private final Map<String, Pair<TregexPattern, Function<TregexMatcher, String>>> annotationPatterns;
    private final List<Pair<TregexPattern, Function<TregexMatcher, String>>> activeAnnotations;
    private MorphoFeatureSpecification morphoSpec = null;
    private static final MorphoFeatureSpecification tagSpec = new ArabicMorphoFeatureSpecification();
    private final List<String> baselineFeatures = new ArrayList<String>();
    private final List<String> additionalFeatures;

    public ArabicTreebankParserParams() {
        super(new ArabicTreebankLanguagePack());
        this.baselineFeatures.add("-markNounNPargTakers");
        this.baselineFeatures.add("-genitiveMark");
        this.baselineFeatures.add("-splitPUNC");
        this.baselineFeatures.add("-markContainsVerb");
        this.baselineFeatures.add("-markStrictBaseNP");
        this.baselineFeatures.add("-markOneLevelIdafa");
        this.baselineFeatures.add("-splitIN");
        this.baselineFeatures.add("-markMasdarVP");
        this.baselineFeatures.add("-containsSVO");
        this.baselineFeatures.add("-splitCC");
        this.baselineFeatures.add("-markFem");
        this.baselineFeatures.add("-mwe");
        this.baselineFeatures.add("-mweContainsVerb");
        this.additionalFeatures = new ArrayList<String>();
        this.optionsString = new StringBuilder();
        this.optionsString.append("ArabicTreebankParserParams\n");
        this.annotationPatterns = Generics.newHashMap();
        this.activeAnnotations = new ArrayList<Pair<TregexPattern, Function<TregexMatcher, String>>>();
        this.headFinder = this.headFinder();
        this.initializeAnnotationPatterns();
    }

    @Override
    public TreeReaderFactory treeReaderFactory() {
        return new ArabicTreeReaderFactory(this.retainNPTmp, this.retainPRD, this.changeNoLabels, this.discardX, this.retainNPSbj, false, this.retainPPClr);
    }

    @Override
    public MemoryTreebank memoryTreebank() {
        return new MemoryTreebank(this.treeReaderFactory(), this.inputEncoding);
    }

    @Override
    public DiskTreebank diskTreebank() {
        return new DiskTreebank(this.treeReaderFactory(), this.inputEncoding);
    }

    @Override
    public HeadFinder headFinder() {
        if (this.headFinder == null) {
            this.headFinder = new ArabicHeadFinder(this.treebankLanguagePack());
        }
        return this.headFinder;
    }

    @Override
    public HeadFinder typedDependencyHeadFinder() {
        return this.headFinder();
    }

    @Override
    public Lexicon lex(Options op, Index<String> wordIndex, Index<String> tagIndex) {
        if (op.lexOptions.uwModelTrainer == null) {
            op.lexOptions.uwModelTrainer = "edu.stanford.nlp.parser.lexparser.ArabicUnknownWordModelTrainer";
        }
        if (this.morphoSpec != null) {
            return new FactoredLexicon(op, this.morphoSpec, wordIndex, tagIndex);
        }
        return new BaseLexicon(op, wordIndex, tagIndex);
    }

    @Override
    public List<? extends HasWord> defaultTestSentence() {
        String[] sent = new String[]{"\u0647\u0648", "\u0627\u0633\u062a\u0646\u0643\u0631", "\u0627\u0644\u062d\u0643\u0648\u0645\u0629", "\u064a\u0648\u0645", "\u0627\u0645\u0633", "."};
        return SentenceUtils.toWordList(sent);
    }

    @Override
    public TreeTransformer subcategoryStripper() {
        return new ArabicSubcategoryStripper();
    }

    @Override
    public TreeTransformer collinizer() {
        return new TreeCollinizer(this.tlp, !this.collinizerRetainsPunctuation, false);
    }

    @Override
    public TreeTransformer collinizerEvalb() {
        return this.collinizer();
    }

    @Override
    public String[] sisterSplitters() {
        return StringUtils.EMPTY_STRING_ARRAY;
    }

    @Override
    public Tree transformTree(Tree t, Tree root) {
        String baseCat = t.value();
        StringBuilder newCategory = new StringBuilder();
        for (Pair<TregexPattern, Function<TregexMatcher, String>> e : this.activeAnnotations) {
            TregexMatcher m = e.first().matcher(root);
            if (!m.matchesAt(t)) continue;
            newCategory.append(e.second().apply(m));
        }
        if (t.isPreTerminal() && tagSpec != null) {
            if (!(t.firstChild().label() instanceof CoreLabel) || ((CoreLabel)t.firstChild().label()).originalText() == null) {
                throw new RuntimeException(String.format("%s: Term lacks morpho analysis: %s", this.getClass().getName(), t.toString()));
            }
            String morphoStr = ((CoreLabel)t.firstChild().label()).originalText();
            MorphoFeatures feats = tagSpec.strToFeatures(morphoStr);
            baseCat = feats.getTag(baseCat);
        }
        String newCat = baseCat + newCategory;
        t.setValue(newCat);
        if (t.isPreTerminal() && t.label() instanceof HasTag) {
            ((HasTag)((Object)t.label())).setTag(newCat);
        }
        return t;
    }

    private void initializeAnnotationPatterns() {
        String genitiveNodeTregexString = "@NP > @NP $- /^N/";
        TregexPatternCompiler tregexPatternCompiler = new TregexPatternCompiler(this.headFinder());
        try {
            this.annotationPatterns.put("-genitiveMark", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@NP > @NP $- /^N/"), new SimpleStringFunction("-genitive")));
            this.annotationPatterns.put("-markStrictBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< (__ < (__ < __))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markOneLevelIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (@NP < (__ < __)) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-idafa1")));
            this.annotationPatterns.put("-markNounNPargTakers", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|DTNN|DTNNS|DTNNP|DTNNPS ># (@NP < @NP)"), new SimpleStringFunction("-NounNParg")));
            this.annotationPatterns.put("-markContainsVerb", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (/^[CIP]?V/ < (__ !< __))"), new SimpleStringFunction("-withV")));
            this.annotationPatterns.put("-splitIN", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@IN < __=word"), new AddRelativeNodeFunction("-", "word", false)));
            this.annotationPatterns.put("-splitPUNC", new Pair<TregexPattern, AnnotatePunctuationFunction2>(tregexPatternCompiler.compile("@PUNC < __=term"), new AnnotatePunctuationFunction2()));
            this.annotationPatterns.put("-markMasdarVP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@VP|MWVP < /VBG|VN/"), new SimpleStringFunction("-masdar")));
            this.annotationPatterns.put("-containsSVO", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (@S < (@NP . @VP|MWVP))"), new SimpleStringFunction("-hasSVO")));
            this.annotationPatterns.put("-splitCC", new Pair<TregexPattern, AddEquivalencedConjNode>(tregexPatternCompiler.compile("@CC|CONJ . __=term , __"), new AddEquivalencedConjNode("-", "term")));
            this.annotationPatterns.put("-markFem", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < /\u0629$/"), new SimpleStringFunction("-fem")));
            this.annotationPatterns.put("-mwe", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("__ > /MW/=tag"), new AddRelativeNodeFunction("-", "tag", true)));
            this.annotationPatterns.put("-mweContainsVerb", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @MWVP"), new SimpleStringFunction("-withV")));
            this.annotationPatterns.put("-splitPUNC2", new Pair<TregexPattern, AbstractTreebankParserParams.AnnotatePunctuationFunction>(tregexPatternCompiler.compile("@PUNC < __=punc"), new AbstractTreebankParserParams.AnnotatePunctuationFunction("-", "punc")));
            this.annotationPatterns.put("-tagPAar", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("!@PUNC < (__ !< __) > __=parent"), new AddRelativeNodeFunction("-", "parent", true)));
            this.annotationPatterns.put("-splitCC1", new Pair<TregexPattern, AddRelativeNodeRegexFunction>(tregexPatternCompiler.compile("@CC|CONJ < __=term"), new AddRelativeNodeRegexFunction("-", "term", "-*([^-].*)")));
            this.annotationPatterns.put("-splitCC2", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@CC . __=term , __"), new AddRelativeNodeFunction("-", "term", true)));
            this.annotationPatterns.put("-idafaJJ1", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <, (@NN $+ @NP) <+(@NP) @ADJP"), new SimpleStringFunction("-idafaJJ")));
            this.annotationPatterns.put("-idafaJJ2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <, (@NN $+ @NP) <+(@NP) @ADJP !<< @SBAR"), new SimpleStringFunction("-idafaJJ")));
            this.annotationPatterns.put("-properBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !<< @NP < /NNP/ !< @PUNC|CD"), new SimpleStringFunction("-prop")));
            this.annotationPatterns.put("-interrog", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << \u0647\u0644|\u0645\u0627\u0630\u0627|\u0644\u0645\u0627\u0630\u0627|\u0627\u064a\u0646|\u0645\u062a\u0649"), new SimpleStringFunction("-inter")));
            this.annotationPatterns.put("-splitPseudo", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN < \u0645\u0639|\u0628\u0639\u062f|\u0628\u064a\u0646"), new SimpleStringFunction("-pseudo")));
            this.annotationPatterns.put("-nPseudo", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (@NN < \u0645\u0639|\u0628\u0639\u062f|\u0628\u064a\u0646)"), new SimpleStringFunction("-npseudo")));
            this.annotationPatterns.put("-pseudoArg", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @NP $, (@NN < \u0645\u0639|\u0628\u0639\u062f|\u0628\u064a\u0646)"), new SimpleStringFunction("-pseudoArg")));
            this.annotationPatterns.put("-eqL1", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (@S !< @VP|S)"), new SimpleStringFunction("-haseq")));
            this.annotationPatterns.put("-eqL1L2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < (@S !< @VP|S)) | < (@S !< @VP|S)"), new SimpleStringFunction("-haseq")));
            this.annotationPatterns.put("-fullQuote", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < ((@PUNC < \") $ (@PUNC < \"))"), new SimpleStringFunction("-fq")));
            this.annotationPatterns.put("-brokeQuote", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < ((@PUNC < \") !$ (@PUNC < \"))"), new SimpleStringFunction("-bq")));
            this.annotationPatterns.put("-splitVP", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@VP <# __=term1"), new AddRelativeNodeFunction("-", "term1", true)));
            this.annotationPatterns.put("-markFemP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP|ADJP < (__ < /\u0629$/)"), new SimpleStringFunction("-femP")));
            this.annotationPatterns.put("-embedSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP|PP <+(@NP|PP) @SBAR"), new SimpleStringFunction("-embedSBAR")));
            this.annotationPatterns.put("-complexVP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (@VP < (@NP $ @NP)) > __"), new SimpleStringFunction("-complexVP")));
            this.annotationPatterns.put("-containsJJ", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <+(@NP) /JJ/"), new SimpleStringFunction("-hasJJ")));
            this.annotationPatterns.put("-markMasdarVP2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @VN|VBG"), new SimpleStringFunction("-masdar")));
            this.annotationPatterns.put("-coordNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP|ADJP <+(@NP|ADJP) (@CC|PUNC $- __ $+ __)"), new SimpleStringFunction("-coordNP")));
            this.annotationPatterns.put("-coordWa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << (@CC , __ < \u0648-)"), new SimpleStringFunction("-coordWA")));
            this.annotationPatterns.put("-NPhasADJP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <+(@NP) @ADJP"), new SimpleStringFunction("-NPhasADJP")));
            this.annotationPatterns.put("-NPADJP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @ADJP"), new SimpleStringFunction("-npadj")));
            this.annotationPatterns.put("-NPJJ", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < /JJ/"), new SimpleStringFunction("-npjj")));
            this.annotationPatterns.put("-NPCC", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP <+(@NP) @CC"), new SimpleStringFunction("-npcc")));
            this.annotationPatterns.put("-NPCD", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @CD"), new SimpleStringFunction("-npcd")));
            this.annotationPatterns.put("-NPNNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < /NNP/"), new SimpleStringFunction("-npnnp")));
            this.annotationPatterns.put("-SVO", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@S < (@NP . @VP)"), new SimpleStringFunction("-svo")));
            this.annotationPatterns.put("-containsSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-hasSBAR")));
            this.annotationPatterns.put("-markGappedVP", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@VP > @VP $- __ $ /^(?:CC|CONJ)/ !< /^V/"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGappedVPConjoiners", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("/^(?:CC|CONJ)/ $ (@VP > @VP $- __ !< /^V/)"), new SimpleStringFunction("-gappedVP")));
            this.annotationPatterns.put("-markGenitiveParent", new Pair<TregexPattern, SimpleStringFunction>(TregexPattern.compile("@NP < (@NP > @NP $- /^N/)"), new SimpleStringFunction("-genitiveParent")));
            this.annotationPatterns.put("-maSdrMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^[t\\u062a].+[y\\u064a].$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark2", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a].+[y\\u064a].|<.{3,}|A.{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark3", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > @NN|NOUN|DTNN)"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark4", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (/^(?:[t\\u062a<A].{3,})$/ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-maSdrMark5", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^N/ <<# (__ > (@NN|NOUN|DTNN > (@NP < @NP)))"), new SimpleStringFunction("-maSdr")));
            this.annotationPatterns.put("-mjjMark", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@JJ|DTJJ < /^m/ $+ @PP ># @ADJP "), new SimpleStringFunction("-mjj")));
            this.annotationPatterns.put("-markNPwithSdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @S << @S [ >> @NP | == @NP ]"), new SimpleStringFunction("-inNPdominatesS")));
            this.annotationPatterns.put("-markRightRecursiveNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ <<- @NP [>>- @NP | == @NP]"), new SimpleStringFunction("-rrNP")));
            this.annotationPatterns.put("-markBaseNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< @NP !< @VP !< @SBAR !< @ADJP !< @ADVP !< @S !< @QP !< @UCP !< @PP"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markBaseNPplusIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-base")));
            this.annotationPatterns.put("-markTwoLevelIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (@NP < (@NP < (__ < __)) !< (/^[^N]/ < (__ < __))) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < (__ < __))))"), new SimpleStringFunction("-idafa2")));
            this.annotationPatterns.put("-markDefiniteIdafa", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (/^(?:NN|NOUN)/ !$,, /^[^AP]/) <+(/^NP/) (@NP < /^DT/)"), new SimpleStringFunction("-defIdafa")));
            this.annotationPatterns.put("-markDefiniteIdafa1", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < (/^(?:NN|NOUN)/ !$,, /^[^AP]/) < (@NP < /^DT/) !< (/^[^N]/ < (__ < __)) !< (__ < (__ < (__ < __)))"), new SimpleStringFunction("-defIdafa1")));
            this.annotationPatterns.put("-markContainsSBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ << @SBAR"), new SimpleStringFunction("-withSBAR")));
            this.annotationPatterns.put("-markPhrasalNodesDominatedBySBAR", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < __) >> @SBAR"), new SimpleStringFunction("-domBySBAR")));
            this.annotationPatterns.put("-markCoordinateNPs", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @CC|CONJ"), new SimpleStringFunction("-coord")));
            this.annotationPatterns.put("-markNounAdjVPheads", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NN|NNS|NNP|NNPS|JJ|DTJJ|DTNN|DTNNS|DTNNP|DTNNPS ># @VP"), new SimpleStringFunction("-VHead")));
            this.annotationPatterns.put("-markPronominalNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP < @PRP"), new SimpleStringFunction("-PRP")));
            this.annotationPatterns.put("-markMultiCC", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (@CC $.. @CC)"), new SimpleStringFunction("-multiCC")));
            this.annotationPatterns.put("-markHasCCdaughter", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < @CC"), new SimpleStringFunction("-CCdtr")));
            this.annotationPatterns.put("-markAcronymNP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@NP !<  (__ < (__ < __)) < (/^NN/ < /^.$/ $ (/^NN/ < /^.$/)) !< (__ < /../)"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markAcronymNN", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("/^NN/ < /^.$/ $ (/^NN/ < /^.$/) > (@NP !<  (__ < (__ < __)) !< (__ < /../))"), new SimpleStringFunction("-acro")));
            this.annotationPatterns.put("-markPPwithPPdescendant", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ !< @PP << @PP [ >> @PP | == @PP ]"), new SimpleStringFunction("-inPPdominatesPP")));
            this.annotationPatterns.put("-gpAnnotatePrepositions", new Pair<TregexPattern, AddRelativeNodeFunction>(TregexPattern.compile("/^(?:IN|PREP)$/ > (__ > __=gp)"), new AddRelativeNodeFunction("^^", "gp", false)));
            this.annotationPatterns.put("-gpEquivalencePrepositions", new Pair<TregexPattern, AddEquivalencedNodeFunction>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunction("^^", "gp")));
            this.annotationPatterns.put("-gpEquivalencePrepositionsVar", new Pair<TregexPattern, AddEquivalencedNodeFunctionVar>(TregexPattern.compile("/^(?:IN|PREP)$/ > (@PP >+(/^PP/) __=gp)"), new AddEquivalencedNodeFunctionVar("^^", "gp")));
            this.annotationPatterns.put("-markPPParent", new Pair<TregexPattern, AddRelativeNodeRegexFunction>(tregexPatternCompiler.compile("@PP=max !< @PP"), new AddRelativeNodeRegexFunction("^^", "max", "^(\\w)")));
            this.annotationPatterns.put("-whPP", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@PP <- (@SBAR <, /^WH/)"), new SimpleStringFunction("-whPP")));
            this.annotationPatterns.put("-deflateMin", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("__ < (__ < \u0645\u0646)"), new SimpleStringFunction("-min")));
            this.annotationPatterns.put("-v2MarkovIN", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@IN > (@__=p1 > @__=p2)"), new AddRelativeNodeFunction("^", "p1", "p2", false)));
            this.annotationPatterns.put("-pleonasticMin", new Pair<TregexPattern, SimpleStringFunction>(tregexPatternCompiler.compile("@PP <, (IN < \u0645\u0646) > @S"), new SimpleStringFunction("-pleo")));
            this.annotationPatterns.put("-v2MarkovPP", new Pair<TregexPattern, AddRelativeNodeFunction>(tregexPatternCompiler.compile("@PP > (@__=p1 > @__=p2)"), new AddRelativeNodeFunction("^", "p1", "p2", false)));
        }
        catch (TregexParseException e) {
            int nth = this.annotationPatterns.size() + 1;
            String nthStr = nth == 1 ? "1st" : (nth == 2 ? "2nd" : nth + "th");
            log.info("Parse exception on " + nthStr + " annotation pattern initialization:" + e);
            throw e;
        }
    }

    private void setHeadFinder(HeadFinder hf) {
        Pair<TregexPattern, Function<TregexMatcher, String>> p;
        if (hf == null) {
            throw new IllegalArgumentException();
        }
        this.headFinder = hf;
        this.initializeAnnotationPatterns();
        this.activeAnnotations.clear();
        for (String key : this.baselineFeatures) {
            p = this.annotationPatterns.get(key);
            this.activeAnnotations.add(p);
        }
        for (String key : this.additionalFeatures) {
            p = this.annotationPatterns.get(key);
            this.activeAnnotations.add(p);
        }
    }

    private String setupMorphoFeatures(String activeFeats) {
        String[] feats = activeFeats.split(",");
        this.morphoSpec = this.tlp.morphFeatureSpec();
        for (String feat : feats) {
            MorphoFeatureSpecification.MorphoFeatureType fType = MorphoFeatureSpecification.MorphoFeatureType.valueOf(feat.trim());
            this.morphoSpec.activate(fType);
        }
        return this.morphoSpec.toString();
    }

    private void removeBaselineFeature(String featName) {
        if (this.baselineFeatures.contains(featName)) {
            this.baselineFeatures.remove(featName);
            Pair<TregexPattern, Function<TregexMatcher, String>> p = this.annotationPatterns.get(featName);
            this.activeAnnotations.remove(p);
        }
    }

    @Override
    public void display() {
        log.info(this.optionsString.toString());
    }

    @Override
    public int setOptionFlag(String[] args, int i) {
        boolean didSomething = false;
        if (this.annotationPatterns.keySet().contains(args[i])) {
            if (!this.baselineFeatures.contains(args[i])) {
                this.additionalFeatures.add(args[i]);
            }
            Pair<TregexPattern, Function<TregexMatcher, String>> p = this.annotationPatterns.get(args[i]);
            this.activeAnnotations.add(p);
            this.optionsString.append("Option " + args[i] + " added annotation pattern " + p.first() + " with annotation " + p.second() + '\n');
            didSomething = true;
        } else if (args[i].equals("-retainNPTmp")) {
            this.optionsString.append("Retaining NP-TMP marking.\n");
            this.retainNPTmp = true;
            didSomething = true;
        } else if (args[i].equals("-retainNPSbj")) {
            this.optionsString.append("Retaining NP-SBJ dash tag.\n");
            this.retainNPSbj = true;
            didSomething = true;
        } else if (args[i].equals("-retainPPClr")) {
            this.optionsString.append("Retaining PP-CLR dash tag.\n");
            this.retainPPClr = true;
            didSomething = true;
        } else if (args[i].equals("-discardX")) {
            this.optionsString.append("Discarding X trees.\n");
            this.discardX = true;
            didSomething = true;
        } else if (args[i].equals("-changeNoLabels")) {
            this.optionsString.append("Change no labels.\n");
            this.changeNoLabels = true;
            didSomething = true;
        } else if (args[i].equals("-markPRDverbs")) {
            this.optionsString.append("Mark PRD.\n");
            this.retainPRD = true;
            didSomething = true;
        } else if (args[i].equals("-collinizerRetainsPunctuation")) {
            this.optionsString.append("Collinizer retains punctuation.\n");
            this.collinizerRetainsPunctuation = true;
            didSomething = true;
        } else if (args[i].equals("-arabicFactored")) {
            for (String annotation : this.baselineFeatures) {
                String[] a = new String[]{annotation};
                this.setOptionFlag(a, 0);
            }
            didSomething = true;
        } else if (args[i].equalsIgnoreCase("-headFinder") && i + 1 < args.length) {
            try {
                HeadFinder hf = (HeadFinder)Class.forName(args[i + 1]).getDeclaredConstructor(new Class[0]).newInstance(new Object[0]);
                this.setHeadFinder(hf);
                this.optionsString.append("HeadFinder: " + args[i + 1] + "\n");
            }
            catch (Exception e) {
                log.info(e);
                log.info(this.getClass().getName() + ": Could not load head finder " + args[i + 1]);
            }
            ++i;
            didSomething = true;
        } else if (args[i].equals("-factlex") && i + 1 < args.length) {
            String activeFeats = this.setupMorphoFeatures(args[++i]);
            this.optionsString.append("Factored Lexicon: active features: ").append(activeFeats);
            didSomething = true;
        } else if (args[i].equals("-noFeatures")) {
            this.activeAnnotations.clear();
            this.optionsString.append("Removed all manual features.\n");
            didSomething = true;
        }
        if (didSomething) {
            ++i;
        }
        return i;
    }

    public static void main(String[] args) {
        if (args.length != 1) {
            System.exit(-1);
        }
        ArabicTreebankParserParams tlpp = new ArabicTreebankParserParams();
        String[] options = new String[]{"-arabicFactored"};
        tlpp.setOptionFlag(options, 0);
        DiskTreebank tb = tlpp.diskTreebank();
        tb.loadPath(args[0], "txt", false);
        for (Tree t : tb) {
            for (Tree subtree : t) {
                tlpp.transformTree(subtree, t);
            }
            System.out.println(t);
        }
    }

    static {
        tagSpec.activate(MorphoFeatureSpecification.MorphoFeatureType.NGEN);
    }

    private static class AddEquivalencedConjNode
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final String nnTags = "DTNN DTNNP DTNNPS DTNNS NN NNP NNS NNPS";
        private static final Set<String> nnTagClass = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList("DTNN DTNNP DTNNPS DTNNS NN NNP NNS NNPS".split("\\s+"))));
        private static final String jjTags = "ADJ_NUM DTJJ DTJJR JJ JJR";
        private static final Set<String> jjTagClass = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList("ADJ_NUM DTJJ DTJJR JJ JJR".split("\\s+"))));
        private static final String vbTags = "VBD VBP";
        private static final Set<String> vbTagClass = Collections.unmodifiableSet(Generics.newHashSet(Arrays.asList("VBD VBP".split("\\s+"))));
        private static final TreebankLanguagePack tlp = new ArabicTreebankLanguagePack();
        private static final long serialVersionUID = 1L;

        public AddEquivalencedConjNode(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).value();
            String eqClass = tlp.basicCategory(node);
            if (nnTagClass.contains(eqClass)) {
                eqClass = "noun";
            } else if (jjTagClass.contains(eqClass)) {
                eqClass = "adj";
            } else if (vbTagClass.contains(eqClass)) {
                eqClass = "vb";
            }
            return this.annotationMark + eqClass;
        }

        public String toString() {
            return "AddEquivalencedConjNode[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    private static class AnnotatePunctuationFunction2
    implements SerializableFunction<TregexMatcher, String> {
        static final String key = "term";
        private static final Pattern quote = Pattern.compile("^\"$");
        private static final long serialVersionUID = 1L;

        private AnnotatePunctuationFunction2() {
        }

        @Override
        public String apply(TregexMatcher m) {
            String punc = m.getNode(key).value();
            if (punc.equals(".")) {
                return "-fs";
            }
            if (punc.equals("?")) {
                return "-quest";
            }
            if (punc.equals(",")) {
                return "-comma";
            }
            if (punc.equals(":") || punc.equals(";")) {
                return "-colon";
            }
            if (punc.equals("(")) {
                return "-lrb";
            }
            if (punc.equals("-LRB-")) {
                return "-lrb";
            }
            if (punc.equals(")")) {
                return "-rrb";
            }
            if (punc.equals("-RRB-")) {
                return "-rrb";
            }
            if (punc.equals("-PLUS-")) {
                return "-plus";
            }
            if (punc.equals("-")) {
                return "-dash";
            }
            if (quote.matcher(punc).matches()) {
                return "-quote";
            }
            return "";
        }

        public String toString() {
            return "AnnotatePunctuationFunction2";
        }
    }

    private static class AddEquivalencedNodeFunctionVar
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddEquivalencedNodeFunctionVar(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).label().value();
            if (node.startsWith("S") || node.startsWith("V") || node.startsWith("A")) {
                return this.annotationMark + "VSA";
            }
            return "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunctionVar[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    private static class AddEquivalencedNodeFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private static final long serialVersionUID = 1L;

        public AddEquivalencedNodeFunction(String annotationMark, String key) {
            this.annotationMark = annotationMark;
            this.key = key;
        }

        @Override
        public String apply(TregexMatcher m) {
            String node = m.getNode(this.key).label().value();
            if (node.startsWith("S")) {
                return this.annotationMark + 'S';
            }
            if (node.startsWith("V")) {
                return this.annotationMark + 'V';
            }
            return "";
        }

        public String toString() {
            return "AddEquivalencedNodeFunction[" + this.annotationMark + ',' + this.key + ']';
        }
    }

    private static class AddRelativeNodeRegexFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private Pattern pattern;
        private String key2 = null;
        private Pattern pattern2;
        private static final long serialVersionUID = 1L;

        public AddRelativeNodeRegexFunction(String annotationMark, String key, String regex) {
            this.annotationMark = annotationMark;
            this.key = key;
            try {
                this.pattern = Pattern.compile(regex);
            }
            catch (PatternSyntaxException pse) {
                log.info("Bad pattern: " + regex);
                this.pattern = null;
                throw new IllegalArgumentException(pse);
            }
        }

        @Override
        public String apply(TregexMatcher m) {
            Matcher mat;
            String val = m.getNode(this.key).label().value();
            if (this.pattern != null && (mat = this.pattern.matcher(val)).find()) {
                val = mat.group(1);
            }
            if (this.key2 != null && this.pattern2 != null) {
                String val2 = m.getNode(this.key2).label().value();
                Matcher mat2 = this.pattern2.matcher(val2);
                val = mat2.find() ? val + this.annotationMark + mat2.group(1) : val + this.annotationMark + val2;
            }
            return this.annotationMark + val;
        }

        public String toString() {
            return "AddRelativeNodeRegexFunction[" + this.annotationMark + ',' + this.key + ',' + this.pattern + ']';
        }
    }

    private static class AddRelativeNodeFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String annotationMark;
        private String key;
        private String key2;
        private boolean doBasicCat = false;
        private static final TreebankLanguagePack tlp = new ArabicTreebankLanguagePack();
        private static final long serialVersionUID = 1L;

        public AddRelativeNodeFunction(String annotationMark, String key, boolean basicCategory) {
            this.annotationMark = annotationMark;
            this.key = key;
            this.key2 = null;
            this.doBasicCat = basicCategory;
        }

        public AddRelativeNodeFunction(String annotationMark, String key1, String key2, boolean basicCategory) {
            this(annotationMark, key1, basicCategory);
            this.key2 = key2;
        }

        @Override
        public String apply(TregexMatcher m) {
            if (this.key2 == null) {
                return this.annotationMark + (this.doBasicCat ? tlp.basicCategory(m.getNode(this.key).label().value()) : m.getNode(this.key).label().value());
            }
            String annot1 = this.doBasicCat ? tlp.basicCategory(m.getNode(this.key).label().value()) : m.getNode(this.key).label().value();
            String annot2 = this.doBasicCat ? tlp.basicCategory(m.getNode(this.key2).label().value()) : m.getNode(this.key2).label().value();
            return this.annotationMark + annot1 + this.annotationMark + annot2;
        }

        public String toString() {
            if (this.key2 == null) {
                return "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ']';
            }
            return "AddRelativeNodeFunction[" + this.annotationMark + ',' + this.key + ',' + this.key2 + ']';
        }
    }

    private static class SimpleStringFunction
    implements SerializableFunction<TregexMatcher, String> {
        private String result;
        private static final long serialVersionUID = 1L;

        public SimpleStringFunction(String result) {
            this.result = result;
        }

        @Override
        public String apply(TregexMatcher tregexMatcher) {
            return this.result;
        }

        public String toString() {
            return "SimpleStringFunction[" + this.result + ']';
        }
    }

    protected class ArabicSubcategoryStripper
    implements TreeTransformer {
        protected final TreeFactory tf = new LabeledScoredTreeFactory();

        protected ArabicSubcategoryStripper() {
        }

        @Override
        public Tree transformTree(Tree tree) {
            Label lab = tree.label();
            String s = lab.value();
            if (tree.isLeaf()) {
                Tree leaf = this.tf.newLeaf(lab);
                leaf.setScore(tree.score());
                return leaf;
            }
            if (tree.isPhrasal()) {
                if (ArabicTreebankParserParams.this.retainNPTmp && s.startsWith("NP-TMP")) {
                    s = "NP-TMP";
                } else if (ArabicTreebankParserParams.this.retainNPSbj && s.startsWith("NP-SBJ")) {
                    s = "NP-SBJ";
                } else if (ArabicTreebankParserParams.this.retainPRD && s.matches("VB[^P].*PRD.*")) {
                    s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
                    s = s + "-PRD";
                } else {
                    s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
                }
            } else if (tree.isPreTerminal()) {
                s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
            } else {
                System.err.printf("Encountered a non-leaf/phrasal/pre-terminal node %s\n", s);
                s = ArabicTreebankParserParams.this.tlp.basicCategory(s);
            }
            ArrayList<Tree> children = new ArrayList<Tree>(tree.numChildren());
            for (Tree child : tree.getChildrenAsList()) {
                Tree newChild = this.transformTree(child);
                children.add(newChild);
            }
            Tree node = this.tf.newTreeNode(lab, children);
            node.setValue(s);
            node.setScore(tree.score());
            if (node.label() instanceof HasTag) {
                ((HasTag)((Object)node.label())).setTag(s);
            }
            return node;
        }
    }
}

