/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.spark;

import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.Options;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.kylin.common.util.AbstractApplication;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.engine.spark.KylinSparkJobListener;
import org.apache.kylin.engine.spark.SparkUtil;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;
import org.apache.spark.scheduler.SparkListenerInterface;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple2;

public class SparkColumnCardinality
extends AbstractApplication
implements Serializable {
    protected static final Logger logger = LoggerFactory.getLogger(SparkColumnCardinality.class);
    public static final Option OPTION_TABLE_NAME;
    public static final Option OPTION_OUTPUT;
    public static final Option OPTION_PRJ;
    public static final Option OPTION_COLUMN_COUNT;
    private Options options = new Options();

    public SparkColumnCardinality() {
        this.options.addOption(OPTION_TABLE_NAME);
        this.options.addOption(OPTION_OUTPUT);
        this.options.addOption(OPTION_PRJ);
        this.options.addOption(OPTION_COLUMN_COUNT);
    }

    @Override
    protected Options getOptions() {
        return this.options;
    }

    @Override
    protected void execute(OptionsHelper optionsHelper) throws Exception {
        String tableName = optionsHelper.getOptionValue(OPTION_TABLE_NAME);
        String output = optionsHelper.getOptionValue(OPTION_OUTPUT);
        int columnCnt = Integer.valueOf(optionsHelper.getOptionValue(OPTION_COLUMN_COUNT));
        Class[] kryoClassArray = new Class[]{Class.forName("scala.reflect.ClassTag$$anon$1"), Class.forName("org.apache.kylin.engine.mr.steps.SelfDefineSortableKey")};
        SparkConf conf = new SparkConf().setAppName("Calculate table:" + tableName);
        conf.set("spark.sql.catalogImplementation", "hive");
        conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer");
        conf.set("spark.kryo.registrator", "org.apache.kylin.engine.spark.KylinKryoRegistrator");
        conf.set("spark.kryo.registrationRequired", "true").registerKryoClasses(kryoClassArray);
        KylinSparkJobListener jobListener = new KylinSparkJobListener();
        try (JavaSparkContext sc = new JavaSparkContext(conf);){
            sc.sc().addSparkListener((SparkListenerInterface)jobListener);
            HadoopUtil.deletePath(sc.hadoopConfiguration(), new Path(output));
            JavaRDD<String[]> recordRDD = SparkUtil.hiveRecordInputRDD(false, sc, null, tableName);
            JavaPairRDD resultRdd = recordRDD.mapPartitionsToPair((PairFlatMapFunction)new BuildHllCounter()).reduceByKey((Function2 & Serializable)(x, y) -> {
                x.merge((HLLCounter)y);
                return x;
            }).mapToPair((PairFunction & Serializable)record -> new Tuple2(record._1, (Object)((HLLCounter)record._2).getCountEstimate())).sortByKey(true, 1).cache();
            if (resultRdd.count() == 0L) {
                ArrayList<Tuple2> list = new ArrayList<Tuple2>();
                for (int i = 0; i < columnCnt; ++i) {
                    list.add(new Tuple2((Object)i, (Object)0L));
                }
                JavaPairRDD nullRdd = sc.parallelizePairs(list).repartition(1);
                nullRdd.saveAsNewAPIHadoopFile(output, IntWritable.class, LongWritable.class, TextOutputFormat.class);
            } else {
                resultRdd.saveAsNewAPIHadoopFile(output, IntWritable.class, LongWritable.class, TextOutputFormat.class);
            }
        }
    }

    static {
        OptionBuilder.withArgName((String)"tableName");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Table Name");
        OPTION_TABLE_NAME = OptionBuilder.create((String)"tableName");
        OptionBuilder.withArgName((String)"output");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Output");
        OPTION_OUTPUT = OptionBuilder.create((String)"output");
        OptionBuilder.withArgName((String)"project");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"Project name");
        OPTION_PRJ = OptionBuilder.create((String)"project");
        OptionBuilder.withArgName((String)"column");
        OptionBuilder.hasArg();
        OptionBuilder.isRequired((boolean)true);
        OptionBuilder.withDescription((String)"column count");
        OPTION_COLUMN_COUNT = OptionBuilder.create((String)"column");
    }

    static class BuildHllCounter
    implements PairFlatMapFunction<Iterator<String[]>, Integer, HLLCounter> {
        public BuildHllCounter() {
            logger.info("BuildHllCounter init here.");
        }

        public Iterator<Tuple2<Integer, HLLCounter>> call(Iterator<String[]> iterator) throws Exception {
            HashMap<Integer, HLLCounter> hllmap = new HashMap<Integer, HLLCounter>();
            while (iterator.hasNext()) {
                String[] values = iterator.next();
                for (int m = 0; m < values.length; ++m) {
                    String fieldValue = values[m];
                    if (fieldValue == null) {
                        fieldValue = "NULL";
                    }
                    this.getHllc(hllmap, m).add(Bytes.toBytes(fieldValue));
                }
            }
            ArrayList<Tuple2> result = new ArrayList<Tuple2>();
            for (Map.Entry entry : hllmap.entrySet()) {
                result.add(new Tuple2(entry.getKey(), entry.getValue()));
            }
            return result.iterator();
        }

        private HLLCounter getHllc(HashMap<Integer, HLLCounter> hllcMap, Integer key) {
            if (!hllcMap.containsKey(key)) {
                hllcMap.put(key, new HLLCounter());
            }
            return hllcMap.get(key);
        }
    }
}

