/*
 * Decompiled with CFR 0.152.
 */
package org.apache.kylin.engine.mr.common;

import java.io.BufferedWriter;
import java.io.Closeable;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.text.DecimalFormat;
import java.text.DecimalFormatSymbols;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.SequenceFile;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.kylin.common.KylinConfig;
import org.apache.kylin.common.persistence.RawResource;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.util.ByteArray;
import org.apache.kylin.common.util.Bytes;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.SumHelper;
import org.apache.kylin.cube.CubeInstance;
import org.apache.kylin.cube.CubeManager;
import org.apache.kylin.cube.CubeSegment;
import org.apache.kylin.cube.cuboid.Cuboid;
import org.apache.kylin.cube.cuboid.CuboidScheduler;
import org.apache.kylin.cube.kv.CubeDimEncMap;
import org.apache.kylin.cube.kv.RowKeyEncoder;
import org.apache.kylin.cube.model.CubeDesc;
import org.apache.kylin.measure.hllc.HLLCounter;
import org.apache.kylin.metadata.datatype.DataType;
import org.apache.kylin.metadata.model.MeasureDesc;
import org.apache.kylin.metadata.model.SegmentStatusEnum;
import org.apache.kylin.metadata.model.Segments;
import org.apache.kylin.metadata.model.TblColRef;
import org.apache.kylin.shaded.com.google.common.collect.Lists;
import org.apache.kylin.shaded.com.google.common.collect.Maps;
import org.apache.kylin.tool.shaded.org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class CubeStatsReader {
    private static final Logger logger = LoggerFactory.getLogger(CubeStatsReader.class);
    final CubeSegment seg;
    final int samplingPercentage;
    final int mapperNumberOfFirstBuild;
    final double mapperOverlapRatioOfFirstBuild;
    final Map<Long, HLLCounter> cuboidRowEstimatesHLL;
    final CuboidScheduler cuboidScheduler;
    final long sourceRowCount;

    public CubeStatsReader(CubeSegment cubeSegment, KylinConfig kylinConfig) throws IOException {
        this(cubeSegment, cubeSegment.getCuboidScheduler(), kylinConfig);
    }

    public CubeStatsReader(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, KylinConfig kylinConfig) throws IOException {
        ResourceStore store = ResourceStore.getStore(kylinConfig);
        String statsKey = cubeSegment.getStatisticsResourcePath();
        RawResource resource = store.getResource(statsKey);
        if (resource == null) {
            throw new IllegalStateException("Missing resource at " + statsKey);
        }
        File tmpSeqFile = this.writeTmpSeqFile(resource.content());
        Path path = new Path(HadoopUtil.fixWindowsPath("file://" + tmpSeqFile.getAbsolutePath()));
        CubeStatsResult cubeStatsResult = new CubeStatsResult(path, kylinConfig.getCubeStatsHLLPrecision());
        Files.deleteIfExists(tmpSeqFile.toPath());
        this.seg = cubeSegment;
        this.cuboidScheduler = cuboidScheduler;
        this.samplingPercentage = cubeStatsResult.getPercentage();
        this.mapperNumberOfFirstBuild = cubeStatsResult.getMapperNumber();
        this.mapperOverlapRatioOfFirstBuild = cubeStatsResult.getMapperOverlapRatio();
        this.cuboidRowEstimatesHLL = cubeStatsResult.getCounterMap();
        this.sourceRowCount = cubeStatsResult.getSourceRecordCount();
    }

    public CubeStatsReader(CubeSegment cubeSegment, CuboidScheduler cuboidScheduler, KylinConfig kylinConfig, Path path) throws IOException {
        CubeStatsResult cubeStatsResult = new CubeStatsResult(path, kylinConfig.getCubeStatsHLLPrecision());
        this.seg = cubeSegment;
        this.cuboidScheduler = cuboidScheduler;
        this.samplingPercentage = cubeStatsResult.getPercentage();
        this.mapperNumberOfFirstBuild = cubeStatsResult.getMapperNumber();
        this.mapperOverlapRatioOfFirstBuild = cubeStatsResult.getMapperOverlapRatio();
        this.cuboidRowEstimatesHLL = cubeStatsResult.getCounterMap();
        this.sourceRowCount = cubeStatsResult.getSourceRecordCount();
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private File writeTmpSeqFile(InputStream inputStream) throws IOException {
        File tempFile = File.createTempFile("kylin_stats_tmp", ".seq");
        FileOutputStream out = null;
        try {
            out = new FileOutputStream(tempFile);
            org.apache.kylin.tool.shaded.org.apache.commons.io.IOUtils.copy(inputStream, (OutputStream)out);
        }
        finally {
            IOUtils.closeStream((Closeable)inputStream);
            IOUtils.closeStream((Closeable)out);
        }
        return tempFile;
    }

    public Map<Long, HLLCounter> getCuboidRowHLLCounters() {
        return this.cuboidRowEstimatesHLL;
    }

    public int getSamplingPercentage() {
        return this.samplingPercentage;
    }

    public long getSourceRowCount() {
        return this.sourceRowCount;
    }

    public Map<Long, Long> getCuboidRowEstimatesHLL() {
        return CubeStatsReader.getCuboidRowCountMapFromSampling(this.cuboidRowEstimatesHLL, this.samplingPercentage);
    }

    public Map<Long, Double> getCuboidSizeMap() {
        return this.getCuboidSizeMap(false);
    }

    public Map<Long, Double> getCuboidSizeMap(boolean origin) {
        return CubeStatsReader.getCuboidSizeMapFromRowCount(this.seg, this.getCuboidRowEstimatesHLL(), this.sourceRowCount, origin);
    }

    public double estimateCubeSize() {
        return SumHelper.sumDouble(this.getCuboidSizeMap().values());
    }

    public int getMapperNumberOfFirstBuild() {
        return this.mapperNumberOfFirstBuild;
    }

    public double getMapperOverlapRatioOfFirstBuild() {
        return this.mapperOverlapRatioOfFirstBuild;
    }

    public static Map<Long, Long> getCuboidRowCountMapFromSampling(Map<Long, HLLCounter> hllcMap, int samplingPercentage) {
        HashMap<Long, Long> cuboidRowCountMap = Maps.newHashMap();
        for (Map.Entry<Long, HLLCounter> entry : hllcMap.entrySet()) {
            cuboidRowCountMap.put(entry.getKey(), entry.getValue().getCountEstimate());
        }
        return cuboidRowCountMap;
    }

    public static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap, long sourceRowCount) {
        return CubeStatsReader.getCuboidSizeMapFromRowCount(cubeSegment, rowCountMap, sourceRowCount, true);
    }

    private static Map<Long, Double> getCuboidSizeMapFromRowCount(CubeSegment cubeSegment, Map<Long, Long> rowCountMap, long sourceRowCount, boolean origin) {
        CubeDesc cubeDesc = cubeSegment.getCubeDesc();
        ArrayList<Integer> rowkeyColumnSize = Lists.newArrayList();
        Cuboid baseCuboid = Cuboid.getBaseCuboid(cubeDesc);
        List<TblColRef> columnList = baseCuboid.getColumns();
        CubeDimEncMap dimEncMap = cubeSegment.getDimensionEncodingMap();
        Long baseCuboidRowCount = rowCountMap.get(baseCuboid.getId());
        for (int i = 0; i < columnList.size(); ++i) {
            rowkeyColumnSize.add(dimEncMap.get(columnList.get(i)).getLengthOfEncoding());
        }
        HashMap<Long, Double> sizeMap = Maps.newHashMap();
        for (Map.Entry<Long, Long> entry : rowCountMap.entrySet()) {
            sizeMap.put(entry.getKey(), CubeStatsReader.estimateCuboidStorageSize(cubeSegment, entry.getKey(), entry.getValue(), baseCuboid.getId(), baseCuboidRowCount, rowkeyColumnSize, sourceRowCount));
        }
        if (!origin && cubeSegment.getConfig().enableJobCuboidSizeOptimize()) {
            CubeStatsReader.optimizeSizeMap(sizeMap, cubeSegment);
        }
        return sizeMap;
    }

    private static Double harmonicMean(List<Double> data) {
        if (data == null || data.isEmpty()) {
            return 1.0;
        }
        Double sum = 0.0;
        for (Double item : data) {
            sum = sum + 1.0 / item;
        }
        if (sum == 0.0) {
            return 1.0;
        }
        return (double)data.size() / sum;
    }

    private static List<Double> getHistoricalRating(CubeSegment cubeSegment, CubeInstance cubeInstance, int totalLevels) {
        boolean isMerged = cubeSegment.isMerged();
        HashMap layerRatio = Maps.newHashMap();
        ArrayList<Double> result = Lists.newArrayList();
        for (CubeSegment seg : cubeInstance.getSegments(SegmentStatusEnum.READY)) {
            if (seg.isMerged() != isMerged || seg.getEstimateRatio() == null) continue;
            logger.info("get ratio from {} with: {}", (Object)seg.getName(), (Object)StringUtils.join(seg.getEstimateRatio(), ","));
            for (int level = 0; level <= totalLevels; ++level) {
                if (seg.getEstimateRatio().get(level) <= 0.0) continue;
                ArrayList<Double> temp = layerRatio.get(level) == null ? Lists.newArrayList() : (List)layerRatio.get(level);
                temp.add(seg.getEstimateRatio().get(level));
                layerRatio.put(level, temp);
            }
        }
        if (layerRatio.size() == 0) {
            logger.info("Fail to get historical rating.");
            return null;
        }
        for (int level = 0; level <= totalLevels; ++level) {
            logger.debug("level {}: {}", (Object)level, (Object)StringUtils.join((Collection)layerRatio.get(level), ","));
            result.add(level, CubeStatsReader.harmonicMean((List)layerRatio.get(level)));
        }
        logger.info("Finally estimate ratio is {}", (Object)StringUtils.join(result, ","));
        return result;
    }

    private static void optimizeSizeMap(Map<Long, Double> sizeMap, CubeSegment cubeSegment) {
        CubeInstance cubeInstance = cubeSegment.getCubeInstance();
        int totalLevels = cubeInstance.getCuboidScheduler().getBuildLevel();
        List<List<Long>> layeredCuboids = cubeInstance.getCuboidScheduler().getCuboidsByLayer();
        logger.info("cube size is {} before optimize", (Object)SumHelper.sumDouble(sizeMap.values()));
        List<Double> levelRating = CubeStatsReader.getHistoricalRating(cubeSegment, cubeInstance, totalLevels);
        if (levelRating == null) {
            logger.info("Fail to optimize, use origin.");
            return;
        }
        for (int level = 0; level <= totalLevels; ++level) {
            Double rate = levelRating.get(level);
            for (Long cuboidId : layeredCuboids.get(level)) {
                double oriValue = sizeMap.get(cuboidId) == null ? 0.0 : sizeMap.get(cuboidId);
                sizeMap.put(cuboidId, oriValue * rate);
            }
        }
        logger.info("cube size is {} after optimize", (Object)SumHelper.sumDouble(sizeMap.values()));
    }

    private static double estimateCuboidStorageSize(CubeSegment cubeSegment, long cuboidId, long rowCount, long baseCuboidId, long baseCuboidCount, List<Integer> rowKeyColumnLength, long sourceRowCount) {
        int rowkeyLength = cubeSegment.getRowKeyPreambleSize();
        KylinConfig kylinConf = cubeSegment.getConfig();
        long mask = Long.highestOneBit(baseCuboidId);
        long parentCuboidIdActualLength = 64L - (long)Long.numberOfLeadingZeros(baseCuboidId);
        int i = 0;
        while ((long)i < parentCuboidIdActualLength) {
            if ((mask & cuboidId) > 0L) {
                rowkeyLength += rowKeyColumnLength.get(i).intValue();
            }
            mask >>= 1;
            ++i;
        }
        int normalSpace = rowkeyLength;
        int countDistinctSpace = 0;
        double percentileSpace = 0.0;
        int topNSpace = 0;
        for (MeasureDesc measureDesc : cubeSegment.getCubeDesc().getMeasures()) {
            if (rowCount == 0L) break;
            DataType returnType = measureDesc.getFunction().getReturnDataType();
            if (measureDesc.getFunction().getExpression().equals("COUNT_DISTINCT")) {
                long estimateDistinctCount = sourceRowCount / rowCount;
                estimateDistinctCount = estimateDistinctCount == 0L ? 1L : estimateDistinctCount;
                countDistinctSpace = (int)((double)countDistinctSpace + returnType.getStorageBytesEstimate(estimateDistinctCount));
                continue;
            }
            if (measureDesc.getFunction().getExpression().equals("PERCENTILE_APPROX")) {
                percentileSpace += returnType.getStorageBytesEstimate((double)baseCuboidCount * 1.0 / (double)rowCount);
                continue;
            }
            if (measureDesc.getFunction().getExpression().equals("TOP_N")) {
                long estimateTopNCount = sourceRowCount / rowCount;
                estimateTopNCount = estimateTopNCount == 0L ? 1L : estimateTopNCount;
                topNSpace = (int)((double)topNSpace + returnType.getStorageBytesEstimate(estimateTopNCount));
                continue;
            }
            normalSpace += returnType.getStorageBytesEstimate();
        }
        double cuboidSizeRatio = kylinConf.getJobCuboidSizeRatio();
        double cuboidSizeMemHungryRatio = kylinConf.getJobCuboidSizeCountDistinctRatio();
        double cuboidSizeTopNRatio = kylinConf.getJobCuboidSizeTopNRatio();
        double ret = (1.0 * (double)normalSpace * (double)rowCount * cuboidSizeRatio + 1.0 * (double)countDistinctSpace * (double)rowCount * cuboidSizeMemHungryRatio + 1.0 * percentileSpace * (double)rowCount + 1.0 * (double)topNSpace * (double)rowCount * cuboidSizeTopNRatio) / 1048576.0;
        return ret;
    }

    private void print(PrintWriter out) {
        Map<Long, Long> cuboidRows = this.getCuboidRowEstimatesHLL();
        Map<Long, Double> cuboidSizes = this.getCuboidSizeMap();
        ArrayList<Long> cuboids = new ArrayList<Long>(cuboidRows.keySet());
        Collections.sort(cuboids);
        out.println("============================================================================");
        out.println("Statistics of " + this.seg);
        out.println();
        out.println("Cube statistics hll precision: " + this.cuboidRowEstimatesHLL.values().iterator().next().getPrecision());
        out.println("Total cuboids: " + cuboidRows.size());
        out.println("Total estimated rows: " + SumHelper.sumLong(cuboidRows.values()));
        out.println("Total estimated size(MB): " + SumHelper.sumDouble(cuboidSizes.values()));
        out.println("Sampling percentage:  " + this.samplingPercentage);
        out.println("Mapper overlap ratio: " + this.mapperOverlapRatioOfFirstBuild);
        out.println("Mapper number: " + this.mapperNumberOfFirstBuild);
        this.printKVInfo(out);
        this.printCuboidInfoTreeEntry(cuboidRows, cuboidSizes, out);
        out.println("----------------------------------------------------------------------------");
    }

    public double estimateLayerSize(int level) {
        if (this.cuboidScheduler == null) {
            throw new UnsupportedOperationException("cuboid scheduler is null");
        }
        List<List<Long>> layeredCuboids = this.cuboidScheduler.getCuboidsByLayer();
        Map<Long, Double> cuboidSizeMap = this.getCuboidSizeMap();
        double ret = 0.0;
        for (Long cuboidId : layeredCuboids.get(level)) {
            ret += cuboidSizeMap.get(cuboidId) == null ? 0.0 : cuboidSizeMap.get(cuboidId);
        }
        logger.info("Estimating size for layer {}, all cuboids are {}, total size is {}", level, StringUtils.join((Collection)layeredCuboids.get(level), ","), ret);
        return ret;
    }

    public List<Long> getCuboidsByLayer(int level) {
        if (this.cuboidScheduler == null) {
            throw new UnsupportedOperationException("cuboid scheduler is null");
        }
        List<List<Long>> layeredCuboids = this.cuboidScheduler.getCuboidsByLayer();
        return layeredCuboids.get(level);
    }

    private void printCuboidInfoTreeEntry(Map<Long, Long> cuboidRows, Map<Long, Double> cuboidSizes, PrintWriter out) {
        if (this.cuboidScheduler == null) {
            throw new UnsupportedOperationException("cuboid scheduler is null");
        }
        long baseCuboid = Cuboid.getBaseCuboidId(this.seg.getCubeDesc());
        int dimensionCount = Long.bitCount(baseCuboid);
        CubeStatsReader.printCuboidInfoTree(-1L, baseCuboid, this.cuboidScheduler, cuboidRows, cuboidSizes, dimensionCount, 0, out);
    }

    private void printKVInfo(PrintWriter writer) {
        Cuboid cuboid = Cuboid.getBaseCuboid(this.seg.getCubeDesc());
        RowKeyEncoder encoder = new RowKeyEncoder(this.seg, cuboid);
        for (TblColRef col : cuboid.getColumns()) {
            writer.println("Length of dimension " + col + " is " + encoder.getColumnLength(col));
        }
    }

    private static void printCuboidInfoTree(long parent, long cuboidID, CuboidScheduler scheduler, Map<Long, Long> cuboidRows, Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) {
        CubeStatsReader.printOneCuboidInfo(parent, cuboidID, cuboidRows, cuboidSizes, dimensionCount, depth, out);
        List<Long> children = scheduler.getSpanningCuboid(cuboidID);
        Collections.sort(children);
        for (Long child : children) {
            CubeStatsReader.printCuboidInfoTree(cuboidID, child, scheduler, cuboidRows, cuboidSizes, dimensionCount, depth + 1, out);
        }
    }

    private static void printOneCuboidInfo(long parent, long cuboidID, Map<Long, Long> cuboidRows, Map<Long, Double> cuboidSizes, int dimensionCount, int depth, PrintWriter out) {
        StringBuffer sb = new StringBuffer();
        for (int i = 0; i < depth; ++i) {
            sb.append("    ");
        }
        String cuboidName = Cuboid.getDisplayName(cuboidID, dimensionCount);
        sb.append("|---- Cuboid ").append(cuboidName);
        long rowCount = cuboidRows.get(cuboidID);
        double size = cuboidSizes.get(cuboidID);
        sb.append(", est row: ").append(rowCount).append(", est MB: ").append(CubeStatsReader.formatDouble(size));
        if (parent != -1L) {
            sb.append(", shrink: ").append(CubeStatsReader.formatDouble(100.0 * (double)cuboidRows.get(cuboidID).longValue() / (double)cuboidRows.get(parent).longValue())).append("%");
        }
        out.println(sb.toString());
    }

    private static String formatDouble(double input) {
        return new DecimalFormat("#.##", DecimalFormatSymbols.getInstance(Locale.ROOT)).format(input);
    }

    public static void main(String[] args) throws IOException {
        logger.info("CubeStatsReader is used to read cube statistic saved in metadata store");
        KylinConfig config = KylinConfig.getInstanceFromEnv();
        CubeInstance cube = CubeManager.getInstance(config).getCube(args[0]);
        Segments<CubeSegment> segments = cube.getSegments();
        PrintWriter out = new PrintWriter(new BufferedWriter(new OutputStreamWriter((OutputStream)System.out, StandardCharsets.UTF_8)));
        for (CubeSegment seg : segments) {
            try {
                new CubeStatsReader(seg, config).print(out);
            }
            catch (Exception e) {
                logger.info("CubeStatsReader for Segment {} failed, skip it.", (Object)seg.getName());
            }
        }
        out.flush();
    }

    public static class CubeStatsResult {
        private int percentage = 100;
        private double mapperOverlapRatio = 0.0;
        private long sourceRecordCount = 0L;
        private int mapperNumber = 0;
        private Map<Long, HLLCounter> counterMap = Maps.newHashMap();

        public CubeStatsResult(Path path, int precision) throws IOException {
            Configuration hadoopConf = HadoopUtil.getCurrentConfiguration();
            SequenceFile.Reader.Option seqInput = SequenceFile.Reader.file((Path)path);
            try (SequenceFile.Reader reader = new SequenceFile.Reader(hadoopConf, new SequenceFile.Reader.Option[]{seqInput});){
                LongWritable key = (LongWritable)ReflectionUtils.newInstance((Class)reader.getKeyClass(), (Configuration)hadoopConf);
                BytesWritable value = (BytesWritable)ReflectionUtils.newInstance((Class)reader.getValueClass(), (Configuration)hadoopConf);
                while (reader.next((Writable)key, (Writable)value)) {
                    if (key.get() == 0L) {
                        this.percentage = Bytes.toInt(value.getBytes());
                        continue;
                    }
                    if (key.get() == -1L) {
                        this.mapperOverlapRatio = Bytes.toDouble(value.getBytes());
                        continue;
                    }
                    if (key.get() == -2L) {
                        this.mapperNumber = Bytes.toInt(value.getBytes());
                        continue;
                    }
                    if (key.get() == -3L) {
                        this.sourceRecordCount = Bytes.toLong(value.getBytes());
                        continue;
                    }
                    if (key.get() <= 0L) continue;
                    HLLCounter hll = new HLLCounter(precision);
                    ByteArray byteArray = new ByteArray(value.getBytes());
                    hll.readRegisters(byteArray.asBuffer());
                    this.counterMap.put(key.get(), hll);
                }
            }
        }

        public int getPercentage() {
            return this.percentage;
        }

        public double getMapperOverlapRatio() {
            return this.mapperOverlapRatio;
        }

        public int getMapperNumber() {
            return this.mapperNumber;
        }

        public Map<Long, HLLCounter> getCounterMap() {
            return Collections.unmodifiableMap(this.counterMap);
        }

        public long getSourceRecordCount() {
            return this.sourceRecordCount;
        }
    }
}

