/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.cf.taste.hadoop.similarity.item;

import com.google.common.base.Preconditions;
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.DoubleWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.SequenceFileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import org.apache.mahout.cf.taste.hadoop.EntityEntityWritable;
import org.apache.mahout.cf.taste.hadoop.TasteHadoopUtils;
import org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob;
import org.apache.mahout.cf.taste.hadoop.similarity.item.TopSimilarItemsQueue;
import org.apache.mahout.cf.taste.similarity.precompute.SimilarItem;
import org.apache.mahout.common.AbstractJob;
import org.apache.mahout.common.HadoopUtil;
import org.apache.mahout.math.Vector;
import org.apache.mahout.math.VectorWritable;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.RowSimilarityJob;
import org.apache.mahout.math.hadoop.similarity.cooccurrence.measures.VectorSimilarityMeasures;
import org.apache.mahout.math.map.OpenIntLongHashMap;

public final class ItemSimilarityJob
extends AbstractJob {
    public static final String ITEM_ID_INDEX_PATH_STR = ItemSimilarityJob.class.getName() + ".itemIDIndexPathStr";
    public static final String MAX_SIMILARITIES_PER_ITEM = ItemSimilarityJob.class.getName() + ".maxSimilarItemsPerItem";
    private static final int DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM = 100;
    private static final int DEFAULT_MAX_PREFS = 500;
    private static final int DEFAULT_MIN_PREFS_PER_USER = 1;

    public static void main(String[] args) throws Exception {
        ToolRunner.run((Tool)new ItemSimilarityJob(), (String[])args);
    }

    public int run(String[] args) throws Exception {
        this.addInputOption();
        this.addOutputOption();
        this.addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
        this.addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number (default: 100)", String.valueOf(100));
        this.addOption("maxPrefs", "mppu", "max number of preferences to consider per user or item, users or items with more preferences will be sampled down (default: 500)", String.valueOf(500));
        this.addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this (default: 1)", String.valueOf(1));
        this.addOption("booleanData", "b", "Treat input as without pref values", String.valueOf(Boolean.FALSE));
        this.addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);
        this.addOption("randomSeed", null, "use this seed for sampling", false);
        Map<String, List<String>> parsedArgs = this.parseArguments(args);
        if (parsedArgs == null) {
            return -1;
        }
        String similarityClassName = this.getOption("similarityClassname");
        int maxSimilarItemsPerItem = Integer.parseInt(this.getOption("maxSimilaritiesPerItem"));
        int maxPrefs = Integer.parseInt(this.getOption("maxPrefs"));
        int minPrefsPerUser = Integer.parseInt(this.getOption("minPrefsPerUser"));
        boolean booleanData = Boolean.valueOf(this.getOption("booleanData"));
        double threshold = this.hasOption("threshold") ? Double.parseDouble(this.getOption("threshold")) : Double.MIN_VALUE;
        long randomSeed = this.hasOption("randomSeed") ? Long.parseLong(this.getOption("randomSeed")) : Long.MIN_VALUE;
        Path similarityMatrixPath = this.getTempPath("similarityMatrix");
        Path prepPath = this.getTempPath("prepareRatingMatrix");
        AtomicInteger currentPhase = new AtomicInteger();
        if (ItemSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            ToolRunner.run((Configuration)this.getConf(), (Tool)new PreparePreferenceMatrixJob(), (String[])new String[]{"--input", this.getInputPath().toString(), "--output", prepPath.toString(), "--minPrefsPerUser", String.valueOf(minPrefsPerUser), "--booleanData", String.valueOf(booleanData), "--tempDir", this.getTempPath().toString()});
        }
        if (ItemSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            int numberOfUsers = HadoopUtil.readInt(new Path(prepPath, "numUsers.bin"), this.getConf());
            ToolRunner.run((Configuration)this.getConf(), (Tool)new RowSimilarityJob(), (String[])new String[]{"--input", new Path(prepPath, "ratingMatrix").toString(), "--output", similarityMatrixPath.toString(), "--numberOfColumns", String.valueOf(numberOfUsers), "--similarityClassname", similarityClassName, "--maxObservationsPerRow", String.valueOf(maxPrefs), "--maxObservationsPerColumn", String.valueOf(maxPrefs), "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed", String.valueOf(randomSeed), "--tempDir", this.getTempPath().toString()});
        }
        if (ItemSimilarityJob.shouldRunNextPhase(parsedArgs, currentPhase)) {
            Job mostSimilarItems = this.prepareJob(similarityMatrixPath, this.getOutputPath(), SequenceFileInputFormat.class, MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class);
            Configuration mostSimilarItemsConf = mostSimilarItems.getConfiguration();
            mostSimilarItemsConf.set(ITEM_ID_INDEX_PATH_STR, new Path(prepPath, "itemIDIndex").toString());
            mostSimilarItemsConf.setInt(MAX_SIMILARITIES_PER_ITEM, maxSimilarItemsPerItem);
            boolean succeeded = mostSimilarItems.waitForCompletion(true);
            if (!succeeded) {
                return -1;
            }
        }
        return 0;
    }

    public static class MostSimilarItemPairsReducer
    extends Reducer<EntityEntityWritable, DoubleWritable, EntityEntityWritable, DoubleWritable> {
        protected void reduce(EntityEntityWritable pair, Iterable<DoubleWritable> values, Reducer.Context ctx) throws IOException, InterruptedException {
            ctx.write((Object)pair, (Object)values.iterator().next());
        }
    }

    public static class MostSimilarItemPairsMapper
    extends Mapper<IntWritable, VectorWritable, EntityEntityWritable, DoubleWritable> {
        private OpenIntLongHashMap indexItemIDMap;
        private int maxSimilarItemsPerItem;

        protected void setup(Mapper.Context ctx) {
            Configuration conf = ctx.getConfiguration();
            this.maxSimilarItemsPerItem = conf.getInt(MAX_SIMILARITIES_PER_ITEM, -1);
            this.indexItemIDMap = TasteHadoopUtils.readIDIndexMap(conf.get(ITEM_ID_INDEX_PATH_STR), conf);
            Preconditions.checkArgument(this.maxSimilarItemsPerItem > 0, "maxSimilarItemsPerItem must be greater then 0!");
        }

        protected void map(IntWritable itemIDIndexWritable, VectorWritable similarityVector, Mapper.Context ctx) throws IOException, InterruptedException {
            int itemIDIndex = itemIDIndexWritable.get();
            TopSimilarItemsQueue topKMostSimilarItems = new TopSimilarItemsQueue(this.maxSimilarItemsPerItem);
            for (Vector.Element element : similarityVector.get().nonZeroes()) {
                SimilarItem top = (SimilarItem)topKMostSimilarItems.top();
                double candidateSimilarity = element.get();
                if (!(candidateSimilarity > top.getSimilarity())) continue;
                top.set(this.indexItemIDMap.get(element.index()), candidateSimilarity);
                topKMostSimilarItems.updateTop();
            }
            long itemID = this.indexItemIDMap.get(itemIDIndex);
            for (SimilarItem similarItem : topKMostSimilarItems.getTopItems()) {
                long otherItemID = similarItem.getItemID();
                if (itemID < otherItemID) {
                    ctx.write((Object)new EntityEntityWritable(itemID, otherItemID), (Object)new DoubleWritable(similarItem.getSimilarity()));
                    continue;
                }
                ctx.write((Object)new EntityEntityWritable(otherItemID, itemID), (Object)new DoubleWritable(similarItem.getSimilarity()));
            }
        }
    }
}

