/*
 * Decompiled with CFR 0.152.
 */
package org.apache.mahout.vectorizer.encoders;

import com.google.common.base.Charsets;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.Multiset;
import org.apache.mahout.math.Vector;
import org.apache.mahout.vectorizer.encoders.WordValueEncoder;

public class AdaptiveWordValueEncoder
extends WordValueEncoder {
    private final Multiset<String> dictionary = HashMultiset.create();

    public AdaptiveWordValueEncoder(String name) {
        super(name);
    }

    @Override
    public void addToVector(String originalForm, double weight, Vector data) {
        this.dictionary.add(originalForm);
        super.addToVector(originalForm, weight, data);
    }

    @Override
    protected double getWeight(byte[] originalForm, double w) {
        return w * this.weight(originalForm);
    }

    @Override
    protected double weight(byte[] originalForm) {
        double thisWord = (double)this.dictionary.count(new String(originalForm, Charsets.UTF_8)) + 0.5;
        double allWords = (double)this.dictionary.size() + (double)this.dictionary.elementSet().size() * 0.5 + 0.5;
        return -Math.log(thisWord / allWords);
    }

    public Multiset<String> getDictionary() {
        return this.dictionary;
    }
}

