/*
 * 17/06/2009, 16:30.
 *
 * Simuquiz - http://www.simuquiz.com.br
 */

package br.com.simuquiz.antispam.classifiers;

import java.util.Set;

import br.com.simuquiz.antispam.Category;
import br.com.simuquiz.antispam.Feature;
import java.util.Map;

/**
 * @author Thiago Henrique Coraini
 * @author Israel Lacerra
 * @author Victor Williams Stafusa da Silva
 * @author Pedro Lopes de Souza
 */
public class NaiveClassifier extends Classifier {

    public NaiveClassifier(Map<String, Category> categories,
            Map<String, Feature> features, int totalDocs) {
        super(categories, features, totalDocs);
    }

    @Override
    public Category classify(Set<String> document) {
        double maxProb = 0.0;
        Category best = null;

        Set<Feature> docFeatures = getFeatures(document);

        for (Category category : categories.values()) {
            double prob = categoryProbGivenDocument(category, docFeatures);
            if (prob > maxProb) {
                maxProb = prob;
                best = category;
            }
        }
        return best;
    }

    private double categoryProbGivenDocument(Category category, Set<Feature> document) {
        double categoryProb = ((double) category.getDocs()) / totalDocs;
        return (categoryProb * documentProb(category, document));
    }

    private double documentProb(Category category, Set<Feature> document) {
        double prob = 1.0;

        for (Feature feature : document) {
            prob *= (featureProbGivenCategory(feature, category, 1.0, 0.5));
        }
        return prob;
    }

    private double featureProbGivenCategory(Feature feature, Category category,
            double weight, double assumedProb) {

        if (category.getDocs() == 0) {
            return 0;
        }
        double basicProb = ((double) feature.getEvents(category))
                / category.getDocs();

        int totalEvents = feature.getTotalEvents();

        double weightedProbability = ((weight * assumedProb) + (totalEvents * basicProb))
                / (weight + totalEvents);
        return weightedProbability;
    }
}
