/*
 * 17/06/2009, 16:30.
 *
 * Simuquiz - http://www.simuquiz.com.br
 */

package br.com.simuquiz.antispam.classifiers;

import java.util.HashMap;
import java.util.Set;

import br.com.simuquiz.antispam.Category;
import br.com.simuquiz.antispam.Feature;
import br.com.simuquiz.antispam.Trainer;
import java.util.Map;

/**
 * @author Thiago Henrique Coraini
 * @author Israel Lacerra
 * @author Victor Williams Stafusa da Silva
 * @author Pedro Lopes de Souza
 */
public class FischerClassifier extends Classifier {

    public FischerClassifier(Trainer trainer) {
        this(trainer.getCategories(), trainer.getFeatures(), trainer.getTotalDocs());
    }

    public FischerClassifier(Map<String, Category> categories,
            Map<String, Feature> features, int totalDocs) {
        super(categories, features, totalDocs);
    }

    @Override
    public Category classify(Set<String> document) {
        Set<Feature> docFeatures = getFeatures(document);

        Category best = null;
        double max = 0.0;

        for (Category category : categories.values()) {
            double prob = documentFisherProb(category, docFeatures);
            if (prob > category.getMin() && prob > max) {
                best = category;
                max = prob;
            }
        }

        if (best == null) throw new IllegalStateException();
        return best;
    }

    private double documentFisherProb(Category category, Set<Feature> document) {
        double prob = 1.0;

        for (Feature feature : document) {
            prob *= categoryProbGivenFeature(feature, category);
        }

        double fscore = -2.0 * Math.log(prob);

        return invChi2(fscore, document.size() * 2);
    }

    private double categoryProbGivenFeature(Feature feature, Category category) {
        double basicProb;

        double featureCatFreq = featureProbGivenCategory(feature, category);
        if (featureCatFreq == 0) {
            basicProb = 0.0;
        } else {
            double featureAllCatFreq = featureProbInAllCategories(feature);
            basicProb = featureCatFreq / featureAllCatFreq;
        }

        int totalEvents = feature.getTotalEvents();
        final double weight = 1.0;
        final double assumedProb = 0.5;

        double weightedProbability = ((weight * assumedProb) + (totalEvents * basicProb))
                / (weight + totalEvents);
        return weightedProbability;
    }

    private double featureProbInAllCategories(Feature feature) {
        double sum = 0.0;

        for (Category category : categories.values()) {
            sum += featureProbGivenCategory(feature, category);
        }

        return sum;
    }

    private double featureProbGivenCategory(Feature feature, Category category) {

        if (category.getDocs() == 0) {
            return 0;
        }

        return ((double) (feature.getEvents(category))) / (double) category.getDocs();
    }

    private double invChi2(double chi, int df) {
        double m = chi / 2.0;
        double sum;
        double term = sum = Math.exp(-m);
        for (int i = 1; i < Math.floor((double) df / 2); i++) {
            term *= m / i;
            sum += term;
        }

        return Math.min(sum, 1.0);
    }
}