/* fit_test.c
 *
 * Copyright (C) 2005, 2006, 2007 Stephane Germain
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or (at
 * your option) any later version.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
 */

/**
   \file
   \brief Log likelihood ratio test for goodness of fit.
   \author Stephane Germain <germste@gmail.com>
*/

#include "libirt.h"
#if HAVE_CONFIG_H
#  include <config.h>
#endif
#include <math.h>
#include <gsl/gsl_cdf.h>

/**
   \brief Compute the log likelihood ratio statistics of each items.

   @param[in] quad_sizes A vector(classes) with the expected number
   of subjects in each class.
   @param[in] quad_freqs A matrix(items x classes) with the expected number of
   subjects in the class having a success at the item.
   @param[in] quad_weights A vector(classes) with the normal weights
   of each class.
   @param[in] probs A matrix(items x classes) with response functions.
   @param[in] nbr_inter The number of intervals to use.
   @param[out] chi2 A vector(items+1) with the statistics of each items
   and for the overall fit.
   @param[out] df A vector(items+1) with the degrees of freedom.
   @param[out] p_value A vector(items+1) with the p-values.
   
   \warning The memory for the outputs should be allocated before.
*/
void
llk_ratio_fit_test (gsl_vector *quad_sizes, gsl_matrix *quad_freqs,
		    gsl_vector *quad_weights, gsl_matrix *probs,
		    int nbr_inter, gsl_vector *chi2, gsl_vector_int *df, 
		    gsl_vector *p_value)
{
  int nbr_quad, nbr_item, i, j, k, d, tot_d;
  double nbr_subject, stat, freq, size, weight, prob, tot;
  nbr_quad = probs->size2;
  nbr_item = probs->size1;

  /* count the number of subject */
  nbr_subject = 0;
  for (k = 0; k < nbr_quad; k++) nbr_subject += gsl_vector_get(quad_sizes, k);

  tot = 0;
  tot_d = 0;
  for (i = 0; i < nbr_item; i++)
    {
      stat = 0;
      d = 0;
      k = 0;
      for (j = 0; j < nbr_inter; j++)
	{
	  freq = 0;
	  size = 0;
	  weight = 0;
	  prob = 0;
	  /* make each interval with the same number of subject */
	  while(k < nbr_quad && size < nbr_subject/nbr_inter)
	    {
	      size += gsl_vector_get(quad_sizes, k);
	      freq += gsl_matrix_get(quad_freqs, i, k);
	      weight += gsl_vector_get(quad_weights, k);
	      prob += gsl_vector_get(quad_weights, k)
		* gsl_matrix_get(probs, i, k);
	      k++;
	    }
	  if(size == 0 || freq == 0 || size-freq == 0) 
	    { 
	      continue;
	    }
	  prob /= weight;
	  d += 1;
	  stat += 2 * ( freq*log(freq/(size*prob))
			+ (size-freq)*log((size-freq)/(size*(1-prob))) );
	}
      gsl_vector_set(chi2, i, stat);
      gsl_vector_int_set(df, i, d);
      gsl_vector_set(p_value, i,
		     1-gsl_cdf_chisq_P(stat, d));
      tot += stat;
      tot_d += d;
    }

  /* compute the overall fit */
  gsl_vector_set(chi2, nbr_item, tot);
  gsl_vector_int_set(df, nbr_item, tot_d);
  gsl_vector_set(p_value, nbr_item,
		   1-gsl_cdf_chisq_P(tot, tot_d));
}

/**
   \brief Compute the log likelihood ratio statistics of each multiple choice items.

   @param[in] quad_sizes A vector(classes) with the expected number
   of subjects in each class.
   @param[in] quad_freqs A matrix(options x classes) with the expected number of
   subjects in the class having a success at the option.
   @param[in] quad_weights A vector(classes) with the normal weights
   of each class.
   @param[in] probs A matrix(options x classes) with response functions.
   @param[in] nbr_options A vector(items) with the number of option of each items.
   @param[in] items_pos A vector(items) with the position of the first option of each item.
   @param[in] nbr_inter The number of intervals to use.
   @param[out] chi2 A vector(items+1) with the statistics of each items
   and for the overall fit.
   @param[out] df A vector(items+1) with the degrees of freedom.
   @param[out] p_value A vector(items+1) with the p-values.
   
   \warning The memory for the outputs should be allocated before.
*/
void
llk_ratio_fit_test_mc (gsl_vector *quad_sizes, gsl_matrix *quad_freqs,
		       gsl_vector *quad_weights, gsl_matrix *probs,
		       gsl_vector_int *nbr_options, gsl_vector_int *items_pos,
		       int nbr_inter, gsl_vector *chi2, gsl_vector_int *df, 
		       gsl_vector *p_value)
{
  int nbr_quad, nbr_item, i, j, k, from, to, o, nbr_option, pos, 
    d_opt, d, tot_d;
  double nbr_subject, stat, freq, size, weight, prob, tot, probtot, freqtot;
  nbr_quad = probs->size2;
  nbr_item = nbr_options->size;

  /* count the number of subject */
  nbr_subject = 0;
  for (k = 0; k < nbr_quad; k++) nbr_subject += gsl_vector_get(quad_sizes, k);

  tot = 0;
  tot_d = 0;
  for (i = 0; i < nbr_item; i++)
    {
      nbr_option = gsl_vector_int_get(nbr_options, i);
      pos = gsl_vector_int_get(items_pos, i);

      stat = 0;
      d = 0;
      to = 0;
      for (j = 0; j < nbr_inter; j++)
	{
	  size = 0;
	  weight = 0;
	  from = to;
	  /* make each interval with the same number of subject */
	  while(to < nbr_quad && size < nbr_subject/nbr_inter)
	    {
	      size += gsl_vector_get(quad_sizes, to);
	      weight += gsl_vector_get(quad_weights, to);
	      to++;
	    }
	  if(size == 0)
	    { 
	      continue;
	    }
	  d_opt = 0;
	  probtot = 0;
	  freqtot = 0;
	  for (o = 0; o < nbr_option; o++)
	    {
	      freq = 0;
	      prob = 0;
	      for (k = from; k < to; k++)
		{
		  freq += gsl_matrix_get(quad_freqs, pos+o, k);
		  prob += gsl_vector_get(quad_weights, k)
		    * gsl_matrix_get(probs, pos+o, k);
		}
	      if(freq == 0 || size-freq == 0) 
		{ 
		  continue;
		}
	      prob /= weight;
	      probtot += prob;
	      freqtot += freq;
	      d_opt += 1;
	      stat += 2 * freq*log(freq/(size*prob));
	    }
	  d += d_opt - 1;
	  stat += 2 * freqtot * log(probtot*size/freqtot);
	}
      gsl_vector_set(chi2, i, stat);
      gsl_vector_int_set(df, i, d);
      gsl_vector_set(p_value, i, 1-gsl_cdf_chisq_P(stat, d));
      tot += stat;
      tot_d += d;
    }

  /* compute the overall fit */
  gsl_vector_set(chi2, nbr_item, tot);
  gsl_vector_int_set(df, nbr_item, tot_d);
  gsl_vector_set(p_value, nbr_item, 1-gsl_cdf_chisq_P(tot, tot_d));
}

