#include <R.h>
#include <Rmath.h>
#include <R_ext/Utils.h>


#define NODEBUG

void tripropscal(
  int *Iarg,
  int *Jarg,
  int *Karg,
  double *p_ij,
  double *p_ik,
  double *p_jk,
  double *p_ijk,
  double *A_ij,
  double *B_ik,
  double *C_jk,
  double *eps,
  int *maxiter,
  int *converged,
  int *iterations,
  int *verbose
) {
  int iter;
  int i,j,k;
  int I = *Iarg;
  int J = *Jarg;
  int K = *Karg;
  int IJ = I*J;
  int IK = I*K;
  int JK = J*K;
  int IJK = I*J*K;

/*  double *last_p_ijk = Calloc(IJK,double);
  double *q_ij = Calloc(IJ,double);
  double *q_ik = Calloc(IK,double);
  double *q_jk = Calloc(JK,double);
  double *r_ij = Calloc(IJ,double);
  double *r_ik = Calloc(IK,double);
  double *r_jk = Calloc(JK,double);*/
  
  double *last_p_ijk = (double*)R_alloc(IJK,sizeof(double));
  double *q_ij = (double*)R_alloc(IJ,sizeof(double));
  double *q_ik = (double*)R_alloc(IK,sizeof(double));
  double *q_jk = (double*)R_alloc(JK,sizeof(double));
  double *r_ij = (double*)R_alloc(IJ,sizeof(double));
  double *r_ik = (double*)R_alloc(IK,sizeof(double));
  double *r_jk = (double*)R_alloc(JK,sizeof(double));
                            
#ifdef DEBUG
  Rprintf("\ninit p_ijk[0] %g\n",p_ijk[0]);
#endif
                            
  for(iter = 0; iter < *maxiter; iter++){
#ifdef DEBUG
    Rprintf("\n========================================");
    Rprintf("\nStarting iteration %d",iter);
#endif
  
    R_CheckUserInterrupt();
    memcpy(last_p_ijk,p_ijk,sizeof(double)*IJK);
#ifdef DEBUG
      Rprintf("\nFirst round");
#endif
    for(i = 0; i < I ; i++){
      for(j = 0; j < J; j++){     
        q_ij[i+j*I] = 0;
        
        for(k = 0; k < K; k++)
          q_ij[i+j*I] += p_ijk[i+j*I+k*IJ];
/*#ifdef DEBUG
          for(k = 0; k < K; k++)
            Rprintf("\np_ijk[%d,%d,%d]=%g",i,j,k,p_ijk[i+j*I+k*IJ]);
#endif*/ 

        if(q_ij[i+j*I] > 0) r_ij[i+j*I] = p_ij[i+j*I]/q_ij[i+j*I];
        else r_ij[i+j*I] = 0;
          
        A_ij[i+j*I] *= r_ij[i+j*I];
        for(k = 0; k < K; k++)
          p_ijk[i+j*I+k*IJ]*=r_ij[i+j*I];
      }
    }
    
#ifdef DEBUG
      Rprintf("\np_ijk[0] = %g ",p_ijk[0]); 
      Rprintf("\np_ij[0] = %g ",p_ij[0]);
      Rprintf("q_ij[0] = %g ",q_ij[0]);
      Rprintf("r_ij[0] = %g ",r_ij[0]);
      Rprintf("A_ij[0] = %g ",A_ij[0]);
#endif
    
    for(i = 0; i < I ; i++){
      for(k = 0; k < K; k++){
        q_ik[i+k*I] = 0;
        
        for(j = 0; j < J; j++)
          q_ik[i+k*I] += p_ijk[i+j*I+k*IJ];
        
        if(q_ik[i+k*I] > 0) r_ik[i+k*I] = p_ik[i+k*I]/q_ik[i+k*I];
        else r_ik[i+k*I] = 0;
        B_ik[i+k*I] *= r_ik[i+k*I];
        for(j = 0; j < J; j++)
          p_ijk[i+j*I+k*IJ]*=r_ik[i+k*I];
      }
    }
      
#ifdef DEBUG
      Rprintf("\nSecond round");
      Rprintf("\np_ijk[0] = %g ",p_ijk[0]);
      Rprintf("\np_ik[0] = %g ",p_ik[0]);
      Rprintf("q_ik[0] = %g ",q_ik[0]);
      Rprintf("r_ik[0] = %g ",r_ik[0]);
      Rprintf("B_ik[0] = %g ",B_ik[0]);
#endif
    
    for(j = 0; j < J ; j++){
      for(k = 0; k < K; k++){
        q_jk[j+k*J] = 0;
          
        for(i = 0; i < I; i++)
          q_jk[j+k*J] += p_ijk[i+j*I+k*IJ];
        
        if(q_jk[j+k*J] > 0) r_jk[j+k*J] = p_jk[j+k*J]/q_jk[j+k*J];
        else r_jk[j+k*J] = 0;
        C_jk[j+k*J] *= r_jk[j+k*J];
        for(i = 0; i < I; i++)
          p_ijk[i+j*I+k*IJ]*=r_jk[j+k*J];
      }
    }

#ifdef DEBUG
      Rprintf("\nThird round");
      Rprintf("\np_ijk[0] = %g ",p_ijk[0]);
      Rprintf("\np_jk[0] = %g ",p_jk[0]);
      Rprintf("q_jk[0] = %g ",q_jk[0]);
      Rprintf("r_jk[0] = %g ",r_jk[0]);
      Rprintf("C_jk[0] = %g ",C_jk[0]);
#endif

    *converged = 1;
    for(i = 0; i < I; i++)
      for(j = 0; j < J; j++)
        for(k = 0; k < K; k++)
          *converged *= (IJK*fabs(p_ijk[i+j*I+k*IJ]-last_p_ijk[i+j*I+k*IJ])<*eps);
    if(*verbose){
      Rprintf("\nIteration %d",iter);
      Rprintf("\tCriterion: %g ",p_ijk[0]-last_p_ijk[0]);
    }
    
    if(*converged){
      if(*verbose)
        Rprintf("\nConverged.\n");
      break;
    }
  }
  *iterations = iter;
  /*
  Because R_alloc is used, the pointers are
  freed after return from .C
  Free(last_p_ijk);
  Free(q_ij);
  Free(q_ik);
  Free(q_jk);
  Free(r_ij);
  Free(r_ik);
  Free(r_jk);*/ 
}
