/*
  Copyright (C) 2019 Alessandro Languasco
*/
#define _GNU_SOURCE
#include <time.h>
#include <fftw3.h> 
#include <stdio.h>
#include <stdlib.h>  
#include <math.h>

#define REAL 0
#define IMAG 1


void getSvalues(long double* Svalues, ptrdiff_t dim) {
  FILE *myFile;
  myFile = fopen("precomp_S-DIF.res", "r");
  int i;
  int dummy;
  
  if (myFile == NULL){
    printf("File precomp_S-DIF.res doesn't exist\n");
    exit (0);
  }
  fscanf(myFile, "%d", &dummy);
  
  for (i = 0; i < dim; i++){
    fscanf(myFile, "%Lf", &Svalues[i]);
  }
  fclose(myFile); 
}
  

void prntc(fftwl_complex* result, ptrdiff_t dim) { /* for checking results */
  int i;
  for (i = 0; i < dim; ++i) {
    printf("%Lf\n", result[i][REAL]);
    printf("%Lf\n", result[i][IMAG]);
  }
}
void prntr(long double* result,ptrdiff_t dim) { /* for checking results */
  int i;
  for (i = 0; i < dim; ++i) {
    printf("%Lf\n", result[i]);
  }
}


int main() {
  ptrdiff_t a;  
  long double c; 
  long double d; 
  ptrdiff_t i;
  ptrdiff_t j;
  ptrdiff_t k;
  ptrdiff_t m;
  ptrdiff_t q;  
  ptrdiff_t qcheck; 
  ptrdiff_t primroot; 
  ptrdiff_t NUM_POINTS;
  ptrdiff_t nhalf;
  ptrdiff_t nquarter; 
  long double oddresult;
  long double evenresult;
  long double res;
  long double Eulergamma;
  long double pi;  
  Eulergamma=0.57721566490153286060651209008240243104;
  pi=3.1415926535897932384626433832795028841971693993751;

 
  printf("********** A. LANGUASCO  *********** \n");
  printf("Computation of the max_{chi neq chi_0} |L'/L(1,chi)| with the S function. [LONG DOUBLE PRECISION - GURU64]\n");
  printf("Acquiring q from file: q = ");
  FILE *myFile;
  myFile = fopen("precomp_S-DIF.res", "r");
  
  if (myFile == NULL){
    printf("File precomp_S-DIF.res doesn't exist\n");
    exit (0);
  }
  fscanf(myFile, "%tu", &q);
  printf("%td\n",q);
  fclose(myFile);
  
  NUM_POINTS=q-1;
  nhalf=NUM_POINTS/2;  
  nquarter=nhalf/2;  

  /*BEGIN GURU 64 PLAN PREPARATION */
  printf("%s", "Start FFT plans preparation"); 
  clock_t startplan = clock();  /* start clock */
   
  /***********  LOGGAMMA PLAN PREPARATION ***********/
  /* long double *lngammavalues;
  lngammavalues = (long double*) fftwl_malloc(sizeof(long double) * NUM_POINTS); 

  fftwl_complex *lngammatrasf;
  lngammatrasf = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * (nhalf+1));*/
  
  /***** for an INPLACE TRANSFORM *****/
  long double *lngammavalues;
  lngammavalues = (long double*) fftwl_malloc(sizeof(long double) * NUM_POINTS+2);  /* for an INPLACE TRANSFORM; lngammavalues should be of the same dim of the output ; 2*(nhalf+1) */
  /* the FFT of a real sequence with length = NUM_POINTS has nhalf + 1 complex positions = NUM_POINTS+2 long double positions*/
  fftwl_complex *lngammatrasf;
  lngammatrasf = (fftwl_complex*)lngammavalues;  

  int rank=1;
  fftw_iodim64 *dims=malloc(rank*sizeof(fftw_iodim64));
  dims[0].n= NUM_POINTS;  /* length of the trasform */
  dims[0].is=1;
  dims[0].os=1;   
  int howmany_rank=1;
  fftw_iodim64 *howmany_dims=malloc(howmany_rank*sizeof(fftw_iodim64)); 
  howmany_dims[0].n=1;
  howmany_dims[0].is=1;
  howmany_dims[0].os=1;  
  fftwl_plan plan_lngamma = fftwl_plan_guru64_dft_r2c(rank, /*rank=1 means monodimensional vector*/
              dims, /* NUM_POINTS */
              howmany_rank,
              howmany_dims,
              lngammavalues,  /*input*/
              lngammatrasf,  /*output IN PLACE*/  
              FFTW_ESTIMATE); /*flags*/
              
  if (plan_lngamma ==NULL){fprintf(stderr,"loggamma plan creation failed\n");exit(1);}
  
  /********** ak PLAN PREPARATION ***********/
  
    /***** for an INPLACE TRANSFORM *****/
  /*fftwl_complex *akvalues;
  akvalues = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * nhalf); */
  fftwl_complex *akvalues_trasf;
   /* the FFT of a complex sequence with length = nhalf has nhalf complex positions*/
  akvalues_trasf = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * nhalf);
  
  fftw_iodim64 *ak_dims=malloc(rank*sizeof(fftw_iodim64));
  ak_dims[0].n= nhalf;   /* length of the trasform */
  ak_dims[0].is=1;
  ak_dims[0].os=1;  
  fftwl_plan plan_ak = fftwl_plan_guru64_dft(rank, /*rank=1 means monodimensional vector*/
              ak_dims, /* nhalf */
              howmany_rank,
              howmany_dims,
              akvalues_trasf,  /*input for INPLACE transforms*/ 
              akvalues_trasf,  /*output; for INPLACE transforms*/ 
              FFTW_FORWARD, /* -1 because it is the sign of FORWARD transform */
              FFTW_ESTIMATE); /*flags*/
              
  if (plan_ak ==NULL){fprintf(stderr,"ak plan creation failed\n");exit(1);}
  
  /*********** S PLAN PREPARATION ***********/
  
    /***** for an INPLACE TRANSFORM *****/
  /*long double *Svalues;
  Svalues = (long double*) fftwl_malloc(sizeof(long double) * nhalf); 
  fftwl_complex *Strasf;
  Strasf = (fftwl_complex*) fftwl_malloc(sizeof(fftwl_complex) * (nquarter+1));*/
  
  long double *Svalues;
  Svalues = (long double*) fftwl_malloc(sizeof(long double) * nhalf+2);  /* for an INPLACE TRANSFORM; Svalues should be of the same dim of the output ; 2*(nquarter+1) */
   /* the FFT of a real sequence with length = nhalf has nquarter + 1 complex positions = nhalf+2 long double positions*/
  fftwl_complex *Strasf;
  Strasf = (fftwl_complex*)Svalues;  /* for an INPLACE TRANSFORM */
  
  fftw_iodim64 *S_dims=malloc(rank*sizeof(fftw_iodim64));
  S_dims[0].n= nhalf;   /* length of the trasform */
  S_dims[0].is=1;
  S_dims[0].os=1; 
  fftwl_plan plan_S = fftwl_plan_guru64_dft_r2c(rank, /*rank=1 means monodimensional vector*/
              S_dims, /* nhalf */
              howmany_rank,
              howmany_dims,
              Svalues,  /*input*/
              Strasf,  /*output*/ 
              FFTW_ESTIMATE); /*flags*/
              
  if (plan_S ==NULL){fprintf(stderr,"S plan creation failed\n");exit(1);}
  
  clock_t endplan = clock(); /* stop clock */
  printf("%s\n", ". Done"); 
  
  /*END GURU 64 PLAN PREPARATION */
  
  
  /*********** acquire the values to generate sequence g^k%q ***********/
  
  printf("%s", "Generating log(gamma(a/q)) and g^k%q [decimated in frequency]"); 
  myFile = fopen("primroot.res", "r");
  
  if (myFile == NULL){
    printf("\n ******  ERROR: File primroot.res doesn't exist\n");
    exit (0);
  }
  /*checking if it is the correct q */
  fscanf(myFile, "%tu", &qcheck);

  if (qcheck != q) {printf("\n%s", "******* ERROR: These is the generator  for q = ");
        printf("%td\n",qcheck); 
        exit (0);
        };
  /*getting the value of the primitive root g mod q used in the precomputation */       
  fscanf(myFile, "%tu", &primroot);
  
  /*printf("%lu",primroot);*/
  
  /******** initialisation for log gamma and a_k **********/
  
  /*  COMMENT: the speediness of the DIF for a_k can be improved
  by replacing the nhalf calls to sin and cos with the summation
  formulae for the sin and cos functions (so using just one call of
  sin and cos and about nhalf sums and products). This  works nicely in
  quadruple precision but in the long double precision it introduces
  some computation errors in the final value of the euler-kronecker constants.
  So we are using here the slower but more precise version of the DIF
  for the a_k sequence.
  */
     clock_t startinitseq = clock();  /* start clock */
  long double sinvalue;
  long double cosvalue;
  long double logpi = logl(pi);
  long double twist = 0.0L;
  long double angle = (long double) pi/nhalf;
  a = 1; 
  for (k = 0; k <nhalf ; k++){  
     c = (long double) (2*a-q); /*g^k -R g^(k+m) \equiv g^k  -R (q-1)g^k \equiv g^k -R (-g^k) \equiv g^k -R (q -g^k) = 2*g^k -R q */
     /* sincosl(twist, &sinvalue, &cosvalue);  works in LINUX; not in C standard; slightly faster */
     cosvalue = cosl(twist);
     sinvalue = sinl(twist); 
     akvalues_trasf[k][REAL]= c * cosvalue;  
     akvalues_trasf[k][IMAG]= (-1.0L) * c * sinvalue; /* depends on the sign = -1 of the transform */
     d = (long double) a/q ;
     lngammavalues[k]=lgammal(d);  /* values of log(gamma(a_k/q)*/
     lngammavalues[k+nhalf] = -lngammavalues[k] + logpi - logl(sinl(pi*d));/* values of log(gamma(a_(k+m)/q) via the reflection formula*/
     a = (a*primroot)%q;  
     twist = twist + angle;
  } 
       clock_t endinitseq = clock();  /* end clock */
  printf("%s\n", ". Done"); 
  
  printf("%s", "Start first FFT transform"); 

       clock_t start = clock();  /* start clock */
  /* transform loggamma */
  fftwl_execute(plan_lngamma);
  
  /* lngammatrasf contains the DFT of loggamma; due to the simmetries the output has half elements than the input */
  /* lngammatrasf[i][REAL]=lngammatrasf[NUM_POINTS-i][REAL];  lngammatrasf[i][IMAG]=-lngammatrasf[NUM_POINTS-i][IMAG] */
  
  clock_t end = clock(); /* stop clock */
  printf("%s\n", " -- End first FFT transform."); 
  
  /* destroy the FFTW plans */  
  fftwl_destroy_plan(plan_lngamma);  /* DO NOT free lngammatrasf; we need it later */
  
 
  printf("%s", "Start second FFT transform"); 
    clock_t start1 = clock();  /* start clock */
  /* transform ak */
  fftwl_execute(plan_ak);

  /*akvalues_trasf contains the DFT of ak; it's a complex to complex transform */
  
  clock_t end1 = clock(); /* stop clock */

  printf("%s\n", " -- End second FFT transform."); 
  
  /* destroy the ak plan */ 
  fftwl_destroy_plan(plan_ak);
  
  printf("%s", "Computing the max over odd characters"); 
  
  clock_t startcompodd = clock(); /* start clock */
  long double M=0;/* odd characters */
  long double num=0;
  long double num1=0;
  long double den=0;
  long double quot=0;   
  long double aux=0; 
  long double corr= logl(2*pi)+Eulergamma;   
  for (i=1;  i <= nhalf; ++i) { /* computes |(a+ib)/(c+id)|= |a+ib|/|c+id| */
    j=2*i-1;
    m= i-1;
    k=NUM_POINTS-j;
    if ( j <= nhalf )
    {num = q*(lngammatrasf[j][REAL] * akvalues_trasf[m][REAL] + lngammatrasf[j][IMAG]*akvalues_trasf[m][IMAG]);
          den =akvalues_trasf[m][REAL]*akvalues_trasf[m][REAL]+akvalues_trasf[m][IMAG]*akvalues_trasf[m][IMAG];
          num1 = q*(lngammatrasf[j][IMAG] * akvalues_trasf[m][REAL] - lngammatrasf[j][REAL]*akvalues_trasf[m][IMAG]);
          aux = (corr+num/den)*(corr+num/den) + (num1/den)*(num1/den);
          quot = sqrtl(aux);
          if (quot> M) {M=quot;};
    }
   else
   {num = q*(lngammatrasf[k][REAL] * akvalues_trasf[m][REAL] - lngammatrasf[k][IMAG]*akvalues_trasf[m][IMAG]);
          den =akvalues_trasf[m][REAL]*akvalues_trasf[m][REAL]+akvalues_trasf[m][IMAG]*akvalues_trasf[m][IMAG];
          num1 = q*(-lngammatrasf[k][IMAG] * akvalues_trasf[m][REAL] - lngammatrasf[k][REAL]*akvalues_trasf[m][IMAG]);
          aux = (corr+num/den)*(corr+num/den) + (num1/den)*(num1/den);
          quot = sqrtl(aux);
          if (quot> M) {M=quot;};
    }
    /*printf("%Lf\n", M);*/
  }
  oddresult=M; /* max for odd characters */
  
  clock_t endcompodd = clock(); /* stop clock */
  printf("%s\n", ". Done");
  
  /* freeing the fftw_malloc*/ 
  fftwl_free(akvalues_trasf);
  
  /* Third TRANSFORM: S (decimated in frequency) */
  
  printf("%s", "Acquiring precomputed values for S(a/q) [decimated in frequency]");
  clock_t startreadonfile = clock(); /* start clock */  
  
  getSvalues(Svalues,nhalf);  /* Svalues has just nhalf meaningful entries; the remaining two are for the inplace transform*/

  clock_t endreadonfile = clock(); /* end clock */
  
  printf("%s\n", ". Done");
  
  printf("%s", "Start third FFT transform"); 
  clock_t start2 = clock();  /* start clock */
  
  fftwl_execute(plan_S);
  /* prnt(result,nhalf);  result contains the DFT of S */
  
  clock_t end2 = clock(); /* stop clock */
  printf("%s\n", " -- End third FFT transform."); 
  
   /* destroy the S plan */
  fftwl_destroy_plan(plan_S);
  
  /* Computing max_{chi\ne chi_0} \vert L'/L(1,\chi) \vert =: M(q) */
  /* sum starts from i=1 to avoid the trivial character contribution */
  
  /* Strasf contains the DFT of S; due to the simmetries the output has half elements than the input */
  /* Strasf[i][REAL]=result[nhalf-i][REAL];  Strasf[i][IMAG]=-result[nhalf-i][IMAG] */

  printf("%s", "Computing the max over even characters"); 
  clock_t startcompeven = clock(); /* start clock */
  
  M=0;/* even characters */
  num=0;
  num1=0;
  den=0;
  quot=0;   
  aux=0;  
  for (i=1;  i <= nhalf-1; ++i) { 
        j=2*i;
        k=NUM_POINTS-j;
        m=nhalf-i;
        if ( i <= nquarter )
        {num = (Strasf[i][REAL] * lngammatrasf[j][REAL] +  Strasf[i][IMAG]*lngammatrasf[j][IMAG])/2;
          den =lngammatrasf[j][REAL] *lngammatrasf[j][REAL] + lngammatrasf[j][IMAG]*lngammatrasf[j][IMAG] ;
          num1 = (Strasf[i][IMAG] * lngammatrasf[j][REAL] - Strasf[i][REAL]*lngammatrasf[j][IMAG])/2;
          aux = (corr+num/den)*(corr+num/den) + (num1/den)*(num1/den);
          quot = sqrtl(aux); 
          if (quot> M) {M=quot;};
          }
       else
       {num = (Strasf[m][REAL] * lngammatrasf[k][REAL] +  Strasf[m][IMAG]*lngammatrasf[k][IMAG])/2;
          den =lngammatrasf[k][REAL] *lngammatrasf[k][REAL] + lngammatrasf[k][IMAG]*lngammatrasf[k][IMAG] ;
          num1 = (Strasf[m][IMAG] * lngammatrasf[k][REAL] - Strasf[m][REAL]*lngammatrasf[k][IMAG])/2;
          aux = (corr+num/den)*(corr+num/den) + (num1/den)*(num1/den);
          quot = sqrtl(aux); 
          if (quot> M) {M=quot;};
          }    
       /*printf("%Lf\n", M);*/
        }
  evenresult=M; /* max for even characters */
  clock_t endcompeven = clock(); /* stop clock */
  printf("%s\n\n", ". Done");
  
  /* freeing the fftw_malloc*/ 
  fftwl_free(lngammatrasf);  fftwl_free(Strasf);
  /* clean the fftw setting */ 
  fftwl_cleanup();
 
  
  /******************** output of the results *********************/
  printf("%s\n\n", "*** RESULTS:");
  
   /* print max_q */
  printf("%s", "max_even(");
  printf("%td",q);
  printf("%s", ") = ");
  printf("%Lf", evenresult);
  printf("\n\n");
  
      /* print max_q */
  printf("%s", "max_odd(");
  printf("%td",q);
  printf("%s", ") = ");
  printf("%Lf", oddresult);
  printf("\n\n");
  
    /* print max_q */
  printf("%s", "max_{chi neq chi_0}|L'/L(1,chi)|(");
  printf("%td",q);
  printf("%s", ") = ");
  printf("%Lf", fmaxl(evenresult,oddresult));
  printf("\n\n");
   
  
  /********************* output of the computation times *********************/
  
  printf("%s\n", "*** TIMES:");
  double time_elapsed_in_sec = (end - start+ end1 - start1 + end2 -start2)/(double)CLOCKS_PER_SEC; 
  
  /* output of the FFT total computation time */
  double min=floor(time_elapsed_in_sec/60);
  double sec=floor(time_elapsed_in_sec-min*60);
  double msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "FFT - total computation time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
  /* output of the PLAN FFT total computation time */
  time_elapsed_in_sec = (endplan - startplan)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "PLAN FFT - total computation time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. ");
  
  /* output of the total computation time */
  time_elapsed_in_sec = (endcompeven - startcompeven + endcompodd - startcompodd)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Final comp maxq - total computation time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
  /* output of the I/O time */
  time_elapsed_in_sec = (endreadonfile - startreadonfile)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Input data from file - time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
    /* output initialisation time */
  time_elapsed_in_sec = (endinitseq - startinitseq)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Initialisation - time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  
  
  /* total time */
  time_elapsed_in_sec = (endinitseq - startinitseq + end - start+ end1 - start1 + end2 -start2+ endplan - startplan + endcompeven - startcompeven + endcompodd - startcompodd+ endreadonfile - startreadonfile)/(double)CLOCKS_PER_SEC; 
  
  min=floor(time_elapsed_in_sec/60);
  sec=floor(time_elapsed_in_sec-min*60);
  msec=(time_elapsed_in_sec-min*60-sec)*1000;
  printf("%s", "Total elapsed time: ");
  printf("%f", min);
  printf("%s", " min. ");
  printf("%f", sec);
  printf("%s", " sec. ");
  printf("%f", msec);
  printf("%s\n", " msec. "); 
  printf("********** END PROGRAM  *********** \n");
  return 0;
}
 
/******  to compile
 Compilation instructions vary by platform. On the Mac and other UNIX-like systems, you should just be able to type:
 
 clang -o  Max-S-fftwl.exe  Max-S-fftwl.c -lfftw3l -lm  using long double precision (on mac ; fftw installed using homebrew)
 
 clang -o Max-S-fftwl.exe  Max-S-fftwl.c -lfftw3l -lm (on the optiplex linux box at the dept) 

 ******************/