# load a set of common macros instructions
#
#   Copyright (C) 2024 Alessandro Languasco
#
import sys
import pandas as pd
import numpy as np 
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.animation as animation
import scipy as sp
from scipy.stats import norm
import sympy as sy
import humanfriendly as hu
from humanfriendly import format_timespan
import time 
from datetime import datetime
now = datetime.now()
# dd/mm/YY H:M:S
dt_string = now.strftime("%d/%m/%Y %H:%M:%S") 


# use Latex for the captions and titles of matplotlib
plt.rcParams['text.usetex'] = True


plt.rcParams.update({
    "text.usetex": True,
    "font.family": "mathptmx"
})

plt.rc('text.latex', preamble=r'\usepackage{amssymb}')
 
# prints 20 digits in pandas dataframes
pd.set_option("display.precision", 20)

print('--------------------------------------------------------------------------------')
print('             Author of the Script: Alessandro Languasco; (C) 2023')
print('--------------------------------------------------------------------------------')
print('    Versions: ')
print('python: {}'.format(sys.version))
print('matplotlib: {}'.format(mpl.__version__))
print('pandas: {}'.format(pd.__version__))
print('numpy: {}'.format(np.__version__))
print('scipy: {}'.format(sp.__version__))
print('sympy: {}'.format(sy.__version__))
print('humanfriendly: {}'.format(hu.__version__))
print('--------------------')
print("date and time =", dt_string)	
print('--------------------')

# function to Find indexes of an element in pandas dataframe
# from https://thispointer.com/python-find-indexes-of-an-element-in-pandas-dataframe/
# to be used at the bottom of the script to detect when  mineven == minodd  and when there are small values

def getIndexes(dfObj, value):
    ''' Get index positions of value in dataframe i.e. dfObj.'''
 
    listOfPos = list()
    # Get bool dataframe with True at positions where the given value exists
    result = dfObj.isin([value])
    # Get list of columns that contains the value
    seriesObj = result.any()
    columnNames = list(seriesObj[seriesObj == True].index)
    # Iterate over list of columns and fetch the rows indexes where value exists
    for col in columnNames:
        rows = list(result[col][result[col] == True].index)
        for row in rows:
            listOfPos.append((row, col))
    # Return a list of tuples indicating the positions of value in the dataframe
    return listOfPos


# functions needed to create the normalised values

def sqrt(x): 
    y =  np.sqrt(x)
    return y

def exp(x): 
    y =  np.exp(x)
    return y    

def ln(x): 
    y = np.log(x)
    return y

def lnln(x): 
    y =  np.log(np.log(x))
    return y           
    
def twothirdpow(x): 
	expo = 2/(3.)
	y = np.power(x,expo)
	return y   

def threequarterpow(x): 
	expo = 3/(4.)
	y = np.power(x,expo)
	return y 	 
 

start_time = time.monotonic() 

data=pd.read_csv(r"data/global-EK.csv", sep=';')
data.head(4)
df = pd.DataFrame(data, columns= ['q','EK', 'EKplus', 'EKdiff'])
#df = df.astype('float128')
df = df.astype('float64')
df = df.astype({"q": int})

# create a new column with normalised values
# \kappa(q) : = (EK(q)^+- EK(q) ) /log q  is EKdiffnorm
df['EKdiffnorm'] = - (df['EKdiff'] / ln(df['q']) )


#print (df)

# reads the list of Sophie Germain primes;first element
data=pd.read_csv(r"data/sophie_germain_primes_first.csv", sep=';')
data.head(1)
dfaux = pd.DataFrame(data, columns= ['q'])
#df = df.astype('float128')
dfaux = dfaux.astype('int')


dfgermain_first= df.merge(dfaux, on='q')
del(dfgermain_first['EK'])
del(dfgermain_first['EKplus'])
del (dfaux)
   
# reads the list of p such that (p,2p-1) are both primes
data=pd.read_csv(r"data/twiceprimeminusone_first.csv", sep=';')
data.head(1)
dfaux = pd.DataFrame(data, columns= ['q'])
#df = df.astype('float128')
dfaux = dfaux.astype('int')

dftwicepminusone_first = df.merge(dfaux, on='q')
del(dftwicepminusone_first['EK'])
del(dftwicepminusone_first['EKplus'])
del(dfaux)

# reads the list of p such that neither (p,2p+1) and (p,2p-1) are primes
data=pd.read_csv(r"data/altri.csv", sep=';')
data.head(1)
dfaux = pd.DataFrame(data, columns= ['q'])
#df = df.astype('float128')
dfaux = dfaux.astype('int')

dfothers = df.merge(dfaux, on='q')
del(dfothers['EK'])
del(dfothers['EKplus'])
del(dfaux)

# reads the list of p such that neither (p,2p+1), (p,2p-1), (p,4p+1), (p,4p-1) are primes
data=pd.read_csv(r"data/altri_altri.csv", sep=';')
data.head(1)
dfaux = pd.DataFrame(data, columns= ['q'])
#df = df.astype('float128')
dfaux = dfaux.astype('int')

dfothers_others = df.merge(dfaux, on='q')
del(dfothers_others['EK'])
del(dfothers_others['EKplus'])
del(dfaux)
   
# reads the list of p such that (p,4p-1) are both primes
data=pd.read_csv(r"data/fourtimesprimeminusone_first.csv", sep=';')
data.head(1)
dfaux = pd.DataFrame(data, columns= ['q'])
#df = df.astype('float128')
dfaux = dfaux.astype('int')

dffourtimespminusone_first = df.merge(dfaux, on='q')
del(dffourtimespminusone_first['EK'])
del(dffourtimespminusone_first['EKplus'])
del(dfaux)

# reads the list of p such that (p,4p-1) are both primes
data=pd.read_csv(r"data/fourtimesprimeplusone_first.csv", sep=';')
data.head(1)
dfaux = pd.DataFrame(data, columns= ['q'])
#df = df.astype('float128')
dfaux = dfaux.astype('int')

dffourtimespplusone_first = df.merge(dfaux, on='q')
del(dffourtimespplusone_first['EK'])
del(dffourtimespplusone_first['EKplus'])
del(dfaux)

   
#FINDING MAX and second MAX; MIN and second MIN; and where they are attained
print(' *****   Statistics for EK(q)-EK(q)^+  *****') 
print('name source fileanalysis = global-EK.csv')

print(' ----------------------- ') 
print(' *****   sums of | \kappa(q)|  *****')  

df['absEKdiffnorm'] = np.abs(df['EKdiffnorm'])

tentofive = 10**5
for jj in range(1, 101):
	partiallist = df['absEKdiffnorm'].loc[df.q < jj*10**5] 
	S= partiallist.sum()
	numoddprimes = len(partiallist)
	print('Value of sum  |\kappa(q)| , q <= ', jj, '*10^5 = ',S)
	print('the same over pi_odd(', jj, '*10^5) = ',S/numoddprimes)
	print(' ----- ') 
	#endfor
	 

del(df['absEKdiffnorm'])
print(' ----------------------- ') 
print(' *****   sum of EK(q)-EK(q)^+   *****')  
weight = np.log(10 ** 7)
print('log(10^7) = ',weight)
print(' -- ') 
S = df['EKdiff'].sum()
print('Value of sum (EK(q)-EK(q)^+)  = ',S)
print('the same over log(10^7) = ',S/weight)
print(' -- ') 
df['absEKdiff'] = np.abs(df['EKdiff'])
S = df['absEKdiff'].sum()
print('Value of sum  | EK(q)-EK(q)^+ |  = ',S)
print('the same over log(10^7) = ',S/weight)
del(df['absEKdiff'])

print(' -- ') 
S = df['EK'].sum()
print('Value of sum EK(q) = ',S)
print('the same over log(10^7) = ',S/weight)
print(' -- ') 
S = df['EKplus'].sum()
print('Value of sum EK(q)^+ = ',S)
print('the same over log(10^7) = ',S/weight)
print(' -- ') 

# number of  values >0 for EKdiff
total=float(len(df))
greater0 = len(df['EKdiff'].loc[df.EKdiff > 0])
percent=float(greater0)/total
print('number of EK(q) > EK(q)^+ :', greater0,'percentage =', percent*100)
print(' -- ') 

# number of  values <0 for EKdiffnorm
less0 = len(df['EKdiff'].loc[df.EKdiff < 0])
percent=float(less0)/total
print('number of EK(q) < EK(q)^+ :',less0,'percentage =', percent*100)

print('total number (less0+greater0) =', less0+greater0)
print('total number (number of rows) =', int(total)) 
print(' -- ') 
eps = 10**(-6)
dfclose = df['EKdiff'].loc[ np.abs(df.EKdiff) < eps ]

close = len(dfclose)
percent = float(close)/total
print('number of | EK(q) - EK(q)^+ | < eps = 10^(-6) :', close,'percentage =', percent*100)
print(' -- ') 
print('list of such values:')
print(dfclose)
del(dfclose)

eps = 10**(-5)
dfclose = df['EKdiff'].loc[ np.abs(df.EKdiff) < eps ]

close = len(dfclose)
percent = float(close)/total
print('number of | EK(q) - EK(q)^+ | < eps = 10^(-5) :', close,'percentage =', percent*100)
print('list of such values:')
print(dfclose)
del(dfclose)
print(' -- ') 
eps = 10**(-4)
dfclose = df['EKdiff'].loc[ np.abs(df.EKdiff) < eps ]

close = len(dfclose)
percent = float(close)/total
print('number of | EK(q) - EK(q)^+ | < eps = 10^(-4) :', close,'percentage =', percent*100)
print('list of such values:')
print(dfclose)
del(dfclose)

print('-----------------------------------------------------')
print('Bump analysis on normalised values kappa(q) :')

print('-------------')
print('Central bump:')
dfclose = df['EKdiffnorm'].loc[ np.abs(df.EKdiffnorm) < 0.125]

close = len(dfclose)
central_bump = close
percent = float(close)/total
print('number of | kappa(q)  | < 1/8 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Central bump:')
dfclose = df['EKdiffnorm'].loc[ np.abs(df.EKdiffnorm) < 0.1]

close = len(dfclose)
central_bump = close
percent = float(close)/total
print('number of | kappa(q)  | < 1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Bump in 0.125:')

dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm - 0.125) < 0.1) ]

close = len(dfclose)
right_secondary_bump = close
percent = float(close)/total
print('number of | kappa(q)   - 0.125 | < 1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Bump in -0.125:')

dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm + 0.125) < 0.1) ]

close = len(dfclose)
left_secondary_bump = close
percent = float(close)/total
print('number of | kappa(q)   + 0.125 | < 1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Bump in 0.25:')

dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm - 0.25) < 0.1) ]

close = len(dfclose)
right_secondary_bump = close
percent = float(close)/total
print('number of | kappa(q)   - 0.25 | < 1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Bump in -0.25:')

dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm + 0.25) < 0.1) ]

close = len(dfclose)
left_secondary_bump = close
percent = float(close)/total
print('number of | kappa(q)   + 0.25 | < 1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Right tail:')

dfclose = df['EKdiffnorm'].loc[df.EKdiffnorm >= 0.35 ]

close = len(dfclose)
right_tail = close
percent = float(close)/total
print('number of kappa(q)   >= 1/4+1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)

print('-------------')
print('Left tail:')

dfclose = df['EKdiffnorm'].loc[ df.EKdiffnorm <= -0.35 ]

close = len(dfclose)
left_tail = close
percent = float(close)/total
print('number of kappa(q)   <= -1/4-1/10 :', close,'; percentage =', percent*100)
#print('list of such values:')
#print(dfclose)
del(dfclose)


print('-------------')
print('Bumps analysis kappa(q)  (summary):')

print('number of left sec bump:', left_secondary_bump,'; percentage =', float(left_secondary_bump)/total*100)
print('number of right sec bump:', right_secondary_bump,'; percentage =', float(right_secondary_bump)/total*100)

print('left tail:', central_bump,'; percentage =', float(left_tail)/total*100)
print('right tail:', central_bump,'; percentage =', float(right_tail)/total*100)
print('secondary bumps:', left_secondary_bump+right_secondary_bump,'; percentage =', float(left_secondary_bump+right_secondary_bump)/total*100)
print('central bump:', central_bump,'; percentage =', float(central_bump)/total*100)


print('-----------------------------------------------------')

print(' -- ') 
print(' ----------------------- ') 
print(' *****   MAX kappa(q)   *****')  

maxima =df.nlargest(10, 'EKdiffnorm', keep = 'all') 
M=maxima['EKdiffnorm'].iloc[0] 
argM=maxima['q'].iloc[0] 
M1=maxima['EKdiffnorm'].iloc[1]
argM1=maxima['q'].iloc[1]
print('maximal value on column kappa(q)  =',M,'attained at q =',argM)
print('second maximal value on column kappa(q)  =',M1,'attained at q =',argM1)
print(maxima)
del maxima

print(' *****   MIN kappa(q)   *****') 
minima =df.nsmallest(10, 'EKdiffnorm', keep = 'all') 
m=minima['EKdiffnorm'].iloc[0] 
argm=minima['q'].iloc[0] 
m1=minima['EKdiffnorm'].iloc[1]
argm1=minima['q'].iloc[1]
print('minimal value on column kappa(q)  =',m,'attained at q =',argm)
print('second minimal value on column kappa(q)  =',m1,'attained at q =',argm1)
print(minima)
del minima


print(' *****   STATS kappa(q)   *****')
# number of  values >0 for EKdiffnorm
total=float(len(df))
greater0 = len(df['EKdiffnorm'].loc[df.EKdiffnorm > 0])
percent=float(greater0)/total
print('number of kappa(q) >0 :', greater0,'percentage =', percent*100)


# number of  values <0 for EKdiffnorm
less0 = len(df['EKdiffnorm'].loc[df.EKdiffnorm < 0])
percent=float(less0)/total
print('number of kappa(q) <0 :',less0,'percentage =', percent*100)

print('total number (less0+greater0) =', less0+greater0)
print('total number (number of rows) =', int(total)) 


# number of EKdiffnorm > 0.6
greater2= len(df.loc[df.EKdiffnorm > 0.6]) 
if greater2 > 0 :
	print(' *****  Possible wrong datas *******') 
	print('number of EKdiffnorm > 0.6 :',greater2,'percentage =', percent*100)
	
	# number of  EKdiffnorm <- 0.6
greater2= len(df.loc[df.EKdiffnorm < -0.6]) 
if greater2 > 0 :
	print(' *****  Possible wrong datas *******') 
	print('number of EKdiffnorm < -0.6 :',greater2,'percentage =', percent*100)


print('*******  MEAN, STANDARD DEVIATION, VARIANCE ********')

mean = df['EKdiffnorm'].mean()
devst = df['EKdiffnorm'].std()
variance = df.var()['EKdiffnorm']
print('mean (kappaq) = ',mean)
print('standard deviation (kappaq) = ',devst)
print('variance (kappaq) = ',variance)
print('*******')

print('********* plotting histogram and normal curves *********')

#df = df.astype('float128')
#mean = np.float128(mean) 
#devst = np.float128(devst) 
mean = np.float64(mean) 
devst = np.float64(devst) 
 
num_bins = 100

plt.grid(True, linestyle='--', axis = 'y')
plt.hist(df['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True")
plt.yticks([]) 


# Plot the Probability Density Function.

#xmin, xmax = plt.xlim() 
xmin, xmax = -0.5, 0.5
x = np.linspace(xmin, xmax, num_bins)
bin_width = (x.max() - x.min()) / num_bins 
mass_correction = total * bin_width
mass_correction = 1
print('******** blue normal function data ******* ')
mass_correction_blue = mass_correction 
devst_blue = 1.13*devst
print('total data ', total)
print('max x = ', x.max())
print('min x = ', x.min())
print('bin_width = ', bin_width)
print('num bins = ', num_bins)
print('mean = ', mean)
print('standard deviation  = ', devst)
print('standard deviation blue (1.13*devst) = ', devst_blue)
print('mass correction = ', mass_correction)
print('mass correction blue (mass_correction) = ', mass_correction_blue)

# Save a version without the density curves
#title = "$\kappa(q); q\le 10^7$: $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction)  
title = "$\kappa(q); q\le 10^7$: $\sigma =$ %.3f" % (devst)  
plt.title(title)
caption = " \par "
caption = caption + " \par "
caption = caption + " "
plt.xlabel(caption)


min_ylim, max_ylim = plt.ylim()
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1)
plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) 
plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1)
plt.text(0.25, max_ylim*0.9675, '$0.25$')
plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1)
plt.text(-0.25, max_ylim*0.9675, '$-0.25$')   
plt.savefig("plots_results/histo-kappaq-nocurves.png", dpi=600, bbox_inches='tight')
print('********* saved in histo-kappaq-nocurves.png   *********') 


# scatter plot
print('********* saved in histo-EKdiffnorm.png   *********')
print('*******  MEAN, STANDARD DEVIATION, VARIANCE for the bumps ********')

mean = np.float64(mean) 
devst = np.float64(devst) 

print('*******  MEAN, STANDARD DEVIATION, VARIANCE ********')
#print(df.describe())

mean = df['EKdiffnorm'].mean()
devst = df['EKdiffnorm'].std()
variance = df.var()['EKdiffnorm']
print('mean (kappaq) = ',mean)
print('standard deviation (kappaq) = ',devst)
print('variance (kappaq) = ',variance)
print('*******')

print('*******  highlitgthed contribution of the bumps ********')
#xmin, xmax = plt.xlim() 
xmin, xmax = -0.5, 0.5
x = np.linspace(xmin, xmax, num_bins)
bin_width = (x.max() - x.min()) / num_bins 
mass_correction = total * bin_width
mass_correction = 1

blackcurve = mass_correction * norm.pdf(x, mean, devst)
plt.plot(x, blackcurve, 'k', linewidth=1)


hist0 = plt.hist(df['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black')
hist1 = plt.hist(dfgermain_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "green", histtype='bar', ec='black')
hist2 = plt.hist(dftwicepminusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "yellow", histtype='bar', ec='black')


min_ylim, max_ylim = plt.ylim()
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1)
shift = 0.1
plt.text(mean*1.2-shift, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean))  
plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1)
plt.text(0.25, max_ylim*0.9675, '$0.25$')
plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1)
plt.text(-0.25-shift+0.02, max_ylim*0.9675, '$-0.25$')

plt.yticks([])  
#title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction)  
title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f" % (devst)  
plt.title(title)


#plt.show()  
caption = ""
caption = caption + "yellow bars: $\kappa(q); q$ s.t. $q\ge 5$ and $q,2q-1$ are both primes; \par "
caption = caption + "green bars: $\kappa(q)$ s.t. $q\ge 5$ and  $q,2q+1$ are both primes; \par "
caption = caption + "cerulean bars: $\kappa(q)$"
plt.xlabel(caption)
plt.title(title)
plt.savefig("plots_results/histo-kappaq-highlighted-bumps1.png", dpi=600, bbox_inches='tight')
print('********* saved in histo-kappaq-highlighted-bumps1.png   *********') 
plt.cla()

print('*******  MEAN, STANDARD DEVIATION, VARIANCE for the bumps ********')

mean = np.float64(mean) 
devst = np.float64(devst) 

print('*******  MEAN, STANDARD DEVIATION, VARIANCE ********')
#print(df.describe())

mean = df['EKdiffnorm'].mean()
devst = df['EKdiffnorm'].std()
variance = df.var()['EKdiffnorm']
print('mean (kappaq) = ',mean)
print('standard deviation (kappaq) = ',devst)
print('variance (kappaq) = ',variance)
print('*******')

print('*******  highlitgthed contribution of the bumps ********')
#xmin, xmax = plt.xlim() 
xmin, xmax = -0.5, 0.5
x = np.linspace(xmin, xmax, num_bins)
bin_width = (x.max() - x.min()) / num_bins 
mass_correction = total * bin_width
mass_correction = 1

blackcurve = mass_correction * norm.pdf(x, mean, devst)
plt.plot(x, blackcurve, 'k', linewidth=1)


hist0 = plt.hist(df['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black')
#hist1 = plt.hist(dfgermain_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "green", histtype='bar', ec='black')
#hist2 = plt.hist(dftwicepminusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "yellow", histtype='bar', ec='black')
hist3 = plt.hist(dffourtimespminusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "magenta", histtype='bar', ec='black')
hist4 = plt.hist(dffourtimespplusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "brown", histtype='bar', ec='black')


min_ylim, max_ylim = plt.ylim()
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1)
shift = 0.1
plt.text(mean*1.2-shift, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean))  

#plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1)
#plt.text(0.25, max_ylim*0.9675, '$0.25$')
#plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1)
#plt.text(-0.25-shift+0.02, max_ylim*0.9675, '$-0.25$')
plt.axvline(0.125, color='b', linestyle='dashed', linewidth=1)
plt.text(0.125, max_ylim*0.9675, '$0.125$')
plt.axvline(-0.125, color='b', linestyle='dashed', linewidth=1)
plt.text(-0.125-shift, max_ylim*0.9675, '$-0.125$')

plt.yticks([])  
#title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction)  
title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f" % (devst)  
plt.title(title)


#plt.show()  
caption = ""
#caption = caption + "yellow bars: $\kappa(q); q$ s.t. $q\ge 5$ and $q,2q-1$ are both primes; \par "
#caption = caption + "green bars: $\kappa(q)$ s.t. $q\ge 5$ and  $q,2q+1$ are both primes; \par "
caption = caption + "magenta bars: $\kappa(q); q$ s.t. $q\ge 5$ and $q,4q-1$ are both primes; \par "
caption = caption + "brown bars: $\kappa(q)$ s.t. $q\ge 5$ and  $q,4q+1$ are both primes; \par "
caption = caption + "cerulean bars: $\kappa(q)$"
plt.xlabel(caption)
plt.title(title)
plt.savefig("plots_results/histo-kappaq-highlighted-bumps2.png", dpi=600, bbox_inches='tight')
print('********* saved in histo-kappaq-highlighted-bumps2.png   *********') 
plt.cla()


print('*******  MEAN, STANDARD DEVIATION, VARIANCE: removed bumps ********')

mean = np.float64(mean) 
devst = np.float64(devst) 

#print(df.describe())
#df['nobumps']= df['logKummer"data/]-dfgermain_first['logKummer"data/]-dftwicepminusone_first['logKummer"data/]

total_nobumps=float(len(dfothers))
xmin, xmax = -0.5, 0.5
x = np.linspace(xmin, xmax, num_bins)
bin_width = (x.max() - x.min()) / num_bins 
#mass_correction = total_nobumps * bin_width
mass_correction = 1

mean =  dfothers['EKdiffnorm'].mean()
devst =  dfothers['EKdiffnorm'].std()
variance = dfothers.var()['EKdiffnorm']

print('total data (no bumps) ', total_nobumps)
print('max x = ', x.max())
print('min x = ', x.min())
print('bin_width = ', bin_width)
print('num bins = ', num_bins)

print('mean (kappaq; no bumps) = ',mean)
print('standard deviation (kappaq; no bumps) = ',devst)
print('variance (kappaq; no bumps) = ',variance)
print('*******')

blackcurve = mass_correction * norm.pdf(x, mean, devst)
plt.plot(x, blackcurve, 'k', linewidth=1)


plt.hist(dfothers['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True")

min_ylim, max_ylim = plt.ylim()
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1)
plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean))  

blackcurve = mass_correction * norm.pdf(x, mean, devst)
plt.plot(x, blackcurve, 'k', linewidth=1)


plt.yticks([])  
#title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction)  
title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f" % (devst)  
plt.title(title)

#plt.show()  
caption = "$\kappa(q); q$ s.t.  $2q + 1$, $2q-1$ are not primes \par "
#caption = caption + "black curve : $\mathcal{M}\cdot \mathcal{N}(x,\mu,\sigma)$"
caption = caption + "black curve : $\mathcal{N}(x,\mu,\sigma)$"
#caption = caption + "green bars: $\log\, r(q); q$ s.t. $q,2q+1$ are both primes; \par "
#caption = caption + "cerulean bars: $\log\, r(q)$"
plt.xlabel(caption)
plt.title(title)
plt.savefig("plots_results/histo-kappaq-no-bumps1.png", dpi=600, bbox_inches='tight')
print('********* saved in histo-kappaq-no-bumps1.png   *********') 
plt.cla()


print('*******  MEAN, STANDARD DEVIATION, VARIANCE: removed bumps ********')

mean = np.float64(mean) 
devst = np.float64(devst) 

#print(df.describe())
#df['nobumps']= df['logKummer"data/]-dfgermain_first['logKummer"data/]-dftwicepminusone_first['logKummer"data/]

total_nobumps=float(len(dfothers_others))
xmin, xmax = -0.5, 0.5
x = np.linspace(xmin, xmax, num_bins)
bin_width = (x.max() - x.min()) / num_bins 
#mass_correction = total_nobumps * bin_width
mass_correction = 1

mean =  dfothers_others['EKdiffnorm'].mean()
devst =  dfothers_others['EKdiffnorm'].std()
variance = dfothers_others.var()['EKdiffnorm']

print('total data (no bumps) ', total_nobumps)
print('max x = ', x.max())
print('min x = ', x.min())
print('bin_width = ', bin_width)
print('num bins = ', num_bins)

print('mean (kappaq; no bumps) = ',mean)
print('standard deviation (kappaq; no bumps) = ',devst)
print('variance (kappaq; no bumps) = ',variance)
print('*******')

blackcurve = mass_correction * norm.pdf(x, mean, devst)
plt.plot(x, blackcurve, 'k', linewidth=1)


plt.hist(dfothers_others['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True")

min_ylim, max_ylim = plt.ylim()
plt.axvline(mean, color='r', linestyle='dashed', linewidth=1)
plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean))  

blackcurve = mass_correction * norm.pdf(x, mean, devst)
plt.plot(x, blackcurve, 'k', linewidth=1)


plt.yticks([])  
#title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction)  
title = "$\kappa(q);  q\le 10^7$; $\sigma =$ %.3f" % (devst)  
plt.title(title)

#plt.show()  
caption = "$\kappa(q); q$ s.t.  $2q + 1$, $2q-1$, $4q + 1$, $4q-1$ are not primes \par "
#caption = caption + "black curve : $\mathcal{M}\cdot \mathcal{N}(x,\mu,\sigma)$"
caption = caption + "black curve : $\mathcal{N}(x,\mu,\sigma)$"
#caption = caption + "green bars: $\log\, r(q); q$ s.t. $q,2q+1$ are both primes; \par "
#caption = caption + "cerulean bars: $\log\, r(q)$"
plt.xlabel(caption)
plt.title(title)
plt.savefig("plots_results/histo-kappaq-no-bumps2.png", dpi=600, bbox_inches='tight')
print('********* saved in histo-kappaq-no-bumps2.png   *********') 
plt.cla()


# scatter plot
print('********')
print('******** Print scatter EKdiffnorm ******* ')

#setting dimentions
plt.figure(figsize=(11, 8))

# setting ticks
million=1000000 # parameter
ax = plt.axes()
ax.set_xticks([0,million,2*million, 3*million, 4*million, 5*million, 6*million, 7*million, 8*million , 9*million, 10*million ])
ax.set_xticklabels(['$0$', '$10^6$','$2\cdot 10^6$','$3\cdot 10^6$','$4\cdot 10^6$','$5\cdot 10^6$','$6\cdot 10^6$','$7\cdot 10^6$','$8\cdot 10^6$','$9\cdot 10^6$','$10^7$'])

avg = df['EKdiffnorm'].mean() 
M = df['EKdiffnorm'].max() 
m = df['EKdiffnorm'].min() 

ax.set_yticks([m,-0.5,-0.4,-0.3,-0.2, -0.1, avg, 0.1, 0.2,0.3, 0.4, 0.5, M])
ax.set_yticklabels(['{0:.3f}'.format(m), '$-0.5$','$-0.4$','$-0.3$','$-0.2$', '$-0.1$', '{0:.6f}'.format(avg), '$0.1$', '$0.2$', '$0.3$', '$0.4$', '$0.5$', '{0:.3f}'.format(M)])

# setting bounds
plt.xlim(0,10*million) 
plt.ylim(-0.63, 0.67) 

# building the grid - horizontal lines
count=8
for n in range(count):
    ax.axhline(y=-0.1*n, linewidth = 1, color='k', linestyle='dotted')
    ax.axhline(y=0.1*n, linewidth = 1, color='k', linestyle='dotted')
## red line in y=0    
ax.axhline(y=0.0, linewidth = 1, color='r', linestyle='dotted') 
    
# building the grid - vertical lines
count=10
for n in range(count): 
    ax.axvline(x=million*n,  linewidth = 1, color='k', linestyle='dotted')


# plotting the scatter plot
plt.scatter(df['q'], df['EKdiffnorm'], s=1, c = 'blue', marker = '.', label = "$\kappa(q)$")

# plotting max and min
maxekdiffnorm = df['EKdiffnorm'].max()
minekdiffnorm = df['EKdiffnorm'].min()
plt.plot(argM, maxekdiffnorm, marker='o', markersize=4, color="red",label=" max ", linestyle="None")
plt.plot(argm, minekdiffnorm, marker='o', markersize=4, color="black",label=" min ", linestyle="None")

ax.axhline(y=maxekdiffnorm, linewidth = 1, color='r', linestyle='solid')  
ax.axhline(y=minekdiffnorm, linewidth = 1, color='k', linestyle='solid')  

ax.axhline(y=avg, linewidth = 1, color='r', linestyle='dashed', label ="$\mu$ (mean value)")

ax.legend(loc="upper right", title="", frameon=True, ncol = len(ax.lines))
# inserting a title and a caption
title = "$\kappa(q)$: scatter plot"
plt.title(title)
caption = ""
plt.xlabel(caption) 

# save plot on file
plt.savefig("plots_results/kappaq-scat.png", dpi=600, bbox_inches='tight', orientation='landscape')
plt.savefig("plots_results/kappaq-scat-300dpi.png", dpi=300, bbox_inches='tight', orientation='landscape')
# clear plot
plt.cla()

print('********* saved in kappaq-scat.png   *********')
print('*******')


end_time = time.monotonic()
elapsed_time =  end_time - start_time
print("Total Execution time : ", format_timespan(elapsed_time)) 
print('------------------------------------------')


print(' *****   End PYTHON script   *****')