# load a set of common macros instructions # # Copyright (C) 2024 Alessandro Languasco # import sys import pandas as pd import numpy as np import matplotlib as mpl import matplotlib.pyplot as plt import matplotlib.animation as animation import scipy as sp from scipy.stats import norm import sympy as sy import humanfriendly as hu from humanfriendly import format_timespan import time from datetime import datetime now = datetime.now() # dd/mm/YY H:M:S dt_string = now.strftime("%d/%m/%Y %H:%M:%S") # use Latex for the captions and titles of matplotlib plt.rcParams['text.usetex'] = True plt.rcParams.update({ "text.usetex": True, "font.family": "mathptmx" }) plt.rc('text.latex', preamble=r'\usepackage{amssymb}') # prints 20 digits in pandas dataframes pd.set_option("display.precision", 20) print('--------------------------------------------------------------------------------') print(' Author of the Script: Alessandro Languasco; (C) 2023') print('--------------------------------------------------------------------------------') print(' Versions: ') print('python: {}'.format(sys.version)) print('matplotlib: {}'.format(mpl.__version__)) print('pandas: {}'.format(pd.__version__)) print('numpy: {}'.format(np.__version__)) print('scipy: {}'.format(sp.__version__)) print('sympy: {}'.format(sy.__version__)) print('humanfriendly: {}'.format(hu.__version__)) print('--------------------') print("date and time =", dt_string) print('--------------------') # function to Find indexes of an element in pandas dataframe # from https://thispointer.com/python-find-indexes-of-an-element-in-pandas-dataframe/ # to be used at the bottom of the script to detect when mineven == minodd and when there are small values def getIndexes(dfObj, value): ''' Get index positions of value in dataframe i.e. dfObj.''' listOfPos = list() # Get bool dataframe with True at positions where the given value exists result = dfObj.isin([value]) # Get list of columns that contains the value seriesObj = result.any() columnNames = list(seriesObj[seriesObj == True].index) # Iterate over list of columns and fetch the rows indexes where value exists for col in columnNames: rows = list(result[col][result[col] == True].index) for row in rows: listOfPos.append((row, col)) # Return a list of tuples indicating the positions of value in the dataframe return listOfPos # functions needed to create the normalised values def sqrt(x): y = np.sqrt(x) return y def exp(x): y = np.exp(x) return y def ln(x): y = np.log(x) return y def lnln(x): y = np.log(np.log(x)) return y def twothirdpow(x): expo = 2/(3.) y = np.power(x,expo) return y def threequarterpow(x): expo = 3/(4.) y = np.power(x,expo) return y start_time = time.monotonic() data=pd.read_csv(r"data/global-EK.csv", sep=';') data.head(4) df = pd.DataFrame(data, columns= ['q','EK', 'EKplus', 'EKdiff']) #df = df.astype('float128') df = df.astype('float64') df = df.astype({"q": int}) # create a new column with normalised values # \kappa(q) : = (EK(q)^+- EK(q) ) /log q is EKdiffnorm df['EKdiffnorm'] = - (df['EKdiff'] / ln(df['q']) ) #print (df) # reads the list of Sophie Germain primes;first element data=pd.read_csv(r"data/sophie_germain_primes_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dfgermain_first= df.merge(dfaux, on='q') del(dfgermain_first['EK']) del(dfgermain_first['EKplus']) del (dfaux) # reads the list of p such that (p,2p-1) are both primes data=pd.read_csv(r"data/twiceprimeminusone_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dftwicepminusone_first = df.merge(dfaux, on='q') del(dftwicepminusone_first['EK']) del(dftwicepminusone_first['EKplus']) del(dfaux) # reads the list of p such that neither (p,2p+1) and (p,2p-1) are primes data=pd.read_csv(r"data/altri.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dfothers = df.merge(dfaux, on='q') del(dfothers['EK']) del(dfothers['EKplus']) del(dfaux) # reads the list of p such that neither (p,2p+1), (p,2p-1), (p,4p+1), (p,4p-1) are primes data=pd.read_csv(r"data/altri_altri.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dfothers_others = df.merge(dfaux, on='q') del(dfothers_others['EK']) del(dfothers_others['EKplus']) del(dfaux) # reads the list of p such that (p,4p-1) are both primes data=pd.read_csv(r"data/fourtimesprimeminusone_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dffourtimespminusone_first = df.merge(dfaux, on='q') del(dffourtimespminusone_first['EK']) del(dffourtimespminusone_first['EKplus']) del(dfaux) # reads the list of p such that (p,4p-1) are both primes data=pd.read_csv(r"data/fourtimesprimeplusone_first.csv", sep=';') data.head(1) dfaux = pd.DataFrame(data, columns= ['q']) #df = df.astype('float128') dfaux = dfaux.astype('int') dffourtimespplusone_first = df.merge(dfaux, on='q') del(dffourtimespplusone_first['EK']) del(dffourtimespplusone_first['EKplus']) del(dfaux) #FINDING MAX and second MAX; MIN and second MIN; and where they are attained print(' ***** Statistics for EK(q)-EK(q)^+ *****') print('name source fileanalysis = global-EK.csv') print(' ----------------------- ') print(' ***** sums of | \kappa(q)| *****') df['absEKdiffnorm'] = np.abs(df['EKdiffnorm']) tentofive = 10**5 for jj in range(1, 101): partiallist = df['absEKdiffnorm'].loc[df.q < jj*10**5] S= partiallist.sum() numoddprimes = len(partiallist) print('Value of sum |\kappa(q)| , q <= ', jj, '*10^5 = ',S) print('the same over pi_odd(', jj, '*10^5) = ',S/numoddprimes) print(' ----- ') #endfor del(df['absEKdiffnorm']) print(' ----------------------- ') print(' ***** sum of EK(q)-EK(q)^+ *****') weight = np.log(10 ** 7) print('log(10^7) = ',weight) print(' -- ') S = df['EKdiff'].sum() print('Value of sum (EK(q)-EK(q)^+) = ',S) print('the same over log(10^7) = ',S/weight) print(' -- ') df['absEKdiff'] = np.abs(df['EKdiff']) S = df['absEKdiff'].sum() print('Value of sum | EK(q)-EK(q)^+ | = ',S) print('the same over log(10^7) = ',S/weight) del(df['absEKdiff']) print(' -- ') S = df['EK'].sum() print('Value of sum EK(q) = ',S) print('the same over log(10^7) = ',S/weight) print(' -- ') S = df['EKplus'].sum() print('Value of sum EK(q)^+ = ',S) print('the same over log(10^7) = ',S/weight) print(' -- ') # number of values >0 for EKdiff total=float(len(df)) greater0 = len(df['EKdiff'].loc[df.EKdiff > 0]) percent=float(greater0)/total print('number of EK(q) > EK(q)^+ :', greater0,'percentage =', percent*100) print(' -- ') # number of values <0 for EKdiffnorm less0 = len(df['EKdiff'].loc[df.EKdiff < 0]) percent=float(less0)/total print('number of EK(q) < EK(q)^+ :',less0,'percentage =', percent*100) print('total number (less0+greater0) =', less0+greater0) print('total number (number of rows) =', int(total)) print(' -- ') eps = 10**(-6) dfclose = df['EKdiff'].loc[ np.abs(df.EKdiff) < eps ] close = len(dfclose) percent = float(close)/total print('number of | EK(q) - EK(q)^+ | < eps = 10^(-6) :', close,'percentage =', percent*100) print(' -- ') print('list of such values:') print(dfclose) del(dfclose) eps = 10**(-5) dfclose = df['EKdiff'].loc[ np.abs(df.EKdiff) < eps ] close = len(dfclose) percent = float(close)/total print('number of | EK(q) - EK(q)^+ | < eps = 10^(-5) :', close,'percentage =', percent*100) print('list of such values:') print(dfclose) del(dfclose) print(' -- ') eps = 10**(-4) dfclose = df['EKdiff'].loc[ np.abs(df.EKdiff) < eps ] close = len(dfclose) percent = float(close)/total print('number of | EK(q) - EK(q)^+ | < eps = 10^(-4) :', close,'percentage =', percent*100) print('list of such values:') print(dfclose) del(dfclose) print('-----------------------------------------------------') print('Bump analysis on normalised values kappa(q) :') print('-------------') print('Central bump:') dfclose = df['EKdiffnorm'].loc[ np.abs(df.EKdiffnorm) < 0.125] close = len(dfclose) central_bump = close percent = float(close)/total print('number of | kappa(q) | < 1/8 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Central bump:') dfclose = df['EKdiffnorm'].loc[ np.abs(df.EKdiffnorm) < 0.1] close = len(dfclose) central_bump = close percent = float(close)/total print('number of | kappa(q) | < 1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bump in 0.125:') dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm - 0.125) < 0.1) ] close = len(dfclose) right_secondary_bump = close percent = float(close)/total print('number of | kappa(q) - 0.125 | < 1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bump in -0.125:') dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm + 0.125) < 0.1) ] close = len(dfclose) left_secondary_bump = close percent = float(close)/total print('number of | kappa(q) + 0.125 | < 1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bump in 0.25:') dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm - 0.25) < 0.1) ] close = len(dfclose) right_secondary_bump = close percent = float(close)/total print('number of | kappa(q) - 0.25 | < 1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bump in -0.25:') dfclose = df['EKdiffnorm'].loc[ (np.abs(df.EKdiffnorm + 0.25) < 0.1) ] close = len(dfclose) left_secondary_bump = close percent = float(close)/total print('number of | kappa(q) + 0.25 | < 1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Right tail:') dfclose = df['EKdiffnorm'].loc[df.EKdiffnorm >= 0.35 ] close = len(dfclose) right_tail = close percent = float(close)/total print('number of kappa(q) >= 1/4+1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Left tail:') dfclose = df['EKdiffnorm'].loc[ df.EKdiffnorm <= -0.35 ] close = len(dfclose) left_tail = close percent = float(close)/total print('number of kappa(q) <= -1/4-1/10 :', close,'; percentage =', percent*100) #print('list of such values:') #print(dfclose) del(dfclose) print('-------------') print('Bumps analysis kappa(q) (summary):') print('number of left sec bump:', left_secondary_bump,'; percentage =', float(left_secondary_bump)/total*100) print('number of right sec bump:', right_secondary_bump,'; percentage =', float(right_secondary_bump)/total*100) print('left tail:', central_bump,'; percentage =', float(left_tail)/total*100) print('right tail:', central_bump,'; percentage =', float(right_tail)/total*100) print('secondary bumps:', left_secondary_bump+right_secondary_bump,'; percentage =', float(left_secondary_bump+right_secondary_bump)/total*100) print('central bump:', central_bump,'; percentage =', float(central_bump)/total*100) print('-----------------------------------------------------') print(' -- ') print(' ----------------------- ') print(' ***** MAX kappa(q) *****') maxima =df.nlargest(10, 'EKdiffnorm', keep = 'all') M=maxima['EKdiffnorm'].iloc[0] argM=maxima['q'].iloc[0] M1=maxima['EKdiffnorm'].iloc[1] argM1=maxima['q'].iloc[1] print('maximal value on column kappa(q) =',M,'attained at q =',argM) print('second maximal value on column kappa(q) =',M1,'attained at q =',argM1) print(maxima) del maxima print(' ***** MIN kappa(q) *****') minima =df.nsmallest(10, 'EKdiffnorm', keep = 'all') m=minima['EKdiffnorm'].iloc[0] argm=minima['q'].iloc[0] m1=minima['EKdiffnorm'].iloc[1] argm1=minima['q'].iloc[1] print('minimal value on column kappa(q) =',m,'attained at q =',argm) print('second minimal value on column kappa(q) =',m1,'attained at q =',argm1) print(minima) del minima print(' ***** STATS kappa(q) *****') # number of values >0 for EKdiffnorm total=float(len(df)) greater0 = len(df['EKdiffnorm'].loc[df.EKdiffnorm > 0]) percent=float(greater0)/total print('number of kappa(q) >0 :', greater0,'percentage =', percent*100) # number of values <0 for EKdiffnorm less0 = len(df['EKdiffnorm'].loc[df.EKdiffnorm < 0]) percent=float(less0)/total print('number of kappa(q) <0 :',less0,'percentage =', percent*100) print('total number (less0+greater0) =', less0+greater0) print('total number (number of rows) =', int(total)) # number of EKdiffnorm > 0.6 greater2= len(df.loc[df.EKdiffnorm > 0.6]) if greater2 > 0 : print(' ***** Possible wrong datas *******') print('number of EKdiffnorm > 0.6 :',greater2,'percentage =', percent*100) # number of EKdiffnorm <- 0.6 greater2= len(df.loc[df.EKdiffnorm < -0.6]) if greater2 > 0 : print(' ***** Possible wrong datas *******') print('number of EKdiffnorm < -0.6 :',greater2,'percentage =', percent*100) print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') mean = df['EKdiffnorm'].mean() devst = df['EKdiffnorm'].std() variance = df.var()['EKdiffnorm'] print('mean (kappaq) = ',mean) print('standard deviation (kappaq) = ',devst) print('variance (kappaq) = ',variance) print('*******') print('********* plotting histogram and normal curves *********') #df = df.astype('float128') #mean = np.float128(mean) #devst = np.float128(devst) mean = np.float64(mean) devst = np.float64(devst) num_bins = 100 plt.grid(True, linestyle='--', axis = 'y') plt.hist(df['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True") plt.yticks([]) # Plot the Probability Density Function. #xmin, xmax = plt.xlim() xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 print('******** blue normal function data ******* ') mass_correction_blue = mass_correction devst_blue = 1.13*devst print('total data ', total) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean = ', mean) print('standard deviation = ', devst) print('standard deviation blue (1.13*devst) = ', devst_blue) print('mass correction = ', mass_correction) print('mass correction blue (mass_correction) = ', mass_correction_blue) # Save a version without the density curves #title = "$\kappa(q); q\le 10^7$: $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\kappa(q); q\le 10^7$: $\sigma =$ %.3f" % (devst) plt.title(title) caption = " \par " caption = caption + " \par " caption = caption + " " plt.xlabel(caption) min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1) plt.text(0.25, max_ylim*0.9675, '$0.25$') plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1) plt.text(-0.25, max_ylim*0.9675, '$-0.25$') plt.savefig("plots_results/histo-kappaq-nocurves.png", dpi=600, bbox_inches='tight') print('********* saved in histo-kappaq-nocurves.png *********') # scatter plot print('********* saved in histo-EKdiffnorm.png *********') print('******* MEAN, STANDARD DEVIATION, VARIANCE for the bumps ********') mean = np.float64(mean) devst = np.float64(devst) print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') #print(df.describe()) mean = df['EKdiffnorm'].mean() devst = df['EKdiffnorm'].std() variance = df.var()['EKdiffnorm'] print('mean (kappaq) = ',mean) print('standard deviation (kappaq) = ',devst) print('variance (kappaq) = ',variance) print('*******') print('******* highlitgthed contribution of the bumps ********') #xmin, xmax = plt.xlim() xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) hist0 = plt.hist(df['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black') hist1 = plt.hist(dfgermain_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "green", histtype='bar', ec='black') hist2 = plt.hist(dftwicepminusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "yellow", histtype='bar', ec='black') min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) shift = 0.1 plt.text(mean*1.2-shift, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1) plt.text(0.25, max_ylim*0.9675, '$0.25$') plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1) plt.text(-0.25-shift+0.02, max_ylim*0.9675, '$-0.25$') plt.yticks([]) #title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "" caption = caption + "yellow bars: $\kappa(q); q$ s.t. $q\ge 5$ and $q,2q-1$ are both primes; \par " caption = caption + "green bars: $\kappa(q)$ s.t. $q\ge 5$ and $q,2q+1$ are both primes; \par " caption = caption + "cerulean bars: $\kappa(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-kappaq-highlighted-bumps1.png", dpi=600, bbox_inches='tight') print('********* saved in histo-kappaq-highlighted-bumps1.png *********') plt.cla() print('******* MEAN, STANDARD DEVIATION, VARIANCE for the bumps ********') mean = np.float64(mean) devst = np.float64(devst) print('******* MEAN, STANDARD DEVIATION, VARIANCE ********') #print(df.describe()) mean = df['EKdiffnorm'].mean() devst = df['EKdiffnorm'].std() variance = df.var()['EKdiffnorm'] print('mean (kappaq) = ',mean) print('standard deviation (kappaq) = ',devst) print('variance (kappaq) = ',variance) print('*******') print('******* highlitgthed contribution of the bumps ********') #xmin, xmax = plt.xlim() xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins mass_correction = total * bin_width mass_correction = 1 blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) hist0 = plt.hist(df['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black') #hist1 = plt.hist(dfgermain_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "green", histtype='bar', ec='black') #hist2 = plt.hist(dftwicepminusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "yellow", histtype='bar', ec='black') hist3 = plt.hist(dffourtimespminusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "magenta", histtype='bar', ec='black') hist4 = plt.hist(dffourtimespplusone_first['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "brown", histtype='bar', ec='black') min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) shift = 0.1 plt.text(mean*1.2-shift, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) #plt.axvline(0.25, color='r', linestyle='dashed', linewidth=1) #plt.text(0.25, max_ylim*0.9675, '$0.25$') #plt.axvline(-0.25, color='r', linestyle='dashed', linewidth=1) #plt.text(-0.25-shift+0.02, max_ylim*0.9675, '$-0.25$') plt.axvline(0.125, color='b', linestyle='dashed', linewidth=1) plt.text(0.125, max_ylim*0.9675, '$0.125$') plt.axvline(-0.125, color='b', linestyle='dashed', linewidth=1) plt.text(-0.125-shift, max_ylim*0.9675, '$-0.125$') plt.yticks([]) #title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "" #caption = caption + "yellow bars: $\kappa(q); q$ s.t. $q\ge 5$ and $q,2q-1$ are both primes; \par " #caption = caption + "green bars: $\kappa(q)$ s.t. $q\ge 5$ and $q,2q+1$ are both primes; \par " caption = caption + "magenta bars: $\kappa(q); q$ s.t. $q\ge 5$ and $q,4q-1$ are both primes; \par " caption = caption + "brown bars: $\kappa(q)$ s.t. $q\ge 5$ and $q,4q+1$ are both primes; \par " caption = caption + "cerulean bars: $\kappa(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-kappaq-highlighted-bumps2.png", dpi=600, bbox_inches='tight') print('********* saved in histo-kappaq-highlighted-bumps2.png *********') plt.cla() print('******* MEAN, STANDARD DEVIATION, VARIANCE: removed bumps ********') mean = np.float64(mean) devst = np.float64(devst) #print(df.describe()) #df['nobumps']= df['logKummer"data/]-dfgermain_first['logKummer"data/]-dftwicepminusone_first['logKummer"data/] total_nobumps=float(len(dfothers)) xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins #mass_correction = total_nobumps * bin_width mass_correction = 1 mean = dfothers['EKdiffnorm'].mean() devst = dfothers['EKdiffnorm'].std() variance = dfothers.var()['EKdiffnorm'] print('total data (no bumps) ', total_nobumps) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean (kappaq; no bumps) = ',mean) print('standard deviation (kappaq; no bumps) = ',devst) print('variance (kappaq; no bumps) = ',variance) print('*******') blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.hist(dfothers['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True") min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.yticks([]) #title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "$\kappa(q); q$ s.t. $2q + 1$, $2q-1$ are not primes \par " #caption = caption + "black curve : $\mathcal{M}\cdot \mathcal{N}(x,\mu,\sigma)$" caption = caption + "black curve : $\mathcal{N}(x,\mu,\sigma)$" #caption = caption + "green bars: $\log\, r(q); q$ s.t. $q,2q+1$ are both primes; \par " #caption = caption + "cerulean bars: $\log\, r(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-kappaq-no-bumps1.png", dpi=600, bbox_inches='tight') print('********* saved in histo-kappaq-no-bumps1.png *********') plt.cla() print('******* MEAN, STANDARD DEVIATION, VARIANCE: removed bumps ********') mean = np.float64(mean) devst = np.float64(devst) #print(df.describe()) #df['nobumps']= df['logKummer"data/]-dfgermain_first['logKummer"data/]-dftwicepminusone_first['logKummer"data/] total_nobumps=float(len(dfothers_others)) xmin, xmax = -0.5, 0.5 x = np.linspace(xmin, xmax, num_bins) bin_width = (x.max() - x.min()) / num_bins #mass_correction = total_nobumps * bin_width mass_correction = 1 mean = dfothers_others['EKdiffnorm'].mean() devst = dfothers_others['EKdiffnorm'].std() variance = dfothers_others.var()['EKdiffnorm'] print('total data (no bumps) ', total_nobumps) print('max x = ', x.max()) print('min x = ', x.min()) print('bin_width = ', bin_width) print('num bins = ', num_bins) print('mean (kappaq; no bumps) = ',mean) print('standard deviation (kappaq; no bumps) = ',devst) print('variance (kappaq; no bumps) = ',variance) print('*******') blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.hist(dfothers_others['EKdiffnorm'], num_bins, range=[-0.5, 0.5], color = "skyblue", histtype='bar', ec='black', density="True") min_ylim, max_ylim = plt.ylim() plt.axvline(mean, color='r', linestyle='dashed', linewidth=1) plt.text(mean*1.2, max_ylim*0.9675, '$\mu$ = {:.6f}'.format(mean)) blackcurve = mass_correction * norm.pdf(x, mean, devst) plt.plot(x, blackcurve, 'k', linewidth=1) plt.yticks([]) #title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f, $\mathcal{M} =$ %d" % (devst, mass_correction) title = "$\kappa(q); q\le 10^7$; $\sigma =$ %.3f" % (devst) plt.title(title) #plt.show() caption = "$\kappa(q); q$ s.t. $2q + 1$, $2q-1$, $4q + 1$, $4q-1$ are not primes \par " #caption = caption + "black curve : $\mathcal{M}\cdot \mathcal{N}(x,\mu,\sigma)$" caption = caption + "black curve : $\mathcal{N}(x,\mu,\sigma)$" #caption = caption + "green bars: $\log\, r(q); q$ s.t. $q,2q+1$ are both primes; \par " #caption = caption + "cerulean bars: $\log\, r(q)$" plt.xlabel(caption) plt.title(title) plt.savefig("plots_results/histo-kappaq-no-bumps2.png", dpi=600, bbox_inches='tight') print('********* saved in histo-kappaq-no-bumps2.png *********') plt.cla() # scatter plot print('********') print('******** Print scatter EKdiffnorm ******* ') #setting dimentions plt.figure(figsize=(11, 8)) # setting ticks million=1000000 # parameter ax = plt.axes() ax.set_xticks([0,million,2*million, 3*million, 4*million, 5*million, 6*million, 7*million, 8*million , 9*million, 10*million ]) ax.set_xticklabels(['$0$', '$10^6$','$2\cdot 10^6$','$3\cdot 10^6$','$4\cdot 10^6$','$5\cdot 10^6$','$6\cdot 10^6$','$7\cdot 10^6$','$8\cdot 10^6$','$9\cdot 10^6$','$10^7$']) avg = df['EKdiffnorm'].mean() M = df['EKdiffnorm'].max() m = df['EKdiffnorm'].min() ax.set_yticks([m,-0.5,-0.4,-0.3,-0.2, -0.1, avg, 0.1, 0.2,0.3, 0.4, 0.5, M]) ax.set_yticklabels(['{0:.3f}'.format(m), '$-0.5$','$-0.4$','$-0.3$','$-0.2$', '$-0.1$', '{0:.6f}'.format(avg), '$0.1$', '$0.2$', '$0.3$', '$0.4$', '$0.5$', '{0:.3f}'.format(M)]) # setting bounds plt.xlim(0,10*million) plt.ylim(-0.63, 0.67) # building the grid - horizontal lines count=8 for n in range(count): ax.axhline(y=-0.1*n, linewidth = 1, color='k', linestyle='dotted') ax.axhline(y=0.1*n, linewidth = 1, color='k', linestyle='dotted') ## red line in y=0 ax.axhline(y=0.0, linewidth = 1, color='r', linestyle='dotted') # building the grid - vertical lines count=10 for n in range(count): ax.axvline(x=million*n, linewidth = 1, color='k', linestyle='dotted') # plotting the scatter plot plt.scatter(df['q'], df['EKdiffnorm'], s=1, c = 'blue', marker = '.', label = "$\kappa(q)$") # plotting max and min maxekdiffnorm = df['EKdiffnorm'].max() minekdiffnorm = df['EKdiffnorm'].min() plt.plot(argM, maxekdiffnorm, marker='o', markersize=4, color="red",label=" max ", linestyle="None") plt.plot(argm, minekdiffnorm, marker='o', markersize=4, color="black",label=" min ", linestyle="None") ax.axhline(y=maxekdiffnorm, linewidth = 1, color='r', linestyle='solid') ax.axhline(y=minekdiffnorm, linewidth = 1, color='k', linestyle='solid') ax.axhline(y=avg, linewidth = 1, color='r', linestyle='dashed', label ="$\mu$ (mean value)") ax.legend(loc="upper right", title="", frameon=True, ncol = len(ax.lines)) # inserting a title and a caption title = "$\kappa(q)$: scatter plot" plt.title(title) caption = "" plt.xlabel(caption) # save plot on file plt.savefig("plots_results/kappaq-scat.png", dpi=600, bbox_inches='tight', orientation='landscape') plt.savefig("plots_results/kappaq-scat-300dpi.png", dpi=300, bbox_inches='tight', orientation='landscape') # clear plot plt.cla() print('********* saved in kappaq-scat.png *********') print('*******') end_time = time.monotonic() elapsed_time = end_time - start_time print("Total Execution time : ", format_timespan(elapsed_time)) print('------------------------------------------') print(' ***** End PYTHON script *****')