Skip to content
Snippets Groups Projects
Commit c26bd007 authored by Doak, Peter W.'s avatar Doak, Peter W.
Browse files

two new scripts checking the normalicy and doing bootstrap analysis

parent a5ee9925
No related branches found
No related tags found
No related merge requests found
# // Copyright (C) 2022 UT-Battelle, LLC
# // All rights reserved.
# //
# // See LICENSE for terms of usage.
# // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
# //
# // Author: Peter Doak (doakpw@ornl.gov)
# //
# This script allows reading a single measure G dca++ dataset and does a bootstrap
# analysis taking the default number of scipy.stats.bootstrap (9999 I believe) to look at the 95%
# reduced G_k_w. What I see is that this results in a what to me is quite a low level of error.
# so at least for single band Hubbard single site CTINT runs nothing like 1million samples is needed to
# get a good result with a converged g0,
# The std deviation for a particular sample is quite wide. I don't think error is the correct interpretation of the
# spread of the std::deviation. The range of values for random vertexes is not "error" but the correct evaluation of
# particular diagram solutions.
import CTINT
import numpy as np
import adios2
from mpi4py import MPI
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
from Adios2DCA import readDCARun, readSampleG_k_w
import statsmodels.api as sm
import os.path
from scipy.stats import bootstrap
from multiprocessing import Pool
import pickle
comm = MPI.COMM_WORLD
def do_bootstrap(freq_data):
conf_level = 0.9999
resamples = 1000
freq_mean = np.mean(freq_data)
freq_data = (freq_data,)
res = bootstrap(freq_data, np.mean, n_resamples=resamples, method="basic", confidence_level=conf_level)
return res.confidence_interval.low, freq_mean, res.confidence_interval.high
def read_and_std(file_name):
with adios2.open(file_name, "r", comm) as adios_fh:
#get all data in a form of histigrams of value at frequency
frequency_histigram_data = np.zeros([1024, 10000])
meas_index = 0
for rank in range(10):
for samp in range(1000):
G_k_w = readSampleG_k_w(adios_fh,rank,0,samp+1)
frequency_histigram_data[:, meas_index] = G_k_w[:,0,0,0,0,0]
meas_index += 1
freq_actual = frequency_histigram_data[451:712, :]
# with Pool(8) as pool:
# interval_mean = pool.map(do_bootstrap, freq_actual, chunksize=4 )
freq_std = np.std(freq_actual, 1)
return freq_std
def read_and_bootstrap(file_name):
with adios2.open(file_name, "r", comm) as adios_fh:
dca_globals = readDCARun(adios_fh)
parameters = [(key, item) for key,item in dca_globals.items() if re.search("parameters", key)]
for param in parameters:
print("param", param)
g0_r_t = dca_globals['cluster_excluded_greens_function_G0_r_t']
g0_k_w = dca_globals['cluster_excluded_greens_function_G0_k_w']
g0_r_w = dca_globals['cluster_excluded_greens_function_G0_r_w']
wn = dca_globals['frequency_domain_elements']
beta = dca_globals['physics_beta'][0]
U = dca_globals['single_band_Hubbard_model_U'][0]
#get all data in a form of histigrams of value at frequency
frequency_histigram_data = np.zeros([1024, 10000])
meas_index = 0
for rank in range(10):
for samp in range(1000):
G_k_w = readSampleG_k_w(adios_fh,rank,0,samp+1)
frequency_histigram_data[:, meas_index] = G_k_w[:,0,0,0,0,0]
meas_index += 1
n_bins = 100
freq_actual = frequency_histigram_data[451:712, :]
with Pool(8) as pool:
interval_mean = pool.map(do_bootstrap, freq_actual, chunksize=4 )
with open('bootstrap_results.pickle', 'wb') as the_pick:
pickle.dump(interval_mean, the_pick, pickle.HIGHEST_PROTOCOL)
return interval_mean
# for freq in range(1024):
# freq_data = frequency_histigram_data[:,freq];
# freq_mean = np.mean(freq_data)
# # freq_data = freq_data - freq_mean
# freq_data = (freq_data, starmap)
# res = bootstrap(do_bootstrap, freq_data, np.mean)
# print ( res.confidence_interval.low, freq_mean, res.confidence_interval.high )
# interval_mean[freq, 0] = res.confidence_interval.low
# interval_mean[freq, 1] = freq_mean
# interval_mean[freq, 2] = res.confidence_interval.high
# freq_mean = np.mean(freq_data)
# freq_data = freq_data - freq_mean
# axs[ip].hist(freq_data, bins=n_bins)
if __name__ == '__main__':
file_name = "single_meas_G_100B_10k.bp"
num_samples = 10
pickle_name = 'bootstrap_results.pickle'
if os.path.isfile(pickle_name):
with open(pickle_name, 'rb') as the_pick:
interval_mean = pickle.load(the_pick)
else:
interval_mean = read_and_bootstrap(file_name)
interval_mean = np.array(interval_mean)
fig = plt.figure(figsize=(10,10), constrained_layout=True)
axs = fig.subplots(1,1)
with adios2.open(file_name, "r", comm) as adios_fh:
dca_globals = readDCARun(adios_fh)
parameters = [(key, item) for key,item in dca_globals.items() if re.search("parameters", key)]
cluster_g_k_w = dca_globals['cluster_greens_function_G_k_w']
wn = dca_globals['frequency_domain_elements']
beta = dca_globals['physics_beta'][0]
U = dca_globals['single_band_Hubbard_model_U'][0]
freq_std = read_and_std(file_name)
up_std = cluster_g_k_w[451:712,0,0,0,0,0] + freq_std
dn_std = cluster_g_k_w[451:712,0,0,0,0,0] - freq_std
axs.plot(wn[451:712], cluster_g_k_w[451:712,0,0,0,0,0], label="accumulated cluster G_k_w")
axs.plot(wn[451:712], up_std, label="up std")
axs.plot(wn[451:712], dn_std, label="dwn_std")
axs.scatter(wn[451:712], interval_mean[:,0], s=3, marker='.', label="99.99% bootstrap high")
#axs.scatter(wn[451:712], interval_mean[:,1], s=1.5, marker=".", label="mean samples")
axs.scatter(wn[451:712], interval_mean[:,2], s=3, marker=".", label="99.99% bootstrap high")
axs.legend()
plt.show()
# // Copyright (C) 2022 UT-Battelle, LLC
# // All rights reserved.
# //
# // See LICENSE for terms of usage.
# // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
# //
# // Author: Peter Doak (doakpw@ornl.gov)
# //
# This script allows quick examination of many samples and comparison between the
# python miniapp single configuration G_k_w and that written from DCA++
# it is set up to read from an adios2 BP4 archive set as filename
import CTINT
import numpy as np
import adios2
from mpi4py import MPI
import re
import matplotlib as mpl
import matplotlib.pyplot as plt
from Adios2DCA import readDCARun, readSampleG_k_w
import statsmodels.api as sm
from scipy.stats import norm
comm = MPI.COMM_WORLD
if __name__ == '__main__':
file_name = "single_meas_G_100B_10k.bp"
num_samples = 10
with adios2.open(file_name, "r", comm) as adios_fh:
dca_globals = readDCARun(adios_fh)
parameters = [(key, item) for key,item in dca_globals.items() if re.search("parameters", key)]
for param in parameters:
print("param", param)
g0_r_t = dca_globals['cluster_excluded_greens_function_G0_r_t']
g0_k_w = dca_globals['cluster_excluded_greens_function_G0_k_w']
g0_r_w = dca_globals['cluster_excluded_greens_function_G0_r_w']
wn = dca_globals['frequency_domain_elements']
beta = dca_globals['physics_beta'][0]
U = dca_globals['single_band_Hubbard_model_U'][0]
#get all data in a form of histigrams of value at frequency
frequency_histigram_data = np.zeros([10000, 1024])
meas_index = 0
for rank in range(10):
for samp in range(1000):
G_k_w = readSampleG_k_w(adios_fh,rank,0,samp+1)
frequency_histigram_data[meas_index, :] = G_k_w[:,0,0,0,0,0]
meas_index += 1
n_bins = 100
fig, axs = plt.subplots(3,1)
fig.suptitle("QQ plot for normality of values for G_k_w frequencies")
for ip, freq in zip(range(3),[512,550,600]):
freq_data = frequency_histigram_data[:,freq];
freq_mean = np.mean(freq_data)
freq_data = freq_data - freq_mean
fig = sm.qqplot(freq_data, line='s', fit=True, ax=axs[ip])
axs[ip].set_title("Freq {}".format(freq))
# freq_mean = np.mean(freq_data)
# freq_data = freq_data - freq_mean
# axs[ip].hist(freq_data, bins=n_bins)
plt.show()
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment