Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
D
dca_mini_apps
Manage
Activity
Members
Labels
Plan
Issues
0
Issue boards
Milestones
Iterations
Wiki
Code
Merge requests
0
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Locked files
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Code review analytics
Issue analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
NDIP
Tool Sources
DCA
dca_mini_apps
Commits
c26bd007
Commit
c26bd007
authored
2 years ago
by
Doak, Peter W.
Browse files
Options
Downloads
Patches
Plain Diff
two new scripts checking the normalicy and doing bootstrap analysis
parent
a5ee9925
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
CTINT_bootstrap.py
+153
-0
153 additions, 0 deletions
CTINT_bootstrap.py
CTINT_check_normalicy.py
+69
-0
69 additions, 0 deletions
CTINT_check_normalicy.py
with
222 additions
and
0 deletions
CTINT_bootstrap.py
0 → 100644
+
153
−
0
View file @
c26bd007
# // Copyright (C) 2022 UT-Battelle, LLC
# // All rights reserved.
# //
# // See LICENSE for terms of usage.
# // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
# //
# // Author: Peter Doak (doakpw@ornl.gov)
# //
# This script allows reading a single measure G dca++ dataset and does a bootstrap
# analysis taking the default number of scipy.stats.bootstrap (9999 I believe) to look at the 95%
# reduced G_k_w. What I see is that this results in a what to me is quite a low level of error.
# so at least for single band Hubbard single site CTINT runs nothing like 1million samples is needed to
# get a good result with a converged g0,
# The std deviation for a particular sample is quite wide. I don't think error is the correct interpretation of the
# spread of the std::deviation. The range of values for random vertexes is not "error" but the correct evaluation of
# particular diagram solutions.
import
CTINT
import
numpy
as
np
import
adios2
from
mpi4py
import
MPI
import
re
import
matplotlib
as
mpl
import
matplotlib.pyplot
as
plt
from
Adios2DCA
import
readDCARun
,
readSampleG_k_w
import
statsmodels.api
as
sm
import
os.path
from
scipy.stats
import
bootstrap
from
multiprocessing
import
Pool
import
pickle
comm
=
MPI
.
COMM_WORLD
def
do_bootstrap
(
freq_data
):
conf_level
=
0.9999
resamples
=
1000
freq_mean
=
np
.
mean
(
freq_data
)
freq_data
=
(
freq_data
,)
res
=
bootstrap
(
freq_data
,
np
.
mean
,
n_resamples
=
resamples
,
method
=
"
basic
"
,
confidence_level
=
conf_level
)
return
res
.
confidence_interval
.
low
,
freq_mean
,
res
.
confidence_interval
.
high
def
read_and_std
(
file_name
):
with
adios2
.
open
(
file_name
,
"
r
"
,
comm
)
as
adios_fh
:
#get all data in a form of histigrams of value at frequency
frequency_histigram_data
=
np
.
zeros
([
1024
,
10000
])
meas_index
=
0
for
rank
in
range
(
10
):
for
samp
in
range
(
1000
):
G_k_w
=
readSampleG_k_w
(
adios_fh
,
rank
,
0
,
samp
+
1
)
frequency_histigram_data
[:,
meas_index
]
=
G_k_w
[:,
0
,
0
,
0
,
0
,
0
]
meas_index
+=
1
freq_actual
=
frequency_histigram_data
[
451
:
712
,
:]
# with Pool(8) as pool:
# interval_mean = pool.map(do_bootstrap, freq_actual, chunksize=4 )
freq_std
=
np
.
std
(
freq_actual
,
1
)
return
freq_std
def
read_and_bootstrap
(
file_name
):
with
adios2
.
open
(
file_name
,
"
r
"
,
comm
)
as
adios_fh
:
dca_globals
=
readDCARun
(
adios_fh
)
parameters
=
[(
key
,
item
)
for
key
,
item
in
dca_globals
.
items
()
if
re
.
search
(
"
parameters
"
,
key
)]
for
param
in
parameters
:
print
(
"
param
"
,
param
)
g0_r_t
=
dca_globals
[
'
cluster_excluded_greens_function_G0_r_t
'
]
g0_k_w
=
dca_globals
[
'
cluster_excluded_greens_function_G0_k_w
'
]
g0_r_w
=
dca_globals
[
'
cluster_excluded_greens_function_G0_r_w
'
]
wn
=
dca_globals
[
'
frequency_domain_elements
'
]
beta
=
dca_globals
[
'
physics_beta
'
][
0
]
U
=
dca_globals
[
'
single_band_Hubbard_model_U
'
][
0
]
#get all data in a form of histigrams of value at frequency
frequency_histigram_data
=
np
.
zeros
([
1024
,
10000
])
meas_index
=
0
for
rank
in
range
(
10
):
for
samp
in
range
(
1000
):
G_k_w
=
readSampleG_k_w
(
adios_fh
,
rank
,
0
,
samp
+
1
)
frequency_histigram_data
[:,
meas_index
]
=
G_k_w
[:,
0
,
0
,
0
,
0
,
0
]
meas_index
+=
1
n_bins
=
100
freq_actual
=
frequency_histigram_data
[
451
:
712
,
:]
with
Pool
(
8
)
as
pool
:
interval_mean
=
pool
.
map
(
do_bootstrap
,
freq_actual
,
chunksize
=
4
)
with
open
(
'
bootstrap_results.pickle
'
,
'
wb
'
)
as
the_pick
:
pickle
.
dump
(
interval_mean
,
the_pick
,
pickle
.
HIGHEST_PROTOCOL
)
return
interval_mean
# for freq in range(1024):
# freq_data = frequency_histigram_data[:,freq];
# freq_mean = np.mean(freq_data)
# # freq_data = freq_data - freq_mean
# freq_data = (freq_data, starmap)
# res = bootstrap(do_bootstrap, freq_data, np.mean)
# print ( res.confidence_interval.low, freq_mean, res.confidence_interval.high )
# interval_mean[freq, 0] = res.confidence_interval.low
# interval_mean[freq, 1] = freq_mean
# interval_mean[freq, 2] = res.confidence_interval.high
# freq_mean = np.mean(freq_data)
# freq_data = freq_data - freq_mean
# axs[ip].hist(freq_data, bins=n_bins)
if
__name__
==
'
__main__
'
:
file_name
=
"
single_meas_G_100B_10k.bp
"
num_samples
=
10
pickle_name
=
'
bootstrap_results.pickle
'
if
os
.
path
.
isfile
(
pickle_name
):
with
open
(
pickle_name
,
'
rb
'
)
as
the_pick
:
interval_mean
=
pickle
.
load
(
the_pick
)
else
:
interval_mean
=
read_and_bootstrap
(
file_name
)
interval_mean
=
np
.
array
(
interval_mean
)
fig
=
plt
.
figure
(
figsize
=
(
10
,
10
),
constrained_layout
=
True
)
axs
=
fig
.
subplots
(
1
,
1
)
with
adios2
.
open
(
file_name
,
"
r
"
,
comm
)
as
adios_fh
:
dca_globals
=
readDCARun
(
adios_fh
)
parameters
=
[(
key
,
item
)
for
key
,
item
in
dca_globals
.
items
()
if
re
.
search
(
"
parameters
"
,
key
)]
cluster_g_k_w
=
dca_globals
[
'
cluster_greens_function_G_k_w
'
]
wn
=
dca_globals
[
'
frequency_domain_elements
'
]
beta
=
dca_globals
[
'
physics_beta
'
][
0
]
U
=
dca_globals
[
'
single_band_Hubbard_model_U
'
][
0
]
freq_std
=
read_and_std
(
file_name
)
up_std
=
cluster_g_k_w
[
451
:
712
,
0
,
0
,
0
,
0
,
0
]
+
freq_std
dn_std
=
cluster_g_k_w
[
451
:
712
,
0
,
0
,
0
,
0
,
0
]
-
freq_std
axs
.
plot
(
wn
[
451
:
712
],
cluster_g_k_w
[
451
:
712
,
0
,
0
,
0
,
0
,
0
],
label
=
"
accumulated cluster G_k_w
"
)
axs
.
plot
(
wn
[
451
:
712
],
up_std
,
label
=
"
up std
"
)
axs
.
plot
(
wn
[
451
:
712
],
dn_std
,
label
=
"
dwn_std
"
)
axs
.
scatter
(
wn
[
451
:
712
],
interval_mean
[:,
0
],
s
=
3
,
marker
=
'
.
'
,
label
=
"
99.99% bootstrap high
"
)
#axs.scatter(wn[451:712], interval_mean[:,1], s=1.5, marker=".", label="mean samples")
axs
.
scatter
(
wn
[
451
:
712
],
interval_mean
[:,
2
],
s
=
3
,
marker
=
"
.
"
,
label
=
"
99.99% bootstrap high
"
)
axs
.
legend
()
plt
.
show
()
This diff is collapsed.
Click to expand it.
CTINT_check_normalicy.py
0 → 100644
+
69
−
0
View file @
c26bd007
# // Copyright (C) 2022 UT-Battelle, LLC
# // All rights reserved.
# //
# // See LICENSE for terms of usage.
# // See CITATION.md for citation guidelines, if DCA++ is used for scientific publications.
# //
# // Author: Peter Doak (doakpw@ornl.gov)
# //
# This script allows quick examination of many samples and comparison between the
# python miniapp single configuration G_k_w and that written from DCA++
# it is set up to read from an adios2 BP4 archive set as filename
import
CTINT
import
numpy
as
np
import
adios2
from
mpi4py
import
MPI
import
re
import
matplotlib
as
mpl
import
matplotlib.pyplot
as
plt
from
Adios2DCA
import
readDCARun
,
readSampleG_k_w
import
statsmodels.api
as
sm
from
scipy.stats
import
norm
comm
=
MPI
.
COMM_WORLD
if
__name__
==
'
__main__
'
:
file_name
=
"
single_meas_G_100B_10k.bp
"
num_samples
=
10
with
adios2
.
open
(
file_name
,
"
r
"
,
comm
)
as
adios_fh
:
dca_globals
=
readDCARun
(
adios_fh
)
parameters
=
[(
key
,
item
)
for
key
,
item
in
dca_globals
.
items
()
if
re
.
search
(
"
parameters
"
,
key
)]
for
param
in
parameters
:
print
(
"
param
"
,
param
)
g0_r_t
=
dca_globals
[
'
cluster_excluded_greens_function_G0_r_t
'
]
g0_k_w
=
dca_globals
[
'
cluster_excluded_greens_function_G0_k_w
'
]
g0_r_w
=
dca_globals
[
'
cluster_excluded_greens_function_G0_r_w
'
]
wn
=
dca_globals
[
'
frequency_domain_elements
'
]
beta
=
dca_globals
[
'
physics_beta
'
][
0
]
U
=
dca_globals
[
'
single_band_Hubbard_model_U
'
][
0
]
#get all data in a form of histigrams of value at frequency
frequency_histigram_data
=
np
.
zeros
([
10000
,
1024
])
meas_index
=
0
for
rank
in
range
(
10
):
for
samp
in
range
(
1000
):
G_k_w
=
readSampleG_k_w
(
adios_fh
,
rank
,
0
,
samp
+
1
)
frequency_histigram_data
[
meas_index
,
:]
=
G_k_w
[:,
0
,
0
,
0
,
0
,
0
]
meas_index
+=
1
n_bins
=
100
fig
,
axs
=
plt
.
subplots
(
3
,
1
)
fig
.
suptitle
(
"
QQ plot for normality of values for G_k_w frequencies
"
)
for
ip
,
freq
in
zip
(
range
(
3
),[
512
,
550
,
600
]):
freq_data
=
frequency_histigram_data
[:,
freq
];
freq_mean
=
np
.
mean
(
freq_data
)
freq_data
=
freq_data
-
freq_mean
fig
=
sm
.
qqplot
(
freq_data
,
line
=
'
s
'
,
fit
=
True
,
ax
=
axs
[
ip
])
axs
[
ip
].
set_title
(
"
Freq {}
"
.
format
(
freq
))
# freq_mean = np.mean(freq_data)
# freq_data = freq_data - freq_mean
# axs[ip].hist(freq_data, bins=n_bins)
plt
.
show
()
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment