Commit 72974151 authored by Godoy, William's avatar Godoy, William

Merge branch 'filesize-per-step' into 'master'

Filesize per step

See merge request !5
parents 3a66012a 6b1edd14
......@@ -8,6 +8,8 @@ Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
CSV = "336ed68f-0bac-5ca0-87d4-7b16caf5d00b"
DataStructures = "864edb3b-99cc-5e75-8d2d-829cb0a9cfe8"
[compat]
julia = "1"
......@@ -20,6 +22,3 @@ JuliaFormatter = "98e50ef6-434e-11e9-1051-2b60c6c9e899"
[targets]
test = ["Test"]
......@@ -6,17 +6,20 @@ import Exio
function main()
#### Configuration section ######
## exio_init requires three inputs and returns a handler of type exioH
# only supported: AmrexCastro
app::String = "AmrexCastro"
# location where several run outputs are located they must have a common prefix
outputPrefix::String = "/home/wgodoy/workspace/Castro/Exec/hydro_tests/Sedov/case"
# Local
# outputPrefix::String = "/home/wgodoy/workspace/Castro/Exec/hydro_tests/Sedov/case"
# Summit location
outputPrefix::String = "/gpfs/alpine/proj-shared/csc310/wgodoy/amrex/Castro/Exec/hydro_tests/Sedov/case"
# file name inside outputs directories
logFile::String = "run.log"
#################################
# init Exio (can create as many entities as needed)
exio = Exio.exio_init(app, outputPrefix, logFile)
# create a linear model based on provided runs (the larger, the better)
......
plots_cells,plots_size,plots_size_L0,plots_size_L1,plots_size_L2,caseID,caseDir
4096,472612.0,461951.0,0.0,0.0,case12,plt_3d_00000
8192,945273.0,923914.0,0.0,0.0,case12,plt_3d_00002
12288,1.417935e6,1.385877e6,0.0,0.0,case12,plt_3d_00004
16384,1.890595e6,1.84784e6,0.0,0.0,case12,plt_3d_00006
20480,2.363255e6,2.309803e6,0.0,0.0,case12,plt_3d_00008
24576,2.835917e6,2.771766e6,0.0,0.0,case12,plt_3d_00010
This diff is collapsed.
......@@ -11,9 +11,13 @@ Pkg.add("DataFrames")
Pkg.add("GLM")
Pkg.add("Plots")
Pkg.add("PackageCompiler")
Pkg.add("Flux")
Pkg.add("CSV")
Pkg.add("DataStructures")
using PackageCompiler
create_sysimage([:Glob, :Plots, :GLM, :DataFrames], sysimage_path = "jexio_deps.so")
create_sysimage([:Glob, :Plots, :GLM, :DataFrames, :Flux, :CSV, :DataStructures],
sysimage_path = "jexio_deps.so")
exit()
......@@ -11,7 +11,14 @@ mutable struct Amrex <: AbstractAmrex
Amrex(outputPrefix::String, runlogFile::String) = new(
"Amrex",
outputPrefix,
["max_step", "amr.check_int", "amr.plot_int", "amr.n_cell", "amr.max_level", "caseID"],
[
"max_step",
"amr.check_int",
"amr.plot_int",
"amr.n_cell",
"amr.max_level",
"caseID",
],
["plots_size", "checkpoints_size"],
runlogFile,
)
......@@ -109,8 +116,8 @@ function _get_independent_variables(extractor::AbstractAmrex, parameters)::Dict{
maxLevel = get(parameters, "amr.max_level", "")
independentVariables["amr.max_level"] = maxLevel == "" ? 1 : parse(Int32, maxLevel)
regrid = get(parameters, "amr.regrid_int", "")
independentVariables["amr.regrid_int"] = regrid == "" ? 1 : parse(Int32, regrid)
regrid = prod(parse.(Int64, split(get(parameters, "amr.regrid_int", ["0"]))))
independentVariables["amr.regrid_int"] = regrid
return independentVariables
......
......@@ -3,6 +3,9 @@ include("../helper/helperSystem.jl")
import DataFrames
import GLM
import Plots
import CSV
import DataStructures
struct AmrexCastro <: AbstractAmrex
app::String
......@@ -44,7 +47,8 @@ function run_linear_models(extractor::AmrexCastro)
for key in keys(extractor.outputs)
if (key == "plots_size")
_run_linear_model_plots_size(extractor, X)
#_run_linear_model_plots_size(extractor, X)
_run_linear_models_plots_size_step(extractor, X)
end
end
end
......@@ -89,7 +93,7 @@ function _get_linear_model_X(extractor::AmrexCastro)::DataFrames.DataFrame
# get outputDir name
independentVariables["caseID"] = helper_get_relative_path(outputDir)
if !isXInit
if isXInit == false
X = DataFrames.DataFrame(independentVariables)
isXInit = true
else
......@@ -102,63 +106,213 @@ function _get_linear_model_X(extractor::AmrexCastro)::DataFrames.DataFrame
end
function _run_linear_model_plots_size(extractor::AmrexCastro, X::DataFrames.DataFrame)
outputDirs = helper_get_prefix_directories(extractor.outputPrefix)
# single column DataFrame Y in the linear model
plotsSizesData = DataFrames.DataFrame(plots_sizes = Int64[])
function _run_linear_models_plots_size_step(extractor::AmrexCastro, X::DataFrames.DataFrame)
function _push_datasize!(directory::String, data::Array{Int64,1})
if size(data,1) == 0
push!(data, helper_get_directory_size(directory) )
else
push!(data, last(data) + helper_get_directory_size(directory) )
end
end
function _find_max_nlevels(outputDirs::Array{String})::Int64
max_nlevels::Int64 = 0
for outputDir in outputDirs
inputFile::String = _get_input_file(extractor, outputDir)
parameters = _input_parser(extractor, inputFile)
rootPlotName = get(parameters, "amr.plot_file", "")
# TODO refactor this later
inputFile::String = _get_input_file(extractor, outputDir)
parameters = _input_parser(extractor, inputFile)
rootPlotName = get(parameters, "amr.plot_file", "")
# find all directories with rootPlotName and get its size
plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName))
plotsSize::Int64 = 0
for plotFileDir in plotFileDirs
sizeDir::Int64 = helper_get_directory_size(plotFileDir)
plotsSize += sizeDir
# find all directories with rootPlotName and get its size
# root of _plt directories
plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName))
for plotFileDir in plotFileDirs
levelDirs = helper_get_prefix_directories(string(plotFileDir,"/Level_"))
nlevels = size(levelDirs,1)
if max_nlevels < nlevels
max_nlevels = nlevels
end
push!(plotsSizesData, [plotsSize])
end
end
return max_nlevels
end
function _find_max_nranks(outputDirs::Array{String})::Int64
max_nranks::Int64 = 0
for outputDir in outputDirs
inputFile::String = _get_input_file(extractor, outputDir)
parameters = _input_parser(extractor, inputFile)
rootPlotName = get(parameters, "amr.plot_file", "")
println(plotsSizesData)
# find all directories with rootPlotName and get its size
# root of _plt directories
plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName))
for plotFileDir in plotFileDirs
levelDirs = helper_get_prefix_directories(string(plotFileDir,"/Level_"))
for levelDir in levelDirs
rankDirs = helper_get_prefix_directories(string(levelDir,"/Cell_D_"))
nranks = size(rankDirs,1)
if max_nranks < nranks
max_nranks = nranks
end
end
end
end
return max_nranks
end
# output directories
outputDirs = helper_get_prefix_directories(extractor.outputPrefix)
# input with total_cells * output_number
Xd::Array{Int64,1} = []
# output cumulative data size, per timestep, level, rank
Yd_Timesteps::Array{Int64,1} = []
Yd_Levels::DataStructures.SortedDict{ String,Array{Int64,1} } = Dict()
Yd_Ranks::DataStructures.SortedDict{ String,Array{Int64,1} } = Dict()
nlevels::Int64 = _find_max_nlevels(outputDirs)
nranks::Int64 = _find_max_nranks(outputDirs)
for level in (1:nlevels)
levelKey::String = string("Level_",level-1)
push!(Yd_Levels, levelKey => [])
println("Yd_Levels", Yd_Levels, " ", level)
# Prepare the X independent variables in the linear model
XNames::Array{String} = extractor.outputs["plots_size"]
for rank in (1:nranks)
rankKey::String = string(levelKey,"/Cell_D_", lpad(rank-1,5,"0") )
push!(Yd_Ranks, rankKey => [])
end
end
println("Yd_Ranks: ", Yd_Ranks)
caseIDs::Array{String,1} = []
caseDirs::Array{String,1} = []
col1 = Symbol("amr.nplot_files")
col2 = Symbol("amr.ncells")
insert!(plotsSizesData, 2 ,X[!,col1] .* X[!,col2], :new_data)
#plotsSizesData[!,col1] =
for (index,outputDir) in enumerate(outputDirs)
#for XName in XNames
#columnName = Symbol(XName)
# this syntax [!, symbol] does not make a copy, use [:, symbol] for copies
#plotsSizesData[!, columnName] = X[!, columnName]
#end
# formula from https://discourse.julialang.org/t/glm-jl-with-unknown-column-names/20692/5
response = Symbol(names(plotsSizesData)[1])
predictors = Symbol.(names(plotsSizesData)[2:end])
println(plotsSizesData)
f = @eval(GLM.@formula($response ~ (+)( $(predictors...))))
inputFile::String = _get_input_file(extractor, outputDir)
parameters = _input_parser(extractor, inputFile)
rootPlotName = get(parameters, "amr.plot_file", "")
# find all directories with rootPlotName and get its size
# root of _plt directories
plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName))
ols = GLM.lm(f, plotsSizesData)
println("I/O linear model formula:")
println(ols)
#import Plots
#display(Plots.plot(X, Y))
counter::Int32 = 1
# extract ncells
ncells = X[!,"amr.ncells"][index]
# caseID
caseID = helper_get_relative_path(outputDir)
for (plotIndex,plotFileDir) in enumerate(plotFileDirs)
# Xd
push!( Xd, counter*ncells)
# Yd timesteps cumulative
if plotIndex == 1
push!(Yd_Timesteps, helper_get_directory_size(plotFileDir) )
else
push!(Yd_Timesteps, last(Yd_Timesteps) + helper_get_directory_size(plotFileDir) )
end
levelDirs = helper_get_prefix_directories(string(plotFileDir,"/Level_"))
currentLevels = helper_get_relative_path.(levelDirs)
#println("Current levels: ", currentLevels)
# Levels
for (levelKey,levelSizeValues) in Yd_Levels
levelDir = string(plotFileDir,"/",levelKey)
foundArray = findall( x -> x == levelKey, currentLevels)
if size(foundArray,1) == 1
# bug _push_datasize!(levelDir, Yd_Levels[levelKey])
if plotIndex == 1
push!( Yd_Levels[levelKey], helper_get_directory_size(levelDir) )
else
push!( Yd_Levels[levelKey], last(Yd_Levels[levelKey]) + helper_get_directory_size(levelDir) )
end
else
push!(Yd_Levels[levelKey],0)
end
# Level Rank data
# add ranks
rankFullFiles = helper_get_prefix_directories(string(levelDir,"/Cell_D_"))
rankFiles = helper_get_relative_path.(rankFullFiles)
for rankID in 1:nranks
rankKey::String = string(levelKey,"/Cell_D_", lpad(rankID-1,5,"0") )
# check if file exist
checkFileName = string(levelDir,"/Cell_D_", lpad(rankID-1,5,"0") )
if isfile( checkFileName )
rankFileSize = filesize(checkFileName)
# bug push!(Yd_Ranks[rankKey], rankFileSize)
if plotIndex == 1
push!(Yd_Ranks[rankKey], rankFileSize)
# push!( Yd_Levels[levelKey], helper_get_directory_size(levelDir) )
else
push!( Yd_Ranks[rankKey], last(Yd_Ranks[rankKey]) + rankFileSize )
end
else
push!(Yd_Ranks[rankKey],0)
end
end
end
# caseID
push!(caseIDs, caseID )
# caseDir
push!(caseDirs, helper_get_relative_path(plotFileDir) )
counter = counter + 1
end
end
df::DataFrames.DataFrame =
DataFrames.DataFrame( plots_cells = Xd,
plots_size = Yd_Timesteps)
for (key,Yd_Level) in Yd_Levels
df[!,key] = Yd_Level
end
for (key,Yd_Rank) in Yd_Ranks
df[!,key] = Yd_Rank
end
df[!,"caseID"] = caseIDs
df[!,"caseDir"] = caseDirs
#println(df)
CSV.write("plot_size.csv", df, header=true)
#display(Plots.scatter(Xd, Yd))
#readline()
end
import Flux
import Statistics
using Plots;
pyplot();
using LaTeXStrings
function FluxTrain(nepocs::Int32, X::Array{Float64,1}, Y::Array{Float64,1})
W = [500.0]
b = [10000.0]
model = Flux.Dense(W, b)
parameters = Flux.params(model)
println("Initial parameter: ", parameters)
# set up data
Xd = reduce(hcat, X)
Yd = reduce(hcat, Y)
data = [(Xd, Yd)]
println("Data", data)
# optimizer = Flux.Descent(1)
# optimizer = Flux.ADAM()
optimizer = Flux.ADAM(1, (0.99, 0.999))
loss(x, y) = Statistics.mean((model(x) .- y) .^ 2)
Yd_0 = model(Xd)
println("Initial solution: ", Yd_0)
plot(X, Y, st = :scatter, label = L"y")
Yd_nE = model(Xd)
for iter = 1:nepocs
Flux.train!(loss, parameters, data, optimizer)
end
println("Module: ", model)
println("Parameters_nE: ", parameters)
println("Final solution: ", Yd_nE)
Yd_nE = model(Xd)
plot!(Xd', Yd_0', lc = :green, label = L"y_0")
plot!(Xd', Yd_nE', lc = :red, label = L"y_{n_\mathrm{E}}")
plot!(xlabel = L"x", ylabel = L"y", title = "Data for linear model")
end
function test()
x::Array{Float64,1} = [256, 4096, 512, 512, 8192]
y::Array{Float64,1} = [435867, 2959963, 1475489, 1485569, 4518030]
nepocs::Int32 = 100000
FluxTrain(nepocs, x, y)
end
test()
import CSV
import DataFrames
using Plots
function plot_CSV_3col( filename::String )
df = CSV.File(filename) |> DataFrames.DataFrame
x = df[!,"plots_cells"]
y = df[!,"plots_size"]
z = df[!,"caseID"]
fig1 = Plots.scatter( x, y, groups = z, xaxis=:log, yaxis=:log,
legendfontsize=7,
legend = :outertopleft,
# [shapes], size, alpha
marker = ([:x :+ :star4 :vline :square :circle], 7, 1.0),
markercolor = [:blue :green :red :orange :pink],
xlabel = "cumulative output ncells ( output_counter x ncells )",
ylabel = "cumulative output data size (bytes)",
title = "Amrex Castro hydro_test Sedov 2d.cyl_in_cartcoords output size",
size=(1200,850),
reuse = false
)
Plots.gui(fig1)
#Plots.png(fig1)
readline()
yL0 = df[!,3]
yL1 = df[!,4]
yL2 = df[!,5]
fig2 = Plots.scatter( x, yL0, groups = z, label="L0",
marker = ([:x], 10, 1.0),
markercolor = [:blue :green :red :orange :pink],
legendfontsize=9,
legend = :outertopleft,
xlabel = "cumulative output ncells ( output_counter x ncells )",
ylabel = "cumulative output data size (bytes)",
title = "Amrex Castro hydro_test Sedov 2d.cyl_in_cartcoords output size per Level",
size=(1200,850),
reuse = true
)
Plots.scatter!( x, yL1, groups = z, label= "L1",
marker = ([:+], 10, 1.0),
markercolor = [:blue :green :red :orange :pink],
)
Plots.scatter!( x, yL2, groups = z, label="L2",
marker = ([:star4], 10, 1.0),
markercolor = [:blue :green :red :orange :pink],
)
Plots.gui(fig2)
#Plots.png(fig2)
readline()
end
function main()
# modify this to the file path
filename = "/home/wgodoy/hdd_home/work/proxy_io/AmrexCastro/plot_size.csv"
# filename = "/home/wgodoy/hdd_home/work/proxy_io/AmrexCastro/plot_size_case4.csv"
plot_CSV_3col(filename)
end
main()
......@@ -57,6 +57,10 @@ end
function helper_get_directory_size(directory::String)::Int64
size::Int64 = 0
if !isdir(directory)
return size
end
for (root, dirs, files) in walkdir(directory)
size += sum(map(filesize, joinpath.(root, files)))
end
......
CheckPointVersion_1.0
3
0
0
0
(0 (0,0,0)(93750000,93750000,93750000) 1)
(RealBox 0 1500000000 0 1500000000 0 1500000000 )((0,0,0) (15,15,15) (0,0,0))P(0,0,0)
0.00036333937718578777
0.00036333937718578777
1
0
0
0
(0 (0,0,0)(93750000,93750000,93750000) 1)
(RealBox 0 1500000000 0 1500000000 0 1500000000 )((0,0,0) (15,15,15) (0,0,0))P(0,0,0)
(4 0
((0,0,0) (15,7,7) (0,0,0))
((0,0,8) (15,7,15) (0,0,0))
((0,8,0) (15,15,7) (0,0,0))
((0,8,8) (15,15,15) (0,0,0))
)4
((0,0,0) (15,15,15) (0,0,0))
(4 0
((0,0,0) (15,7,7) (0,0,0))
((0,0,8) (15,7,15) (0,0,0))
((0,8,0) (15,15,7) (0,0,0))
((0,8,8) (15,15,15) (0,0,0))
)-0.00036333937718578777
-0.00036333937718578777
0
0
1
Level_0/SD_0_New_MF
((0,0,0) (15,15,15) (0,0,0))
(4 0