include("../helper/helperSystem.jl") import DataFrames import GLM import Plots import CSV import DataStructures struct AmrexCastro <: AbstractAmrex app::String outputPrefix::String inputs::Array{String} outputs::Dict{String,Array{String}} runlogFile::String AmrexCastro(outputPrefix::String, runlogFile::String) = new( "AmrexCastro", outputPrefix, [ "max_step", "amr.check_int", "amr.plot_int", "amr.n_cell", "amr.max_level", "castro.cfl", "castro.max_grid_size", ], Dict{String,Array{String}}( "plots_size" => ["amr.nplot_files", "amr.ncells"], "checks_size" => [ "amr.check_int", "amr.ncheck_files", "amr.ncells", "amr.max_level", "amr.regrid_int", ], ), runlogFile, ) end # runs the linear models for each entry in outputs function run_linear_models(extractor::AmrexCastro) X::DataFrames.DataFrame = _get_linear_model_X(extractor) for key in keys(extractor.outputs) if (key == "plots_size") #_run_linear_model_plots_size(extractor, X) _run_linear_models_plots_size_step(extractor, X) end end end function _get_input_file(extractor::AmrexCastro, outputDir::String)::String # hardcoded input...find out if this is always the case with Castro inputFiles::Array{String} = helper_get_prefix_files("input", outputDir) if isempty(inputFiles) throw(NoSuchFieldException( "jexio AmrexCastro: Can't find Castro input files in directories:", outputDirs, )) # use length, instead of size as it returns tuples elseif length(inputFiles) != 1 print("Number of files: ", size(inputFiles)) throw(DimensionMismatch("jexio AmrexCastro: found more than one input file")) else end # print(inputFiles) return inputFiles[1] end function _get_linear_model_X(extractor::AmrexCastro)::DataFrames.DataFrame # get directories runs output location outputDirs = helper_get_prefix_directories(extractor.outputPrefix) println(outputDirs) X::DataFrames.DataFrame = DataFrames.DataFrame() isXInit::Bool = false for outputDir in outputDirs inputFile::String = _get_input_file(extractor, outputDir) # get relevant input parameters parameters = _input_parser(extractor, inputFile) independentVariables = _get_independent_variables(extractor, parameters) # get outputDir name independentVariables["caseID"] = helper_get_relative_path(outputDir) if isXInit == false X = DataFrames.DataFrame(independentVariables) isXInit = true else DataFrames.push!(X, independentVariables) end end println(X) return X end function _run_linear_models_plots_size_step(extractor::AmrexCastro, X::DataFrames.DataFrame) function _push_datasize!(directory::String, data::Array{Int64,1}) if size(data,1) == 0 push!(data, helper_get_directory_size(directory) ) else push!(data, last(data) + helper_get_directory_size(directory) ) end end function _find_max_nlevels(outputDirs::Array{String})::Int64 max_nlevels::Int64 = 0 for outputDir in outputDirs inputFile::String = _get_input_file(extractor, outputDir) parameters = _input_parser(extractor, inputFile) rootPlotName = get(parameters, "amr.plot_file", "") # find all directories with rootPlotName and get its size # root of _plt directories plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName)) for plotFileDir in plotFileDirs levelDirs = helper_get_prefix_directories(string(plotFileDir,"/Level_")) nlevels = size(levelDirs,1) if max_nlevels < nlevels max_nlevels = nlevels end end end return max_nlevels end function _find_max_nranks(outputDirs::Array{String})::Int64 max_nranks::Int64 = 0 for outputDir in outputDirs inputFile::String = _get_input_file(extractor, outputDir) parameters = _input_parser(extractor, inputFile) rootPlotName = get(parameters, "amr.plot_file", "") # find all directories with rootPlotName and get its size # root of _plt directories plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName)) for plotFileDir in plotFileDirs levelDirs = helper_get_prefix_directories(string(plotFileDir,"/Level_")) for levelDir in levelDirs rankDirs = helper_get_prefix_directories(string(levelDir,"/Cell_D_")) nranks = size(rankDirs,1) if max_nranks < nranks max_nranks = nranks end end end end return max_nranks end # output directories outputDirs = helper_get_prefix_directories(extractor.outputPrefix) # input with total_cells * output_number Xd::Array{Int64,1} = [] # output cumulative data size, per timestep, level, rank Yd_Timesteps::Array{Int64,1} = [] Yd_Levels::DataStructures.SortedDict{ String,Array{Int64,1} } = Dict() Yd_Ranks::DataStructures.SortedDict{ String,Array{Int64,1} } = Dict() nlevels::Int64 = _find_max_nlevels(outputDirs) nranks::Int64 = _find_max_nranks(outputDirs) for level in (1:nlevels) levelKey::String = string("Level_",level-1) push!(Yd_Levels, levelKey => []) println("Yd_Levels", Yd_Levels, " ", level) for rank in (1:nranks) rankKey::String = string(levelKey,"/Cell_D_", lpad(rank-1,5,"0") ) push!(Yd_Ranks, rankKey => []) end end println("Yd_Ranks: ", Yd_Ranks) caseIDs::Array{String,1} = [] caseDirs::Array{String,1} = [] for (index,outputDir) in enumerate(outputDirs) inputFile::String = _get_input_file(extractor, outputDir) parameters = _input_parser(extractor, inputFile) rootPlotName = get(parameters, "amr.plot_file", "") # find all directories with rootPlotName and get its size # root of _plt directories plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName)) counter::Int32 = 1 # extract ncells ncells = X[!,"amr.ncells"][index] # caseID caseID = helper_get_relative_path(outputDir) for plotFileDir in plotFileDirs # Xd push!( Xd, counter*ncells) # Yd timesteps _push_datasize!(plotFileDir, Yd_Timesteps) levelDirs = helper_get_prefix_directories(string(plotFileDir,"/Level_")) currentLevels = helper_get_relative_path.(levelDirs) println("Current levels: ", currentLevels) # Levels for (levelKey,levelSizeValues) in Yd_Levels levelDir = string(plotFileDir,"/",levelKey) foundArray = findall( x -> x == levelKey, currentLevels) if size(foundArray,1) == 1 _push_datasize!(levelDir, Yd_Levels[levelKey]) else push!(Yd_Levels[levelKey],0) end # Level Rank data # add ranks rankFullFiles = helper_get_prefix_directories(string(levelDir,"/Cell_D_")) rankFiles = helper_get_relative_path.(rankFullFiles) for rankID in 1:nranks rankKey::String = string(levelKey,"/Cell_D_", lpad(rankID-1,5,"0") ) # check if file exist checkFileName = string(levelDir,"/Cell_D_", lpad(rankID-1,5,"0") ) if isfile( checkFileName ) rankFileSize = filesize(checkFileName) push!(Yd_Ranks[rankKey], rankFileSize) else push!(Yd_Ranks[rankKey],0) end end end # caseID push!(caseIDs, caseID ) # caseDir push!(caseDirs, helper_get_relative_path(plotFileDir) ) counter = counter + 1 end end df::DataFrames.DataFrame = DataFrames.DataFrame( plots_cells = Xd, plots_size = Yd_Timesteps) for (key,Yd_Level) in Yd_Levels df[!,key] = Yd_Level end for (key,Yd_Rank) in Yd_Ranks df[!,key] = Yd_Rank end df[!,"caseID"] = caseIDs df[!,"caseDir"] = caseDirs println(df) CSV.write("plot_size.csv", df, header=true) #display(Plots.scatter(Xd, Yd)) #readline() end