include("../helper/helperSystem.jl") import DataFrames import GLM struct AmrexCastro <: AbstractAmrex app::String outputPrefix::String inputs::Array{String} outputs::Dict{String,Array{String}} runlogFile::String AmrexCastro(outputPrefix::String, runlogFile::String) = new( "AmrexCastro", outputPrefix, [ "max_step", "amr.check_int", "amr.plot_int", "amr.n_cell", "amr.max_level", "castro.cfl", "castro.max_grid_size", ], Dict{String,Array{String}}( "plots_size" => ["amr.nplot_files", "amr.ncells"], "checks_size" => [ "amr.check_int", "amr.ncheck_files", "amr.ncells", "amr.max_level", "amr.regrid_int", ], ), runlogFile, ) end # runs the linear models for each entry in outputs function run_linear_models(extractor::AmrexCastro) X::DataFrames.DataFrame = _get_linear_model_X(extractor) for key in keys(extractor.outputs) if (key == "plots_size") _run_linear_model_plots_size(extractor, X) end end end function _get_input_file(extractor::AmrexCastro, outputDir::String)::String # hardcoded input...find out if this is always the case with Castro inputFiles::Array{String} = helper_get_prefix_files("input", outputDir) if isempty(inputFiles) throw(NoSuchFieldException( "jexio AmrexCastro: Can't find Castro input files in directories:", outputDirs, )) # use length, instead of size as it returns tuples elseif length(inputFiles) != 1 print("Number of files: ", size(inputFiles)) throw(DimensionMismatch("jexio AmrexCastro: found more than one input file")) else end # print(inputFiles) return inputFiles[1] end function _get_linear_model_X(extractor::AmrexCastro)::DataFrames.DataFrame # get directories runs output location outputDirs = helper_get_prefix_directories(extractor.outputPrefix) println(outputDirs) X::DataFrames.DataFrame = DataFrames.DataFrame() isXInit::Bool = false for outputDir in outputDirs inputFile::String = _get_input_file(extractor, outputDir) # get relevant input parameters parameters = _input_parser(extractor, inputFile) independentVariables = _get_independent_variables(extractor, parameters) # get outputDir name independentVariables["caseID"] = helper_get_relative_path(outputDir) if !isXInit X = DataFrames.DataFrame(independentVariables) isXInit = true else DataFrames.push!(X, independentVariables) end end println(X) return X end function _run_linear_model_plots_size(extractor::AmrexCastro, X::DataFrames.DataFrame) outputDirs = helper_get_prefix_directories(extractor.outputPrefix) # single column DataFrame Y in the linear model plotsSizesData = DataFrames.DataFrame(plots_sizes = Int64[]) for outputDir in outputDirs # TODO refactor this later inputFile::String = _get_input_file(extractor, outputDir) parameters = _input_parser(extractor, inputFile) rootPlotName = get(parameters, "amr.plot_file", "") # find all directories with rootPlotName and get its size plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName)) plotsSize::Int64 = 0 for plotFileDir in plotFileDirs sizeDir::Int64 = helper_get_directory_size(plotFileDir) plotsSize += sizeDir end push!(plotsSizesData, [plotsSize]) end println(plotsSizesData) # Prepare the X independent variables in the linear model XNames::Array{String} = extractor.outputs["plots_size"] col1 = Symbol("amr.nplot_files") col2 = Symbol("amr.ncells") insert!(plotsSizesData, 2 ,X[!,col1] .* X[!,col2], :new_data) #plotsSizesData[!,col1] = #for XName in XNames #columnName = Symbol(XName) # this syntax [!, symbol] does not make a copy, use [:, symbol] for copies #plotsSizesData[!, columnName] = X[!, columnName] #end # formula from https://discourse.julialang.org/t/glm-jl-with-unknown-column-names/20692/5 response = Symbol(names(plotsSizesData)[1]) predictors = Symbol.(names(plotsSizesData)[2:end]) println(plotsSizesData) f = @eval(GLM.@formula($response ~ (+)( $(predictors...)))) ols = GLM.lm(f, plotsSizesData) println("I/O linear model formula:") println(ols) #import Plots #display(Plots.plot(X, Y)) end