Commit c0fed3e1 authored by Godoy, William's avatar Godoy, William

Merge branch 'linear-model' into 'master'

Linear model

See merge request !3
parents 0cd6a38c c03dbf3d
.buildpath
.project
*.so
......@@ -5,6 +5,9 @@ version = "0.0.1"
[deps]
Glob = "c27321d9-0574-5035-807b-f59d2c89b15c"
DataFrames = "a93c6f00-e57d-5684-b7b6-d8193f3e46c0"
GLM = "38e38edf-8417-5370-95a0-9cbb8c7f171a"
Plots = "91a5bcdd-55d7-5caf-9e0b-520d859cae80"
[compat]
julia = "1"
......
......@@ -9,12 +9,16 @@ Supported:
For simple usage see tests:
1. Install requirements (run once):
`$ julia scripts/requirements.jl`
1. Install requirements and precompile dependencies (run once). It takes time the first time,
generates `jexio_deps.so`:
- `$ julia scripts/requirements.jl`
2. Run tests:
`$ julia --project=. test/runtests.jl`
- `$ julia -Jjexio_deps.so --project=. test/runtests.jl`
Optionally, run tests from the Julia REPL for preloading libraries,
as it results in faster runtimes after the first time.
- `$ julia -Jjexio_deps.so --project=.`
- `julia> include("test/runtests.jl")
Formatting the code:
......
import Exio
function main()
#### Configuration section ######
## exio_init requires three inputs and returns a handler of type exioH
# only supported: AmrexCastro
app::String = "AmrexCastro"
# location where several run outputs are located they must have a common prefix
outputPrefix::String = "/home/wgodoy/workspace/Castro/Exec/hydro_tests/Sedov/case"
# file name inside outputs directories
logFile::String = "run.log"
#################################
# init Exio (can create as many entities as needed)
exio = Exio.exio_init(app, outputPrefix, logFile)
# create a linear model based on provided runs (the larger, the better)
# it will report the formula for dependent and independent variables
Exio.run_linear_models(exio)
end
main()
......@@ -7,5 +7,13 @@ using Pkg
Pkg.add("JuliaFormatter")
Pkg.add("Glob")
Pkg.add("DataFrames")
Pkg.add("GLM")
Pkg.add("Plots")
Pkg.add("PackageCompiler")
using PackageCompiler
create_sysimage([:Glob, :Plots, :GLM, :DataFrames], sysimage_path = "jexio_deps.so")
exit()
module Exio
export ExioH, exio_init, input_parser
export ExioH, exio_init
include("extractor/Extractor.jl")
mutable struct ExioH
extractor::AbstractExtractor
ExioH() = new()
end
function exio_init(app::String, outputPrefix::String)::ExioH
function exio_init(app::String, outputPrefix::String, runlogFile::String)::ExioH
exioH = ExioH()
if app == "AmrexCastro"
println("Hello AmrexCastro")
exioH.extractor = AmrexCastro()
init!(exioH.extractor, outputPrefix)
exioH.extractor = AmrexCastro(outputPrefix, runlogFile)
end
return exioH
end
function run_linear_models(exioH::ExioH)
run_linear_models(exioH.extractor)
end
end
......@@ -4,23 +4,19 @@ abstract type AbstractAmrex <: AbstractExtractor end
mutable struct Amrex <: AbstractAmrex
app::String
outputPrefix::String
degreesOfFreedom::Any
Amrex() = new()
inputs::Array{String}
outputs::Array{String}
runlogFile::String
Amrex(outputPrefix::String, runlogFile::String) = new(
"Amrex",
outputPrefix,
["max_step", "amr.check_int", "amr.plot_int", "amr.n_cell", "amr.max_level"],
["plots_size", "checkpoints_size"],
runlogFile,
)
end
"""
initialize members of the extractor::Amrex type
Using bang convention as init modifies the extractor::Amrex
https://docs.julialang.org/en/v1/manual/style-guide/index.html#bang-convention-1
"""
function init!(extractor::Amrex, outputPrefix::String)
extractor.app = "Amrex"
extractor.outputPrefix = outputPrefix
extractor.degreesOfFreedom =
["max_step", "amr.check_int", "amr.plot_int", "amr.n_cell", "amr.max_level"]
end
"""
Parses an input file with entries key = value, returns all entries in a Dict
......@@ -28,7 +24,7 @@ end
- `extractor::AbstractAmrex` : input type extending AbstractAmrex
- `inputFile::String` : input file to be parsed, absolute path is preferred
"""
function input_parser(extractor::AbstractAmrex, inputFile::String)::Dict{String,String}
function _input_parser(extractor::AbstractAmrex, inputFile::String)::Dict{String,String}
parameters = Dict{String,String}()
# get file contents in a single iterable type, \n newline is removed
......@@ -64,24 +60,25 @@ end
from input parameters
# Arguments
- `extractor::AbstractAmrex` : input type extending AbstractAmrex
- `parameters::Dict{String,String}` : input file to be parsed, absolute path is preferred
- `parameters::Dict{Any,Any}` : input file to be parsed, absolute path is preferred
"""
function get_independent_variables(extractor::AbstractAmrex, parameters)::Dict
function _get_independent_variables(extractor::AbstractAmrex, parameters)::Dict{String,Any}
independentVariables = Dict()
independentVariables = Dict{String,Any}()
# Number of output events,
# get is the safe way. 3rd argument is a default fallback if key not found
maxStepStr = get(parameters, "max_step", "")
if maxStepStr != ""
# safe string conversion to a type
# safe string conversion to a type
maxStep = parse(Int64, maxStepStr)
plotIntStr = get(parameters, "amr.plot_int", "")
if plotIntStr != ""
plotFrequency = parse(Int64, plotIntStr)
independentVariables["amr.plot_int"] = plotFrequency
independentVariables["amr.nplot_files"] = floor(Int32, maxStep / plotFrequency)
else
throw(NoSuchFieldException("jexio Amrex: Can't find amr.plot_int"))
......@@ -90,7 +87,9 @@ function get_independent_variables(extractor::AbstractAmrex, parameters)::Dict
checkIntStr = get(parameters, "amr.check_int", "")
if checkIntStr != ""
checkpointFrequency = parse(Int64, checkIntStr)
independentVariables["amr.ncheck_files"] = floor(Int32, maxStep / checkpointFrequency)
independentVariables["amr.check_int"] = checkpointFrequency
independentVariables["amr.ncheck_files"] =
floor(Int32, maxStep / checkpointFrequency)
else
throw(NoSuchFieldException("jexio Amrex: Can't find amr.check_int"))
end
......@@ -109,10 +108,15 @@ function get_independent_variables(extractor::AbstractAmrex, parameters)::Dict
maxLevel = get(parameters, "amr.max_level", "")
independentVariables["amr.max_level"] = maxLevel == "" ? 1 : parse(Int32, maxLevel)
regrid = get(parameters, "amr.regrid_int", "")
independentVariables["amr.regrid_int"] = regrid == "" ? 1 : parse(Int32, regrid)
return independentVariables
end
function _get_inputs_X(extractor::AbstractAmrex)
end
include("../helper/helperSystem.jl")
mutable struct AmrexCastro <: AbstractAmrex
import DataFrames
import GLM
struct AmrexCastro <: AbstractAmrex
app::String
degreesOfFreedom::Any
outputPrefix::String
inputs::Array{String}
outputs::Dict{String,Array{String}}
runlogFile::String
AmrexCastro() = new()
AmrexCastro(outputPrefix::String, runlogFile::String) = new(
"AmrexCastro",
outputPrefix,
[
"max_step",
"amr.check_int",
"amr.plot_int",
"amr.n_cell",
"amr.max_level",
"castro.cfl",
"castro.max_grid_size",
],
Dict{String,Array{String}}(
"plots_size" => ["amr.nplot_files", "amr.ncells", "amr.max_level"],
"checks_size" => [
"amr.check_int",
"amr.ncheck_files",
"amr.ncells",
"amr.max_level",
"amr.regrid_int",
],
),
runlogFile,
)
end
"""
init!
initialize members of the extractor::AmrexCastro type
Using bang convention as init modifies the extractor::Amrex
https://docs.julialang.org/en/v1/manual/style-guide/index.html#bang-convention-1
"""
function init!(extractor::AmrexCastro, outputPrefix::String)
extractor.app = "AmrexCastro"
extractor.degreesOfFreedom = [
"max_step",
"amr.check_int",
"amr.plot_int",
"amr.n_cell",
"amr.max_level",
"castro.cfl",
"castro.max_grid_size",
]
extractor.outputPrefix = outputPrefix
# runs the linear models for each entry in outputs
function run_linear_models(extractor::AmrexCastro)
X::DataFrames.DataFrame = _get_linear_model_X(extractor)
for key in keys(extractor.outputs)
if (key == "plots_size")
_run_linear_model_plots_size(extractor, X)
end
end
end
function _get_input_file(extractor::AmrexCastro, outputDir::String)::String
# hardcoded input...find out if this is always the case with Castro
inputFiles::Array{String} = helper_get_prefix_files("input", outputDir)
if isempty(inputFiles)
throw(NoSuchFieldException(
"jexio AmrexCastro: Can't find Castro input files in directories:",
outputDirs,
))
# use length, instead of size as it returns tuples
elseif length(inputFiles) != 1
print("Number of files: ", size(inputFiles))
throw(DimensionMismatch("jexio AmrexCastro: found more than one input file"))
else
end
return inputFiles[1]
end
function _get_linear_model_X(extractor::AmrexCastro)::DataFrames.DataFrame
# get directories runs output location
outputDirs = helper_get_prefix_directories(extractor.outputPrefix)
# println(outputDirs)
X::DataFrames.DataFrame = DataFrames.DataFrame()
isXInit::Bool = false
for outputDir in outputDirs
inputFile::String = _get_input_file(extractor, outputDir)
# get relevant input parameters
parameters = _input_parser(extractor, inputFile)
independent_variables = _get_independent_variables(extractor, parameters)
if !isXInit
X = DataFrames.DataFrame(independent_variables)
isXInit = true
else
DataFrames.push!(X, independent_variables)
end
end
function get_dependent_variable_datasize(extractor::AmrexCastro)
outputDirs = helper_get_prefix_directories(extractor.outputPrefix)
println(outputDirs)
println(X)
return X
end
function _run_linear_model_plots_size(extractor::AmrexCastro, X::DataFrames.DataFrame)
outputDirs = helper_get_prefix_directories(extractor.outputPrefix)
# single column DataFrame Y in the linear model
plotsSizesData = DataFrames.DataFrame(plots_sizes = Int64[])
for outputDir in outputDirs
# TODO refactor this later
inputFile::String = _get_input_file(extractor, outputDir)
parameters = _input_parser(extractor, inputFile)
rootPlotName = get(parameters, "amr.plot_file", "")
# find all directories with rootPlotName and get its size
plotFileDirs = helper_get_prefix_directories(string(outputDir, "/", rootPlotName))
plotsSize::Int64 = 0
for plotFileDir in plotFileDirs
sizeDir::Int64 = helper_get_directory_size(plotFileDir)
plotsSize += sizeDir
end
push!(plotsSizesData, [plotsSize])
end
# Prepare the X independent variables in the linear model
XNames::Array{String} = extractor.outputs["plots_size"]
for XName in XNames
columnName = Symbol(XName)
# this syntax [!, symbol] does not make a copy, use [:, symbol] for copies
plotsSizesData[!, columnName] = X[!, columnName]
end
# formula from https://discourse.julialang.org/t/glm-jl-with-unknown-column-names/20692/5
response = Symbol(names(plotsSizesData)[1])
predictors = Symbol.(names(plotsSizesData)[2:end])
println(plotsSizesData)
f = @eval(GLM.@formula($response ~ (+)(1, $(predictors...))))
ols = GLM.lm(f, plotsSizesData)
println("I/O linear model formula:")
println(ols)
#import Plots
#display(Plots.plot(X, Y))
end
import Glob
"""
Gets the list of directories with a certain absolute prefix
Example:
/my/absolute/path/prefix
returns:
/my/absolute/path/prefix.1
/my/absolute/path/prefix.2
/my/absolute/path/prefix.3
"""
function helper_get_prefix_directories(prefix::String)::Array{String}
findDelimiter = findlast("/",prefix)
searchDirectory = prefix[ 1: findDelimiter[1]-1 ]
print(searchDirectory)
pattern = string(prefix[ findDelimiter[1]+1:end],"*")
outputDirs = Glob.glob( pattern, searchDirectory )
return outputDirs
end
\ No newline at end of file
findDelimiter = findlast("/", prefix)
searchDirectory::String = prefix[1:findDelimiter[1]-1]
#print(searchDirectory)
pattern::String = string(prefix[findDelimiter[1]+1:end], "*")
outputDirs = Glob.glob(pattern, searchDirectory)
return outputDirs
end
"""
Gets the list of files inside a path that meet the prefix criteria.
Example:
path/prefix.*
path/
prefix.1
prefix.2
prefix.3
output: [ prefix.1, prefix.2, prefix.3 ]
"""
function helper_get_prefix_files(prefix::String, path::String)::Array{String}
pattern::String = string(prefix, "*")
files = Glob.glob(pattern, path)
return files
end
function helper_get_directory_size(directory::String)::Int64
size::Int64 = 0
for (root, dirs, files) in walkdir(directory)
size += sum(map(filesize, joinpath.(root, files)))
end
return size
end
......@@ -8,5 +8,5 @@ import Exio
end;
@testset "test_Exio.input_parser_docstring" begin
@test println(@doc Exio.input_parser) === nothing
@test println(@doc Exio._input_parser) === nothing
end;
......@@ -16,18 +16,23 @@ function test_value(
end
function test_AmrexCastro()
exio = Exio.exio_init("AmrexCastro", "./test/data/AmrexCastro/hydro_tests/Sedov/2d.cyl_in_cartcoords/case")
exio = Exio.exio_init(
"AmrexCastro",
"./test/data/AmrexCastro/hydro_tests/Sedov/2d.cyl_in_cartcoords/case",
"run.log",
)
@test typeof(exio) == Exio.ExioH
println("Current directory: ", Filesystem.pwd())
inputFile = string(
Filesystem.pwd(), "/",
inputFile::String = string(
Filesystem.pwd(),
"/",
"test/data/AmrexCastro/test_data_AmrexCastro_inputs.2d.cyl_in_cartcoords",
)
println("Input file: ", inputFile)
parameters = Exio.input_parser(exio.extractor, inputFile)
parameters = Exio._input_parser(exio.extractor, inputFile)
# keys counter
counter = 0
......@@ -62,10 +67,10 @@ function test_AmrexCastro()
@test counter == 27
independent_variables = Exio.get_independent_variables(exio.extractor, parameters)
independent_variables = Exio._get_independent_variables(exio.extractor, parameters)
println(independent_variables)
Exio.get_dependent_variable_datasize(exio.extractor)
Exio.run_linear_models(exio.extractor)
end
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment