Commit a7515fb3 authored by Maiterth, Matthias's avatar Maiterth, Matthias
Browse files

Merge branch 'rl3' into 'develop'

RL framework (try 3)

See merge request !117
parents 15aacee4 c41b265e
Loading
Loading
Loading
Loading
+8 −3
Original line number Diff line number Diff line
@@ -62,16 +62,21 @@ For MIT Supercloud
    python -m raps.dataloaders.mit_supercloud.cli download --start 2021-05-21T13:00 --end 2021-05-21T14:00

    # Load data and run simulation - will save data as part-cpu.npz and part-gpu.npz files
    raps run-parts -x mit_supercloud -f $DPATH --system mit_supercloud --start 2021-05-21T13:00 --end 2021-05-21T14:00
    raps run-parts -x mit_supercloud -f $DPATH --start 2021-05-21T13:00 --end 2021-05-21T14:00
    # or simply
    raps run-parts experiments/mit-replay-25hrs.yaml
    # Note: if no start, end dates provided will default to run 24 hours between
    # 2021-05-21T00:00 to 2021-05-22T00:00 set by defaults in raps/dataloaders/mit_supercloud/utils.py

    # Re-run simulation using npz files (much faster load)
    raps run-parts -x mit_supercloud -f part-*.npz --system mit_supercloud
    raps run-parts -x mit_supercloud -f part-*.npz

    # Synthetic tests for verification studies:
    raps run-parts -x mit_supercloud -w multitenant

    # Reinforcement learning test case
    python main.py train-rl --system mit_supercloud/part-cpu -f /opt/data/mit_supercloud/202201

For Lumi

    # Synthetic test for Lumi:
+1 −1
Original line number Diff line number Diff line
@@ -36,7 +36,7 @@ power:
  power_cost: 0.094
scheduler:
  multitenant: true
  job_arrival_time: 900
  job_arrival_time: 1
  mtbf: 11
  trace_quanta: 10
  min_wall_time: 3600
+2 −2
Original line number Diff line number Diff line
system: frontier
replay:
  - ~/data/frontier/slurm/joblive/date=2024-01-18
  - ~/data/frontier/jobprofile/date=2024-01-18
  - /opt/data/frontier/slurm/joblive/date=2024-01-18
  - /opt/data/frontier/jobprofile/date=2024-01-18
+1 −1
Original line number Diff line number Diff line
system: gcloudv2
replay:
  - ~/data/gcloud/v2/google_cluster_data_2011_sample
  - /opt/data/gcloud/v2/google_cluster_data_2011_sample
ff: 600
+1 −1
Original line number Diff line number Diff line
system: marconi100
replay:
  - ~/data/marconi100/job_table.parquet
  - /opt/data/marconi100/job_table.parquet
Loading