Commit ae2cfd2f authored by Tsaris, Aristeidis's avatar Tsaris, Aristeidis
Browse files

change allocation

parent 15b1c139
#!/bin/bash
# Begin LSF directives
#BSUB -P stf011
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -J olcfTut21
#BSUB -o olcfTut21.o%J
#BSUB -W 0:10
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
......
#!/bin/bash
# Begin LSF directives
#BSUB -P stf011
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -J olcfTut21
#BSUB -o olcfTut21.o%J
#BSUB -W 0:10
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
......
#!/bin/bash
# Begin LSF directives
#BSUB -P stf011
#BSUB -J sc21
#BSUB -o logs/sc21.o%J
#BSUB -J olcfTut21
#BSUB -o olcfTut21.o%J
#BSUB -W 0:30
#BSUB -nnodes 1
#BSUB -alloc_flags "nvme smt4"
......
DLLL {"timestamp": "1635187919.430034", "datetime": "2021-10-25 14:51:59.430034", "elapsedtime": "1.2e-05", "type": "LOG", "step": "PARAMETER", "data": {"data": "/gpfs/alpine/world-shared/stf011/junqi/choco_env/dl_code/data/ILSVRC", "data_backend": "pytorch", "interpolation": "bilinear", "arch": "resnet50", "workers": 28, "epochs": 1, "run_epochs": -1, "early_stopping_patience": -1, "image_size": null, "batch_size": 128, "optimizer_batch_size": -1, "lr": 0.1, "lr_schedule": "step", "end_lr": 0, "warmup": 0, "label_smoothing": 0.0, "mixup": 0.0, "optimizer": "sgd", "momentum": 0.9, "weight_decay": 0.0001, "bn_weight_decay": false, "noDDP": false, "dtLdTime": false, "rmsprop_alpha": 0.9, "rmsprop_eps": 0.001, "nesterov": false, "print_freq": 10, "resume": null, "static_loss_scale": 1, "dynamic_loss_scale": false, "prof": 100, "amp": true, "seed": null, "gather_checkpoints": false, "raport_file": "summit_logs//data.1GPU.json", "evaluate": false, "training_only": true, "save_checkpoints": false, "checkpoint_filename": "checkpoint.pth.tar", "workspace": "./", "memory_format": "nhwc", "use_ema": null, "augmentation": null, "num_classes": null, "use_benchy": false, "distributed": false, "local_rank": 0, "gpu": 0, "world_size": 1}}
DLLL {"timestamp": "1635187919.430324", "datetime": "2021-10-25 14:51:59.430324", "elapsedtime": "0.000302", "type": "LOG", "step": "PARAMETER", "data": {"model.num_classes": 1000, "model.last_bn_0_init": false, "model.conv_init": "fan_in", "model.trt": false, "model.pretrained_from_file": null, "model.pretrained": false}}
DLLL {"timestamp": "1635187919.431721", "elapsedtime": "0.001699", "datetime": "2021-10-25 14:51:59.431721", "type": "METADATA", "metric": "lr", "metadata": {}}
DLLL {"timestamp": "1635187919.43229", "elapsedtime": "0.002268", "datetime": "2021-10-25 14:51:59.432290", "type": "METADATA", "metric": "train.loss", "metadata": {"format": ":.5f"}}
DLLL {"timestamp": "1635187919.432331", "elapsedtime": "0.002309", "datetime": "2021-10-25 14:51:59.432331", "type": "METADATA", "metric": "train.compute_ips", "metadata": {"unit": "img/s", "format": ":.2f"}}
DLLL {"timestamp": "1635187919.432366", "elapsedtime": "0.002344", "datetime": "2021-10-25 14:51:59.432366", "type": "METADATA", "metric": "train.total_ips", "metadata": {"unit": "img/s", "format": ":.2f"}}
DLLL {"timestamp": "1635187919.432398", "elapsedtime": "0.002376", "datetime": "2021-10-25 14:51:59.432398", "type": "METADATA", "metric": "train.data_time", "metadata": {"unit": "s", "format": ":.5f"}}
DLLL {"timestamp": "1635187919.43243", "elapsedtime": "0.002408", "datetime": "2021-10-25 14:51:59.432430", "type": "METADATA", "metric": "train.compute_time", "metadata": {"unit": "s", "format": ":.5f"}}
DLLL {"timestamp": "1635187962.405548", "datetime": "2021-10-25 14:52:42.405548", "elapsedtime": "42.975526", "type": "LOG", "step": [0, 10], "data": {"train.loss": 8.837985229492187, "train.total_ips": 848.501690959409}}
DLLL {"timestamp": "1635187962.405914", "datetime": "2021-10-25 14:52:42.405914", "elapsedtime": "42.975892", "type": "LOG", "step": [0, 10], "data": {"train.compute_ips": 860.5512960898008, "train.data_time": 1.6314417123794556, "train.compute_time": 2.665672039985657}}
DLLL {"timestamp": "1635187963.773231", "datetime": "2021-10-25 14:52:43.773231", "elapsedtime": "44.343209", "type": "LOG", "step": [0, 20], "data": {"train.loss": 8.969688415527344, "train.total_ips": 938.5739595949368}}
DLLL {"timestamp": "1635187963.773454", "datetime": "2021-10-25 14:52:43.773454", "elapsedtime": "44.343432", "type": "LOG", "step": [0, 20], "data": {"train.compute_ips": 954.104251941669, "train.data_time": 0.0021953821182250977, "train.compute_time": 0.13447554111480714}}
DLLL {"timestamp": "1635187965.882774", "datetime": "2021-10-25 14:52:45.882774", "elapsedtime": "46.452752", "type": "LOG", "step": [0, 30], "data": {"train.loss": 7.8730224609375, "train.total_ips": 660.767066819101}}
DLLL {"timestamp": "1635187965.88301", "datetime": "2021-10-25 14:52:45.883010", "elapsedtime": "46.452988", "type": "LOG", "step": [0, 30], "data": {"train.compute_ips": 688.8596923042858, "train.data_time": 0.00916118621826172, "train.compute_time": 0.20168330669403076}}
DLLL {"timestamp": "1635187968.849219", "datetime": "2021-10-25 14:52:48.849219", "elapsedtime": "49.419197", "type": "LOG", "step": [0, 40], "data": {"train.loss": 7.453359985351563, "train.total_ips": 459.08878229078437}}
DLLL {"timestamp": "1635187968.849466", "datetime": "2021-10-25 14:52:48.849466", "elapsedtime": "49.419444", "type": "LOG", "step": [0, 40], "data": {"train.compute_ips": 472.9648322789538, "train.data_time": 0.008267092704772949, "train.compute_time": 0.2862657308578491}}
DLLL {"timestamp": "1635187972.933293", "datetime": "2021-10-25 14:52:52.933293", "elapsedtime": "53.503271", "type": "LOG", "step": [0, 50], "data": {"train.loss": 7.120611572265625, "train.total_ips": 343.0502854365943}}
DLLL {"timestamp": "1635187972.933529", "datetime": "2021-10-25 14:52:52.933529", "elapsedtime": "53.503507", "type": "LOG", "step": [0, 50], "data": {"train.compute_ips": 350.45682446431255, "train.data_time": 0.01117405891418457, "train.compute_time": 0.3971038103103638}}
DLLL {"timestamp": "1635187978.853392", "datetime": "2021-10-25 14:52:58.853392", "elapsedtime": "59.42337", "type": "LOG", "step": [0, 60], "data": {"train.loss": 7.051701354980469, "train.total_ips": 220.82079478262222}}
DLLL {"timestamp": "1635187978.853626", "datetime": "2021-10-25 14:52:58.853626", "elapsedtime": "59.423604", "type": "LOG", "step": [0, 60], "data": {"train.compute_ips": 226.8180312927629, "train.data_time": 0.015385174751281738, "train.compute_time": 0.575557279586792}}
DLLL {"timestamp": "1635187983.074842", "datetime": "2021-10-25 14:53:03.074842", "elapsedtime": "63.64482", "type": "LOG", "step": [0, 70], "data": {"train.loss": 6.970297241210938, "train.total_ips": 358.48050745266085}}
DLLL {"timestamp": "1635187983.075084", "datetime": "2021-10-25 14:53:03.075084", "elapsedtime": "63.645062", "type": "LOG", "step": [0, 70], "data": {"train.compute_ips": 378.7963574549109, "train.data_time": 0.03083939552307129, "train.compute_time": 0.38971552848815916}}
DLLL {"timestamp": "1635187987.317353", "datetime": "2021-10-25 14:53:07.317353", "elapsedtime": "67.887331", "type": "LOG", "step": [0, 80], "data": {"train.loss": 6.963623046875, "train.total_ips": 361.1750918431663}}
DLLL {"timestamp": "1635187987.31762", "datetime": "2021-10-25 14:53:07.317620", "elapsedtime": "67.887598", "type": "LOG", "step": [0, 80], "data": {"train.compute_ips": 370.50542951749276, "train.data_time": 0.016566824913024903, "train.compute_time": 0.4075549364089966}}
DLLL {"timestamp": "1635187992.927826", "datetime": "2021-10-25 14:53:12.927826", "elapsedtime": "73.497804", "type": "LOG", "step": [0, 90], "data": {"train.loss": 6.91949462890625, "train.total_ips": 235.87855539383796}}
DLLL {"timestamp": "1635187992.928075", "datetime": "2021-10-25 14:53:12.928075", "elapsedtime": "73.498053", "type": "LOG", "step": [0, 90], "data": {"train.compute_ips": 244.13381123501986, "train.data_time": 0.01773548126220703, "train.compute_time": 0.5421025514602661}}
DLLL {"timestamp": "1635188002.188873", "datetime": "2021-10-25 14:53:22.188873", "elapsedtime": "82.758851", "type": "LOG", "step": [0], "data": {"train.loss": 7.508066711425781, "train.total_ips": 481.02649890034786}}
DLLL {"timestamp": "1635188002.189135", "datetime": "2021-10-25 14:53:22.189135", "elapsedtime": "82.759113", "type": "LOG", "step": [0], "data": {"lr": 0.1, "train.compute_ips": 495.41542348743474, "train.data_time": 0.17698683261871337, "train.compute_time": 0.599780821800232}}
DLLL {"timestamp": "1635188002.189462", "datetime": "2021-10-25 14:53:22.189462", "elapsedtime": "82.75944", "type": "LOG", "step": [], "data": {"train.loss": 7.508066711425781, "train.total_ips": 481.02649890034786}}
DLLL {"timestamp": "1635188002.189628", "datetime": "2021-10-25 14:53:22.189628", "elapsedtime": "82.759606", "type": "LOG", "step": [], "data": {"lr": 0.1, "train.compute_ips": 495.41542348743474, "train.data_time": 0.17698683261871337, "train.compute_time": 0.599780821800232}}
DLLL {"timestamp": "1635352115.187432", "datetime": "2021-10-27 12:28:35.187432", "elapsedtime": "1.1e-05", "type": "LOG", "step": "PARAMETER", "data": {"data": "/gpfs/alpine/world-shared/stf011/junqi/choco_env/dl_code/data/ILSVRC", "data_backend": "pytorch", "interpolation": "bilinear", "arch": "resnet50", "workers": 28, "epochs": 1, "run_epochs": -1, "early_stopping_patience": -1, "image_size": null, "batch_size": 128, "optimizer_batch_size": -1, "lr": 0.1, "lr_schedule": "step", "end_lr": 0, "warmup": 0, "label_smoothing": 0.0, "mixup": 0.0, "optimizer": "sgd", "momentum": 0.9, "weight_decay": 0.0001, "bn_weight_decay": false, "noDDP": false, "dtLdTime": false, "rmsprop_alpha": 0.9, "rmsprop_eps": 0.001, "nesterov": false, "print_freq": 10, "resume": null, "static_loss_scale": 1, "dynamic_loss_scale": false, "prof": 100, "amp": true, "seed": null, "gather_checkpoints": false, "raport_file": "summit_logs//data.1GPU.json", "evaluate": false, "training_only": true, "save_checkpoints": false, "checkpoint_filename": "checkpoint.pth.tar", "workspace": "./", "memory_format": "nhwc", "use_ema": null, "augmentation": null, "num_classes": null, "use_benchy": false, "distributed": false, "local_rank": 0, "gpu": 0, "world_size": 1}}
DLLL {"timestamp": "1635352115.187737", "datetime": "2021-10-27 12:28:35.187737", "elapsedtime": "0.000316", "type": "LOG", "step": "PARAMETER", "data": {"model.num_classes": 1000, "model.last_bn_0_init": false, "model.conv_init": "fan_in", "model.trt": false, "model.pretrained_from_file": null, "model.pretrained": false}}
DLLL {"timestamp": "1635352115.189152", "elapsedtime": "0.001731", "datetime": "2021-10-27 12:28:35.189152", "type": "METADATA", "metric": "lr", "metadata": {}}
DLLL {"timestamp": "1635352115.189729", "elapsedtime": "0.002308", "datetime": "2021-10-27 12:28:35.189729", "type": "METADATA", "metric": "train.loss", "metadata": {"format": ":.5f"}}
DLLL {"timestamp": "1635352115.189771", "elapsedtime": "0.00235", "datetime": "2021-10-27 12:28:35.189771", "type": "METADATA", "metric": "train.compute_ips", "metadata": {"unit": "img/s", "format": ":.2f"}}
DLLL {"timestamp": "1635352115.189805", "elapsedtime": "0.002384", "datetime": "2021-10-27 12:28:35.189805", "type": "METADATA", "metric": "train.total_ips", "metadata": {"unit": "img/s", "format": ":.2f"}}
DLLL {"timestamp": "1635352115.189837", "elapsedtime": "0.002416", "datetime": "2021-10-27 12:28:35.189837", "type": "METADATA", "metric": "train.data_time", "metadata": {"unit": "s", "format": ":.5f"}}
DLLL {"timestamp": "1635352115.189869", "elapsedtime": "0.002448", "datetime": "2021-10-27 12:28:35.189869", "type": "METADATA", "metric": "train.compute_time", "metadata": {"unit": "s", "format": ":.5f"}}
DLLL {"timestamp": "1635352225.758581", "datetime": "2021-10-27 12:30:25.758581", "elapsedtime": "110.57116", "type": "LOG", "step": [0, 10], "data": {"train.loss": 8.575665283203126, "train.total_ips": 836.9713869577757}}
DLLL {"timestamp": "1635352225.758955", "datetime": "2021-10-27 12:30:25.758955", "elapsedtime": "110.571534", "type": "LOG", "step": [0, 10], "data": {"train.compute_ips": 847.8470374882257, "train.data_time": 1.615895938873291, "train.compute_time": 9.440788316726685}}
DLLL {"timestamp": "1635352227.184875", "datetime": "2021-10-27 12:30:27.184875", "elapsedtime": "111.997454", "type": "LOG", "step": [0, 20], "data": {"train.loss": 8.894097900390625, "train.total_ips": 904.5958178875837}}
DLLL {"timestamp": "1635352227.185137", "datetime": "2021-10-27 12:30:27.185137", "elapsedtime": "111.997716", "type": "LOG", "step": [0, 20], "data": {"train.compute_ips": 916.0009168304957, "train.data_time": 0.001762247085571289, "train.compute_time": 0.14076616764068603}}
DLLL {"timestamp": "1635352228.974289", "datetime": "2021-10-27 12:30:28.974289", "elapsedtime": "113.786868", "type": "LOG", "step": [0, 30], "data": {"train.loss": 7.819781494140625, "train.total_ips": 747.062368729455}}
DLLL {"timestamp": "1635352228.974528", "datetime": "2021-10-27 12:30:28.974528", "elapsedtime": "113.787107", "type": "LOG", "step": [0, 30], "data": {"train.compute_ips": 764.392551643349, "train.data_time": 0.0034996747970581056, "train.compute_time": 0.1753300666809082}}
DLLL {"timestamp": "1635352230.845195", "datetime": "2021-10-27 12:30:30.845195", "elapsedtime": "115.657774", "type": "LOG", "step": [0, 40], "data": {"train.loss": 7.27269287109375, "train.total_ips": 709.5770979561217}}
DLLL {"timestamp": "1635352230.845436", "datetime": "2021-10-27 12:30:30.845436", "elapsedtime": "115.658015", "type": "LOG", "step": [0, 40], "data": {"train.compute_ips": 722.0636206317358, "train.data_time": 0.0031224489212036133, "train.compute_time": 0.18385138511657714}}
DLLL {"timestamp": "1635352233.556233", "datetime": "2021-10-27 12:30:33.556233", "elapsedtime": "118.368812", "type": "LOG", "step": [0, 50], "data": {"train.loss": 7.170654296875, "train.total_ips": 495.6966926549556}}
DLLL {"timestamp": "1635352233.556479", "datetime": "2021-10-27 12:30:33.556479", "elapsedtime": "118.369058", "type": "LOG", "step": [0, 50], "data": {"train.compute_ips": 503.5406239587889, "train.data_time": 0.004349184036254883, "train.compute_time": 0.26663575172424314}}
DLLL {"timestamp": "1635352240.380984", "datetime": "2021-10-27 12:30:40.380984", "elapsedtime": "125.193563", "type": "LOG", "step": [0, 60], "data": {"train.loss": 6.9960174560546875, "train.total_ips": 347.64120109570894}}
DLLL {"timestamp": "1635352240.381221", "datetime": "2021-10-27 12:30:40.381221", "elapsedtime": "125.1938", "type": "LOG", "step": [0, 60], "data": {"train.compute_ips": 376.0279876609608, "train.data_time": 0.30582776069641116, "train.compute_time": 0.37652204036712644}}
DLLL {"timestamp": "1635352245.612022", "datetime": "2021-10-27 12:30:45.612022", "elapsedtime": "130.424601", "type": "LOG", "step": [0, 70], "data": {"train.loss": 6.9576263427734375, "train.total_ips": 267.2641106634795}}
DLLL {"timestamp": "1635352245.612282", "datetime": "2021-10-27 12:30:45.612282", "elapsedtime": "130.424861", "type": "LOG", "step": [0, 70], "data": {"train.compute_ips": 286.05238615351817, "train.data_time": 0.02798898220062256, "train.compute_time": 0.49405386447906496}}
DLLL {"timestamp": "1635352248.313067", "datetime": "2021-10-27 12:30:48.313067", "elapsedtime": "133.125646", "type": "LOG", "step": [0, 80], "data": {"train.loss": 6.94117431640625, "train.total_ips": 539.1389231678104}}
DLLL {"timestamp": "1635352248.313318", "datetime": "2021-10-27 12:30:48.313318", "elapsedtime": "133.125897", "type": "LOG", "step": [0, 80], "data": {"train.compute_ips": 549.4614488634396, "train.data_time": 0.005727076530456543, "train.compute_time": 0.26426191329956056}}
DLLL {"timestamp": "1635352256.536951", "datetime": "2021-10-27 12:30:56.536951", "elapsedtime": "141.34953", "type": "LOG", "step": [0, 90], "data": {"train.loss": 6.931619262695312, "train.total_ips": 244.1741433324426}}
DLLL {"timestamp": "1635352256.537194", "datetime": "2021-10-27 12:30:56.537194", "elapsedtime": "141.349773", "type": "LOG", "step": [0, 90], "data": {"train.compute_ips": 300.16410507057867, "train.data_time": 0.37331602573394773, "train.compute_time": 0.4489408493041992}}
DLLL {"timestamp": "1635352265.797752", "datetime": "2021-10-27 12:31:05.797752", "elapsedtime": "150.610331", "type": "LOG", "step": [0], "data": {"train.loss": 7.448251647949219, "train.total_ips": 544.7861574579425}}
DLLL {"timestamp": "1635352265.797997", "datetime": "2021-10-27 12:31:05.797997", "elapsedtime": "150.610576", "type": "LOG", "step": [0], "data": {"lr": 0.1, "train.compute_ips": 562.8258784168959, "train.data_time": 0.235234055519104, "train.compute_time": 1.2205868768692016}}
DLLL {"timestamp": "1635352265.79826", "datetime": "2021-10-27 12:31:05.798260", "elapsedtime": "150.610839", "type": "LOG", "step": [], "data": {"train.loss": 7.448251647949219, "train.total_ips": 544.7861574579425}}
DLLL {"timestamp": "1635352265.798401", "datetime": "2021-10-27 12:31:05.798401", "elapsedtime": "150.61098", "type": "LOG", "step": [], "data": {"lr": 0.1, "train.compute_ips": 562.8258784168959, "train.data_time": 0.235234055519104, "train.compute_time": 1.2205868768692016}}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment