Skip to content
GitLab
Projects
Groups
Snippets
/
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Rogers, David
launchad
Commits
91f9dc30
Commit
91f9dc30
authored
Jun 17, 2020
by
David M. Rogers
Browse files
Version used for gigadock.
parent
1ca85933
Changes
3
Hide whitespace changes
Inline
Side-by-side
loadem.py
View file @
91f9dc30
...
...
@@ -6,12 +6,12 @@ from datetime import datetime
test
=
False
testone
=
False
hopper
=
Tru
e
hopper
=
Fals
e
conn_retries
=
0
def
stamp
():
return
datetime
.
now
().
strftime
(
"%Y-%m-%d %H:%M:%S.%f"
)
+
" v1.
1
"
return
datetime
.
now
().
strftime
(
"%Y-%m-%d %H:%M:%S.%f"
)
+
" v1.
2
"
def
run_redis
(
host
,
fn
):
global
conn_retries
...
...
@@ -68,6 +68,8 @@ def main(argv):
# redis DB contains 4 sets of shard-IDs
n
=
0
errors
=
0
consecutive_errors
=
0
while
True
:
shard
=
get_shard
(
host
)
if
shard
is
None
:
# graceful shutdown
...
...
@@ -83,8 +85,16 @@ def main(argv):
if
ret
:
ofile
.
write
(
"%s %s ERR
\n
"
%
(
stamp
(),
shard
))
newset
=
'errors'
consecutive_errors
+=
1
errors
+=
1
if
consecutive_errors
>=
10
:
print
(
"%s Host %04x quitting due to %d consecutive errors."
%
(
stamp
(),
rank
,
consecutive_errors
))
break
if
consecutive_errors
>=
2
:
time
.
sleep
(
60
)
else
:
ofile
.
write
(
"%s %s OK
\n
"
%
(
stamp
(),
shard
))
consecutive_errors
=
0
run_redis
(
host
,
lambda
r
:
r
.
smove
(
'doing'
,
newset
,
shard
))
n
+=
1
...
...
@@ -96,7 +106,7 @@ def main(argv):
ofile
.
close
()
print
(
"%s Host %04x completed (%d decishards processed, %d conn retries)."
%
(
stamp
(),
rank
,
n
,
conn_retries
))
print
(
"%s Host %04x completed (%d decishards processed, %d
errors, %d
conn retries)."
%
(
stamp
(),
rank
,
n
,
errors
,
conn_retries
))
ret
=
subprocess
.
call
(
"rm -fr /mnt/bb/%s/%d"
%
(
username
,
rank
),
shell
=
True
)
if
__name__
==
"__main__"
:
...
...
run_ad.sh
View file @
91f9dc30
...
...
@@ -9,7 +9,7 @@
export
OMP_NUM_THREADS
=
7
set
-e
version
=
"run_ad.sh v1.
1
"
version
=
"run_ad.sh v1.
2
"
if
[
$#
-ne
2
]
;
then
echo
"Usage:
$0
shard_name shard_segment"
...
...
run_docking.lsf
View file @
91f9dc30
#BSUB -nnodes
1
0
#BSUB -W
1:2
0
#BSUB -nnodes
460
0
#BSUB -W
24:0
0
#BSUB -q batch
#BSUB -P BIF128
#BSUB -J ADv1.
1
#BSUB -J ADv1.
0
#BSUB -o %J.out
#BSUB -alloc_flags "NVME"
...
...
@@ -10,7 +10,7 @@ source /ccs/proj/bif128/venvs/env.sh
PROJ=/gpfs/alpine/bif128/proj-shared/redis
gpus=$(( (LSB_MAX_NUM_PROCESSORS-1)/7 ))
echo "Starting $((gpus/6)) node run of ADv1.
1
at " `date`
echo "Starting $((gpus/6)) node run of ADv1.
0
at " `date`
[ -s $PROJ/shards.rdb ]
REMAKE=$?
...
...
@@ -27,7 +27,6 @@ for((i=0;i<120;i++)); do
[ $? -eq 0 ] && break
sleep 1
done
query set hopper $((gpus*2))
echo "$memb initial members at " `date`
...
...
Write
Preview
Supports
Markdown
0%
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment