Skip to content
GitLab
Menu
Projects
Groups
Snippets
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Sign in
Toggle navigation
Menu
Open sidebar
Rogers, David
launchad
Commits
4125d09c
Commit
4125d09c
authored
Jun 10, 2020
by
David M. Rogers
Browse files
Ready to test docking steps.
parent
57f1e9a6
Changes
2
Hide whitespace changes
Inline
Side-by-side
loadem.py
View file @
4125d09c
...
...
@@ -3,13 +3,13 @@
import
os
,
subprocess
import
redis
,
time
,
random
test
=
True
test
=
False
testone
=
True
conn_retries
=
0
def
get_shard
(
host
):
def
run_redis
(
fn
):
global
conn_retries
for
i
in
range
(
120
):
try
:
r
=
redis
.
StrictRedis
(
host
=
host
,
port
=
6379
,
password
=
"Z1908840168_2_T1"
,
db
=
0
)
...
...
@@ -19,11 +19,15 @@ def get_shard(host):
time
.
sleep
(
random
.
random
()
*
0.2
)
else
:
raise
redis
.
exceptions
.
ConnectionError
u
=
fn
(
r
)
shard
=
r
.
spop
(
'shards'
)
r
.
connection_pool
.
disconnect
()
del
r
return
u
def
get_shard
(
host
):
shard
=
run_redis
(
lambda
r
:
r
.
spop
(
'shards'
))
if
shard
is
None
:
return
shard
return
shard
.
decode
(
'utf8'
)
...
...
@@ -33,9 +37,10 @@ def main(argv):
assert
len
(
argv
)
==
2
,
"Usage: %s <redis host>"
host
=
argv
[
1
]
me
=
int
(
os
.
environ
[
'OMPI_COMM_WORLD_RANK'
])
ofile
=
open
(
'/gpfs/alpine/world-shared/bif128/docked/logs/rank%04x.log'
%
me
,
"w"
)
time
.
sleep
(
me
*
0.0001
)
# 10k connections per second at startup
rank
=
int
(
os
.
environ
[
'OMPI_COMM_WORLD_RANK'
])
username
=
os
.
environ
[
'USER'
]
ofile
=
open
(
'/gpfs/alpine/world-shared/bif128/docked/logs/rank%04x.log'
%
rank
,
"w"
)
time
.
sleep
(
rank
*
0.0001
)
# 10k connections per second at startup
n
=
0
while
True
:
...
...
@@ -49,18 +54,21 @@ def main(argv):
ret
=
subprocess
.
call
(
cmd
)
if
ret
:
ofile
.
write
(
"%s ERR"
%
shard
)
#
r.sadd('errors', shard)
# FIXME - make this better.
run_redis
(
lambda
r
:
r
.
sadd
(
'errors'
,
shard
)
)
else
:
ofile
.
write
(
"%s OK
\n
"
%
shard
)
n
+=
1
if
n
%
10
==
0
:
# 13k of these messages.
ofile
.
flush
()
print
(
"Host %04x processed %d shards."
%
(
me
,
n
))
print
(
"Host %04x processed %d decishards."
%
(
rank
,
n
))
if
testone
:
break
ofile
.
close
()
print
(
"Host %04x completed (%d shards processed)."
%
(
me
,
n
))
print
(
"%d connection retries"
%
conn_retries
)
print
(
"Host %04x completed (%d decishards processed)."
%
(
rank
,
n
))
print
(
"Host %04x %d connection retries"
%
(
rank
,
conn_retries
))
ret
=
subprocess
.
call
(
"rm -fr /mnt/bb/%s/%d"
%
(
user
,
rank
),
shell
=
True
)
if
__name__
==
"__main__"
:
import
sys
...
...
run_docking.lsf
0 → 100644
View file @
4125d09c
#BSUB -nnodes 60
#BSUB -W 30
#BSUB -q debug
#BSUB -P BIF128
#BSUB -J ADv1
#BSUB -o %J.out
source /ccs/proj/bif128/venvs/env.sh
PROJ=/gpfs/alpine/bif128/proj-shared/redis
gpus=$(( (LSB_MAX_NUM_PROCESSORS-1)/7 ))
echo "Starting $((gpus/6)) node run at " `date`
[ -s $PROJ/shards.rdb ]
REMAKE=$?
# REMAKE == 0 if file exists and has nonzero size
redis-server shards.conf 2>&1 >$PROJ/"shard_"$LSB_JOBID.log &
if [ $REMAKE -eq 1 ]; then
python setdb.py `hostname` /gpfs/alpine/bif128/world-shared/ligand_shards.txt
fi
for((i=0;i<120;i++)); do
memb=$(query scard shards)
sleep 1
[ $? -eq 0 ] && break
done
echo "$memb initial members at " `date`
jsrun -X 0 \
-n $gpus -r6 -a1 -g1 -c7 -d cyclic -b packed:7 \
python loadem.py `hostname`
memb=$(query scard shards)
echo "$memb members remain at " `date`
memb=$(query scard errors)
echo "$memb errors"
if [ $memb -gt 0 ]; then
query smembers errors
fi
kill %
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment