Commit a2b74a61 authored by Abraham, Subil's avatar Abraham, Subil
Browse files

Merge branch 'main' of code.ornl.gov:76a/olcfbaseimages into main

parents cd28ceea b57f26d3
Loading
Loading
Loading
Loading
+2 −4
Original line number Diff line number Diff line
# Centos 8, Cuda 11.0.3, tensorflow 2.5.0, JAX
FROM code.ornl.gov:4567/76a/olcfbaseimages/mpiimage-centos-cuda:latest

ARG mpi_root

RUN dnf install -y dnf-plugins-core \
	&& dnf config-manager --set-enabled powertools
@@ -113,9 +114,6 @@ RUN pip install /tmp/tensorflow_pkg/tensorflow-2.5.0-cp39-cp39-linux_ppc64le.whl
#RUN pip install horovod
#ENV LD_LIBRARY_PATH=/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/lib:/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
#RUN dnf install -y --allowerasing openmpi 
ENV PATH=$PATH:/usr/mpi/gcc/openmpi-4.0.3rc4/bin
ENV CPATH=$CPATH:/usr/mpi/gcc/openmpi-4.0.3rc4/include
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/mpi/gcc/openmpi-4.0.3rc4/lib64
RUN HOROVOD_WITH_MPI=1 CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod
RUN HOROVOD_WITH_MPI=1 CC=$mpi_root/bin/mpicc CXX=$mpi_root/bin/mpicxx pip install --no-cache-dir --no-binary horovod horovod
#
#ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
+19 −0
Original line number Diff line number Diff line
#!/bin/bash

module purge
module load DefApps
module load  gcc/9.1.0

export XDG_CACHE_HOME=/gpfs/alpine/stf007/world-shared/subil/tmp/podman-cache;

# this is needed because a dnf install step during the container build will fail saying that it can't create a file. 
# Said file might have been created in previous build attempts and so needs to be cleared. 
rm -rf /var/cache/dnf;
ulimit -n 262144;

podman build --build-arg mpi_root=$MPI_ROOT -v $MPI_ROOT:$MPI_ROOT,/gpfs/alpine/stf007/world-shared/subil/tmp:/tmp --ulimit nofile=262144:262144 -f Dockerfile.mpiimagebase_peak -t code.ornl.gov:4567/76a/olcfbaseimages/tensorflowimage-mpiimagebase-centos-cuda:latest . ;

 
podman save -o tensorflow_v2.5.0_mpiimagebase.tar code.ornl.gov:4567/76a/olcfbaseimages/tensorflowimage-mpiimagebase-centos-cuda:latest;

singularity build tensorflow_v2.5.0_mpiimagebase.sif docker-archive://tensorflow_v2.5.0_mpiimagebase.tar;