Loading tensorflowimage-centos-cuda/Dockerfile.baseimagebase_nompi 0 → 100644 +136 −0 Original line number Diff line number Diff line # Centos 8, Cuda 11.0.3, tensorflow 2.5.0, JAX FROM code.ornl.gov:4567/76a/olcfbaseimages/baseimage-centos-cuda:latest RUN dnf install -y dnf-plugins-core \ && dnf config-manager --set-enabled powertools RUN dnf -y install \ kernel-devel \ cuda-command-line-tools-11-0 \ java-11-openjdk-devel \ zip unzip \ python3-devel \ tzdata \ gpg \ perl lsof numactl-libs pciutils tk libnl3 python36 tcsh gcc-gfortran tcl \ tar wget git openssh \ gcc gcc-c++ libevent libevent-devel tar \ glibc-devel \ hdf5-devel \ && dnf -y clean all # install cmake 3.15 RUN wget -q -P /tmp https://cmake.org/files/v3.15/cmake-3.15.7.tar.gz && \ cd /tmp && \ tar -xzf cmake-3.15.7.tar.gz && \ cd cmake-3.15.7 && \ ./bootstrap && make && make install && \ rm -rf /tmp/cmake-3.15.7 RUN dnf config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/ppc64le/cuda-rhel8.repo RUN yum -y install libnccl-2.11.4-1+cuda11.0 libnccl-devel-2.11.4-1+cuda11.0 libnccl-static-2.11.4-1+cuda11.0 ENV CONDA_HOME="/opt/conda" RUN mkdir /code WORKDIR /code ENV _CONDA_INSTALLER="Miniconda3-py39_4.9.2-Linux-ppc64le.sh" RUN wget -q -P /code \ https://repo.anaconda.com/miniconda/${_CONDA_INSTALLER} \ && bash /code/${_CONDA_INSTALLER} -b -p ${CONDA_HOME} \ && rm /code/${_CONDA_INSTALLER} ENV PATH="/opt/conda/bin:$PATH" ARG CONDA_REPO=https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/ RUN conda config --prepend channels $CONDA_REPO \ && conda install -y -c conda-forge numpy==1.19.5 scipy==1.7.0 six wheel pip \ && conda install -y -c conda-forge cudatoolkit==11.0.3 mock \ && pip install \ biopython==1.79 \ dm-haiku==0.0.4 \ immutabledict==2.0.0 \ absl-py==0.13.0 \ ml-collections==0.1.0 # TODO #ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH" #ENV C_INCLUDE_PATH="/opt/conda/include:$C_INCLUDE_PATH" #ENV CPLUS_INCLUDE_PATH="/opt/conda/include:$CPLUS_INCLUDE_PATH" RUN ln -s /usr/bin/python3 /usr/bin/python # install bazel RUN wget -q -P /tmp https://github.com/bazelbuild/bazel/releases/download/3.7.2/bazel-3.7.2-dist.zip && \ cd /tmp && unzip -d bazel bazel-3.7.2-dist.zip RUN cd /tmp/bazel && env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh && \ cp /tmp/bazel/output/bazel /usr/bin && rm -rf /tmp/bazel ## install jax #RUN cd /tmp && git clone https://github.com/google/jax && cd /tmp/jax && git checkout 1db53b11755a86d69238b4e999ad011d1142e23c && \ # python build/build.py --bazel_path=/usr/bin/bazel --noenable_mkl_dnn --enable_cuda --cuda_path /usr/local/cuda --cudnn_path /usr --target_cpu=ppc && \ # pip install dist/*.whl && \ # rm -rf /tmp/jax #RUN pip install jax && \ RUN conda install -y -c conda-forge opt_einsum==3.3.0 # install tensorflow ## weird keras requirement. tensorflow complained so adding this. RUN pip install keras_preprocessing --no-deps RUN cd /tmp && \ wget -q -P /tmp https://github.com/tensorflow/tensorflow/archive/refs/tags/v2.5.0.zip && \ unzip -d /tmp v2.5.0.zip && echo "bb" && \ mv /tmp/tensorflow-2.5.0 /tmp/tensorflow # git checkout r2.5 ## need this to skip the interactive ./configure for tensorflow RUN echo $'build --action_env PYTHON_BIN_PATH="/opt/conda/bin/python3"\n\ build --action_env PYTHON_LIB_PATH="/opt/conda/lib/python3.9/site-packages"\n\ build --python_path="/opt/conda/bin/python3"\n\ build:xla --define with_xla_support=true\n\ build --config=xla\n\ build --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-11.0"\n\ build --action_env TF_CUDA_COMPUTE_CAPABILITIES="3.5,7.0"\n\ build --action_env LD_LIBRARY_PATH="/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-b5zk3valzzwupvryqfd7ouwwir73wats/lib:/usr/local/cuda/lib:/usr/local/cuda/lib64"\n\ build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/gcc"\n\ build --config=cuda\n\ build --action_env TF_CONFIGURE_IOS="0"\n\ build:opt --copt=-mcpu=power9\n\ build:opt --copt=-mtune=power9\n\ build:opt --host_copt=-mcpu=power9\n\ build:opt --define with_default_optimizations=true\n\ test --flaky_test_attempts=3\n\ test --test_size_filters=small,medium\n\ test --test_env=LD_LIBRARY_PATH\n\ test:v1 --test_tag_filters=-benchmark-test,-no_oss,-no_gpu,-oss_serial\n\ test:v1 --build_tag_filters=-benchmark-test,-no_oss,-no_gpu\n\ test:v2 --test_tag_filters=-benchmark-test,-no_oss,-no_gpu,-oss_serial,-v1only\n\ test:v2 --build_tag_filters=-benchmark-test,-no_oss,-no_gpu,-v1only' > /tmp/tensorflow/.tf_configure.bazelrc # install hdf5 from source # install h5py from source #ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib" #ENV C_INCLUDE_PATH="$C_INCLUDE_PATH:/opt/conda/include" #ENV CPLUS_INCLUDE_PATH="$CPLUS_INCLUDE_PATH:/opt/conda/include" RUN cd /tmp/tensorflow && /usr/bin/bazel build --config=v2 //tensorflow/tools/pip_package:build_pip_package RUN cd /tmp/tensorflow && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg RUN pip install /tmp/tensorflow_pkg/tensorflow-2.5.0-cp39-cp39-linux_ppc64le.whl #RUN pip install horovod #ENV LD_LIBRARY_PATH=/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/lib:/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 #RUN dnf install -y --allowerasing openmpi RUN HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_GLOO=1 HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib64 HOROVOD_GPU_OPERATIONS=NCCL CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod #RUN HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib64 HOROVOD_GPU_OPERATIONS=NCCL CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod Loading
tensorflowimage-centos-cuda/Dockerfile.baseimagebase_nompi 0 → 100644 +136 −0 Original line number Diff line number Diff line # Centos 8, Cuda 11.0.3, tensorflow 2.5.0, JAX FROM code.ornl.gov:4567/76a/olcfbaseimages/baseimage-centos-cuda:latest RUN dnf install -y dnf-plugins-core \ && dnf config-manager --set-enabled powertools RUN dnf -y install \ kernel-devel \ cuda-command-line-tools-11-0 \ java-11-openjdk-devel \ zip unzip \ python3-devel \ tzdata \ gpg \ perl lsof numactl-libs pciutils tk libnl3 python36 tcsh gcc-gfortran tcl \ tar wget git openssh \ gcc gcc-c++ libevent libevent-devel tar \ glibc-devel \ hdf5-devel \ && dnf -y clean all # install cmake 3.15 RUN wget -q -P /tmp https://cmake.org/files/v3.15/cmake-3.15.7.tar.gz && \ cd /tmp && \ tar -xzf cmake-3.15.7.tar.gz && \ cd cmake-3.15.7 && \ ./bootstrap && make && make install && \ rm -rf /tmp/cmake-3.15.7 RUN dnf config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/ppc64le/cuda-rhel8.repo RUN yum -y install libnccl-2.11.4-1+cuda11.0 libnccl-devel-2.11.4-1+cuda11.0 libnccl-static-2.11.4-1+cuda11.0 ENV CONDA_HOME="/opt/conda" RUN mkdir /code WORKDIR /code ENV _CONDA_INSTALLER="Miniconda3-py39_4.9.2-Linux-ppc64le.sh" RUN wget -q -P /code \ https://repo.anaconda.com/miniconda/${_CONDA_INSTALLER} \ && bash /code/${_CONDA_INSTALLER} -b -p ${CONDA_HOME} \ && rm /code/${_CONDA_INSTALLER} ENV PATH="/opt/conda/bin:$PATH" ARG CONDA_REPO=https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/ RUN conda config --prepend channels $CONDA_REPO \ && conda install -y -c conda-forge numpy==1.19.5 scipy==1.7.0 six wheel pip \ && conda install -y -c conda-forge cudatoolkit==11.0.3 mock \ && pip install \ biopython==1.79 \ dm-haiku==0.0.4 \ immutabledict==2.0.0 \ absl-py==0.13.0 \ ml-collections==0.1.0 # TODO #ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH" #ENV C_INCLUDE_PATH="/opt/conda/include:$C_INCLUDE_PATH" #ENV CPLUS_INCLUDE_PATH="/opt/conda/include:$CPLUS_INCLUDE_PATH" RUN ln -s /usr/bin/python3 /usr/bin/python # install bazel RUN wget -q -P /tmp https://github.com/bazelbuild/bazel/releases/download/3.7.2/bazel-3.7.2-dist.zip && \ cd /tmp && unzip -d bazel bazel-3.7.2-dist.zip RUN cd /tmp/bazel && env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh && \ cp /tmp/bazel/output/bazel /usr/bin && rm -rf /tmp/bazel ## install jax #RUN cd /tmp && git clone https://github.com/google/jax && cd /tmp/jax && git checkout 1db53b11755a86d69238b4e999ad011d1142e23c && \ # python build/build.py --bazel_path=/usr/bin/bazel --noenable_mkl_dnn --enable_cuda --cuda_path /usr/local/cuda --cudnn_path /usr --target_cpu=ppc && \ # pip install dist/*.whl && \ # rm -rf /tmp/jax #RUN pip install jax && \ RUN conda install -y -c conda-forge opt_einsum==3.3.0 # install tensorflow ## weird keras requirement. tensorflow complained so adding this. RUN pip install keras_preprocessing --no-deps RUN cd /tmp && \ wget -q -P /tmp https://github.com/tensorflow/tensorflow/archive/refs/tags/v2.5.0.zip && \ unzip -d /tmp v2.5.0.zip && echo "bb" && \ mv /tmp/tensorflow-2.5.0 /tmp/tensorflow # git checkout r2.5 ## need this to skip the interactive ./configure for tensorflow RUN echo $'build --action_env PYTHON_BIN_PATH="/opt/conda/bin/python3"\n\ build --action_env PYTHON_LIB_PATH="/opt/conda/lib/python3.9/site-packages"\n\ build --python_path="/opt/conda/bin/python3"\n\ build:xla --define with_xla_support=true\n\ build --config=xla\n\ build --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-11.0"\n\ build --action_env TF_CUDA_COMPUTE_CAPABILITIES="3.5,7.0"\n\ build --action_env LD_LIBRARY_PATH="/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-b5zk3valzzwupvryqfd7ouwwir73wats/lib:/usr/local/cuda/lib:/usr/local/cuda/lib64"\n\ build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/gcc"\n\ build --config=cuda\n\ build --action_env TF_CONFIGURE_IOS="0"\n\ build:opt --copt=-mcpu=power9\n\ build:opt --copt=-mtune=power9\n\ build:opt --host_copt=-mcpu=power9\n\ build:opt --define with_default_optimizations=true\n\ test --flaky_test_attempts=3\n\ test --test_size_filters=small,medium\n\ test --test_env=LD_LIBRARY_PATH\n\ test:v1 --test_tag_filters=-benchmark-test,-no_oss,-no_gpu,-oss_serial\n\ test:v1 --build_tag_filters=-benchmark-test,-no_oss,-no_gpu\n\ test:v2 --test_tag_filters=-benchmark-test,-no_oss,-no_gpu,-oss_serial,-v1only\n\ test:v2 --build_tag_filters=-benchmark-test,-no_oss,-no_gpu,-v1only' > /tmp/tensorflow/.tf_configure.bazelrc # install hdf5 from source # install h5py from source #ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib" #ENV C_INCLUDE_PATH="$C_INCLUDE_PATH:/opt/conda/include" #ENV CPLUS_INCLUDE_PATH="$CPLUS_INCLUDE_PATH:/opt/conda/include" RUN cd /tmp/tensorflow && /usr/bin/bazel build --config=v2 //tensorflow/tools/pip_package:build_pip_package RUN cd /tmp/tensorflow && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg RUN pip install /tmp/tensorflow_pkg/tensorflow-2.5.0-cp39-cp39-linux_ppc64le.whl #RUN pip install horovod #ENV LD_LIBRARY_PATH=/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/lib:/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/local/nvidia/lib:/usr/local/nvidia/lib64 #RUN dnf install -y --allowerasing openmpi RUN HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_GLOO=1 HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib64 HOROVOD_GPU_OPERATIONS=NCCL CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod #RUN HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib64 HOROVOD_GPU_OPERATIONS=NCCL CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod