Commit cd28ceea authored by Abraham, Subil's avatar Abraham, Subil
Browse files

adding no mpi version

parent 24b2296f
Loading
Loading
Loading
Loading
+136 −0
Original line number Diff line number Diff line
# Centos 8, Cuda 11.0.3, tensorflow 2.5.0, JAX
FROM code.ornl.gov:4567/76a/olcfbaseimages/baseimage-centos-cuda:latest


RUN dnf install -y dnf-plugins-core \
	&& dnf config-manager --set-enabled powertools
RUN dnf -y install \
	kernel-devel \
	cuda-command-line-tools-11-0 \
	java-11-openjdk-devel \
	zip unzip \
	python3-devel \
	tzdata \
	gpg \
	perl lsof numactl-libs pciutils tk libnl3 python36 tcsh gcc-gfortran tcl \
	tar wget git openssh \
	gcc gcc-c++ libevent libevent-devel tar \
	glibc-devel \
	hdf5-devel \
	&& dnf -y clean all

	
# install cmake 3.15
RUN wget -q -P /tmp https://cmake.org/files/v3.15/cmake-3.15.7.tar.gz && \
    cd /tmp && \
    tar -xzf cmake-3.15.7.tar.gz && \
    cd cmake-3.15.7 && \
    ./bootstrap && make && make install && \
    rm -rf /tmp/cmake-3.15.7

RUN dnf config-manager --add-repo http://developer.download.nvidia.com/compute/cuda/repos/rhel8/ppc64le/cuda-rhel8.repo
RUN yum -y install libnccl-2.11.4-1+cuda11.0 libnccl-devel-2.11.4-1+cuda11.0 libnccl-static-2.11.4-1+cuda11.0



ENV CONDA_HOME="/opt/conda"
RUN mkdir /code
WORKDIR /code
ENV _CONDA_INSTALLER="Miniconda3-py39_4.9.2-Linux-ppc64le.sh"

RUN wget -q -P /code \
  https://repo.anaconda.com/miniconda/${_CONDA_INSTALLER} \
    && bash /code/${_CONDA_INSTALLER} -b -p ${CONDA_HOME} \
    && rm /code/${_CONDA_INSTALLER}

ENV PATH="/opt/conda/bin:$PATH"

ARG CONDA_REPO=https://public.dhe.ibm.com/ibmdl/export/pub/software/server/ibm-ai/conda/
RUN conda config --prepend channels $CONDA_REPO \
    && conda install -y -c conda-forge numpy==1.19.5 scipy==1.7.0 six wheel pip \
    && conda install -y -c conda-forge cudatoolkit==11.0.3 mock \
    && pip install \
         biopython==1.79 \
         dm-haiku==0.0.4 \
         immutabledict==2.0.0 \
         absl-py==0.13.0 \
         ml-collections==0.1.0 

# TODO
#ENV LD_LIBRARY_PATH="/opt/conda/lib:$LD_LIBRARY_PATH"
#ENV C_INCLUDE_PATH="/opt/conda/include:$C_INCLUDE_PATH"
#ENV CPLUS_INCLUDE_PATH="/opt/conda/include:$CPLUS_INCLUDE_PATH"

RUN ln -s /usr/bin/python3 /usr/bin/python
# install bazel
RUN wget -q -P /tmp https://github.com/bazelbuild/bazel/releases/download/3.7.2/bazel-3.7.2-dist.zip && \
    cd /tmp && unzip -d bazel bazel-3.7.2-dist.zip 

RUN cd /tmp/bazel && env EXTRA_BAZEL_ARGS="--host_javabase=@local_jdk//:jdk" bash ./compile.sh && \
    cp /tmp/bazel/output/bazel /usr/bin && rm -rf /tmp/bazel



## install jax
#RUN cd /tmp && git clone https://github.com/google/jax && cd /tmp/jax && git checkout 1db53b11755a86d69238b4e999ad011d1142e23c && \
#    python build/build.py --bazel_path=/usr/bin/bazel --noenable_mkl_dnn --enable_cuda --cuda_path /usr/local/cuda --cudnn_path /usr --target_cpu=ppc && \
#    pip install dist/*.whl && \
#    rm -rf /tmp/jax 

#RUN pip install jax && \
RUN conda install -y -c conda-forge opt_einsum==3.3.0 

# install tensorflow
## weird keras requirement. tensorflow complained so adding this.
RUN pip install keras_preprocessing --no-deps

RUN cd /tmp && \
    wget -q -P /tmp https://github.com/tensorflow/tensorflow/archive/refs/tags/v2.5.0.zip && \
    unzip -d /tmp v2.5.0.zip && echo "bb" && \
    mv /tmp/tensorflow-2.5.0 /tmp/tensorflow
#    git checkout r2.5

## need this to skip the interactive ./configure for tensorflow
RUN echo $'build --action_env PYTHON_BIN_PATH="/opt/conda/bin/python3"\n\
build --action_env PYTHON_LIB_PATH="/opt/conda/lib/python3.9/site-packages"\n\
build --python_path="/opt/conda/bin/python3"\n\
build:xla --define with_xla_support=true\n\
build --config=xla\n\
build --action_env CUDA_TOOLKIT_PATH="/usr/local/cuda-11.0"\n\
build --action_env TF_CUDA_COMPUTE_CAPABILITIES="3.5,7.0"\n\
build --action_env LD_LIBRARY_PATH="/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-b5zk3valzzwupvryqfd7ouwwir73wats/lib:/usr/local/cuda/lib:/usr/local/cuda/lib64"\n\
build --action_env GCC_HOST_COMPILER_PATH="/usr/bin/gcc"\n\
build --config=cuda\n\
build --action_env TF_CONFIGURE_IOS="0"\n\
build:opt --copt=-mcpu=power9\n\
build:opt --copt=-mtune=power9\n\
build:opt --host_copt=-mcpu=power9\n\
build:opt --define with_default_optimizations=true\n\
test --flaky_test_attempts=3\n\
test --test_size_filters=small,medium\n\
test --test_env=LD_LIBRARY_PATH\n\
test:v1 --test_tag_filters=-benchmark-test,-no_oss,-no_gpu,-oss_serial\n\
test:v1 --build_tag_filters=-benchmark-test,-no_oss,-no_gpu\n\
test:v2 --test_tag_filters=-benchmark-test,-no_oss,-no_gpu,-oss_serial,-v1only\n\
test:v2 --build_tag_filters=-benchmark-test,-no_oss,-no_gpu,-v1only' > /tmp/tensorflow/.tf_configure.bazelrc

# install hdf5 from source


# install h5py from source



#ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/opt/conda/lib"
#ENV C_INCLUDE_PATH="$C_INCLUDE_PATH:/opt/conda/include"
#ENV CPLUS_INCLUDE_PATH="$CPLUS_INCLUDE_PATH:/opt/conda/include"
RUN cd /tmp/tensorflow && /usr/bin/bazel build --config=v2 //tensorflow/tools/pip_package:build_pip_package 
RUN cd /tmp/tensorflow && ./bazel-bin/tensorflow/tools/pip_package/build_pip_package /tmp/tensorflow_pkg  
RUN pip install /tmp/tensorflow_pkg/tensorflow-2.5.0-cp39-cp39-linux_ppc64le.whl

#RUN pip install horovod
#ENV LD_LIBRARY_PATH=/sw/peak/spack-envs/base/opt/linux-rhel8-ppc64le/gcc-9.1.0/spectrum-mpi-10.4.0.3-20210112-6jbupg3thjwhsabgevk6xmwhd2bbyxdc/lib:/usr/local/cuda/lib64:/usr/local/cuda/compat:/usr/local/nvidia/lib:/usr/local/nvidia/lib64
#RUN dnf install -y --allowerasing openmpi 

RUN HOROVOD_WITHOUT_MPI=1 HOROVOD_WITH_GLOO=1 HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib64 HOROVOD_GPU_OPERATIONS=NCCL CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod
#RUN  HOROVOD_NCCL_INCLUDE=/usr/include HOROVOD_NCCL_LIB=/usr/lib64 HOROVOD_GPU_OPERATIONS=NCCL CXX=/usr/bin/g++ CC=/usr/bin/gcc pip install --no-cache-dir --no-binary horovod horovod