Loading pytorchimage-centos-cuda/Dockerfile.v1.10 0 → 100644 +207 −0 Original line number Diff line number Diff line # Centos 8, Cuda 11.0.3, Pytorch 1.10 built from source FROM code.ornl.gov:4567/76a/olcfbaseimages/mpiimage-centos-cuda:latest RUN dnf install -y dnf-plugins-core \ && dnf config-manager --set-enabled powertools RUN dnf -y install \ kernel-devel \ cuda-command-line-tools-11-0 \ java-11-openjdk-devel \ zip unzip \ python3-devel \ python3 \ python3-pip \ tzdata \ gpg \ patch \ glibc-devel \ && dnf -y clean all ENV CONDA_HOME="/opt/conda" RUN mkdir /code WORKDIR /code ENV _CONDA_INSTALLER="Miniconda3-py39_4.9.2-Linux-ppc64le.sh" RUN wget -q -P /code \ https://repo.anaconda.com/miniconda/${_CONDA_INSTALLER} \ && bash /code/${_CONDA_INSTALLER} -b -p ${CONDA_HOME} \ && rm /code/${_CONDA_INSTALLER} ENV PATH="/opt/conda/bin:$PATH" RUN conda install -y conda-build conda-verify ENV INSTALL_ROOT=/code ENV PYTORCH_VER=1.10.0 RUN mkdir -p ${INSTALL_ROOT} # Download and build NCCL RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/NVIDIA/nccl.git --branch v2.11.4-1 nccl_build \ && cd nccl_build \ && make -j10 src.build CUDA_HOME=${CUDA_HOME} NVCC_GENCODE="-gencode=arch=compute_70,code=sm_70" \ && cd ${INSTALL_ROOT} \ && mkdir nccl \ && mv nccl_build/build/lib nccl \ && mv nccl_build/build/include nccl \ && rm -rf nccl_build # Set to pre-installed NCCL and CUDNN paths ENV NCCL_ROOT=${INSTALL_ROOT}/nccl # install magma ENV TORCH_CUDA_ARCH_LIST="7.0" RUN conda install -y pip astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses RUN conda install -y -c conda-forge lapack==3.9.0 RUN cd /code \ && wget -q -P /code http://icl.utk.edu/projectsfiles/magma/downloads/magma-2.6.1.tar.gz \ && tar -xzf magma-2.6.1.tar.gz \ && cd magma-2.6.1 \ && mkdir build \ && cd build \ && cmake -DBUILD_SHARED_LIBS=ON -DUSE_FORTRAN=yes -DGPU_TARGET=sm_70 -DMAGMA_ENABLE_CUDA=ON -DCUDA_NVCC_FLAGS="-Xcompiler;-mno-float128" .. \ && make install \ && cd .. \ && cp magmablas/atomics.cuh control/magma_threadsetting.h control/pthread_barrier.h \ control/magma_internal.h /usr/include #conda install -y magma-cuda110 ## Install PyTorch RUN git config --global user.email "you@example.com" \ && git config --global user.name "Your Name" RUN cd ${INSTALL_ROOT} \ && git clone --recursive --branch v${PYTORCH_VER} https://github.com/pytorch/pytorch.git pytorch \ && cd pytorch \ && git cherry-pick 7452b65144340d199e99c5d69849414d970002fa \ && python setup.py clean ENV MAX_JOBS=10 ENV CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} ENV USE_CUDNN=1 ENV CUDNN_INCLUDE_DIR="/usr/include" ENV CUDNN_LIB_DIR="/usr/lib" ENV NCCL_INCLUDE_DIR="${NCCL_ROOT}/include/" ENV NCCL_LIB_DIR="${NCCL_ROOT}/lib/" ENV USE_SYSTEM_NCCL=1 ENV USE_NINJA=OFF ENV USE_BREAKPAD=OFF RUN cd ${INSTALL_ROOT}/pytorch \ && python3 setup.py -v install # Hide libgomp inside env (it has an old one included that causes compatibility issues when importing torch) RUN cd ${CONDA_HOME}/lib \ && mkdir hide \ && mv libgomp* libstdc++* hide/ # Install DGL RUN pip install pybind11 pythran scipy COPY ./gklib_patch /code RUN cd ${INSTALL_ROOT} \ && git clone --recurse-submodules https://github.com/dmlc/dgl.git --branch=0.7.1 \ && cd dgl \ && cd third_party/METIS/GKlib \ && git apply ${INSTALL_ROOT}/gklib_patch \ && cd ../../../ \ && mkdir build \ && cd build \ && cmake -DUSE_CUDA=ON -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN="70" -DUSE_AVX=OFF \ -DUSE_LIBXSMM=OFF -DBUILD_TORCH=ON .. \ && make -j \ && cd ../python \ && python setup.py install # Install Apex RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/NVIDIA/apex.git \ && cd apex \ && pip install --no-cache-dir --disable-pip-version-check --global-option="--cpp_ext" --global-option="--cuda_ext" . ## Install DALI_deps ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ENV LD_RUN_PATH=$LD_RUN_PATH:/usr/local/lib ENV PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib/pkgconfig RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/NVIDIA/DALI_deps --depth 1 --branch v1.6.0 \ && cd DALI_deps \ && git submodule init \ && git submodule update --depth 1 --recursive \ && sed -i "s/\"cmake\"//" build_scripts/build_deps.sh \ && dnf install -y libtool clang-devel autogen automake \ && pip install clang \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/protobuf \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/flac \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/ogg \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/vorbis \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/opus \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/libtar \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps \ && build_scripts/build_deps.sh # Install Torchvision RUN cd ${INSTALL_ROOT} \ && git clone --depth 1 --branch v0.10.0 https://github.com/pytorch/vision \ && cd vision \ && python setup.py install #&& dnf install -y libjpeg-turbo-devel \ # build opencv (need this to build libquirc.a and libade.a) RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/opencv/opencv --branch 4.5.3 \ && cd opencv \ && mkdir build \ && cd build \ && cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D INSTALL_PYTHON_EXAMPLES=ON -D INSTALL_C_EXAMPLES=ON .. \ && make -j32 \ && cp /code/opencv/build/3rdparty/lib/libquirc.a /usr/local/lib64/opencv4/3rdparty/ \ && cp /code/opencv/build/3rdparty/lib/libade.a /usr/local/lib64/opencv4/3rdparty/ \ && cd ${INSTALL_ROOT} \ && rm -rf opencv # Install horovod RUN pip install horovod ## Install DALI (not working) #COPY ./cmake_patch /code #RUN cd ${INSTALL_ROOT} \ # && git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.6.0 \ # && cd DALI \ # && git apply /code/cmake_patch \ # && mkdir build \ # && cd build \ # && ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \ # && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DJPEG_INCLUDE_DIR=/usr/local/include -DJPEG_LIBRARY=/usr/local/lib/libturbojpeg.so -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/usr/local/lib/libprotobuf.a -DFFmpeg_Libavcodec=/usr/local/lib/libavcodec.so -DFFmpeg_Libavfilter=/usr/local/lib/libavfilter.so -DFFmpeg_Libavformat=/usr/local/lib/libavformat.so -DFFmpeg_Libavutil=/usr/local/lib/libavutil.so .. \ # && make -j1 \ # && make install \ # && pip install dali/python ### failed attempt 1 #RUN cd ${INSTALL_ROOT} \ # && git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.6.0 \ # && cd DALI \ # && mkdir build \ # && cd build \ # && dnf install -y clang-devel \ # && pip install clang \ # && ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \ # && conda install -y -c conda-forge opencv libsndfile libtar ffmpeg protobuf libjpeg-turbo \ # && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DJPEG_INCLUDE_DIR=${CONDA_HOME}/include -DJPEG_LIBRARY=${CONDA_HOME}/lib/libturbojpeg.so -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/opt/conda/lib/libprotobuf.so .. \ # && make-j4 \ # && make install \ # && pip install dali/python ### failed attempt 2 ## dnf install -y clang-devel opencv libsndfile libtar protobuf # && pip install clang \ # && ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \ # && conda install -y -c conda-forge ffmpeg \ # && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/usr/lib64/libprotobuf.so .. \ # Install DLPRof pytorchimage-centos-cuda/pytorch110build.log 0 → 100644 +5253 −0 File added.Preview size limit exceeded, changes collapsed. Show changes Loading
pytorchimage-centos-cuda/Dockerfile.v1.10 0 → 100644 +207 −0 Original line number Diff line number Diff line # Centos 8, Cuda 11.0.3, Pytorch 1.10 built from source FROM code.ornl.gov:4567/76a/olcfbaseimages/mpiimage-centos-cuda:latest RUN dnf install -y dnf-plugins-core \ && dnf config-manager --set-enabled powertools RUN dnf -y install \ kernel-devel \ cuda-command-line-tools-11-0 \ java-11-openjdk-devel \ zip unzip \ python3-devel \ python3 \ python3-pip \ tzdata \ gpg \ patch \ glibc-devel \ && dnf -y clean all ENV CONDA_HOME="/opt/conda" RUN mkdir /code WORKDIR /code ENV _CONDA_INSTALLER="Miniconda3-py39_4.9.2-Linux-ppc64le.sh" RUN wget -q -P /code \ https://repo.anaconda.com/miniconda/${_CONDA_INSTALLER} \ && bash /code/${_CONDA_INSTALLER} -b -p ${CONDA_HOME} \ && rm /code/${_CONDA_INSTALLER} ENV PATH="/opt/conda/bin:$PATH" RUN conda install -y conda-build conda-verify ENV INSTALL_ROOT=/code ENV PYTORCH_VER=1.10.0 RUN mkdir -p ${INSTALL_ROOT} # Download and build NCCL RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/NVIDIA/nccl.git --branch v2.11.4-1 nccl_build \ && cd nccl_build \ && make -j10 src.build CUDA_HOME=${CUDA_HOME} NVCC_GENCODE="-gencode=arch=compute_70,code=sm_70" \ && cd ${INSTALL_ROOT} \ && mkdir nccl \ && mv nccl_build/build/lib nccl \ && mv nccl_build/build/include nccl \ && rm -rf nccl_build # Set to pre-installed NCCL and CUDNN paths ENV NCCL_ROOT=${INSTALL_ROOT}/nccl # install magma ENV TORCH_CUDA_ARCH_LIST="7.0" RUN conda install -y pip astunparse numpy ninja pyyaml setuptools cmake cffi typing_extensions future six requests dataclasses RUN conda install -y -c conda-forge lapack==3.9.0 RUN cd /code \ && wget -q -P /code http://icl.utk.edu/projectsfiles/magma/downloads/magma-2.6.1.tar.gz \ && tar -xzf magma-2.6.1.tar.gz \ && cd magma-2.6.1 \ && mkdir build \ && cd build \ && cmake -DBUILD_SHARED_LIBS=ON -DUSE_FORTRAN=yes -DGPU_TARGET=sm_70 -DMAGMA_ENABLE_CUDA=ON -DCUDA_NVCC_FLAGS="-Xcompiler;-mno-float128" .. \ && make install \ && cd .. \ && cp magmablas/atomics.cuh control/magma_threadsetting.h control/pthread_barrier.h \ control/magma_internal.h /usr/include #conda install -y magma-cuda110 ## Install PyTorch RUN git config --global user.email "you@example.com" \ && git config --global user.name "Your Name" RUN cd ${INSTALL_ROOT} \ && git clone --recursive --branch v${PYTORCH_VER} https://github.com/pytorch/pytorch.git pytorch \ && cd pytorch \ && git cherry-pick 7452b65144340d199e99c5d69849414d970002fa \ && python setup.py clean ENV MAX_JOBS=10 ENV CMAKE_PREFIX_PATH=${CONDA_PREFIX:-"$(dirname $(which conda))/../"} ENV USE_CUDNN=1 ENV CUDNN_INCLUDE_DIR="/usr/include" ENV CUDNN_LIB_DIR="/usr/lib" ENV NCCL_INCLUDE_DIR="${NCCL_ROOT}/include/" ENV NCCL_LIB_DIR="${NCCL_ROOT}/lib/" ENV USE_SYSTEM_NCCL=1 ENV USE_NINJA=OFF ENV USE_BREAKPAD=OFF RUN cd ${INSTALL_ROOT}/pytorch \ && python3 setup.py -v install # Hide libgomp inside env (it has an old one included that causes compatibility issues when importing torch) RUN cd ${CONDA_HOME}/lib \ && mkdir hide \ && mv libgomp* libstdc++* hide/ # Install DGL RUN pip install pybind11 pythran scipy COPY ./gklib_patch /code RUN cd ${INSTALL_ROOT} \ && git clone --recurse-submodules https://github.com/dmlc/dgl.git --branch=0.7.1 \ && cd dgl \ && cd third_party/METIS/GKlib \ && git apply ${INSTALL_ROOT}/gklib_patch \ && cd ../../../ \ && mkdir build \ && cd build \ && cmake -DUSE_CUDA=ON -DCUDA_ARCH_NAME=Manual -DCUDA_ARCH_BIN="70" -DUSE_AVX=OFF \ -DUSE_LIBXSMM=OFF -DBUILD_TORCH=ON .. \ && make -j \ && cd ../python \ && python setup.py install # Install Apex RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/NVIDIA/apex.git \ && cd apex \ && pip install --no-cache-dir --disable-pip-version-check --global-option="--cpp_ext" --global-option="--cuda_ext" . ## Install DALI_deps ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib ENV LD_RUN_PATH=$LD_RUN_PATH:/usr/local/lib ENV PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib/pkgconfig RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/NVIDIA/DALI_deps --depth 1 --branch v1.6.0 \ && cd DALI_deps \ && git submodule init \ && git submodule update --depth 1 --recursive \ && sed -i "s/\"cmake\"//" build_scripts/build_deps.sh \ && dnf install -y libtool clang-devel autogen automake \ && pip install clang \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/protobuf \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/flac \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/ogg \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/vorbis \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/opus \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps/third_party/libtar \ && libtoolize \ && cd ${INSTALL_ROOT}/DALI_deps \ && build_scripts/build_deps.sh # Install Torchvision RUN cd ${INSTALL_ROOT} \ && git clone --depth 1 --branch v0.10.0 https://github.com/pytorch/vision \ && cd vision \ && python setup.py install #&& dnf install -y libjpeg-turbo-devel \ # build opencv (need this to build libquirc.a and libade.a) RUN cd ${INSTALL_ROOT} \ && git clone https://github.com/opencv/opencv --branch 4.5.3 \ && cd opencv \ && mkdir build \ && cd build \ && cmake -D CMAKE_BUILD_TYPE=RELEASE -D CMAKE_INSTALL_PREFIX=/usr/local -D INSTALL_PYTHON_EXAMPLES=ON -D INSTALL_C_EXAMPLES=ON .. \ && make -j32 \ && cp /code/opencv/build/3rdparty/lib/libquirc.a /usr/local/lib64/opencv4/3rdparty/ \ && cp /code/opencv/build/3rdparty/lib/libade.a /usr/local/lib64/opencv4/3rdparty/ \ && cd ${INSTALL_ROOT} \ && rm -rf opencv # Install horovod RUN pip install horovod ## Install DALI (not working) #COPY ./cmake_patch /code #RUN cd ${INSTALL_ROOT} \ # && git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.6.0 \ # && cd DALI \ # && git apply /code/cmake_patch \ # && mkdir build \ # && cd build \ # && ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \ # && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DJPEG_INCLUDE_DIR=/usr/local/include -DJPEG_LIBRARY=/usr/local/lib/libturbojpeg.so -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/usr/local/lib/libprotobuf.a -DFFmpeg_Libavcodec=/usr/local/lib/libavcodec.so -DFFmpeg_Libavfilter=/usr/local/lib/libavfilter.so -DFFmpeg_Libavformat=/usr/local/lib/libavformat.so -DFFmpeg_Libavutil=/usr/local/lib/libavutil.so .. \ # && make -j1 \ # && make install \ # && pip install dali/python ### failed attempt 1 #RUN cd ${INSTALL_ROOT} \ # && git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.6.0 \ # && cd DALI \ # && mkdir build \ # && cd build \ # && dnf install -y clang-devel \ # && pip install clang \ # && ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \ # && conda install -y -c conda-forge opencv libsndfile libtar ffmpeg protobuf libjpeg-turbo \ # && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DJPEG_INCLUDE_DIR=${CONDA_HOME}/include -DJPEG_LIBRARY=${CONDA_HOME}/lib/libturbojpeg.so -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/opt/conda/lib/libprotobuf.so .. \ # && make-j4 \ # && make install \ # && pip install dali/python ### failed attempt 2 ## dnf install -y clang-devel opencv libsndfile libtar protobuf # && pip install clang \ # && ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \ # && conda install -y -c conda-forge ffmpeg \ # && cmake -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/usr/lib64/libprotobuf.so .. \ # Install DLPRof
pytorchimage-centos-cuda/pytorch110build.log 0 → 100644 +5253 −0 File added.Preview size limit exceeded, changes collapsed. Show changes