Commit 7d25d855 authored by Abraham, Subil's avatar Abraham, Subil
Browse files

failed DALI attempts. Giving up for now. Let's just get a regular pytorch container going

parent c91d7c6d
Loading
Loading
Loading
Loading
+61 −5
Original line number Diff line number Diff line
# Centos 8, Cuda 11.0.3, Pytorch 1.9 built from source
FROM code.ornl.gov:4567/76a/olcfbaseimages/mpiimage-centos-cuda:latest

RUN dnf install -y dnf-plugins-core \
	&& dnf config-manager --set-enabled powertools
RUN dnf -y install \
	kernel-devel \
	cuda-command-line-tools-11-0 \
@@ -116,25 +118,79 @@ RUN cd ${INSTALL_ROOT} \
	&& cd apex \
	&& pip install --no-cache-dir --disable-pip-version-check --global-option="--cpp_ext" --global-option="--cuda_ext" .



## Install DALI_deps 
ENV LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/local/lib
ENV LD_RUN_PATH=$LD_RUN_PATH:/usr/local/lib
ENV PKG_CONFIG_PATH=$PKG_CONFIG_PATH:/usr/local/lib/pkgconfig
RUN cd ${INSTALL_ROOT} \
	&& git clone https://github.com/NVIDIA/DALI_deps --depth 1 --branch v1.6.0 \
	&& cd DALI_deps \
	&& git submodule init \
	&& git submodule update --depth 1 --recursive \
	&& sed -i "s/\"cmake\"//" build_scripts/build_deps.sh \
	&& dnf install -y libtool clang-devel autogen automake  \
	&& pip install clang \
	&& cd ${INSTALL_ROOT}/DALI_deps/third_party/protobuf \
	&& libtoolize \
	&& cd ${INSTALL_ROOT}/DALI_deps/third_party/flac \
	&& libtoolize \
	&& cd ${INSTALL_ROOT}/DALI_deps/third_party/ogg \
	&& libtoolize \
	&& cd ${INSTALL_ROOT}/DALI_deps/third_party/vorbis \
	&& libtoolize \
	&& cd ${INSTALL_ROOT}/DALI_deps/third_party/opus \
	&& libtoolize \
	&& cd ${INSTALL_ROOT}/DALI_deps/third_party/libtar \
	&& libtoolize \
	&& cd ${INSTALL_ROOT}/DALI_deps \
	&& build_scripts/build_deps.sh

# Install Torchvision
RUN cd ${INSTALL_ROOT} \
	&& git clone --depth 1 --branch v0.10.0 https://github.com/pytorch/vision \
	&& cd vision \
	&& dnf install -y libjpeg-turbo-devel \
	&& python setup.py install
	#&& dnf install -y libjpeg-turbo-devel \

# build opencv

#then
#cp /code/opencv/build/3rdparty/lib/libquirc.a /usr/local/lib64/opencv4/3rdparty/
#cp /code/opencv/build/3rdparty/lib/libade.a /usr/local/lib64/opencv4/3rdparty/
#then delete the opencv directory
## Install DALI (not working)
#RUN cd ${INSTALL_ROOT} \
#	&& git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.5.0 \
#	&& git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.6.0 \
#	&& cd DALI \ 
#	&& mkdir build \
#	&& cd build \
#	&& conda install -y -c conda-forge opencv libsndfile libtar libavformat \
#	&& cmake -D CMAKE_BUILD_TYPE=Release .. \
#	&& ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \
#	&&  cmake  -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DJPEG_INCLUDE_DIR=/usr/local/include -DJPEG_LIBRARY=/usr/local/lib/libturbojpeg.so -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/usr/local/lib/libprotobuf.so -DFFmpeg_Libavcodec=/usr/local/lib/libavcodec.so -DFFmpeg_Libavfilter=/usr/local/lib/libavfilter.so -DFFmpeg_Libavformat=/usr/local/lib/libavformat.so -DFFmpeg_Libavutil=/usr/local/lib/libavutil.so .. \
#	&& make-j4 \
#	&& make install \
#	&& pip install dali/python

### failed attempt 1
#RUN cd ${INSTALL_ROOT} \
#	&& git clone --recurse-submodules --shallow-submodules https://github.com/NVIDIA/DALI --depth 1 --branch v1.6.0 \
#	&& cd DALI \ 
#	&& mkdir build \
#	&& cd build \
#	&& dnf install -y clang-devel \
#	&& pip install clang \
#	&& ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \
#	&& conda install -y -c conda-forge opencv libsndfile libtar ffmpeg protobuf libjpeg-turbo \
#	&& cmake  -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DJPEG_INCLUDE_DIR=${CONDA_HOME}/include -DJPEG_LIBRARY=${CONDA_HOME}/lib/libturbojpeg.so -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/opt/conda/lib/libprotobuf.so .. \
#	&& make-j4 \
#	&& make install \
#	&& pip install dali/python
### failed attempt 2
## dnf install -y clang-devel opencv libsndfile libtar protobuf
#	&& pip install clang \
#	&& ln -s /usr/lib64/libclang.so /usr/lib64/libclang-11.so \
#	&& conda install -y -c conda-forge ffmpeg \
#	&& cmake  -DCMAKE_BUILD_TYPE=Release -DBUILD_NVJPEG2K=OFF -DFFMPEG_ROOT_DIR=${CONDA_HOME} -DCUDA_TARGET_ARCHS=70 -DBUILD_PROTOBUF=ON -DProtobuf_LIBRARY=/usr/lib64/libprotobuf.so .. \
# Install DLPRof
ENTRYPOINT ["/bin/bash"]
+2 −1
Original line number Diff line number Diff line
@@ -54,7 +54,8 @@ ENV OCE_MPI="system"
ENV OCE_CUDA_VERSIONS="11.0"
ENV OCE_BUILD_TYPES="cuda"
ENV OCE_PYTHON_VERSIONS="3.9"
ENV OCE_ENV="/code/open-ce/envs/pytorch-env.yaml"
#ENV OCE_ENV="/code/open-ce/envs/pytorch-env.yaml"
ENV OCE_ENV="/code/open-ce/envs/dali-env.yaml"

RUN cd /code && /code/open-ce-builder/open_ce/open-ce \
  build env --mpi_types "$OCE_MPI" \
+1095 −0

File added.

Preview size limit exceeded, changes collapsed.