Loading pkgs/build-support/setup-hooks/mpi-check-hook/default.nix +4 −0 Original line number Diff line number Diff line Loading @@ -2,4 +2,8 @@ makeSetupHook { name = "mpi-checkPhase-hook"; substitutions = { topology = ./topology.xml; }; } ./mpi-check-hook.sh pkgs/build-support/setup-hooks/mpi-check-hook/mpi-check-hook.sh +11 −0 Original line number Diff line number Diff line Loading @@ -44,6 +44,17 @@ setupMpiCheck() { # Disable CPU pinning export OMPI_MCA_hwloc_base_binding_policy=none export PRTE_MCA_hwloc_default_binding_policy=none # OpenMPI get confused by the sandbox environment and spew errors like this (both to stdout and stderr): # [hwloc/linux] failed to find sysfs cpu topology directory, aborting linux discovery. # [1729458724.473282] [localhost:78 :0] tcp_iface.c:893 UCX ERROR scandir(/sys/class/net) failed: No such file or directory # These messages contaminate test output, which makes the difftest to fail. # The solution is to use a preset cpu topology file and disable ucx model. # Disable sysfs cpu topology directory discovery. export PRTE_MCA_hwloc_use_topo_file="@topology@" # Use the network model ob1 instead of ucx. export OMPI_MCA_pml=ob1 ;; MPICH) # Fix to make mpich run in a sandbox Loading pkgs/build-support/setup-hooks/mpi-check-hook/topology.xml 0 → 100644 +10 −0 Original line number Diff line number Diff line <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE topology SYSTEM "hwloc2.dtd"> <topology version="2.0"> <object type="Machine" os_index="0" cpuset="0x00000001" complete_cpuset="0x00000001" allowed_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" allowed_nodeset="0x00000001" gp_index="1"> <object type="Core" cpuset="0x00000001" complete_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" gp_index="2"> <object type="NUMANode" os_index="0" cpuset="0x00000001" complete_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" gp_index="4"/> <object type="PU" os_index="0" cpuset="0x00000001" complete_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" gp_index="3"/> </object> </object> </topology> Loading
pkgs/build-support/setup-hooks/mpi-check-hook/default.nix +4 −0 Original line number Diff line number Diff line Loading @@ -2,4 +2,8 @@ makeSetupHook { name = "mpi-checkPhase-hook"; substitutions = { topology = ./topology.xml; }; } ./mpi-check-hook.sh
pkgs/build-support/setup-hooks/mpi-check-hook/mpi-check-hook.sh +11 −0 Original line number Diff line number Diff line Loading @@ -44,6 +44,17 @@ setupMpiCheck() { # Disable CPU pinning export OMPI_MCA_hwloc_base_binding_policy=none export PRTE_MCA_hwloc_default_binding_policy=none # OpenMPI get confused by the sandbox environment and spew errors like this (both to stdout and stderr): # [hwloc/linux] failed to find sysfs cpu topology directory, aborting linux discovery. # [1729458724.473282] [localhost:78 :0] tcp_iface.c:893 UCX ERROR scandir(/sys/class/net) failed: No such file or directory # These messages contaminate test output, which makes the difftest to fail. # The solution is to use a preset cpu topology file and disable ucx model. # Disable sysfs cpu topology directory discovery. export PRTE_MCA_hwloc_use_topo_file="@topology@" # Use the network model ob1 instead of ucx. export OMPI_MCA_pml=ob1 ;; MPICH) # Fix to make mpich run in a sandbox Loading
pkgs/build-support/setup-hooks/mpi-check-hook/topology.xml 0 → 100644 +10 −0 Original line number Diff line number Diff line <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE topology SYSTEM "hwloc2.dtd"> <topology version="2.0"> <object type="Machine" os_index="0" cpuset="0x00000001" complete_cpuset="0x00000001" allowed_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" allowed_nodeset="0x00000001" gp_index="1"> <object type="Core" cpuset="0x00000001" complete_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" gp_index="2"> <object type="NUMANode" os_index="0" cpuset="0x00000001" complete_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" gp_index="4"/> <object type="PU" os_index="0" cpuset="0x00000001" complete_cpuset="0x00000001" nodeset="0x00000001" complete_nodeset="0x00000001" gp_index="3"/> </object> </object> </topology>