Strong scaling performance, Free benchmark, Haswell, 8M particles
Performance in millions of particle-timesteps / second / node

Nodes CPU (mpi,hyper) Kokkos/OMP (mpi,hyper,thread) Kokkos/serial (mpi,hyper)
1 310.7 (64,2) 295.3 (32,2,2) 286.8 (64,2)
2 377.2 (64,2) 373.6 (16,2,4) 358.5 (64,2)
4 435.3 (32,1) 423.9 (32,2,2) 420.4 (32,1)
8 432.1 (32,1) 415 (32,2,2) 415.8 (32,1)
16 727.8 (64,2) 607.9 (16,2,4) 663.6 (64,2)
32 620.7 (32,1) 568.5 (32,2,2) 557.9 (32,1)
64 432.1 (32,1) 376.3 (32,2,2) 393.1 (32,1)

Run commands and logfile links for column CPU

1 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=1.mpi=64.hyper=2
2 srun -n 128 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=2.mpi=64.hyper=2
4 srun -n 128 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=4.mpi=32.hyper=1
8 srun -n 256 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=8.mpi=32.hyper=1
16 srun -n 1024 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=16.mpi=64.hyper=2
32 srun -n 1024 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=32.mpi=32.hyper=1
64 srun -n 2048 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=strong.size=8M.node=64.mpi=32.hyper=1

Run commands and logfile links for column Kokkos/OMP

1 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=1.mpi=32.thread=2.hyper=2
2 setenv OMP_NUM_THREADS 4; srun -n 32 -C haswell --ntasks-per-node 16 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_omp -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=2.mpi=16.thread=4.hyper=2
4 setenv OMP_NUM_THREADS 2; srun -n 128 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=4.mpi=32.thread=2.hyper=2
8 setenv OMP_NUM_THREADS 2; srun -n 256 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=8.mpi=32.thread=2.hyper=2
16 setenv OMP_NUM_THREADS 4; srun -n 256 -C haswell --ntasks-per-node 16 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_omp -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=16.mpi=16.thread=4.hyper=2
32 setenv OMP_NUM_THREADS 2; srun -n 1024 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=32.mpi=32.thread=2.hyper=2
64 setenv OMP_NUM_THREADS 2; srun -n 2048 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=strong.size=8M.node=64.mpi=32.thread=2.hyper=2

Run commands and logfile links for column Kokkos/serial

1 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=1.mpi=64.hyper=2
2 srun -n 128 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=2.mpi=64.hyper=2
4 srun -n 128 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=4.mpi=32.hyper=1
8 srun -n 256 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=8.mpi=32.hyper=1
16 srun -n 1024 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=16.mpi=64.hyper=2
32 srun -n 1024 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=32.mpi=32.hyper=1
64 srun -n 2048 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=strong.size=8M.node=64.mpi=32.hyper=1