Single node performance, Free benchmark, Haswell
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi,hyper) Kokkos/OMP (mpi,hyper,thread) Kokkos/serial (mpi,hyper)
32000 688.6 (32,1) 491.4 (16,1,2) 553.7 (32,1)
64000 850 (32,1) 655.5 (32,2,2) 700.5 (32,1)
128000 1005 (32,1) 828.2 (32,2,2) 827.1 (32,1)
256000 1199 (64,2) 922.3 (32,2,2) 975.1 (64,2)
512000 1185 (64,2) 911.2 (32,2,2) 976.4 (64,2)
1024000 461.1 (64,2) 462.7 (16,2,4) 464.4 (32,1)
2048000 458.6 (32,1) 445 (32,2,2) 448.8 (32,1)
4096000 390.9 (64,2) 377.3 (32,2,2) 367.1 (64,2)
8192000 309.8 (64,2) 285.4 (32,2,2) 290.8 (64,2)
16384000 268.3 (64,2) 252.8 (32,2,2) 249.3 (64,2)
32768000 247.8 (64,2) 225.8 (16,2,4) 230.6 (64,2)
65536000 238.1 (64,2) 216.1 (32,2,2) 221.3 (64,2)
131072000 232.2 (64,2) 210.6 (32,2,2) 215.8 (64,2)

Run commands and logfile links for column CPU

32000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=32K.node=1.mpi=32.hyper=1
64000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=64K.node=1.mpi=32.hyper=1
128000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=128K.node=1.mpi=32.hyper=1
256000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=256K.node=1.mpi=64.hyper=2
512000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=512K.node=1.mpi=64.hyper=2
1024000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=1M.node=1.mpi=64.hyper=2
2048000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=2M.node=1.mpi=32.hyper=1
4096000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=4M.node=1.mpi=64.hyper=2
8192000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=8M.node=1.mpi=64.hyper=2
16384000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=16M.node=1.mpi=64.hyper=2
32768000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=32M.node=1.mpi=64.hyper=2
65536000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=64M.node=1.mpi=64.hyper=2
131072000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=128M.node=1.mpi=64.hyper=2

Run commands and logfile links for column Kokkos/OMP

32000 setenv OMP_NUM_THREADS 2; srun -n 16 -C haswell --ntasks-per-node 16 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=32K.node=1.mpi=16.thread=2.hyper=1
64000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=64K.node=1.mpi=32.thread=2.hyper=2
128000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=128K.node=1.mpi=32.thread=2.hyper=2
256000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=256K.node=1.mpi=32.thread=2.hyper=2
512000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=512K.node=1.mpi=32.thread=2.hyper=2
1024000 setenv OMP_NUM_THREADS 4; srun -n 16 -C haswell --ntasks-per-node 16 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_omp -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=1M.node=1.mpi=16.thread=4.hyper=2
2048000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=2M.node=1.mpi=32.thread=2.hyper=2
4096000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=4M.node=1.mpi=32.thread=2.hyper=2
8192000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=8M.node=1.mpi=32.thread=2.hyper=2
16384000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=16M.node=1.mpi=32.thread=2.hyper=2
32768000 setenv OMP_NUM_THREADS 4; srun -n 16 -C haswell --ntasks-per-node 16 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_omp -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=32M.node=1.mpi=16.thread=4.hyper=2
65536000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=64M.node=1.mpi=32.thread=2.hyper=2
131072000 setenv OMP_NUM_THREADS 2; srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=node.size=128M.node=1.mpi=32.thread=2.hyper=2

Run commands and logfile links for column Kokkos/serial

32000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=32K.node=1.mpi=32.hyper=1
64000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=64K.node=1.mpi=32.hyper=1
128000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=128K.node=1.mpi=32.hyper=1
256000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=256K.node=1.mpi=64.hyper=2
512000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=512K.node=1.mpi=64.hyper=2
1024000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=32.hyper=1
2048000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=32.hyper=1
4096000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=64.hyper=2
8192000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=64.hyper=2
16384000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=64.hyper=2
32768000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=64.hyper=2
65536000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=64.hyper=2
131072000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=64.hyper=2