Single node performance, Sphere benchmark, Broadwell
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi,hyper) Kokkos/OMP (mpi,hyper,thread) Kokkos/serial (mpi,hyper)
32000 208 (32,2) 114.9 (16,2,2) 171.2 (32,2)
64000 327.3 (32,2) 168 (18,1,2) 274.6 (32,1)
128000 446.7 (32,1) 215.2 (32,2,2) 394.1 (32,2)
256000 529.2 (32,1) 303.6 (32,2,2) 484.3 (32,1)
512000 629.1 (72,2) 367.5 (36,2,2) 530 (36,2)
1024000 375.2 (72,2) 283 (36,2,2) 306.6 (72,2)
2048000 299.1 (72,2) 271 (36,2,2) 285.1 (72,2)
4096000 273.4 (72,2) 266.2 (36,2,2) 283.4 (72,2)
8192000 237.3 (72,2) 225 (36,2,2) 247.4 (72,2)
16384000 193 (72,2) 189.1 (36,2,2) 204.4 (72,2)
32768000 163 (72,2) 167.7 (36,2,2) 178.1 (72,2)
65536000 136.7 (72,2) 137.7 (36,2,2) 149.8 (72,2)
131072000 125.2 (72,2) 123.1 (36,2,2) 134.2 (72,2)

Run commands and logfile links for column CPU

32000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=32K.node=1.mpi=32.hyper=2
64000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=64K.node=1.mpi=32.hyper=2
128000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=128K.node=1.mpi=32.hyper=1
256000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=256K.node=1.mpi=32.hyper=1
512000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=512K.node=1.mpi=72.hyper=2
1024000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=1M.node=1.mpi=72.hyper=2
2048000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=2M.node=1.mpi=72.hyper=2
4096000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=4M.node=1.mpi=72.hyper=2
8192000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=8M.node=1.mpi=72.hyper=2
16384000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=16M.node=1.mpi=72.hyper=2
32768000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=32M.node=1.mpi=72.hyper=2
65536000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=64M.node=1.mpi=72.hyper=2
131072000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=cpu.kind=node.size=128M.node=1.mpi=72.hyper=2

Run commands and logfile links for column Kokkos/OMP

32000 mpiexec -np 16 -npernode 16 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=32K.node=1.mpi=16.thread=2.hyper=2
64000 mpiexec -np 18 -npernode 18 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=64K.node=1.mpi=18.thread=2.hyper=1
128000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=128K.node=1.mpi=32.thread=2.hyper=2
256000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=256K.node=1.mpi=32.thread=2.hyper=2
512000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=512K.node=1.mpi=36.thread=2.hyper=2
1024000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=1M.node=1.mpi=36.thread=2.hyper=2
2048000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=2M.node=1.mpi=36.thread=2.hyper=2
4096000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=4M.node=1.mpi=36.thread=2.hyper=2
8192000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=8M.node=1.mpi=36.thread=2.hyper=2
16384000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=16M.node=1.mpi=36.thread=2.hyper=2
32768000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=32M.node=1.mpi=36.thread=2.hyper=2
65536000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=64M.node=1.mpi=36.thread=2.hyper=2
131072000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_omp.kind=node.size=128M.node=1.mpi=36.thread=2.hyper=2

Run commands and logfile links for column Kokkos/serial

32000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=32K.node=1.mpi=32.hyper=2
64000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=64K.node=1.mpi=32.hyper=1
128000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=128K.node=1.mpi=32.hyper=2
256000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=256K.node=1.mpi=32.hyper=1
512000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=512K.node=1.mpi=36.hyper=2
1024000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=72.hyper=2
2048000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=72.hyper=2
4096000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=72.hyper=2
8192000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=72.hyper=2
16384000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=72.hyper=2
32768000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=72.hyper=2
65536000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=72.hyper=2
131072000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=serrano.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=72.hyper=2