Single node performance, Collide benchmark, Broadwell
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi,hyper) Kokkos/OMP (mpi,hyper,thread) Kokkos/serial (mpi,hyper)
32000 295.1 (36,2) 144.3 (32,2,2) 261.2 (36,1)
64000 404.8 (36,1) 208.4 (36,2,2) 364.2 (36,1)
128000 486.6 (72,2) 274.3 (36,2,2) 446.9 (72,2)
256000 592.4 (64,2) 335.9 (36,2,2) 548.7 (64,2)
512000 617.9 (72,2) 356.3 (36,2,2) 548.4 (72,2)
1024000 326.8 (72,2) 251.2 (32,2,2) 277.5 (72,2)
2048000 260.2 (72,2) 231.2 (36,2,2) 260 (72,2)
4096000 243.3 (72,2) 226.1 (36,2,2) 243.6 (72,2)
8192000 207.4 (72,2) 195.3 (36,2,2) 207.3 (72,2)
16384000 171.3 (72,2) 169.3 (36,2,2) 178.1 (72,2)
32768000 147.6 (72,2) 149.7 (36,2,2) 157.4 (72,2)
65536000 135.8 (72,2) 136.4 (36,2,2) 143.7 (72,2)
131072000 129 (72,2) 125.3 (36,2,2) 132.3 (72,2)

Run commands and logfile links for column CPU

32000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=32K.node=1.mpi=36.hyper=2
64000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=64K.node=1.mpi=36.hyper=1
128000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=128K.node=1.mpi=72.hyper=2
256000 mpiexec -np 64 -npernode 64 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=256K.node=1.mpi=64.hyper=2
512000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=512K.node=1.mpi=72.hyper=2
1024000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=1M.node=1.mpi=72.hyper=2
2048000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=2M.node=1.mpi=72.hyper=2
4096000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=4M.node=1.mpi=72.hyper=2
8192000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=8M.node=1.mpi=72.hyper=2
16384000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=16M.node=1.mpi=72.hyper=2
32768000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=32M.node=1.mpi=72.hyper=2
65536000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=64M.node=1.mpi=72.hyper=2
131072000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=128M.node=1.mpi=72.hyper=2

Run commands and logfile links for column Kokkos/OMP

32000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=32K.node=1.mpi=32.thread=2.hyper=2
64000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=64K.node=1.mpi=36.thread=2.hyper=2
128000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=128K.node=1.mpi=36.thread=2.hyper=2
256000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=256K.node=1.mpi=36.thread=2.hyper=2
512000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=512K.node=1.mpi=36.thread=2.hyper=2
1024000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=1M.node=1.mpi=32.thread=2.hyper=2
2048000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=2M.node=1.mpi=36.thread=2.hyper=2
4096000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=4M.node=1.mpi=36.thread=2.hyper=2
8192000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=8M.node=1.mpi=36.thread=2.hyper=2
16384000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=16M.node=1.mpi=36.thread=2.hyper=2
32768000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=32M.node=1.mpi=36.thread=2.hyper=2
65536000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=64M.node=1.mpi=36.thread=2.hyper=2
131072000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to socket ./spa_serrano_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_omp.kind=node.size=128M.node=1.mpi=36.thread=2.hyper=2

Run commands and logfile links for column Kokkos/serial

32000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=32K.node=1.mpi=36.hyper=1
64000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=64K.node=1.mpi=36.hyper=1
128000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=128K.node=1.mpi=72.hyper=2
256000 mpiexec -np 64 -npernode 64 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=256K.node=1.mpi=64.hyper=2
512000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=512K.node=1.mpi=72.hyper=2
1024000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=72.hyper=2
2048000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=72.hyper=2
4096000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=72.hyper=2
8192000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=72.hyper=2
16384000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=72.hyper=2
32768000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=72.hyper=2
65536000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=72.hyper=2
131072000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=72.hyper=2