Single node performance, Sphere benchmark, SandyBridge
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi) Kokkos/OMP (mpi,thread) Kokkos/serial (mpi)
32000 208.1 (16) 82.08 (8,2) 160.4 (16)
64000 285.2 (16) 97.97 (8,2) 245.6 (16)
128000 306.6 (16) 174.4 (8,2) 297.5 (16)
256000 359.2 (16) 165.5 (8,2) 292.5 (16)
512000 228.6 (16) 128.7 (8,2) 195.9 (16)
1024000 173.1 (16) 87.2 (8,2) 170.5 (16)
2048000 163.1 (16) 104.6 (8,2) 157.9 (16)
4096000 152.9 (16) 74.52 (8,2) 141.4 (16)
8192000 109.4 (16) 87.01 (8,2) 108.8 (16)
16384000 85.54 (16) 50.89 (8,2) 85.72 (16)
32768000 84.4 (16) 35.35 (8,2) 78.52 (16)
65536000 63.7 (16) 44.09 (8,2) 61.13 (16)
131072000 58.58 (16) 30.09 (8,2) 35.14 (16)

Run commands and logfile links for column CPU

32000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=32K.node=1.mpi=16
64000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=64K.node=1.mpi=16
128000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=128K.node=1.mpi=16
256000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=256K.node=1.mpi=16
512000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=512K.node=1.mpi=16
1024000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=1M.node=1.mpi=16
2048000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=2M.node=1.mpi=16
4096000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=4M.node=1.mpi=16
8192000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=8M.node=1.mpi=16
16384000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=16M.node=1.mpi=16
32768000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=32M.node=1.mpi=16
65536000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=64M.node=1.mpi=16
131072000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=cpu.kind=node.size=128M.node=1.mpi=16

Run commands and logfile links for column Kokkos/OMP

32000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=32K.node=1.mpi=8.thread=2
64000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=64K.node=1.mpi=8.thread=2
128000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=128K.node=1.mpi=8.thread=2
256000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=256K.node=1.mpi=8.thread=2
512000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=512K.node=1.mpi=8.thread=2
1024000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=1M.node=1.mpi=8.thread=2
2048000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=2M.node=1.mpi=8.thread=2
4096000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=4M.node=1.mpi=8.thread=2
8192000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=8M.node=1.mpi=8.thread=2
16384000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=16M.node=1.mpi=8.thread=2
32768000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=32M.node=1.mpi=8.thread=2
65536000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=64M.node=1.mpi=8.thread=2
131072000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_omp.kind=node.size=128M.node=1.mpi=8.thread=2

Run commands and logfile links for column Kokkos/serial

32000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=32K.node=1.mpi=16
64000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=64K.node=1.mpi=16
128000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=128K.node=1.mpi=16
256000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=256K.node=1.mpi=16
512000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=512K.node=1.mpi=16
1024000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=16
2048000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=16
4096000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=16
8192000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=16
16384000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=16
32768000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=16
65536000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=16
131072000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=chama.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=16