Single node performance, Collide benchmark, SandyBridge
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi) Kokkos/OMP (mpi,thread) Kokkos/serial (mpi)
32000 243.3 (16) 127.5 (8,2) 212.6 (16)
64000 276.6 (16) 131.7 (8,2) 250.2 (16)
128000 291.8 (16) 137.6 (8,2) 280.6 (16)
256000 268.2 (16) 127.2 (8,2) 247.2 (16)
512000 159.8 (16) 84.97 (8,2) 150.3 (16)
1024000 133.8 (16) 76.66 (8,2) 141.4 (16)
2048000 118 (16) 70.6 (8,2) 117.4 (16)
4096000 97.1 (16) 71.07 (8,2) 95.4 (16)
8192000 78.51 (16) 56.88 (8,2) 81.03 (16)
16384000 66.36 (16) 57.95 (8,2) 69.04 (16)
32768000 63.06 (16) 35.65 (8,2) 66.75 (16)
65536000 58.02 (16) 54.32 (8,2) 59.63 (16)
131072000 58.42 (16) 54.55 (8,2) 56.04 (16)

Run commands and logfile links for column CPU

32000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=32K.node=1.mpi=16
64000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=64K.node=1.mpi=16
128000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=128K.node=1.mpi=16
256000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=256K.node=1.mpi=16
512000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=512K.node=1.mpi=16
1024000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=1M.node=1.mpi=16
2048000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=2M.node=1.mpi=16
4096000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=4M.node=1.mpi=16
8192000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=8M.node=1.mpi=16
16384000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=16M.node=1.mpi=16
32768000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=32M.node=1.mpi=16
65536000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=64M.node=1.mpi=16
131072000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=128M.node=1.mpi=16

Run commands and logfile links for column Kokkos/OMP

32000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=32K.node=1.mpi=8.thread=2
64000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=64K.node=1.mpi=8.thread=2
128000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=128K.node=1.mpi=8.thread=2
256000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=256K.node=1.mpi=8.thread=2
512000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=512K.node=1.mpi=8.thread=2
1024000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=1M.node=1.mpi=8.thread=2
2048000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=2M.node=1.mpi=8.thread=2
4096000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=4M.node=1.mpi=8.thread=2
8192000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=8M.node=1.mpi=8.thread=2
16384000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=16M.node=1.mpi=8.thread=2
32768000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=32M.node=1.mpi=8.thread=2
65536000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=64M.node=1.mpi=8.thread=2
131072000 mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=128M.node=1.mpi=8.thread=2

Run commands and logfile links for column Kokkos/serial

32000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=32K.node=1.mpi=16
64000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=64K.node=1.mpi=16
128000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=128K.node=1.mpi=16
256000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=256K.node=1.mpi=16
512000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=512K.node=1.mpi=16
1024000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=16
2048000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=16
4096000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=16
8192000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=16
16384000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=16
32768000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=16
65536000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=16
131072000 mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=16