Strong scaling performance, Sphere benchmark, KNL, 8M particles
Performance in millions of particle-timesteps / second / node

Nodes CPU/KNL (mpi,hyper) Kokkos/KNL (mpi,thread,hyper) Kokkos/serial/KNL (mpi,hyper)
1 271.8 (256,4) 303.1 (32,8,4) 255.2 (256,4)
2 239.3 (128,2) 301.1 (64,4,4) 234.9 (128,2)
4 211.2 (128,2) 272.6 (32,8,4) 203.6 (128,2)
8 176 (128,2) 202 (32,8,4) 165.5 (128,2)
16 134.7 (128,2) 138.5 (32,8,4) 127.6 (128,2)
32 106.6 (64,1) 91.61 (64,4,4) 96.63 (64,1)
64 73.38 (64,1) 62.02 (32,4,2) 62.44 (64,1)

Run commands and logfile links for column CPU/KNL

1 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=1.mpi=256.hyper=4
2 srun -n 256 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=2.mpi=128.hyper=2
4 srun -n 512 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=4.mpi=128.hyper=2
8 srun -n 1024 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=8.mpi=128.hyper=2
16 srun -n 2048 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=16.mpi=128.hyper=2
32 srun -n 2048 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=32.mpi=64.hyper=1
64 srun -n 4096 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=strong.size=8M.node=64.mpi=64.hyper=1

Run commands and logfile links for column Kokkos/KNL

1 setenv OMP_NUM_THREADS 8; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=1.mpi=32.thread=8.hyper=4
2 setenv OMP_NUM_THREADS 4; srun -n 128 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=2.mpi=64.thread=4.hyper=4
4 setenv OMP_NUM_THREADS 8; srun -n 128 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=4.mpi=32.thread=8.hyper=4
8 setenv OMP_NUM_THREADS 8; srun -n 256 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=8.mpi=32.thread=8.hyper=4
16 setenv OMP_NUM_THREADS 8; srun -n 512 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=16.mpi=32.thread=8.hyper=4
32 setenv OMP_NUM_THREADS 4; srun -n 2048 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=32.mpi=64.thread=4.hyper=4
64 setenv OMP_NUM_THREADS 4; srun -n 2048 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=strong.size=8M.node=64.mpi=32.thread=4.hyper=2

Run commands and logfile links for column Kokkos/serial/KNL

1 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=1.mpi=256.hyper=4
2 srun -n 256 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=2.mpi=128.hyper=2
4 srun -n 512 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=4.mpi=128.hyper=2
8 srun -n 1024 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=8.mpi=128.hyper=2
16 srun -n 2048 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=16.mpi=128.hyper=2
32 srun -n 2048 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=32.mpi=64.hyper=1
64 srun -n 4096 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=strong.size=8M.node=64.mpi=64.hyper=1