Single core performance, Free benchmark, KNL
Performance in millions of particle-timesteps / second

Nparticles CPU/KNL (mpi) Kokkos/KNL (mpi) Kokkos/serial/KNL (mpi)
1000 12.49 (1) 7.82 (1) 8.343 (1)
2000 12.79 (1) 9.111 (1) 9.484 (1)
4000 13.04 (1) 9.919 (1) 10.1 (1)
8000 13 (1) 9.449 (1) 9.7 (1)
16000 12.25 (1) 9.582 (1) 9.705 (1)

Run commands and logfile links for column CPU/KNL

1000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 4 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=core.size=16K.node=1.mpi=1.hyper=1

Run commands and logfile links for column Kokkos/KNL

1000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=core.size=1K.node=1.mpi=1.thread=1.hyper=1
2000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=core.size=2K.node=1.mpi=1.thread=1.hyper=1
4000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=core.size=4K.node=1.mpi=1.thread=1.hyper=1
8000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=core.size=8K.node=1.mpi=1.thread=1.hyper=1
16000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=core.size=16K.node=1.mpi=1.thread=1.hyper=1

Run commands and logfile links for column Kokkos/serial/KNL

1000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=16K.node=1.mpi=1.hyper=1