Single core performance, Collide benchmark, KNL
Performance in millions of particle-timesteps / second

Nparticles CPU/KNL (mpi) Kokkos/KNL (mpi) Kokkos/serial/KNL (mpi)
1000 6.501 (1) 4.727 (1) 5.269 (1)
2000 6.61 (1) 5.388 (1) 5.836 (1)
4000 6.564 (1) 5.773 (1) 6.152 (1)
8000 6.022 (1) 4.972 (1) 5.285 (1)
16000 5.08 (1) 4.812 (1) 4.888 (1)

Run commands and logfile links for column CPU/KNL

1000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=16K.node=1.mpi=1.hyper=1

Run commands and logfile links for column Kokkos/KNL

1000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=1K.node=1.mpi=1.thread=1.hyper=1
2000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=2K.node=1.mpi=1.thread=1.hyper=1
4000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=4K.node=1.mpi=1.thread=1.hyper=1
8000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=8K.node=1.mpi=1.thread=1.hyper=1
16000 setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=16K.node=1.mpi=1.thread=1.hyper=1

Run commands and logfile links for column Kokkos/serial/KNL

1000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=16K.node=1.mpi=1.hyper=1