Single core performance, Collide benchmark, Haswell
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi) Kokkos/OMP (mpi) Kokkos/serial (mpi)
1000 27.74 (1) 22.33 (1) 23.91 (1)
2000 28.13 (1) 22.7 (1) 23.35 (1)
4000 26.74 (1) 24.12 (1) 24.46 (1)
8000 25.58 (1) 22.42 (1) 23.56 (1)
16000 23.86 (1) 21.02 (1) 21.14 (1)

Run commands and logfile links for column CPU

1000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=16K.node=1.mpi=1.hyper=1

Run commands and logfile links for column Kokkos/OMP

1000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_omp.kind=core.size=1K.node=1.mpi=1.thread=1.hyper=1
2000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_omp.kind=core.size=2K.node=1.mpi=1.thread=1.hyper=1
4000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_omp.kind=core.size=4K.node=1.mpi=1.thread=1.hyper=1
8000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_omp.kind=core.size=8K.node=1.mpi=1.thread=1.hyper=1
16000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_omp.kind=core.size=16K.node=1.mpi=1.thread=1.hyper=1

Run commands and logfile links for column Kokkos/serial

1000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=core.size=16K.node=1.mpi=1.hyper=1