Single core performance, Free benchmark, Haswell
Performance in millions of particle-timesteps / second

Nparticles CPU (mpi) Kokkos/OMP (mpi) Kokkos/serial (mpi)
1000 53.28 (1) 38.84 (1) 39.26 (1)
2000 54.38 (1) 39.25 (1) 41.61 (1)
4000 56.8 (1) 42.87 (1) 44.43 (1)
8000 53.84 (1) 45 (1) 44.27 (1)
16000 52.47 (1) 41.85 (1) 41.66 (1)

Run commands and logfile links for column CPU

1000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 4 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 5 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 16 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=core.size=16K.node=1.mpi=1.hyper=1

Run commands and logfile links for column Kokkos/OMP

1000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=core.size=1K.node=1.mpi=1.thread=1.hyper=1
2000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=core.size=2K.node=1.mpi=1.thread=1.hyper=1
4000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=core.size=4K.node=1.mpi=1.thread=1.hyper=1
8000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=core.size=8K.node=1.mpi=1.thread=1.hyper=1
16000 setenv OMP_NUM_THREADS 1; srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_omp.kind=core.size=16K.node=1.mpi=1.thread=1.hyper=1

Run commands and logfile links for column Kokkos/serial

1000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=core.size=16K.node=1.mpi=1.hyper=1