Single core performance, Collide benchmark
Performance in millions of particle-timesteps / second

Nparticles SandyBridge Haswell Broadwell KNL
1000 24.79 (CPU,mpi=1) 27.74 (CPU,mpi=1) 25.11 (CPU,mpi=1) 6.501 (CPU/KNL,mpi=1)
2000 26.75 (CPU,mpi=1) 28.13 (CPU,mpi=1) 26.41 (CPU,mpi=1) 6.61 (CPU/KNL,mpi=1)
4000 20.92 (CPU,mpi=1) 26.74 (CPU,mpi=1) 25.13 (CPU,mpi=1) 6.564 (CPU/KNL,mpi=1)
8000 18.96 (Kokkos/OMP,mpi=1) 25.58 (CPU,mpi=1) 24.4 (CPU,mpi=1) 6.022 (CPU/KNL,mpi=1)
16000 18.41 (CPU,mpi=1) 23.86 (CPU,mpi=1) 22.74 (CPU,mpi=1) 5.08 (CPU/KNL,mpi=1)

Run commands and logfile links for column SandyBridge

1000 mpirun -n 1 -N 1 --bind-to core spa_chama_cpu -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=core.size=1K.node=1.mpi=1
2000 mpirun -n 1 -N 1 --bind-to core spa_chama_cpu -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=core.size=2K.node=1.mpi=1
4000 mpirun -n 1 -N 1 --bind-to core spa_chama_cpu -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=core.size=4K.node=1.mpi=1
8000 mpirun -n 1 -N 1 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=core.size=8K.node=1.mpi=1.thread=1
16000 mpirun -n 1 -N 1 --bind-to core spa_chama_cpu -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=core.size=16K.node=1.mpi=1

Run commands and logfile links for column Haswell

1000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C haswell --ntasks-per-node 1 --cpu_bind=cores -c 2 ./spa_mutrino_cpu -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=core.size=16K.node=1.mpi=1.hyper=1

Run commands and logfile links for column Broadwell

1000 mpiexec -np 1 -npernode 1 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 mpiexec -np 1 -npernode 1 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 mpiexec -np 1 -npernode 1 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 mpiexec -np 1 -npernode 1 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 mpiexec -np 1 -npernode 1 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=core.size=16K.node=1.mpi=1.hyper=1

Run commands and logfile links for column KNL

1000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=1K.node=1.mpi=1.hyper=1
2000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=2K.node=1.mpi=1.hyper=1
4000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=4K.node=1.mpi=1.hyper=1
8000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=8K.node=1.mpi=1.hyper=1
16000 srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=16K.node=1.mpi=1.hyper=1