Single node performance, Sphere benchmark, KNL
Performance in millions of particle-timesteps / second

Nparticles CPU/KNL (mpi,hyper) Kokkos/KNL (mpi,thread,hyper) Kokkos/serial/KNL (mpi,hyper)
32000 83.88 (64,1) 51.58 (32,2,1) 65.59 (64,1)
64000 139.5 (64,1) 94.63 (32,2,1) 106.6 (64,1)
128000 191.4 (64,1) 129.2 (32,2,1) 148.1 (64,1)
256000 229 (64,1) 166.3 (32,4,2) 179.1 (64,1)
512000 266.2 (64,1) 226 (64,2,2) 231.8 (64,1)
1024000 291.6 (128,2) 291.3 (64,4,4) 257.7 (64,1)
2048000 309.3 (128,2) 344.5 (64,4,4) 285.1 (128,2)
4096000 282.3 (256,4) 348.1 (32,8,4) 276.3 (128,2)
8192000 271.5 (256,4) 303.1 (32,8,4) 261.8 (256,4)
16384000 251.5 (256,4) 266.2 (64,4,4) 251.6 (256,4)
32768000 235.6 (256,4) 250 (64,4,4) 241.2 (256,4)
65536000 210 (256,4) 209.8 (64,4,4) 213.3 (256,4)
131072000 186.7 (256,4) 188.7 (64,4,4) 189.7 (256,4)

Run commands and logfile links for column CPU/KNL

32000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=32K.node=1.mpi=64.hyper=1
64000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=64K.node=1.mpi=64.hyper=1
128000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=128K.node=1.mpi=64.hyper=1
256000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=256K.node=1.mpi=64.hyper=1
512000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=512K.node=1.mpi=64.hyper=1
1024000 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=1M.node=1.mpi=128.hyper=2
2048000 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=2M.node=1.mpi=128.hyper=2
4096000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=4M.node=1.mpi=256.hyper=4
8192000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=8M.node=1.mpi=256.hyper=4
16384000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=16M.node=1.mpi=256.hyper=4
32768000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=32M.node=1.mpi=256.hyper=4
65536000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=64M.node=1.mpi=256.hyper=4
131072000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_knl -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=cpu_knl.kind=node.size=128M.node=1.mpi=256.hyper=4

Run commands and logfile links for column Kokkos/KNL

32000 setenv OMP_NUM_THREADS 2; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=32K.node=1.mpi=32.thread=2.hyper=1
64000 setenv OMP_NUM_THREADS 2; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=64K.node=1.mpi=32.thread=2.hyper=1
128000 setenv OMP_NUM_THREADS 2; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=128K.node=1.mpi=32.thread=2.hyper=1
256000 setenv OMP_NUM_THREADS 4; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=256K.node=1.mpi=32.thread=4.hyper=2
512000 setenv OMP_NUM_THREADS 2; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=512K.node=1.mpi=64.thread=2.hyper=2
1024000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=1M.node=1.mpi=64.thread=4.hyper=4
2048000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=2M.node=1.mpi=64.thread=4.hyper=4
4096000 setenv OMP_NUM_THREADS 8; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=4M.node=1.mpi=32.thread=8.hyper=4
8192000 setenv OMP_NUM_THREADS 8; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=8M.node=1.mpi=32.thread=8.hyper=4
16384000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=16M.node=1.mpi=64.thread=4.hyper=4
32768000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=32M.node=1.mpi=64.thread=4.hyper=4
65536000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=64M.node=1.mpi=64.thread=4.hyper=4
131072000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_knl.kind=node.size=128M.node=1.mpi=64.thread=4.hyper=4

Run commands and logfile links for column Kokkos/serial/KNL

32000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=32K.node=1.mpi=64.hyper=1
64000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=64K.node=1.mpi=64.hyper=1
128000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=128K.node=1.mpi=64.hyper=1
256000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=256K.node=1.mpi=64.hyper=1
512000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=512K.node=1.mpi=64.hyper=1
1024000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=1M.node=1.mpi=64.hyper=1
2048000 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=2M.node=1.mpi=128.hyper=2
4096000 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=4M.node=1.mpi=128.hyper=2
8192000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=8M.node=1.mpi=256.hyper=4
16384000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=16M.node=1.mpi=256.hyper=4
32768000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=32M.node=1.mpi=256.hyper=4
65536000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=64M.node=1.mpi=256.hyper=4
131072000 srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.sphere.steps -log log.sparta.date=23Dec17.model=sphere.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=128M.node=1.mpi=256.hyper=4