Single node performance, Free benchmark
Performance in millions of particle-timesteps / second

Nparticles SandyBridge Haswell Broadwell KNL K80-1 P100-1
32000 381.9 (CPU,mpi=16) 688.6 (CPU,mpi=32,hyper=1) 376.1 (CPU,mpi=32,hyper=1) 274.5 (CPU/KNL,mpi=64,hyper=1) 26.09 (Kokkos/Cuda,mpi=2) 89.93 (Kokkos/Cuda,mpi=1)
64000 464.7 (CPU,mpi=16) 850 (CPU,mpi=32,hyper=1) 569 (CPU,mpi=32,hyper=1) 375.1 (CPU/KNL,mpi=64,hyper=1) 44.15 (Kokkos/Cuda,mpi=2) 143.9 (Kokkos/Cuda,mpi=1)
128000 547.6 (CPU,mpi=16) 1005 (CPU,mpi=32,hyper=1) 751 (CPU,mpi=32,hyper=2) 460.9 (CPU/KNL,mpi=64,hyper=1) 73.14 (Kokkos/Cuda,mpi=2) 206.4 (Kokkos/Cuda,mpi=1)
256000 540.9 (CPU,mpi=16) 1199 (CPU,mpi=64,hyper=2) 956.4 (CPU,mpi=64,hyper=2) 465.4 (CPU/KNL,mpi=64,hyper=1) 104.8 (Kokkos/Cuda,mpi=2) 256.9 (Kokkos/Cuda,mpi=1)
512000 329.1 (CPU,mpi=16) 1185 (CPU,mpi=64,hyper=2) 1020 (CPU,mpi=72,hyper=2) 528.8 (CPU/KNL,mpi=128,hyper=2) 129.9 (Kokkos/Cuda,mpi=2) 269.5 (Kokkos/Cuda,mpi=1)
1024000 300.6 (CPU,mpi=16) 464.4 (Kokkos/serial,mpi=32,hyper=1) 522.6 (CPU,mpi=36,hyper=2) 545.4 (CPU/KNL,mpi=128,hyper=2) 141.4 (Kokkos/Cuda,mpi=2) 258.2 (Kokkos/Cuda,mpi=1)
2048000 225.9 (CPU,mpi=16) 458.6 (CPU,mpi=32,hyper=1) 452.5 (CPU,mpi=36,hyper=1) 535.5 (Kokkos/KNL,mpi=64,thread=4,hyper=4) 146.2 (Kokkos/Cuda,mpi=2) 247.8 (Kokkos/Cuda,mpi=1)
4096000 184 (CPU,mpi=16) 390.9 (CPU,mpi=64,hyper=2) 419.4 (CPU,mpi=72,hyper=2) 487 (Kokkos/KNL,mpi=64,thread=4,hyper=4) 148.2 (Kokkos/Cuda,mpi=2) 238.1 (Kokkos/Cuda,mpi=1)
8192000 148.1 (CPU,mpi=16) 309.8 (CPU,mpi=64,hyper=2) 331.4 (CPU,mpi=72,hyper=2) 451.5 (Kokkos/KNL,mpi=32,thread=8,hyper=4) 149.7 (Kokkos/Cuda,mpi=2) 232.3 (Kokkos/Cuda,mpi=1)
16384000 141.9 (CPU,mpi=16) 268.3 (CPU,mpi=64,hyper=2) 287.2 (CPU,mpi=72,hyper=2) 427.2 (Kokkos/KNL,mpi=64,thread=4,hyper=4) 150.2 (Kokkos/Cuda,mpi=2) 230.1 (Kokkos/Cuda,mpi=1)
32768000 134.6 (CPU,mpi=16) 247.8 (CPU,mpi=64,hyper=2) 265.4 (CPU,mpi=72,hyper=2) 410.8 (Kokkos/KNL,mpi=64,thread=4,hyper=4) 148.4 (Kokkos/Cuda,mpi=2) 227.9 (Kokkos/Cuda,mpi=1)
65536000 131.7 (CPU,mpi=16) 238.1 (CPU,mpi=64,hyper=2) 253.7 (CPU,mpi=72,hyper=2) 396.5 (Kokkos/KNL,mpi=64,thread=4,hyper=4) 148.2 (Kokkos/Cuda,mpi=2) None
131072000 128.4 (CPU,mpi=16) 232.2 (CPU,mpi=64,hyper=2) 246.7 (CPU,mpi=72,hyper=2) 386.2 (Kokkos/KNL,mpi=64,thread=4,hyper=4) None None

Run commands and logfile links for column SandyBridge

32000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=32K.node=1.mpi=16
64000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=64K.node=1.mpi=16
128000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=128K.node=1.mpi=16
256000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=256K.node=1.mpi=16
512000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=512K.node=1.mpi=16
1024000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=1M.node=1.mpi=16
2048000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=2M.node=1.mpi=16
4096000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=4M.node=1.mpi=16
8192000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=8M.node=1.mpi=16
16384000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=16M.node=1.mpi=16
32768000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=32M.node=1.mpi=16
65536000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=64M.node=1.mpi=16
131072000 mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=chama.pkg=cpu.kind=node.size=128M.node=1.mpi=16

Run commands and logfile links for column Haswell

32000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=32K.node=1.mpi=32.hyper=1
64000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=64K.node=1.mpi=32.hyper=1
128000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=128K.node=1.mpi=32.hyper=1
256000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=256K.node=1.mpi=64.hyper=2
512000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=512K.node=1.mpi=64.hyper=2
1024000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=32.hyper=1
2048000 srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=2M.node=1.mpi=32.hyper=1
4096000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=4M.node=1.mpi=64.hyper=2
8192000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=8M.node=1.mpi=64.hyper=2
16384000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=16M.node=1.mpi=64.hyper=2
32768000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=32M.node=1.mpi=64.hyper=2
65536000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=64M.node=1.mpi=64.hyper=2
131072000 srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu.kind=node.size=128M.node=1.mpi=64.hyper=2

Run commands and logfile links for column Broadwell

32000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=32K.node=1.mpi=32.hyper=1
64000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=64K.node=1.mpi=32.hyper=1
128000 mpiexec -np 32 -npernode 32 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=128K.node=1.mpi=32.hyper=2
256000 mpiexec -np 64 -npernode 64 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=256K.node=1.mpi=64.hyper=2
512000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=512K.node=1.mpi=72.hyper=2
1024000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=1M.node=1.mpi=36.hyper=2
2048000 mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=2M.node=1.mpi=36.hyper=1
4096000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=4M.node=1.mpi=72.hyper=2
8192000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=8M.node=1.mpi=72.hyper=2
16384000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=16M.node=1.mpi=72.hyper=2
32768000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=32M.node=1.mpi=72.hyper=2
65536000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=64M.node=1.mpi=72.hyper=2
131072000 mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=serrano.pkg=cpu.kind=node.size=128M.node=1.mpi=72.hyper=2

Run commands and logfile links for column KNL

32000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=node.size=32K.node=1.mpi=64.hyper=1
64000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=node.size=64K.node=1.mpi=64.hyper=1
128000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=node.size=128K.node=1.mpi=64.hyper=1
256000 srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=node.size=256K.node=1.mpi=64.hyper=1
512000 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=node.size=512K.node=1.mpi=128.hyper=2
1024000 srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=cpu_knl.kind=node.size=1M.node=1.mpi=128.hyper=2
2048000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=2M.node=1.mpi=64.thread=4.hyper=4
4096000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=4M.node=1.mpi=64.thread=4.hyper=4
8192000 setenv OMP_NUM_THREADS 8; srun -n 32 -C knl --ntasks-per-node 32 --cpu_bind=cores -c 8 ./spa_mutrino_kokkos_knl -sf kk -k on t 8 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=8M.node=1.mpi=32.thread=8.hyper=4
16384000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=16M.node=1.mpi=64.thread=4.hyper=4
32768000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=32M.node=1.mpi=64.thread=4.hyper=4
65536000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=64M.node=1.mpi=64.thread=4.hyper=4
131072000 setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=mutrino.pkg=kokkos_knl.kind=node.size=128M.node=1.mpi=64.thread=4.hyper=4

Run commands and logfile links for column K80-1

32000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=32K.node=1.mpi=2.gpu=2
64000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=64K.node=1.mpi=2.gpu=2
128000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=128K.node=1.mpi=2.gpu=2
256000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=256K.node=1.mpi=2.gpu=2
512000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=512K.node=1.mpi=2.gpu=2
1024000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=1M.node=1.mpi=2.gpu=2
2048000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=2M.node=1.mpi=2.gpu=2
4096000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=4M.node=1.mpi=2.gpu=2
8192000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=8M.node=1.mpi=2.gpu=2
16384000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=16M.node=1.mpi=2.gpu=2
32768000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=32M.node=1.mpi=2.gpu=2
65536000 mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 256 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride80.pkg=kokkos_cuda.kind=node.size=64M.node=1.mpi=2.gpu=2
131072000 None

Run commands and logfile links for column P100-1

32000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 16 -v y 10 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=32K.node=1.mpi=1.gpu=1
64000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 16 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=64K.node=1.mpi=1.gpu=1
128000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 20 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=128K.node=1.mpi=1.gpu=1
256000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=256K.node=1.mpi=1.gpu=1
512000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 32 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=512K.node=1.mpi=1.gpu=1
1024000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 40 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=1M.node=1.mpi=1.gpu=1
2048000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=2M.node=1.mpi=1.gpu=1
4096000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 64 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=4M.node=1.mpi=1.gpu=1
8192000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 80 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=8M.node=1.mpi=1.gpu=1
16384000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=16M.node=1.mpi=1.gpu=1
32768000 mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 128 -v y 160 -v z 160 -v t 100 -in in.free.steps -log log.sparta.date=23Dec17.model=free.machine=ride100.pkg=kokkos_cuda.kind=node.size=32M.node=1.mpi=1.gpu=1
65536000 None
131072000 None