Single node performance, Collide benchmark
Performance in millions of particle-timesteps / second
Nparticles | SandyBridge | Haswell | Broadwell | KNL | K80-1 | P100-1 | |
32000 | 243.3 (CPU,mpi=16) | 452.3 (CPU,mpi=32,hyper=1) | 295.1 (CPU,mpi=36,hyper=2) | 198 (CPU/KNL,mpi=64,hyper=1) | 20.03 (Kokkos/Cuda,mpi=2) | 53.9 (Kokkos/Cuda,mpi=1) | |
64000 | 276.6 (CPU,mpi=16) | 518.8 (CPU,mpi=64,hyper=2) | 404.8 (CPU,mpi=36,hyper=1) | 249.1 (CPU/KNL,mpi=64,hyper=1) | 34.69 (Kokkos/Cuda,mpi=2) | 92.84 (Kokkos/Cuda,mpi=1) | |
128000 | 291.8 (CPU,mpi=16) | 614 (CPU,mpi=64,hyper=2) | 486.6 (CPU,mpi=72,hyper=2) | 273.3 (CPU/KNL,mpi=64,hyper=1) | 62.77 (Kokkos/Cuda,mpi=2) | 145.2 (Kokkos/Cuda,mpi=1) | |
256000 | 268.2 (CPU,mpi=16) | 675.4 (CPU,mpi=64,hyper=2) | 592.4 (CPU,mpi=64,hyper=2) | 291.5 (CPU/KNL,mpi=128,hyper=2) | 98.92 (Kokkos/Cuda,mpi=2) | 203.4 (Kokkos/Cuda,mpi=1) | |
512000 | 159.8 (CPU,mpi=16) | 616.7 (CPU,mpi=64,hyper=2) | 617.9 (CPU,mpi=72,hyper=2) | 323.3 (CPU/KNL,mpi=128,hyper=2) | 124.5 (Kokkos/Cuda,mpi=2) | 253.3 (Kokkos/Cuda,mpi=1) | |
1024000 | 141.4 (Kokkos/serial,mpi=16) | 288.4 (CPU,mpi=64,hyper=2) | 326.8 (CPU,mpi=72,hyper=2) | 323.9 (Kokkos/KNL,mpi=64,thread=4,hyper=4) | 140.1 (Kokkos/Cuda,mpi=2) | 284 (Kokkos/Cuda,mpi=1) | |
2048000 | 118 (CPU,mpi=16) | 245.3 (Kokkos/serial,mpi=64,hyper=2) | 260.2 (CPU,mpi=72,hyper=2) | 343.2 (Kokkos/KNL,mpi=64,thread=4,hyper=4) | 142.8 (Kokkos/Cuda,mpi=2) | 308.2 (Kokkos/Cuda,mpi=1) | |
4096000 | 97.1 (CPU,mpi=16) | 220.1 (Kokkos/serial,mpi=64,hyper=2) | 243.6 (Kokkos/serial,mpi=72,hyper=2) | 326.6 (Kokkos/KNL,mpi=64,thread=4,hyper=4) | 147.5 (Kokkos/Cuda,mpi=2) | 302.7 (Kokkos/Cuda,mpi=1) | |
8192000 | 81.03 (Kokkos/serial,mpi=16) | 187.7 (Kokkos/serial,mpi=64,hyper=2) | 207.4 (CPU,mpi=72,hyper=2) | 296.7 (Kokkos/KNL,mpi=128,thread=2,hyper=4) | 145.8 (Kokkos/Cuda,mpi=2) | 300.3 (Kokkos/Cuda,mpi=1) | |
16384000 | 69.04 (Kokkos/serial,mpi=16) | 160.9 (Kokkos/serial,mpi=64,hyper=2) | 178.1 (Kokkos/serial,mpi=72,hyper=2) | 281.2 (Kokkos/serial/KNL,mpi=256,hyper=4) | 145.1 (Kokkos/Cuda,mpi=2) | 302 (Kokkos/Cuda,mpi=1) | |
32768000 | 66.75 (Kokkos/serial,mpi=16) | 141.6 (Kokkos/serial,mpi=64,hyper=2) | 157.4 (Kokkos/serial,mpi=72,hyper=2) | 269.6 (Kokkos/serial/KNL,mpi=256,hyper=4) | 143.3 (Kokkos/Cuda,mpi=2) | 303.5 (Kokkos/Cuda,mpi=1) | |
65536000 | 59.63 (Kokkos/serial,mpi=16) | 130.2 (Kokkos/serial,mpi=64,hyper=2) | 143.7 (Kokkos/serial,mpi=72,hyper=2) | 253.3 (Kokkos/serial/KNL,mpi=256,hyper=4) | None | None | |
131072000 | 58.42 (CPU,mpi=16) | 121.2 (Kokkos/serial,mpi=64,hyper=2) | 132.3 (Kokkos/serial,mpi=72,hyper=2) | 226.4 (Kokkos/serial/KNL,mpi=256,hyper=4) | None | None |
32000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=32K.node=1.mpi=16 |
64000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=64K.node=1.mpi=16 |
128000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=128K.node=1.mpi=16 |
256000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=256K.node=1.mpi=16 |
512000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=512K.node=1.mpi=16 |
1024000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=16 |
2048000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=2M.node=1.mpi=16 |
4096000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=4M.node=1.mpi=16 |
8192000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=16 |
16384000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=16 |
32768000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=16 |
65536000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=16 |
131072000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=128M.node=1.mpi=16 |
32000 | srun -n 32 -C haswell --ntasks-per-node 32 --cpu_bind=rank -c 2 ./spa_mutrino_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=node.size=32K.node=1.mpi=32.hyper=1 |
64000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=node.size=64K.node=1.mpi=64.hyper=2 |
128000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=node.size=128K.node=1.mpi=64.hyper=2 |
256000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=node.size=256K.node=1.mpi=64.hyper=2 |
512000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=node.size=512K.node=1.mpi=64.hyper=2 |
1024000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu.kind=node.size=1M.node=1.mpi=64.hyper=2 |
2048000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=64.hyper=2 |
4096000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=64.hyper=2 |
8192000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=64.hyper=2 |
16384000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=64.hyper=2 |
32768000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=64.hyper=2 |
65536000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=64.hyper=2 |
131072000 | srun -n 64 -C haswell --ntasks-per-node 64 --cpu_bind=rank -c 1 ./spa_mutrino_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=64.hyper=2 |
32000 | mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=32K.node=1.mpi=36.hyper=2 |
64000 | mpiexec -np 36 -npernode 36 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=64K.node=1.mpi=36.hyper=1 |
128000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=128K.node=1.mpi=72.hyper=2 |
256000 | mpiexec -np 64 -npernode 64 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=256K.node=1.mpi=64.hyper=2 |
512000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=512K.node=1.mpi=72.hyper=2 |
1024000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=1M.node=1.mpi=72.hyper=2 |
2048000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=2M.node=1.mpi=72.hyper=2 |
4096000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=72.hyper=2 |
8192000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=cpu.kind=node.size=8M.node=1.mpi=72.hyper=2 |
16384000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=72.hyper=2 |
32768000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=72.hyper=2 |
65536000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=72.hyper=2 |
131072000 | mpiexec -np 72 -npernode 72 --oversubscribe --bind-to core ./spa_serrano_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=serrano.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=72.hyper=2 |
32000 | srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=node.size=32K.node=1.mpi=64.hyper=1 |
64000 | srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=node.size=64K.node=1.mpi=64.hyper=1 |
128000 | srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=rank -c 4 ./spa_mutrino_knl -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=node.size=128K.node=1.mpi=64.hyper=1 |
256000 | srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=node.size=256K.node=1.mpi=128.hyper=2 |
512000 | srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_knl -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=node.size=512K.node=1.mpi=128.hyper=2 |
1024000 | setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=node.size=1M.node=1.mpi=64.thread=4.hyper=4 |
2048000 | setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=node.size=2M.node=1.mpi=64.thread=4.hyper=4 |
4096000 | setenv OMP_NUM_THREADS 4; srun -n 64 -C knl --ntasks-per-node 64 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 4 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=node.size=4M.node=1.mpi=64.thread=4.hyper=4 |
8192000 | setenv OMP_NUM_THREADS 2; srun -n 128 -C knl --ntasks-per-node 128 --cpu_bind=threads -c 2 ./spa_mutrino_kokkos_knl -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=node.size=8M.node=1.mpi=128.thread=2.hyper=4 |
16384000 | srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=16M.node=1.mpi=256.hyper=4 |
32768000 | srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=32M.node=1.mpi=256.hyper=4 |
65536000 | srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=64M.node=1.mpi=256.hyper=4 |
131072000 | srun -n 256 -C knl --ntasks-per-node 256 --cpu_bind=threads -c 1 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=node.size=128M.node=1.mpi=256.hyper=4 |
32000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=32K.node=1.mpi=2.gpu=2 |
64000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=64K.node=1.mpi=2.gpu=2 |
128000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=128K.node=1.mpi=2.gpu=2 |
256000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=256K.node=1.mpi=2.gpu=2 |
512000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=512K.node=1.mpi=2.gpu=2 |
1024000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=1M.node=1.mpi=2.gpu=2 |
2048000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=2M.node=1.mpi=2.gpu=2 |
4096000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=4M.node=1.mpi=2.gpu=2 |
8192000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=8M.node=1.mpi=2.gpu=2 |
16384000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=16M.node=1.mpi=2.gpu=2 |
32768000 | mpirun -np 2 --npersocket 1 --bind-to core spa_ride80_kokkos_cuda -sf kk -k on g 2 -pk kokkos reduction atomic comm threaded -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride80.pkg=kokkos_cuda.kind=node.size=32M.node=1.mpi=2.gpu=2 |
65536000 | None |
131072000 | None |
32000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=32K.node=1.mpi=1.gpu=1 |
64000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=64K.node=1.mpi=1.gpu=1 |
128000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=128K.node=1.mpi=1.gpu=1 |
256000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=256K.node=1.mpi=1.gpu=1 |
512000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=512K.node=1.mpi=1.gpu=1 |
1024000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=1M.node=1.mpi=1.gpu=1 |
2048000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=2M.node=1.mpi=1.gpu=1 |
4096000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=4M.node=1.mpi=1.gpu=1 |
8192000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=8M.node=1.mpi=1.gpu=1 |
16384000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=16M.node=1.mpi=1.gpu=1 |
32768000 | mpirun -np 1 --npernode 1 --bind-to core spa_ride100_kokkos_cuda -sf kk -k on g 1 -pk kokkos reduction atomic comm threaded -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.gpu.steps -log log.sparta.date=23Dec17.model=collide.machine=ride100.pkg=kokkos_cuda.kind=node.size=32M.node=1.mpi=1.gpu=1 |
65536000 | None |
131072000 | None |