Weak scaling performance, Collide benchmark, SandyBridge, 16M particles/node
Performance in millions of particle-timesteps / second / node
Nodes | CPU (mpi) | Kokkos/OMP (mpi,thread) | Kokkos/serial (mpi) | |
1 | 68.35 (16) | 57.37 (8,2) | 68.95 (16) | |
2 | 67.29 (16) | 61.55 (8,2) | 68.32 (16) | |
4 | 66.39 (16) | 55.59 (8,2) | 70.3 (16) | |
8 | 69.07 (16) | 57.36 (8,2) | 67.19 (16) | |
16 | 68.26 (16) | 56.3 (8,2) | 64.78 (16) | |
32 | 67.6 (16) | 53.79 (8,2) | 63.7 (16) | |
64 | 60.3 (16) | 50.91 (8,2) | 61.26 (16) |
1 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=1.mpi=16 |
2 | mpirun -n 32 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=2.mpi=16 |
4 | mpirun -n 64 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=4.mpi=16 |
8 | mpirun -n 128 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=8.mpi=16 |
16 | mpirun -n 256 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 320 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=16.mpi=16 |
32 | mpirun -n 512 -N 16 --bind-to core spa_chama_cpu -v x 512 -v y 320 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=32.mpi=16 |
64 | mpirun -n 1024 -N 16 --bind-to core spa_chama_cpu -v x 512 -v y 320 -v z 640 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=weak.size=16M.node=64.mpi=16 |
1 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=1.mpi=8.thread=2 |
2 | mpirun -n 16 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=2.mpi=8.thread=2 |
4 | mpirun -n 32 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=4.mpi=8.thread=2 |
8 | mpirun -n 64 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=8.mpi=8.thread=2 |
16 | mpirun -n 128 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 320 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=16.mpi=8.thread=2 |
32 | mpirun -n 256 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 512 -v y 320 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=32.mpi=8.thread=2 |
64 | mpirun -n 512 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 512 -v y 320 -v z 640 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=weak.size=16M.node=64.mpi=8.thread=2 |
1 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=1.mpi=16 |
2 | mpirun -n 32 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=2.mpi=16 |
4 | mpirun -n 64 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=4.mpi=16 |
8 | mpirun -n 128 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=8.mpi=16 |
16 | mpirun -n 256 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 320 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=16.mpi=16 |
32 | mpirun -n 512 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 512 -v y 320 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=32.mpi=16 |
64 | mpirun -n 1024 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 512 -v y 320 -v z 640 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=weak.size=16M.node=64.mpi=16 |