Single node performance, Collide benchmark, SandyBridge
Performance in millions of particle-timesteps / second
Nparticles | CPU (mpi) | Kokkos/OMP (mpi,thread) | Kokkos/serial (mpi) | |
32000 | 243.3 (16) | 127.5 (8,2) | 212.6 (16) | |
64000 | 276.6 (16) | 131.7 (8,2) | 250.2 (16) | |
128000 | 291.8 (16) | 137.6 (8,2) | 280.6 (16) | |
256000 | 268.2 (16) | 127.2 (8,2) | 247.2 (16) | |
512000 | 159.8 (16) | 84.97 (8,2) | 150.3 (16) | |
1024000 | 133.8 (16) | 76.66 (8,2) | 141.4 (16) | |
2048000 | 118 (16) | 70.6 (8,2) | 117.4 (16) | |
4096000 | 97.1 (16) | 71.07 (8,2) | 95.4 (16) | |
8192000 | 78.51 (16) | 56.88 (8,2) | 81.03 (16) | |
16384000 | 66.36 (16) | 57.95 (8,2) | 69.04 (16) | |
32768000 | 63.06 (16) | 35.65 (8,2) | 66.75 (16) | |
65536000 | 58.02 (16) | 54.32 (8,2) | 59.63 (16) | |
131072000 | 58.42 (16) | 54.55 (8,2) | 56.04 (16) |
32000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=32K.node=1.mpi=16 |
64000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=64K.node=1.mpi=16 |
128000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=128K.node=1.mpi=16 |
256000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=256K.node=1.mpi=16 |
512000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=512K.node=1.mpi=16 |
1024000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=1M.node=1.mpi=16 |
2048000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=2M.node=1.mpi=16 |
4096000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=4M.node=1.mpi=16 |
8192000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=8M.node=1.mpi=16 |
16384000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=16M.node=1.mpi=16 |
32768000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=32M.node=1.mpi=16 |
65536000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=64M.node=1.mpi=16 |
131072000 | mpirun -n 16 -N 16 --bind-to core spa_chama_cpu -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=cpu.kind=node.size=128M.node=1.mpi=16 |
32000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=32K.node=1.mpi=8.thread=2 |
64000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=64K.node=1.mpi=8.thread=2 |
128000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=128K.node=1.mpi=8.thread=2 |
256000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=256K.node=1.mpi=8.thread=2 |
512000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=512K.node=1.mpi=8.thread=2 |
1024000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=1M.node=1.mpi=8.thread=2 |
2048000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=2M.node=1.mpi=8.thread=2 |
4096000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=4M.node=1.mpi=8.thread=2 |
8192000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=8M.node=1.mpi=8.thread=2 |
16384000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=16M.node=1.mpi=8.thread=2 |
32768000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=32M.node=1.mpi=8.thread=2 |
65536000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=64M.node=1.mpi=8.thread=2 |
131072000 | mpirun -n 8 -N 8 --bind-to socket spa_chama_kokkos_omp -sf kk -k on t 2 -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_omp.kind=node.size=128M.node=1.mpi=8.thread=2 |
32000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=32K.node=1.mpi=16 |
64000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=64K.node=1.mpi=16 |
128000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 20 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=128K.node=1.mpi=16 |
256000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 20 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=256K.node=1.mpi=16 |
512000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 32 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=512K.node=1.mpi=16 |
1024000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 40 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=1M.node=1.mpi=16 |
2048000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 40 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=2M.node=1.mpi=16 |
4096000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 64 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=4M.node=1.mpi=16 |
8192000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 80 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=8M.node=1.mpi=16 |
16384000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 80 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=16M.node=1.mpi=16 |
32768000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 128 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=32M.node=1.mpi=16 |
65536000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 160 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=64M.node=1.mpi=16 |
131072000 | mpirun -n 16 -N 16 --bind-to core spa_chama_kokkos_serial -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 256 -v y 160 -v z 320 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=chama.pkg=kokkos_serial.kind=node.size=128M.node=1.mpi=16 |