Single core performance, Collide benchmark, KNL
Performance in millions of particle-timesteps / second
Nparticles | CPU/KNL (mpi) | Kokkos/KNL (mpi) | Kokkos/serial/KNL (mpi) | |
1000 | 6.501 (1) | 4.727 (1) | 5.269 (1) | |
2000 | 6.61 (1) | 5.388 (1) | 5.836 (1) | |
4000 | 6.564 (1) | 5.773 (1) | 6.152 (1) | |
8000 | 6.022 (1) | 4.972 (1) | 5.285 (1) | |
16000 | 5.08 (1) | 4.812 (1) | 4.888 (1) |
1000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=1K.node=1.mpi=1.hyper=1 |
2000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=2K.node=1.mpi=1.hyper=1 |
4000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=4K.node=1.mpi=1.hyper=1 |
8000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=8K.node=1.mpi=1.hyper=1 |
16000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_knl -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=cpu_knl.kind=core.size=16K.node=1.mpi=1.hyper=1 |
1000 | setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=1K.node=1.mpi=1.thread=1.hyper=1 |
2000 | setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=2K.node=1.mpi=1.thread=1.hyper=1 |
4000 | setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=4K.node=1.mpi=1.thread=1.hyper=1 |
8000 | setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=8K.node=1.mpi=1.thread=1.hyper=1 |
16000 | setenv OMP_NUM_THREADS 1; srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_knl -sf kk -k on t 1 -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_knl.kind=core.size=16K.node=1.mpi=1.thread=1.hyper=1 |
1000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 4 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=1K.node=1.mpi=1.hyper=1 |
2000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 5 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=2K.node=1.mpi=1.hyper=1 |
4000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 5 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=4K.node=1.mpi=1.hyper=1 |
8000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 8 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=8K.node=1.mpi=1.hyper=1 |
16000 | srun -n 1 -C knl --ntasks-per-node 1 --cpu_bind=cores -c 4 ./spa_mutrino_kokkos_serial_knl -sf kk -k on -pk kokkos reduction parallel/reduce comm classic -v x 16 -v y 10 -v z 10 -v t 100 -in in.collide.steps -log log.sparta.date=23Dec17.model=collide.machine=mutrino.pkg=kokkos_serial_knl.kind=core.size=16K.node=1.mpi=1.hyper=1 |