====vasp==== For use by authorized users only. Local copy 5.4.4 (for Intel) is modified from distribution as follows: installs$ diff -r VASP-REFERENCE/vasp.5.4.4/src VASP/5.4.4/vasp.5.4.4/build/std | grep -v "\.o$" | grep -v "\.f90$" | grep -v "\.mod$" Only in VASP-REFERENCE/vasp.5.4.4/src: CUDA Only in VASP-REFERENCE/vasp.5.4.4/src: fftlib Only in VASP/5.4.4/vasp.5.4.4/build/std/lib: libdmy.a Only in VASP/5.4.4/vasp.5.4.4/build/std/lib: makefile.include diff -r VASP-REFERENCE/vasp.5.4.4/src/makefile VASP/5.4.4/vasp.5.4.4/build/std/makefile 18c18 < LLIB=-Llib -ldmy --- > LLIB=lib/getshmem.o lib/linpack_double.o -Llib -ldmy 131c131 < #OBJCTS_f90=$(filter-out getshmem.o, $(OBJCTS)) --- > OBJCTS_f90=$(filter-out getshmem.o, $(OBJCTS)) Only in VASP/5.4.4/vasp.5.4.4/build/std: makefile.include Only in VASP/5.4.4/vasp.5.4.4/build/std/parser: libparser.a Only in VASP/5.4.4/vasp.5.4.4/build/std/parser: makefile.include Only in VASP-REFERENCE/vasp.5.4.4/src: README Only in VASP-REFERENCE/vasp.5.4.4/src: vasp.cfg Only in VASP/5.4.4/vasp.5.4.4/build/std: vasp.O2.smkl Only in VASP/5.4.4/vasp.5.4.4/build/std: vasp.O3.pmkl Only in VASP/5.4.4/vasp.5.4.4/build/std: vasp.O3.smkl installs$ ''getshmem.o'' and ''linpack_double.o'' are made in build/lib, and modules and ''makefile.include'' are: installs$ module load intel/19.0.5 mkl/19.0.5 impi/19.0.5 installs$ cat VASP/5.4.4/vasp.5.4.4/build/std/makefile.include # Precompiler options CPP_OPTIONS= -DHOST=\"LinuxIFC\"\ -DMPI -DMPI_BLOCK=8000 \ -Duse_collective \ -DscaLAPACK \ -DCACHE_SIZE=4000 \ -Davoidalloc \ -Duse_bse_te \ -Dtbdyn \ -Duse_shmem CPP = fpp -f_com=no -free -w0 $*$(FUFFIX) $*$(SUFFIX) $(CPP_OPTIONS) FC = mpiifort FCL = mpiifort -mkl=sequential -lstdc++ FREE = -free -names lowercase FFLAGS = -assume byterecl -w OFLAG = -O2 -xHOST -qopenmp OFLAG = -O3 -xsse3 -axsse4.2,AVX,COREAVX512 -qopenmp OFLAG = -O2 -xsse3 -axsse4.2,AVX,COREAVX512 -qopenmp OFLAG_IN = $(OFLAG) DEBUG = -O0 MKL_PATH = $(MKLROOT)/lib/intel64 BLAS = LAPACK = BLACS = -lmkl_blacs_intelmpi_lp64 SCALAPACK = $(MKL_PATH)/libmkl_scalapack_lp64.a $(BLACS) OBJECTS = fftmpiw.o fftmpi_map.o fft3dlib.o fftw3d.o INCS =-I$(MKLROOT)/include/fftw LLIBS = $(SCALAPACK) $(LAPACK) $(BLAS) OBJECTS_O1 += fftw3d.o fftmpi.o fftmpiw.o OBJECTS_O2 += fft3dlib.o # For what used to be vasp.5.lib CPP_LIB = $(CPP) FC_LIB = $(FC) CC_LIB = icc CFLAGS_LIB = -O FFLAGS_LIB = -O1 FREE_LIB = $(FREE) OBJECTS_LIB= linpack_double.o getshmem.o # For the parser library CXX_PARS = icpc LIBS += parser LLIBS += -Lparser -lparser -lstdc++ # Normally no need to change this SRCDIR = ../../src BINDIR = ../../bin #================================================ # GPU Stuff #CPP_GPU = -DCUDA_GPU -DRPROMU_CPROJ_OVERLAP -DUSE_PINNED_MEMORY -DCUFFT_MIN=28 -UscaLAPACK #OBJECTS_GPU = fftmpiw.o fftmpi_map.o fft3dlib.o fftw3d_gpu.o fftmpiw_gpu.o #CC = icc #CXX = icpc #CFLAGS = -fPIC -DADD_ -Wall -openmp -DMAGMA_WITH_MKL -DMAGMA_SETAFFINITY -DGPUSHMEM=300 -DHAVE_CUBLAS #CUDA_ROOT ?= /usr/local/cuda/ #NVCC := $(CUDA_ROOT)/bin/nvcc -ccbin=icc #CUDA_LIB := -L$(CUDA_ROOT)/lib64 -lnvToolsExt -lcudart -lcuda -lcufft -lcublas #GENCODE_ARCH := -gencode=arch=compute_30,code=\"sm_30,compute_30\" \ -gencode=arch=compute_35,code=\"sm_35,compute_35\" \ -gencode=arch=compute_60,code=\"sm_60,compute_60\" MPI_INC = $(I_MPI_ROOT)/intel64/include/ installs$ Some run examples with an OpenMP, MKL sequential build: module load intel/19.0.5 mkl/19.0.5 impi/19.0.5 mpirun -np 16 -genv OMP_NUM_THREADS=2 /scrfs/apps/vasp/vasp.5.4.4/build/std/vasp.O2.smkl took 75 seconds on Trestles, 35 seconds on Razor 16-core (with OMP_NUM_THREADS=1)**, 27.3 seconds on Pinnacle. vasp.o2.smkl took 26.1 seconds on Pinnacle. **For performance MPI threads x OpenMP threads should not exceed physical cores.