Commit 9ee817fc authored by Claus Kleinwort's avatar Claus Kleinwort
Browse files

Optional LAPACK support added

git-svn-id: http://svnsrv.desy.de/public/MillepedeII/trunk@206 3547b9b0-65b8-46d3-b95d-921b3f43af62
parent 85c88543
......@@ -44,12 +44,21 @@ SUPPORT_ZLIB = yes
# requires z library and header to be installed at places defined here:
ZLIB_INCLUDES_DIR = =
ZLIB_LIBS_DIR = =
# LAPACK (64 bit) with Intel MKL
SUPPORT_LAPACK64 =
#yes
LAPACK64 = OPENBLAS
LAPACK64_LIBS_DIR = /usr/lib64
LAPACK64_LIB = openblasp64
#LAPACK64 = MKL
#LAPACK64_LIBS_DIR = <path to mkl_rt>
#LAPACK64_LIB = mkl_rt
#
# If yes use multithreading with OpenMP (TM)
SUPPORT_OPENMP = yes
# ompP profiler (http://www.ompp-tool.com, needs Opari for source-to-source instrumentation)
OMPP =
# kinst-ompp
OMPP =
#kinst-ompp
#
# make install copies the binary to $(PREFIX)/bin
PREFIX = .
......@@ -79,6 +88,33 @@ ifeq ($(SUPPORT_OPENMP),yes)
F_FLAGS += -fopenmp
endif
#
ifeq ($(SUPPORT_LAPACK64),yes)
# Using LAPACK64, tested: Intel MKL, OPENBLAS
C_LIBS += -L$(LAPACK64_LIBS_DIR) -l$(LAPACK64_LIB)
F_FLAGS += -DLAPACK64=\"$(LAPACK64)\"
ifeq ($(LAPACK64),MKL)
$(info Using MKL for LAPACK64)
$(info - Provide $(LAPACK64_LIB) in runtime environment)
ifeq ($(SUPPORT_OPENMP),yes)
$(info - Set number of MKL threads in OMP_NUM_THREADS)
else
$(info - Set number of MKL threads in MKL_NUM_THREADS)
endif
$(info - Set MKL_THREADING_LAYER=GNU)
$(info )
endif
ifeq ($(LAPACK64),OPENBLAS)
$(info Using OPENBLAS for LAPACK64)
$(info - Provide $(LAPACK64_LIB) in runtime environment)
ifeq ($(SUPPORT_OPENMP),yes)
$(info - Set number of OPENBLAS threads in OMP_NUM_THREADS)
else
$(info - Set number of OPENBLAS threads in OPENBLAS_NUM_THREADS)
endif
$(info )
endif
endif
#
LOADER = $(OMPP) $(GCC)
L_FLAGS = -Wall -O3
#
......@@ -103,8 +139,8 @@ ifeq ($(SUPPORT_READ_C),yes)
endif
endif
endif
#
#
# -L$(MKL_DIR) -lmkl_rt
# -lopenblasp64
# Make the executables
EXECUTABLES = pede
#
......
......@@ -4,7 +4,7 @@
!! \author Claus Kleinwort, DESY, 2012 (Claus.Kleinwort@desy.de)
!!
!! \copyright
!! Copyright (c) 2012 - 2020 Deutsches Elektronen-Synchroton,
!! Copyright (c) 2012 - 2021 Deutsches Elektronen-Synchroton,
!! Member of the Helmholtz Association, (DESY), HAMBURG, GERMANY \n\n
!! This library is free software; you can redistribute it and/or modify
!! it under the terms of the GNU Library General Public License as
......@@ -31,8 +31,8 @@ MODULE mpmod
SAVE
! steering parameters
INTEGER(mpi) :: ictest=0 !< test mode '-t'
INTEGER(mpi) :: metsol=0 !< solution method (1: inversion, 2: diagonalization, 3: decomposition, 4: MINRES, 5: \ref minresqlpmodule::minresqlp "MINRES-QLP")
INTEGER(mpi) :: matsto=2 !< (global) matrix storage mode (1: full, 2: sparse, 3: block diagonal)
INTEGER(mpi) :: metsol=0 !< solution method (1: inversion, 2: diagonalization, 3: decomposition, 4: MINRES, 5: \ref minresqlpmodule::minresqlp "MINRES-QLP", 7: LAPACK)
INTEGER(mpi) :: matsto=2 !< (global) matrix storage mode (0: unpacked, 1: full = packed, 2: sparse)
INTEGER(mpi) :: mprint=1 !< print flag (0: minimal, 1: normal, >1: more)
INTEGER(mpi) :: mdebug=0 !< debug flag (number of records to print)
INTEGER(mpi) :: mdebg2=10 !< number of measurements for record debug printout
......@@ -112,6 +112,9 @@ MODULE mpmod
INTEGER(mpi) :: mcount=0 !< flag for grouping and counting global parameters on equlation (0) or record (1) level
INTEGER(mpi) :: monpg1=0 !< progress monitoring, repetition rate start value
INTEGER(mpi) :: monpg2=0 !< progress monitoring, repetition rate max increase
#ifdef LAPACK64
INTEGER(mpi) :: ilperr=0 !< flag to calculate parameter errors with LAPACK
#endif
! variables
INTEGER(mpi) :: lunmon !< unit for monitoring output file
......@@ -126,7 +129,7 @@ MODULE mpmod
INTEGER(mpi) :: ntpgrp !< number of parameter groups
INTEGER(mpi) :: nvpgrp !< number of variable parameter groups
INTEGER(mpi) :: napgrp !< number of all parameter groups (variable + Lagrange mult.)
INTEGER(mpi) :: npblck !< number of (disjoint) parameter blocks
INTEGER(mpi) :: npblck !< number of (disjoint) parameter blocks (>1: block diagonal storage)
INTEGER(mpi) :: ncblck !< number of (disjoint) constraint blocks
INTEGER(mpi) :: mszcon !< (integrated block) matrix size for constraint matrix
INTEGER(mpi) :: mszprd !< (integrated block) matrix size for (constraint) product matrix
......@@ -192,6 +195,7 @@ MODULE mpmod
REAL(mpd), DIMENSION(:), ALLOCATABLE :: globalMatD !< global matrix 'A' (double, full or sparse)
REAL(mps), DIMENSION(:), ALLOCATABLE :: globalMatF !< global matrix 'A' (float part for compressed sparse)
REAL(mpd), DIMENSION(:), ALLOCATABLE :: globalVector !< global vector 'x' (in A*x=b)
INTEGER(mpl), DIMENSION(:), ALLOCATABLE :: globalRowOffsets !< row offsets for full or unpacked matrix
INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: globalCounter !< global counter (entries in 'x')
! AVPROD (A*x=b) by MINRES
REAL(mpd), DIMENSION(:), ALLOCATABLE :: vecXav !< vector x for AVPROD (A*x=b)
......@@ -208,6 +212,14 @@ MODULE mpmod
REAL(mpd), DIMENSION(:), ALLOCATABLE :: workspaceEigenValues !< workspace eigen values
REAL(mpd), DIMENSION(:), ALLOCATABLE :: workspaceEigenVectors !< workspace eigen vectors
INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: workspaceI !< (general) workspace (I)
#ifdef LAPACK64
! LAPACK
INTEGER(mpl):: lplwrk=1 !< length of LAPACK WORK array
REAL(mpd), DIMENSION(:), ALLOCATABLE :: lapackQL !< LAPACK QL (QL decomp.)
REAL(mpd), DIMENSION(:), ALLOCATABLE :: lapackTAU !< LAPACK TAU (QL decomp.)
REAL(mpd), DIMENSION(:), ALLOCATABLE :: lapackWORK !< LAPACK work array
INTEGER(mpl), DIMENSION(:), ALLOCATABLE :: lapackIPIV !< LAPACK IPIV (pivot)
#endif
! constraint matrix, residuals
REAL(mpd), DIMENSION(:), ALLOCATABLE :: matConsProduct !< product matrix of constraints
REAL(mpd), DIMENSION(:), ALLOCATABLE :: vecConsResiduals !< residuals of constraints
......@@ -256,7 +268,7 @@ MODULE mpmod
! global parameter usage from all records
INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: globalIndexRanges !< global par ranges
INTEGER(mpi), DIMENSION(:,:), ALLOCATABLE :: matParBlockOffsets !< global par block offsets (parameter, constraint blocks)
INTEGER(mpl), DIMENSION(:), ALLOCATABLE :: vecParBlockOffsets !< global par block offsets (global matrix)
INTEGER(mpi), DIMENSION(:), ALLOCATABLE :: vecParBlockConOffsets !< global par block (constraint) offsets
! local fit
REAL(mpd), DIMENSION(:), ALLOCATABLE::blvec !< local fit vector 'b' (in A*x=b), replaced by 'x'
REAL(mpd), DIMENSION(:), ALLOCATABLE::clmat !< local fit matrix 'A' (in A*x=b)
......
......@@ -9,7 +9,7 @@
!! \author Claus Kleinwort, DESY (maintenance and developement)
!!
!! \copyright
!! Copyright (c) 2009 - 2020 Deutsches Elektronen-Synchroton,
!! Copyright (c) 2009 - 2021 Deutsches Elektronen-Synchroton,
!! Member of the Helmholtz Association, (DESY), HAMBURG, GERMANY \n\n
!! This library is free software; you can redistribute it and/or modify
!! it under the terms of the GNU Library General Public License as
......
......@@ -4,7 +4,7 @@
!! \author Claus Kleinwort, DESY, 2015 (Claus.Kleinwort@desy.de)
!!
!! \copyright
!! Copyright (c) 2015-2020 Deutsches Elektronen-Synchroton,
!! Copyright (c) 2015-2021 Deutsches Elektronen-Synchroton,
!! Member of the Helmholtz Association, (DESY), HAMBURG, GERMANY \n\n
!! This library is free software; you can redistribute it and/or modify
!! it under the terms of the GNU Library General Public License as
......@@ -71,8 +71,10 @@ SUBROUTINE qlini(n,m,l,k)
CALL mpalloc(sparseV,length,'QLDEC: sparsity structure of V')
length=npar*ncon
CALL mpalloc(matV,length,'QLDEC: V')
matV=0.
length=ncon*ncon
CALL mpalloc(matL,length,'QLDEC: L')
matL=0.
length=npar
CALL mpalloc(vecN,length,'QLDEC: v')
length=nblock
......@@ -223,6 +225,8 @@ SUBROUTINE qldecb(a,bpar,bcon)
INTEGER(mpi), INTENT(IN) :: bpar(2,*)
INTEGER(mpi), INTENT(IN) :: bcon(3,*)
!$POMP INST BEGIN(qldecb)
! prepare
length=npar*ncon
matV=0.0_mpd
......@@ -325,6 +329,8 @@ SUBROUTINE qldecb(a,bpar,bcon)
sparseV(ioff3+5)=kn-ipoff ! end 2
END DO
END DO
!$POMP INST END(qldecb)
END SUBROUTINE qldecb
......@@ -479,12 +485,12 @@ END SUBROUTINE qlsmq
!! Similarity transformation for symmetric matrix by Q from QL decomposition.
!!
!! \param [in] aprod external procedure to calculate A*v
!! \param [in,out] A symmetric Npar-by-Npar matrix A in symmetric storage mode
!! (V(1) = V11, V(2) = V12, V(3) = V22, V(4) = V13, ...),
!! \param [in,out] A symmetric Npar-by-Npar matrix A in symmetric or unpacked storage mode
!! overwritten with Q*A*Q^t (t=false) or Q^t*A*Q (t=true)
!! \param [in] roff row offsets for A
!! \param [in] t use transposed of Q
!!
SUBROUTINE qlssq(aprod,A,t)
SUBROUTINE qlssq(aprod,A,roff,t)
USE mpqldec
USE mpdalc
......@@ -497,11 +503,10 @@ SUBROUTINE qlssq(aprod,A,t)
INTEGER(mpi) :: iclast
INTEGER(mpl) :: ioff1
INTEGER(mpl) :: ioff2
INTEGER(mpl) :: ioffb
INTEGER(mpl) :: ioffp
INTEGER(mpi) :: j
INTEGER(mpi) :: k
INTEGER(mpi) :: kn
INTEGER(mpi) :: l
INTEGER(mpl) :: length
INTEGER(mpi) :: nconb
INTEGER(mpi) :: nparb
......@@ -509,6 +514,7 @@ SUBROUTINE qlssq(aprod,A,t)
REAL(mpd), DIMENSION(:), ALLOCATABLE :: Av
REAL(mpd), INTENT(IN OUT) :: A(*)
INTEGER(mpl), INTENT(IN) :: roff(*)
LOGICAL, INTENT(IN) :: t
INTERFACE
......@@ -525,7 +531,7 @@ SUBROUTINE qlssq(aprod,A,t)
length=npar
CALL mpalloc(Av,length,'qlssq: A*v')
ioffb=0 ! block offset
ioffp=0 ! parameter offset for block
DO ibpar=1,nblock ! parameter block
icoff=ioffBlock(ibpar) ! constraint offset in parameter block
iclast=ioffBlock(ibpar+1) ! last constraint in parameter block
......@@ -540,29 +546,33 @@ SUBROUTINE qlssq(aprod,A,t)
! column offset
ioff1=(k-1+icoff)*npar
! A*v
CALL aprod(nparb,ioffb,matV(ioff1+1:ioff1+nparb),Av(1:nparb))
CALL aprod(nparb,ioffp,matV(ioff1+1:ioff1+nparb),Av(1:nparb))
! transformation
! diagonal block
! v^t*A*v
vtAv=dot_product(matV(ioff1+1:ioff1+kn),Av(1:kn))
! update
ioff2=ioffb
! parallelize row loop
! slot of 8 'I' for next idle thread
!$OMP PARALLEL DO &
!$OMP PRIVATE(IOFF2) &
!$OMP SCHEDULE(DYNAMIC,8)
DO i=1,kn
ioff2=roff(i+ioffp)+ioffp
! correct with 2*(2v*vtAv*v^t - Av*v^t - (Av*v^t)^t)
A(ioff2+1:ioff2+i)=A(ioff2+1:ioff2+i)+2.0_mpd* &
((2.0_mpd*matV(ioff1+i)*vtAv-Av(i))*matV(ioff1+1:ioff1+i)-Av(1:i)*matV(ioff1+i))
ioff2=ioff2+i
END DO
!$OMP END PARALLEL DO
! off diagonal block
DO i=kn+1,nparb
ioff2=roff(i+ioffp)+ioffp
! correct with -2Av*v^t
A(ioff2+1:ioff2+kn)=A(ioff2+1:ioff2+kn)-2.0_mpd*matV(ioff1+1:ioff1+kn)*Av(i)
ioff2=ioff2+i
END DO
END DO
! update block offset
l=nparb
ioffb=ioffb+(l*l+l)/2
! update parameter offset
ioffp=ioffp+nparb
END DO
CALL mpdealloc(Av)
......@@ -570,7 +580,6 @@ SUBROUTINE qlssq(aprod,A,t)
END SUBROUTINE qlssq
!> Partial similarity transformation by Q(t).
!!
!! Partial similarity transformation for symmetric matrix by Q from QL decomposition.
......@@ -614,10 +623,9 @@ SUBROUTINE qlpssq(aprod,B,m,t)
LOGICAL, INTENT(IN) :: t
INTERFACE
SUBROUTINE aprod(n,l,x,y,is) ! y=A*x
SUBROUTINE aprod(n,x,y,is) ! y=A*x
USE mpdef
INTEGER(mpi), INTENT(in) :: n
INTEGER(mpl), INTENT(in) :: l
REAL(mpd), INTENT(IN) :: x(n)
REAL(mpd), INTENT(OUT) :: y(n)
INTEGER(mpi), INTENT(in) :: is(*)
......@@ -633,7 +641,7 @@ SUBROUTINE qlpssq(aprod,B,m,t)
! A*V
ioff1=0
DO i=1,ncon
CALL aprod(npar,0_mpl,matV(ioff1+1:ioff1+npar),Av(ioff1+1:ioff1+npar),sparseV(i*5-4))
CALL aprod(npar,matV(ioff1+1:ioff1+npar),Av(ioff1+1:ioff1+npar),sparseV(i*5-4))
ioff1=ioff1+npar
END DO
......@@ -823,3 +831,5 @@ SUBROUTINE qldump()
100 FORMAT(" qldump",7I8)
END SUBROUTINE qldump
This diff is collapsed.
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment