rpl/lapack/lapack/zhegv_2stage.f - view

Return to zhegv_2stage.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / zhegv_2stage.f
Revision 1.5: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:39:23 2023 UTC (9 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD

Première mise à jour de lapack et blas.

    1: *> \brief \b ZHEGV_2STAGE
    2: *
    3: *  @precisions fortran z -> c
    4: *
    5: *  =========== DOCUMENTATION ===========
    6: *
    7: * Online html documentation available at
    8: *            http://www.netlib.org/lapack/explore-html/
    9: *
   10: *> \htmlonly
   11: *> Download ZHEGV_2STAGE + dependencies
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv_2stage.f">
   13: *> [TGZ]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv_2stage.f">
   15: *> [ZIP]</a>
   16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv_2stage.f">
   17: *> [TXT]</a>
   18: *> \endhtmlonly
   19: *
   20: *  Definition:
   21: *  ===========
   22: *
   23: *       SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
   24: *                                WORK, LWORK, RWORK, INFO )
   25: *
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER          JOBZ, UPLO
   30: *       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
   31: *       ..
   32: *       .. Array Arguments ..
   33: *       DOUBLE PRECISION   RWORK( * ), W( * )
   34: *       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
   35: *       ..
   36: *
   37: *
   38: *> \par Purpose:
   39: *  =============
   40: *>
   41: *> \verbatim
   42: *>
   43: *> ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors
   44: *> of a complex generalized Hermitian-definite eigenproblem, of the form
   45: *> A*x=(lambda)*B*x,  A*Bx=(lambda)*x,  or B*A*x=(lambda)*x.
   46: *> Here A and B are assumed to be Hermitian and B is also
   47: *> positive definite.
   48: *> This routine use the 2stage technique for the reduction to tridiagonal
   49: *> which showed higher performance on recent architecture and for large
   50: *> sizes N>2000.
   51: *> \endverbatim
   52: *
   53: *  Arguments:
   54: *  ==========
   55: *
   56: *> \param[in] ITYPE
   57: *> \verbatim
   58: *>          ITYPE is INTEGER
   59: *>          Specifies the problem type to be solved:
   60: *>          = 1:  A*x = (lambda)*B*x
   61: *>          = 2:  A*B*x = (lambda)*x
   62: *>          = 3:  B*A*x = (lambda)*x
   63: *> \endverbatim
   64: *>
   65: *> \param[in] JOBZ
   66: *> \verbatim
   67: *>          JOBZ is CHARACTER*1
   68: *>          = 'N':  Compute eigenvalues only;
   69: *>          = 'V':  Compute eigenvalues and eigenvectors.
   70: *>                  Not available in this release.
   71: *> \endverbatim
   72: *>
   73: *> \param[in] UPLO
   74: *> \verbatim
   75: *>          UPLO is CHARACTER*1
   76: *>          = 'U':  Upper triangles of A and B are stored;
   77: *>          = 'L':  Lower triangles of A and B are stored.
   78: *> \endverbatim
   79: *>
   80: *> \param[in] N
   81: *> \verbatim
   82: *>          N is INTEGER
   83: *>          The order of the matrices A and B.  N >= 0.
   84: *> \endverbatim
   85: *>
   86: *> \param[in,out] A
   87: *> \verbatim
   88: *>          A is COMPLEX*16 array, dimension (LDA, N)
   89: *>          On entry, the Hermitian matrix A.  If UPLO = 'U', the
   90: *>          leading N-by-N upper triangular part of A contains the
   91: *>          upper triangular part of the matrix A.  If UPLO = 'L',
   92: *>          the leading N-by-N lower triangular part of A contains
   93: *>          the lower triangular part of the matrix A.
   94: *>
   95: *>          On exit, if JOBZ = 'V', then if INFO = 0, A contains the
   96: *>          matrix Z of eigenvectors.  The eigenvectors are normalized
   97: *>          as follows:
   98: *>          if ITYPE = 1 or 2, Z**H*B*Z = I;
   99: *>          if ITYPE = 3, Z**H*inv(B)*Z = I.
  100: *>          If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
  101: *>          or the lower triangle (if UPLO='L') of A, including the
  102: *>          diagonal, is destroyed.
  103: *> \endverbatim
  104: *>
  105: *> \param[in] LDA
  106: *> \verbatim
  107: *>          LDA is INTEGER
  108: *>          The leading dimension of the array A.  LDA >= max(1,N).
  109: *> \endverbatim
  110: *>
  111: *> \param[in,out] B
  112: *> \verbatim
  113: *>          B is COMPLEX*16 array, dimension (LDB, N)
  114: *>          On entry, the Hermitian positive definite matrix B.
  115: *>          If UPLO = 'U', the leading N-by-N upper triangular part of B
  116: *>          contains the upper triangular part of the matrix B.
  117: *>          If UPLO = 'L', the leading N-by-N lower triangular part of B
  118: *>          contains the lower triangular part of the matrix B.
  119: *>
  120: *>          On exit, if INFO <= N, the part of B containing the matrix is
  121: *>          overwritten by the triangular factor U or L from the Cholesky
  122: *>          factorization B = U**H*U or B = L*L**H.
  123: *> \endverbatim
  124: *>
  125: *> \param[in] LDB
  126: *> \verbatim
  127: *>          LDB is INTEGER
  128: *>          The leading dimension of the array B.  LDB >= max(1,N).
  129: *> \endverbatim
  130: *>
  131: *> \param[out] W
  132: *> \verbatim
  133: *>          W is DOUBLE PRECISION array, dimension (N)
  134: *>          If INFO = 0, the eigenvalues in ascending order.
  135: *> \endverbatim
  136: *>
  137: *> \param[out] WORK
  138: *> \verbatim
  139: *>          WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
  140: *>          On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
  141: *> \endverbatim
  142: *>
  143: *> \param[in] LWORK
  144: *> \verbatim
  145: *>          LWORK is INTEGER
  146: *>          The length of the array WORK. LWORK >= 1, when N <= 1;
  147: *>          otherwise  
  148: *>          If JOBZ = 'N' and N > 1, LWORK must be queried.
  149: *>                                   LWORK = MAX(1, dimension) where
  150: *>                                   dimension = max(stage1,stage2) + (KD+1)*N + N
  151: *>                                             = N*KD + N*max(KD+1,FACTOPTNB) 
  152: *>                                               + max(2*KD*KD, KD*NTHREADS) 
  153: *>                                               + (KD+1)*N + N
  154: *>                                   where KD is the blocking size of the reduction,
  155: *>                                   FACTOPTNB is the blocking used by the QR or LQ
  156: *>                                   algorithm, usually FACTOPTNB=128 is a good choice
  157: *>                                   NTHREADS is the number of threads used when
  158: *>                                   openMP compilation is enabled, otherwise =1.
  159: *>          If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available
  160: *>
  161: *>          If LWORK = -1, then a workspace query is assumed; the routine
  162: *>          only calculates the optimal size of the WORK array, returns
  163: *>          this value as the first entry of the WORK array, and no error
  164: *>          message related to LWORK is issued by XERBLA.
  165: *> \endverbatim
  166: *>
  167: *> \param[out] RWORK
  168: *> \verbatim
  169: *>          RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2))
  170: *> \endverbatim
  171: *>
  172: *> \param[out] INFO
  173: *> \verbatim
  174: *>          INFO is INTEGER
  175: *>          = 0:  successful exit
  176: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
  177: *>          > 0:  ZPOTRF or ZHEEV returned an error code:
  178: *>             <= N:  if INFO = i, ZHEEV failed to converge;
  179: *>                    i off-diagonal elements of an intermediate
  180: *>                    tridiagonal form did not converge to zero;
  181: *>             > N:   if INFO = N + i, for 1 <= i <= N, then the leading
  182: *>                    minor of order i of B is not positive definite.
  183: *>                    The factorization of B could not be completed and
  184: *>                    no eigenvalues or eigenvectors were computed.
  185: *> \endverbatim
  186: *
  187: *  Authors:
  188: *  ========
  189: *
  190: *> \author Univ. of Tennessee
  191: *> \author Univ. of California Berkeley
  192: *> \author Univ. of Colorado Denver
  193: *> \author NAG Ltd.
  194: *
  195: *> \ingroup complex16HEeigen
  196: *
  197: *> \par Further Details:
  198: *  =====================
  199: *>
  200: *> \verbatim
  201: *>
  202: *>  All details about the 2stage techniques are available in:
  203: *>
  204: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  205: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  206: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  207: *>  of 2011 International Conference for High Performance Computing,
  208: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  209: *>  Article 8 , 11 pages.
  210: *>  http://doi.acm.org/10.1145/2063384.2063394
  211: *>
  212: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  213: *>  An improved parallel singular value algorithm and its implementation 
  214: *>  for multicore hardware, In Proceedings of 2013 International Conference
  215: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  216: *>  Denver, Colorado, USA, 2013.
  217: *>  Article 90, 12 pages.
  218: *>  http://doi.acm.org/10.1145/2503210.2503292
  219: *>
  220: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  221: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  222: *>  calculations based on fine-grained memory aware tasks.
  223: *>  International Journal of High Performance Computing Applications.
  224: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  225: *>  http://hpc.sagepub.com/content/28/2/196 
  226: *>
  227: *> \endverbatim
  228: *
  229: *  =====================================================================
  230:       SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
  231:      $                         WORK, LWORK, RWORK, INFO )
  232: *
  233:       IMPLICIT NONE
  234: *
  235: *  -- LAPACK driver routine --
  236: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  237: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  238: *
  239: *     .. Scalar Arguments ..
  240:       CHARACTER          JOBZ, UPLO
  241:       INTEGER            INFO, ITYPE, LDA, LDB, LWORK, N
  242: *     ..
  243: *     .. Array Arguments ..
  244:       DOUBLE PRECISION   RWORK( * ), W( * )
  245:       COMPLEX*16         A( LDA, * ), B( LDB, * ), WORK( * )
  246: *     ..
  247: *
  248: *  =====================================================================
  249: *
  250: *     .. Parameters ..
  251:       COMPLEX*16         ONE
  252:       PARAMETER          ( ONE = ( 1.0D+0, 0.0D+0 ) )
  253: *     ..
  254: *     .. Local Scalars ..
  255:       LOGICAL            LQUERY, UPPER, WANTZ
  256:       CHARACTER          TRANS
  257:       INTEGER            NEIG, LWMIN, LHTRD, LWTRD, KD, IB
  258: *     ..
  259: *     .. External Functions ..
  260:       LOGICAL            LSAME
  261:       INTEGER            ILAENV2STAGE
  262:       EXTERNAL           LSAME, ILAENV2STAGE
  263: *     ..
  264: *     .. External Subroutines ..
  265:       EXTERNAL           XERBLA, ZHEGST, ZPOTRF, ZTRMM, ZTRSM,
  266:      $                   ZHEEV_2STAGE
  267: *     ..
  268: *     .. Intrinsic Functions ..
  269:       INTRINSIC          MAX
  270: *     ..
  271: *     .. Executable Statements ..
  272: *
  273: *     Test the input parameters.
  274: *
  275:       WANTZ = LSAME( JOBZ, 'V' )
  276:       UPPER = LSAME( UPLO, 'U' )
  277:       LQUERY = ( LWORK.EQ.-1 )
  278: *
  279:       INFO = 0
  280:       IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
  281:          INFO = -1
  282:       ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
  283:          INFO = -2
  284:       ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
  285:          INFO = -3
  286:       ELSE IF( N.LT.0 ) THEN
  287:          INFO = -4
  288:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  289:          INFO = -6
  290:       ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
  291:          INFO = -8
  292:       END IF
  293: *
  294:       IF( INFO.EQ.0 ) THEN
  295:          KD    = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 )
  296:          IB    = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 )
  297:          LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
  298:          LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
  299:          LWMIN = N + LHTRD + LWTRD
  300:          WORK( 1 )  = LWMIN
  301: *
  302:          IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  303:             INFO = -11
  304:          END IF
  305:       END IF
  306: *
  307:       IF( INFO.NE.0 ) THEN
  308:          CALL XERBLA( 'ZHEGV_2STAGE ', -INFO )
  309:          RETURN
  310:       ELSE IF( LQUERY ) THEN
  311:          RETURN
  312:       END IF
  313: *
  314: *     Quick return if possible
  315: *
  316:       IF( N.EQ.0 )
  317:      $   RETURN
  318: *
  319: *     Form a Cholesky factorization of B.
  320: *
  321:       CALL ZPOTRF( UPLO, N, B, LDB, INFO )
  322:       IF( INFO.NE.0 ) THEN
  323:          INFO = N + INFO
  324:          RETURN
  325:       END IF
  326: *
  327: *     Transform problem to standard eigenvalue problem and solve.
  328: *
  329:       CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
  330:       CALL ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W, 
  331:      $                   WORK, LWORK, RWORK, INFO )
  332: *
  333:       IF( WANTZ ) THEN
  334: *
  335: *        Backtransform eigenvectors to the original problem.
  336: *
  337:          NEIG = N
  338:          IF( INFO.GT.0 )
  339:      $      NEIG = INFO - 1
  340:          IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
  341: *
  342: *           For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
  343: *           backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y
  344: *
  345:             IF( UPPER ) THEN
  346:                TRANS = 'N'
  347:             ELSE
  348:                TRANS = 'C'
  349:             END IF
  350: *
  351:             CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
  352:      $                  B, LDB, A, LDA )
  353: *
  354:          ELSE IF( ITYPE.EQ.3 ) THEN
  355: *
  356: *           For B*A*x=(lambda)*x;
  357: *           backtransform eigenvectors: x = L*y or U**H *y
  358: *
  359:             IF( UPPER ) THEN
  360:                TRANS = 'C'
  361:             ELSE
  362:                TRANS = 'N'
  363:             END IF
  364: *
  365:             CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
  366:      $                  B, LDB, A, LDA )
  367:          END IF
  368:       END IF
  369: *
  370:       WORK( 1 ) = LWMIN
  371: *
  372:       RETURN
  373: *
  374: *     End of ZHEGV_2STAGE
  375: *
  376:       END

CVSweb interface <joel.bertrand@systella.fr>