rpl/lapack/lapack/zhetrd_he2hb.f - view

Return to zhetrd_he2hb.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / zhetrd_he2hb.f
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:39:24 2023 UTC (9 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD

Première mise à jour de lapack et blas.

    1: *> \brief \b ZHETRD_HE2HB
    2: *
    3: *  @precisions fortran z -> s d c
    4: *      
    5: *  =========== DOCUMENTATION ===========
    6: *
    7: * Online html documentation available at 
    8: *            http://www.netlib.org/lapack/explore-html/ 
    9: *
   10: *> \htmlonly
   11: *> Download ZHETRD_HE2HB + dependencies 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_he2hb.f"> 
   13: *> [TGZ]</a> 
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_he2hb.f"> 
   15: *> [ZIP]</a> 
   16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_he2hb.f"> 
   17: *> [TXT]</a>
   18: *> \endhtmlonly 
   19: *
   20: *  Definition:
   21: *  ===========
   22: *
   23: *       SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 
   24: *                              WORK, LWORK, INFO )
   25: *
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER          UPLO
   30: *       INTEGER            INFO, LDA, LDAB, LWORK, N, KD
   31: *       ..
   32: *       .. Array Arguments ..
   33: *       COMPLEX*16         A( LDA, * ), AB( LDAB, * ), 
   34: *                          TAU( * ), WORK( * )
   35: *       ..
   36: *  
   37: *
   38: *> \par Purpose:
   39: *  =============
   40: *>
   41: *> \verbatim
   42: *>
   43: *> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
   44: *> band-diagonal form AB by a unitary similarity transformation:
   45: *> Q**H * A * Q = AB.
   46: *> \endverbatim
   47: *
   48: *  Arguments:
   49: *  ==========
   50: *
   51: *> \param[in] UPLO
   52: *> \verbatim
   53: *>          UPLO is CHARACTER*1
   54: *>          = 'U':  Upper triangle of A is stored;
   55: *>          = 'L':  Lower triangle of A is stored.
   56: *> \endverbatim
   57: *>
   58: *> \param[in] N
   59: *> \verbatim
   60: *>          N is INTEGER
   61: *>          The order of the matrix A.  N >= 0.
   62: *> \endverbatim
   63: *>
   64: *> \param[in] KD
   65: *> \verbatim
   66: *>          KD is INTEGER
   67: *>          The number of superdiagonals of the reduced matrix if UPLO = 'U',
   68: *>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
   69: *>          The reduced matrix is stored in the array AB.
   70: *> \endverbatim
   71: *>
   72: *> \param[in,out] A
   73: *> \verbatim
   74: *>          A is COMPLEX*16 array, dimension (LDA,N)
   75: *>          On entry, the Hermitian matrix A.  If UPLO = 'U', the leading
   76: *>          N-by-N upper triangular part of A contains the upper
   77: *>          triangular part of the matrix A, and the strictly lower
   78: *>          triangular part of A is not referenced.  If UPLO = 'L', the
   79: *>          leading N-by-N lower triangular part of A contains the lower
   80: *>          triangular part of the matrix A, and the strictly upper
   81: *>          triangular part of A is not referenced.
   82: *>          On exit, if UPLO = 'U', the diagonal and first superdiagonal
   83: *>          of A are overwritten by the corresponding elements of the
   84: *>          tridiagonal matrix T, and the elements above the first
   85: *>          superdiagonal, with the array TAU, represent the unitary
   86: *>          matrix Q as a product of elementary reflectors; if UPLO
   87: *>          = 'L', the diagonal and first subdiagonal of A are over-
   88: *>          written by the corresponding elements of the tridiagonal
   89: *>          matrix T, and the elements below the first subdiagonal, with
   90: *>          the array TAU, represent the unitary matrix Q as a product
   91: *>          of elementary reflectors. See Further Details.
   92: *> \endverbatim
   93: *>
   94: *> \param[in] LDA
   95: *> \verbatim
   96: *>          LDA is INTEGER
   97: *>          The leading dimension of the array A.  LDA >= max(1,N).
   98: *> \endverbatim
   99: *>
  100: *> \param[out] AB
  101: *> \verbatim
  102: *>          AB is COMPLEX*16 array, dimension (LDAB,N)
  103: *>          On exit, the upper or lower triangle of the Hermitian band
  104: *>          matrix A, stored in the first KD+1 rows of the array.  The
  105: *>          j-th column of A is stored in the j-th column of the array AB
  106: *>          as follows:
  107: *>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
  108: *>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
  109: *> \endverbatim
  110: *>
  111: *> \param[in] LDAB
  112: *> \verbatim
  113: *>          LDAB is INTEGER
  114: *>          The leading dimension of the array AB.  LDAB >= KD+1.
  115: *> \endverbatim
  116: *>
  117: *> \param[out] TAU
  118: *> \verbatim
  119: *>          TAU is COMPLEX*16 array, dimension (N-KD)
  120: *>          The scalar factors of the elementary reflectors (see Further
  121: *>          Details).
  122: *> \endverbatim
  123: *>
  124: *> \param[out] WORK
  125: *> \verbatim
  126: *>          WORK is COMPLEX*16 array, dimension (LWORK)
  127: *>          On exit, if INFO = 0, or if LWORK=-1, 
  128: *>          WORK(1) returns the size of LWORK.
  129: *> \endverbatim
  130: *>
  131: *> \param[in] LWORK
  132: *> \verbatim
  133: *>          LWORK is INTEGER
  134: *>          The dimension of the array WORK which should be calculated
  135: *>          by a workspace query. LWORK = MAX(1, LWORK_QUERY)
  136: *>          If LWORK = -1, then a workspace query is assumed; the routine
  137: *>          only calculates the optimal size of the WORK array, returns
  138: *>          this value as the first entry of the WORK array, and no error
  139: *>          message related to LWORK is issued by XERBLA.
  140: *>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
  141: *>          where FACTOPTNB is the blocking used by the QR or LQ
  142: *>          algorithm, usually FACTOPTNB=128 is a good choice otherwise
  143: *>          putting LWORK=-1 will provide the size of WORK.
  144: *> \endverbatim
  145: *>
  146: *> \param[out] INFO
  147: *> \verbatim
  148: *>          INFO is INTEGER
  149: *>          = 0:  successful exit
  150: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
  151: *> \endverbatim
  152: *
  153: *  Authors:
  154: *  ========
  155: *
  156: *> \author Univ. of Tennessee 
  157: *> \author Univ. of California Berkeley 
  158: *> \author Univ. of Colorado Denver 
  159: *> \author NAG Ltd. 
  160: *
  161: *> \ingroup complex16HEcomputational
  162: *
  163: *> \par Further Details:
  164: *  =====================
  165: *>
  166: *> \verbatim
  167: *>
  168: *>  Implemented by Azzam Haidar.
  169: *>
  170: *>  All details are available on technical report, SC11, SC13 papers.
  171: *>
  172: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  173: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  174: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  175: *>  of 2011 International Conference for High Performance Computing,
  176: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  177: *>  Article 8 , 11 pages.
  178: *>  http://doi.acm.org/10.1145/2063384.2063394
  179: *>
  180: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  181: *>  An improved parallel singular value algorithm and its implementation 
  182: *>  for multicore hardware, In Proceedings of 2013 International Conference
  183: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  184: *>  Denver, Colorado, USA, 2013.
  185: *>  Article 90, 12 pages.
  186: *>  http://doi.acm.org/10.1145/2503210.2503292
  187: *>
  188: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  189: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  190: *>  calculations based on fine-grained memory aware tasks.
  191: *>  International Journal of High Performance Computing Applications.
  192: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  193: *>  http://hpc.sagepub.com/content/28/2/196 
  194: *>
  195: *> \endverbatim
  196: *>
  197: *> \verbatim
  198: *>
  199: *>  If UPLO = 'U', the matrix Q is represented as a product of elementary
  200: *>  reflectors
  201: *>
  202: *>     Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
  203: *>
  204: *>  Each H(i) has the form
  205: *>
  206: *>     H(i) = I - tau * v * v**H
  207: *>
  208: *>  where tau is a complex scalar, and v is a complex vector with
  209: *>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
  210: *>  A(i,i+kd+1:n), and tau in TAU(i).
  211: *>
  212: *>  If UPLO = 'L', the matrix Q is represented as a product of elementary
  213: *>  reflectors
  214: *>
  215: *>     Q = H(1) H(2) . . . H(k), where k = n-kd.
  216: *>
  217: *>  Each H(i) has the form
  218: *>
  219: *>     H(i) = I - tau * v * v**H
  220: *>
  221: *>  where tau is a complex scalar, and v is a complex vector with
  222: *>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
  223: *>  A(i+kd+2:n,i), and tau in TAU(i).
  224: *>
  225: *>  The contents of A on exit are illustrated by the following examples
  226: *>  with n = 5:
  227: *>
  228: *>  if UPLO = 'U':                       if UPLO = 'L':
  229: *>
  230: *>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )
  231: *>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )
  232: *>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )
  233: *>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )
  234: *>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )
  235: *>
  236: *>  where d and e denote diagonal and off-diagonal elements of T, and vi
  237: *>  denotes an element of the vector defining H(i).
  238: *> \endverbatim
  239: *>
  240: *  =====================================================================
  241:       SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 
  242:      $                         WORK, LWORK, INFO )
  243: *
  244:       IMPLICIT NONE
  245: *
  246: *  -- LAPACK computational routine --
  247: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  248: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  249: *
  250: *     .. Scalar Arguments ..
  251:       CHARACTER          UPLO
  252:       INTEGER            INFO, LDA, LDAB, LWORK, N, KD
  253: *     ..
  254: *     .. Array Arguments ..
  255:       COMPLEX*16         A( LDA, * ), AB( LDAB, * ), 
  256:      $                   TAU( * ), WORK( * )
  257: *     ..
  258: *
  259: *  =====================================================================
  260: *
  261: *     .. Parameters ..
  262:       DOUBLE PRECISION   RONE
  263:       COMPLEX*16         ZERO, ONE, HALF
  264:       PARAMETER          ( RONE = 1.0D+0,
  265:      $                   ZERO = ( 0.0D+0, 0.0D+0 ),
  266:      $                   ONE = ( 1.0D+0, 0.0D+0 ),
  267:      $                   HALF = ( 0.5D+0, 0.0D+0 ) )
  268: *     ..
  269: *     .. Local Scalars ..
  270:       LOGICAL            LQUERY, UPPER
  271:       INTEGER            I, J, IINFO, LWMIN, PN, PK, LK,
  272:      $                   LDT, LDW, LDS2, LDS1, 
  273:      $                   LS2, LS1, LW, LT,
  274:      $                   TPOS, WPOS, S2POS, S1POS
  275: *     ..
  276: *     .. External Subroutines ..
  277:       EXTERNAL           XERBLA, ZHER2K, ZHEMM, ZGEMM, ZCOPY,
  278:      $                   ZLARFT, ZGELQF, ZGEQRF, ZLASET
  279: *     ..
  280: *     .. Intrinsic Functions ..
  281:       INTRINSIC          MIN, MAX
  282: *     ..
  283: *     .. External Functions ..
  284:       LOGICAL            LSAME
  285:       INTEGER            ILAENV2STAGE 
  286:       EXTERNAL           LSAME, ILAENV2STAGE
  287: *     ..
  288: *     .. Executable Statements ..
  289: *
  290: *     Determine the minimal workspace size required 
  291: *     and test the input parameters
  292: *
  293:       INFO   = 0
  294:       UPPER  = LSAME( UPLO, 'U' )
  295:       LQUERY = ( LWORK.EQ.-1 )
  296:       LWMIN  = ILAENV2STAGE( 4, 'ZHETRD_HE2HB', ' ', N, KD, -1, -1 )
  297:       
  298:       IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  299:          INFO = -1
  300:       ELSE IF( N.LT.0 ) THEN
  301:          INFO = -2
  302:       ELSE IF( KD.LT.0 ) THEN
  303:          INFO = -3
  304:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  305:          INFO = -5
  306:       ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
  307:          INFO = -7
  308:       ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  309:          INFO = -10
  310:       END IF
  311: *
  312:       IF( INFO.NE.0 ) THEN
  313:          CALL XERBLA( 'ZHETRD_HE2HB', -INFO )
  314:          RETURN
  315:       ELSE IF( LQUERY ) THEN
  316:          WORK( 1 ) = LWMIN
  317:          RETURN
  318:       END IF
  319: *
  320: *     Quick return if possible        
  321: *     Copy the upper/lower portion of A into AB 
  322: *
  323:       IF( N.LE.KD+1 ) THEN
  324:           IF( UPPER ) THEN
  325:               DO 100 I = 1, N
  326:                   LK = MIN( KD+1, I )
  327:                   CALL ZCOPY( LK, A( I-LK+1, I ), 1, 
  328:      $                            AB( KD+1-LK+1, I ), 1 )
  329:   100         CONTINUE
  330:           ELSE
  331:               DO 110 I = 1, N
  332:                   LK = MIN( KD+1, N-I+1 )
  333:                   CALL ZCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
  334:   110         CONTINUE
  335:           ENDIF
  336:           WORK( 1 ) = 1
  337:           RETURN
  338:       END IF
  339: *
  340: *     Determine the pointer position for the workspace
  341: *      
  342:       LDT    = KD
  343:       LDS1   = KD
  344:       LT     = LDT*KD
  345:       LW     = N*KD
  346:       LS1    = LDS1*KD
  347:       LS2    = LWMIN - LT - LW - LS1
  348: *      LS2 = N*MAX(KD,FACTOPTNB) 
  349:       TPOS   = 1
  350:       WPOS   = TPOS  + LT
  351:       S1POS  = WPOS  + LW
  352:       S2POS  = S1POS + LS1 
  353:       IF( UPPER ) THEN
  354:           LDW    = KD
  355:           LDS2   = KD
  356:       ELSE
  357:           LDW    = N
  358:           LDS2   = N
  359:       ENDIF
  360: *
  361: *
  362: *     Set the workspace of the triangular matrix T to zero once such a
  363: *     way every time T is generated the upper/lower portion will be always zero
  364: *   
  365:       CALL ZLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
  366: *
  367:       IF( UPPER ) THEN
  368:           DO 10 I = 1, N - KD, KD
  369:              PN = N-I-KD+1
  370:              PK = MIN( N-I-KD+1, KD )
  371: *        
  372: *            Compute the LQ factorization of the current block
  373: *        
  374:              CALL ZGELQF( KD, PN, A( I, I+KD ), LDA,
  375:      $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
  376: *        
  377: *            Copy the upper portion of A into AB
  378: *        
  379:              DO 20 J = I, I+PK-1
  380:                 LK = MIN( KD, N-J ) + 1
  381:                 CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
  382:    20        CONTINUE
  383: *                
  384:              CALL ZLASET( 'Lower', PK, PK, ZERO, ONE, 
  385:      $                    A( I, I+KD ), LDA )
  386: *        
  387: *            Form the matrix T
  388: *        
  389:              CALL ZLARFT( 'Forward', 'Rowwise', PN, PK,
  390:      $                    A( I, I+KD ), LDA, TAU( I ), 
  391:      $                    WORK( TPOS ), LDT )
  392: *        
  393: *            Compute W:
  394: *             
  395:              CALL ZGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
  396:      $                   ONE,  WORK( TPOS ), LDT,
  397:      $                         A( I, I+KD ), LDA,
  398:      $                   ZERO, WORK( S2POS ), LDS2 )
  399: *        
  400:              CALL ZHEMM( 'Right', UPLO, PK, PN,
  401:      $                   ONE,  A( I+KD, I+KD ), LDA,
  402:      $                         WORK( S2POS ), LDS2,
  403:      $                   ZERO, WORK( WPOS ), LDW )
  404: *        
  405:              CALL ZGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
  406:      $                   ONE,  WORK( WPOS ), LDW,
  407:      $                         WORK( S2POS ), LDS2,
  408:      $                   ZERO, WORK( S1POS ), LDS1 )
  409: *        
  410:              CALL ZGEMM( 'No transpose', 'No transpose', PK, PN, PK,
  411:      $                   -HALF, WORK( S1POS ), LDS1, 
  412:      $                          A( I, I+KD ), LDA,
  413:      $                   ONE,   WORK( WPOS ), LDW )
  414: *             
  415: *        
  416: *            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
  417: *            an update of the form:  A := A - V'*W - W'*V
  418: *        
  419:              CALL ZHER2K( UPLO, 'Conjugate', PN, PK,
  420:      $                    -ONE, A( I, I+KD ), LDA,
  421:      $                          WORK( WPOS ), LDW,
  422:      $                    RONE, A( I+KD, I+KD ), LDA )
  423:    10     CONTINUE
  424: *
  425: *        Copy the upper band to AB which is the band storage matrix
  426: *
  427:          DO 30 J = N-KD+1, N
  428:             LK = MIN(KD, N-J) + 1
  429:             CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
  430:    30    CONTINUE
  431: *
  432:       ELSE
  433: *
  434: *         Reduce the lower triangle of A to lower band matrix
  435: *        
  436:           DO 40 I = 1, N - KD, KD
  437:              PN = N-I-KD+1
  438:              PK = MIN( N-I-KD+1, KD )
  439: *        
  440: *            Compute the QR factorization of the current block
  441: *        
  442:              CALL ZGEQRF( PN, KD, A( I+KD, I ), LDA,
  443:      $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
  444: *        
  445: *            Copy the upper portion of A into AB 
  446: *        
  447:              DO 50 J = I, I+PK-1
  448:                 LK = MIN( KD, N-J ) + 1
  449:                 CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
  450:    50        CONTINUE
  451: *                
  452:              CALL ZLASET( 'Upper', PK, PK, ZERO, ONE, 
  453:      $                    A( I+KD, I ), LDA )
  454: *        
  455: *            Form the matrix T
  456: *        
  457:              CALL ZLARFT( 'Forward', 'Columnwise', PN, PK,
  458:      $                    A( I+KD, I ), LDA, TAU( I ), 
  459:      $                    WORK( TPOS ), LDT )
  460: *        
  461: *            Compute W:
  462: *             
  463:              CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK,
  464:      $                   ONE, A( I+KD, I ), LDA,
  465:      $                         WORK( TPOS ), LDT,
  466:      $                   ZERO, WORK( S2POS ), LDS2 )
  467: *        
  468:              CALL ZHEMM( 'Left', UPLO, PN, PK,
  469:      $                   ONE, A( I+KD, I+KD ), LDA,
  470:      $                         WORK( S2POS ), LDS2,
  471:      $                   ZERO, WORK( WPOS ), LDW )
  472: *        
  473:              CALL ZGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
  474:      $                   ONE, WORK( S2POS ), LDS2,
  475:      $                         WORK( WPOS ), LDW,
  476:      $                   ZERO, WORK( S1POS ), LDS1 )
  477: *        
  478:              CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK,
  479:      $                   -HALF, A( I+KD, I ), LDA,
  480:      $                         WORK( S1POS ), LDS1,
  481:      $                   ONE, WORK( WPOS ), LDW )
  482: *             
  483: *        
  484: *            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
  485: *            an update of the form:  A := A - V*W' - W*V'
  486: *        
  487:              CALL ZHER2K( UPLO, 'No transpose', PN, PK,
  488:      $                    -ONE, A( I+KD, I ), LDA,
  489:      $                           WORK( WPOS ), LDW,
  490:      $                    RONE, A( I+KD, I+KD ), LDA )
  491: *            ==================================================================
  492: *            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
  493: *             DO 45 J = I, I+PK-1
  494: *                LK = MIN( KD, N-J ) + 1
  495: *                CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
  496: *   45        CONTINUE
  497: *            ==================================================================
  498:    40     CONTINUE
  499: *
  500: *        Copy the lower band to AB which is the band storage matrix
  501: *
  502:          DO 60 J = N-KD+1, N
  503:             LK = MIN(KD, N-J) + 1
  504:             CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
  505:    60    CONTINUE
  506: 
  507:       END IF
  508: *
  509:       WORK( 1 ) = LWMIN
  510:       RETURN
  511: *
  512: *     End of ZHETRD_HE2HB
  513: *
  514:       END

CVSweb interface <joel.bertrand@systella.fr>