rpl/lapack/lapack/dsytrd_sy2sb.f - view

Return to dsytrd_sy2sb.f CVS log

Up to [local] / rpl / lapack / lapack

File: [local] / rpl / lapack / lapack / dsytrd_sy2sb.f
Revision 1.5: download - view: text, annotated - select for diffs - revision graph
Thu May 21 21:46:02 2020 UTC (4 years, 1 month ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_33, rpl-4_1_32, HEAD

Mise à jour de Lapack.

    1: *> \brief \b DSYTRD_SY2SB
    2: *
    3: *  @generated from zhetrd_he2hb.f, fortran z -> d, Wed Dec  7 08:22:39 2016
    4: *      
    5: *  =========== DOCUMENTATION ===========
    6: *
    7: * Online html documentation available at 
    8: *            http://www.netlib.org/lapack/explore-html/ 
    9: *
   10: *> \htmlonly
   11: *> Download DSYTRD_SY2SB + dependencies 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd.f"> 
   13: *> [TGZ]</a> 
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd.f"> 
   15: *> [ZIP]</a> 
   16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd.f"> 
   17: *> [TXT]</a>
   18: *> \endhtmlonly 
   19: *
   20: *  Definition:
   21: *  ===========
   22: *
   23: *       SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 
   24: *                              WORK, LWORK, INFO )
   25: *
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER          UPLO
   30: *       INTEGER            INFO, LDA, LDAB, LWORK, N, KD
   31: *       ..
   32: *       .. Array Arguments ..
   33: *       DOUBLE PRECISION   A( LDA, * ), AB( LDAB, * ), 
   34: *                          TAU( * ), WORK( * )
   35: *       ..
   36: *  
   37: *
   38: *> \par Purpose:
   39: *  =============
   40: *>
   41: *> \verbatim
   42: *>
   43: *> DSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric
   44: *> band-diagonal form AB by a orthogonal similarity transformation:
   45: *> Q**T * A * Q = AB.
   46: *> \endverbatim
   47: *
   48: *  Arguments:
   49: *  ==========
   50: *
   51: *> \param[in] UPLO
   52: *> \verbatim
   53: *>          UPLO is CHARACTER*1
   54: *>          = 'U':  Upper triangle of A is stored;
   55: *>          = 'L':  Lower triangle of A is stored.
   56: *> \endverbatim
   57: *>
   58: *> \param[in] N
   59: *> \verbatim
   60: *>          N is INTEGER
   61: *>          The order of the matrix A.  N >= 0.
   62: *> \endverbatim
   63: *>
   64: *> \param[in] KD
   65: *> \verbatim
   66: *>          KD is INTEGER
   67: *>          The number of superdiagonals of the reduced matrix if UPLO = 'U',
   68: *>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
   69: *>          The reduced matrix is stored in the array AB.
   70: *> \endverbatim
   71: *>
   72: *> \param[in,out] A
   73: *> \verbatim
   74: *>          A is DOUBLE PRECISION array, dimension (LDA,N)
   75: *>          On entry, the symmetric matrix A.  If UPLO = 'U', the leading
   76: *>          N-by-N upper triangular part of A contains the upper
   77: *>          triangular part of the matrix A, and the strictly lower
   78: *>          triangular part of A is not referenced.  If UPLO = 'L', the
   79: *>          leading N-by-N lower triangular part of A contains the lower
   80: *>          triangular part of the matrix A, and the strictly upper
   81: *>          triangular part of A is not referenced.
   82: *>          On exit, if UPLO = 'U', the diagonal and first superdiagonal
   83: *>          of A are overwritten by the corresponding elements of the
   84: *>          tridiagonal matrix T, and the elements above the first
   85: *>          superdiagonal, with the array TAU, represent the orthogonal
   86: *>          matrix Q as a product of elementary reflectors; if UPLO
   87: *>          = 'L', the diagonal and first subdiagonal of A are over-
   88: *>          written by the corresponding elements of the tridiagonal
   89: *>          matrix T, and the elements below the first subdiagonal, with
   90: *>          the array TAU, represent the orthogonal matrix Q as a product
   91: *>          of elementary reflectors. See Further Details.
   92: *> \endverbatim
   93: *>
   94: *> \param[in] LDA
   95: *> \verbatim
   96: *>          LDA is INTEGER
   97: *>          The leading dimension of the array A.  LDA >= max(1,N).
   98: *> \endverbatim
   99: *>
  100: *> \param[out] AB
  101: *> \verbatim
  102: *>          AB is DOUBLE PRECISION array, dimension (LDAB,N)
  103: *>          On exit, the upper or lower triangle of the symmetric band
  104: *>          matrix A, stored in the first KD+1 rows of the array.  The
  105: *>          j-th column of A is stored in the j-th column of the array AB
  106: *>          as follows:
  107: *>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
  108: *>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
  109: *> \endverbatim
  110: *>
  111: *> \param[in] LDAB
  112: *> \verbatim
  113: *>          LDAB is INTEGER
  114: *>          The leading dimension of the array AB.  LDAB >= KD+1.
  115: *> \endverbatim
  116: *>
  117: *> \param[out] TAU
  118: *> \verbatim
  119: *>          TAU is DOUBLE PRECISION array, dimension (N-KD)
  120: *>          The scalar factors of the elementary reflectors (see Further
  121: *>          Details).
  122: *> \endverbatim
  123: *>
  124: *> \param[out] WORK
  125: *> \verbatim
  126: *>          WORK is DOUBLE PRECISION array, dimension (LWORK)
  127: *>          On exit, if INFO = 0, or if LWORK=-1, 
  128: *>          WORK(1) returns the size of LWORK.
  129: *> \endverbatim
  130: *>
  131: *> \param[in] LWORK
  132: *> \verbatim
  133: *>          LWORK is INTEGER
  134: *>          The dimension of the array WORK which should be calculated
  135: *>          by a workspace query. LWORK = MAX(1, LWORK_QUERY)
  136: *>          If LWORK = -1, then a workspace query is assumed; the routine
  137: *>          only calculates the optimal size of the WORK array, returns
  138: *>          this value as the first entry of the WORK array, and no error
  139: *>          message related to LWORK is issued by XERBLA.
  140: *>          LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
  141: *>          where FACTOPTNB is the blocking used by the QR or LQ
  142: *>          algorithm, usually FACTOPTNB=128 is a good choice otherwise
  143: *>          putting LWORK=-1 will provide the size of WORK.
  144: *> \endverbatim
  145: *>
  146: *> \param[out] INFO
  147: *> \verbatim
  148: *>          INFO is INTEGER
  149: *>          = 0:  successful exit
  150: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
  151: *> \endverbatim
  152: *
  153: *  Authors:
  154: *  ========
  155: *
  156: *> \author Univ. of Tennessee 
  157: *> \author Univ. of California Berkeley 
  158: *> \author Univ. of Colorado Denver 
  159: *> \author NAG Ltd. 
  160: *
  161: *> \date November 2017
  162: *
  163: *> \ingroup doubleSYcomputational
  164: *
  165: *> \par Further Details:
  166: *  =====================
  167: *>
  168: *> \verbatim
  169: *>
  170: *>  Implemented by Azzam Haidar.
  171: *>
  172: *>  All details are available on technical report, SC11, SC13 papers.
  173: *>
  174: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  175: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  176: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  177: *>  of 2011 International Conference for High Performance Computing,
  178: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  179: *>  Article 8 , 11 pages.
  180: *>  http://doi.acm.org/10.1145/2063384.2063394
  181: *>
  182: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  183: *>  An improved parallel singular value algorithm and its implementation 
  184: *>  for multicore hardware, In Proceedings of 2013 International Conference
  185: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  186: *>  Denver, Colorado, USA, 2013.
  187: *>  Article 90, 12 pages.
  188: *>  http://doi.acm.org/10.1145/2503210.2503292
  189: *>
  190: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  191: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  192: *>  calculations based on fine-grained memory aware tasks.
  193: *>  International Journal of High Performance Computing Applications.
  194: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  195: *>  http://hpc.sagepub.com/content/28/2/196 
  196: *>
  197: *> \endverbatim
  198: *>
  199: *> \verbatim
  200: *>
  201: *>  If UPLO = 'U', the matrix Q is represented as a product of elementary
  202: *>  reflectors
  203: *>
  204: *>     Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd.
  205: *>
  206: *>  Each H(i) has the form
  207: *>
  208: *>     H(i) = I - tau * v * v**T
  209: *>
  210: *>  where tau is a real scalar, and v is a real vector with
  211: *>  v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
  212: *>  A(i,i+kd+1:n), and tau in TAU(i).
  213: *>
  214: *>  If UPLO = 'L', the matrix Q is represented as a product of elementary
  215: *>  reflectors
  216: *>
  217: *>     Q = H(1) H(2) . . . H(k), where k = n-kd.
  218: *>
  219: *>  Each H(i) has the form
  220: *>
  221: *>     H(i) = I - tau * v * v**T
  222: *>
  223: *>  where tau is a real scalar, and v is a real vector with
  224: *>  v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
  225: *>  A(i+kd+2:n,i), and tau in TAU(i).
  226: *>
  227: *>  The contents of A on exit are illustrated by the following examples
  228: *>  with n = 5:
  229: *>
  230: *>  if UPLO = 'U':                       if UPLO = 'L':
  231: *>
  232: *>    (  ab  ab/v1  v1      v1     v1    )              (  ab                            )
  233: *>    (      ab     ab/v2   v2     v2    )              (  ab/v1  ab                     )
  234: *>    (             ab      ab/v3  v3    )              (  v1     ab/v2  ab              )
  235: *>    (                     ab     ab/v4 )              (  v1     v2     ab/v3  ab       )
  236: *>    (                            ab    )              (  v1     v2     v3     ab/v4 ab )
  237: *>
  238: *>  where d and e denote diagonal and off-diagonal elements of T, and vi
  239: *>  denotes an element of the vector defining H(i).
  240: *> \endverbatim
  241: *>
  242: *  =====================================================================
  243:       SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU, 
  244:      $                         WORK, LWORK, INFO )
  245: *
  246:       IMPLICIT NONE
  247: *
  248: *  -- LAPACK computational routine (version 3.8.0) --
  249: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  250: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  251: *     November 2017
  252: *
  253: *     .. Scalar Arguments ..
  254:       CHARACTER          UPLO
  255:       INTEGER            INFO, LDA, LDAB, LWORK, N, KD
  256: *     ..
  257: *     .. Array Arguments ..
  258:       DOUBLE PRECISION   A( LDA, * ), AB( LDAB, * ), 
  259:      $                   TAU( * ), WORK( * )
  260: *     ..
  261: *
  262: *  =====================================================================
  263: *
  264: *     .. Parameters ..
  265:       DOUBLE PRECISION   RONE
  266:       DOUBLE PRECISION   ZERO, ONE, HALF
  267:       PARAMETER          ( RONE = 1.0D+0,
  268:      $                   ZERO = 0.0D+0,
  269:      $                   ONE = 1.0D+0,
  270:      $                   HALF = 0.5D+0 )
  271: *     ..
  272: *     .. Local Scalars ..
  273:       LOGICAL            LQUERY, UPPER
  274:       INTEGER            I, J, IINFO, LWMIN, PN, PK, LK,
  275:      $                   LDT, LDW, LDS2, LDS1, 
  276:      $                   LS2, LS1, LW, LT,
  277:      $                   TPOS, WPOS, S2POS, S1POS
  278: *     ..
  279: *     .. External Subroutines ..
  280:       EXTERNAL           XERBLA, DSYR2K, DSYMM, DGEMM, DCOPY,
  281:      $                   DLARFT, DGELQF, DGEQRF, DLASET
  282: *     ..
  283: *     .. Intrinsic Functions ..
  284:       INTRINSIC          MIN, MAX
  285: *     ..
  286: *     .. External Functions ..
  287:       LOGICAL            LSAME
  288:       INTEGER            ILAENV2STAGE 
  289:       EXTERNAL           LSAME, ILAENV2STAGE
  290: *     ..
  291: *     .. Executable Statements ..
  292: *
  293: *     Determine the minimal workspace size required 
  294: *     and test the input parameters
  295: *
  296:       INFO   = 0
  297:       UPPER  = LSAME( UPLO, 'U' )
  298:       LQUERY = ( LWORK.EQ.-1 )
  299:       LWMIN  = ILAENV2STAGE( 4, 'DSYTRD_SY2SB', '', N, KD, -1, -1 )
  300:       
  301:       IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  302:          INFO = -1
  303:       ELSE IF( N.LT.0 ) THEN
  304:          INFO = -2
  305:       ELSE IF( KD.LT.0 ) THEN
  306:          INFO = -3
  307:       ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
  308:          INFO = -5
  309:       ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
  310:          INFO = -7
  311:       ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  312:          INFO = -10
  313:       END IF
  314: *
  315:       IF( INFO.NE.0 ) THEN
  316:          CALL XERBLA( 'DSYTRD_SY2SB', -INFO )
  317:          RETURN
  318:       ELSE IF( LQUERY ) THEN
  319:          WORK( 1 ) = LWMIN
  320:          RETURN
  321:       END IF
  322: *
  323: *     Quick return if possible        
  324: *     Copy the upper/lower portion of A into AB 
  325: *
  326:       IF( N.LE.KD+1 ) THEN
  327:           IF( UPPER ) THEN
  328:               DO 100 I = 1, N
  329:                   LK = MIN( KD+1, I )
  330:                   CALL DCOPY( LK, A( I-LK+1, I ), 1, 
  331:      $                            AB( KD+1-LK+1, I ), 1 )
  332:   100         CONTINUE
  333:           ELSE
  334:               DO 110 I = 1, N
  335:                   LK = MIN( KD+1, N-I+1 )
  336:                   CALL DCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
  337:   110         CONTINUE
  338:           ENDIF
  339:           WORK( 1 ) = 1
  340:           RETURN
  341:       END IF
  342: *
  343: *     Determine the pointer position for the workspace
  344: *      
  345:       LDT    = KD
  346:       LDS1   = KD
  347:       LT     = LDT*KD
  348:       LW     = N*KD
  349:       LS1    = LDS1*KD
  350:       LS2    = LWMIN - LT - LW - LS1
  351: *      LS2 = N*MAX(KD,FACTOPTNB) 
  352:       TPOS   = 1
  353:       WPOS   = TPOS  + LT
  354:       S1POS  = WPOS  + LW
  355:       S2POS  = S1POS + LS1 
  356:       IF( UPPER ) THEN
  357:           LDW    = KD
  358:           LDS2   = KD
  359:       ELSE
  360:           LDW    = N
  361:           LDS2   = N
  362:       ENDIF
  363: *
  364: *
  365: *     Set the workspace of the triangular matrix T to zero once such a
  366: *     way every time T is generated the upper/lower portion will be always zero
  367: *   
  368:       CALL DLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
  369: *
  370:       IF( UPPER ) THEN
  371:           DO 10 I = 1, N - KD, KD
  372:              PN = N-I-KD+1
  373:              PK = MIN( N-I-KD+1, KD )
  374: *        
  375: *            Compute the LQ factorization of the current block
  376: *        
  377:              CALL DGELQF( KD, PN, A( I, I+KD ), LDA,
  378:      $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
  379: *        
  380: *            Copy the upper portion of A into AB
  381: *        
  382:              DO 20 J = I, I+PK-1
  383:                 LK = MIN( KD, N-J ) + 1
  384:                 CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
  385:    20        CONTINUE
  386: *                
  387:              CALL DLASET( 'Lower', PK, PK, ZERO, ONE, 
  388:      $                    A( I, I+KD ), LDA )
  389: *        
  390: *            Form the matrix T
  391: *        
  392:              CALL DLARFT( 'Forward', 'Rowwise', PN, PK,
  393:      $                    A( I, I+KD ), LDA, TAU( I ), 
  394:      $                    WORK( TPOS ), LDT )
  395: *        
  396: *            Compute W:
  397: *             
  398:              CALL DGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
  399:      $                   ONE,  WORK( TPOS ), LDT,
  400:      $                         A( I, I+KD ), LDA,
  401:      $                   ZERO, WORK( S2POS ), LDS2 )
  402: *        
  403:              CALL DSYMM( 'Right', UPLO, PK, PN,
  404:      $                   ONE,  A( I+KD, I+KD ), LDA,
  405:      $                         WORK( S2POS ), LDS2,
  406:      $                   ZERO, WORK( WPOS ), LDW )
  407: *        
  408:              CALL DGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
  409:      $                   ONE,  WORK( WPOS ), LDW,
  410:      $                         WORK( S2POS ), LDS2,
  411:      $                   ZERO, WORK( S1POS ), LDS1 )
  412: *        
  413:              CALL DGEMM( 'No transpose', 'No transpose', PK, PN, PK,
  414:      $                   -HALF, WORK( S1POS ), LDS1, 
  415:      $                          A( I, I+KD ), LDA,
  416:      $                   ONE,   WORK( WPOS ), LDW )
  417: *             
  418: *        
  419: *            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
  420: *            an update of the form:  A := A - V'*W - W'*V
  421: *        
  422:              CALL DSYR2K( UPLO, 'Conjugate', PN, PK,
  423:      $                    -ONE, A( I, I+KD ), LDA,
  424:      $                          WORK( WPOS ), LDW,
  425:      $                    RONE, A( I+KD, I+KD ), LDA )
  426:    10     CONTINUE
  427: *
  428: *        Copy the upper band to AB which is the band storage matrix
  429: *
  430:          DO 30 J = N-KD+1, N
  431:             LK = MIN(KD, N-J) + 1
  432:             CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
  433:    30    CONTINUE
  434: *
  435:       ELSE
  436: *
  437: *         Reduce the lower triangle of A to lower band matrix
  438: *        
  439:           DO 40 I = 1, N - KD, KD
  440:              PN = N-I-KD+1
  441:              PK = MIN( N-I-KD+1, KD )
  442: *        
  443: *            Compute the QR factorization of the current block
  444: *        
  445:              CALL DGEQRF( PN, KD, A( I+KD, I ), LDA,
  446:      $                    TAU( I ), WORK( S2POS ), LS2, IINFO )
  447: *        
  448: *            Copy the upper portion of A into AB 
  449: *        
  450:              DO 50 J = I, I+PK-1
  451:                 LK = MIN( KD, N-J ) + 1
  452:                 CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
  453:    50        CONTINUE
  454: *                
  455:              CALL DLASET( 'Upper', PK, PK, ZERO, ONE, 
  456:      $                    A( I+KD, I ), LDA )
  457: *        
  458: *            Form the matrix T
  459: *        
  460:              CALL DLARFT( 'Forward', 'Columnwise', PN, PK,
  461:      $                    A( I+KD, I ), LDA, TAU( I ), 
  462:      $                    WORK( TPOS ), LDT )
  463: *        
  464: *            Compute W:
  465: *             
  466:              CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK,
  467:      $                   ONE, A( I+KD, I ), LDA,
  468:      $                         WORK( TPOS ), LDT,
  469:      $                   ZERO, WORK( S2POS ), LDS2 )
  470: *        
  471:              CALL DSYMM( 'Left', UPLO, PN, PK,
  472:      $                   ONE, A( I+KD, I+KD ), LDA,
  473:      $                         WORK( S2POS ), LDS2,
  474:      $                   ZERO, WORK( WPOS ), LDW )
  475: *        
  476:              CALL DGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
  477:      $                   ONE, WORK( S2POS ), LDS2,
  478:      $                         WORK( WPOS ), LDW,
  479:      $                   ZERO, WORK( S1POS ), LDS1 )
  480: *        
  481:              CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK,
  482:      $                   -HALF, A( I+KD, I ), LDA,
  483:      $                         WORK( S1POS ), LDS1,
  484:      $                   ONE, WORK( WPOS ), LDW )
  485: *             
  486: *        
  487: *            Update the unreduced submatrix A(i+kd:n,i+kd:n), using
  488: *            an update of the form:  A := A - V*W' - W*V'
  489: *        
  490:              CALL DSYR2K( UPLO, 'No transpose', PN, PK,
  491:      $                    -ONE, A( I+KD, I ), LDA,
  492:      $                           WORK( WPOS ), LDW,
  493:      $                    RONE, A( I+KD, I+KD ), LDA )
  494: *            ==================================================================
  495: *            RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
  496: *             DO 45 J = I, I+PK-1
  497: *                LK = MIN( KD, N-J ) + 1
  498: *                CALL DCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
  499: *   45        CONTINUE
  500: *            ==================================================================
  501:    40     CONTINUE
  502: *
  503: *        Copy the lower band to AB which is the band storage matrix
  504: *
  505:          DO 60 J = N-KD+1, N
  506:             LK = MIN(KD, N-J) + 1
  507:             CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
  508:    60    CONTINUE
  509: 
  510:       END IF
  511: *
  512:       WORK( 1 ) = LWMIN
  513:       RETURN
  514: *
  515: *     End of DSYTRD_SY2SB
  516: *
  517:       END

CVSweb interface <joel.bertrand@systella.fr>