File:  [local] / rpl / lapack / lapack / zhetrd_hb2st.F
Revision 1.3: download - view: text, annotated - select for diffs - revision graph
Tue May 29 07:18:20 2018 UTC (6 years ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_31, rpl-4_1_30, rpl-4_1_29, rpl-4_1_28, HEAD
Mise à jour de Lapack.

    1: *> \brief \b ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at
    6: *            http://www.netlib.org/lapack/explore-html/
    7: *
    8: *> \htmlonly
    9: *> Download ZHETRD_HB2ST + dependencies
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbtrd_hb2st.f">
   11: *> [TGZ]</a>
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbtrd_hb2st.f">
   13: *> [ZIP]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbtrd_hb2st.f">
   15: *> [TXT]</a>
   16: *> \endhtmlonly
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, 
   22: *                               D, E, HOUS, LHOUS, WORK, LWORK, INFO )
   23: *
   24: *       #if defined(_OPENMP)
   25: *       use omp_lib
   26: *       #endif
   27: *
   28: *       IMPLICIT NONE
   29: *
   30: *       .. Scalar Arguments ..
   31: *       CHARACTER          STAGE1, UPLO, VECT
   32: *       INTEGER            N, KD, IB, LDAB, LHOUS, LWORK, INFO
   33: *       ..
   34: *       .. Array Arguments ..
   35: *       DOUBLE PRECISION   D( * ), E( * )
   36: *       COMPLEX*16         AB( LDAB, * ), HOUS( * ), WORK( * )
   37: *       ..
   38: *
   39: *
   40: *> \par Purpose:
   41: *  =============
   42: *>
   43: *> \verbatim
   44: *>
   45: *> ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
   46: *> tridiagonal form T by a unitary similarity transformation:
   47: *> Q**H * A * Q = T.
   48: *> \endverbatim
   49: *
   50: *  Arguments:
   51: *  ==========
   52: *
   53: *> \param[in] STAGE
   54: *> \verbatim
   55: *>          STAGE is CHARACTER*1
   56: *>          = 'N':  "No": to mention that the stage 1 of the reduction  
   57: *>                  from dense to band using the zhetrd_he2hb routine
   58: *>                  was not called before this routine to reproduce AB. 
   59: *>                  In other term this routine is called as standalone. 
   60: *>          = 'Y':  "Yes": to mention that the stage 1 of the 
   61: *>                  reduction from dense to band using the zhetrd_he2hb 
   62: *>                  routine has been called to produce AB (e.g., AB is
   63: *>                  the output of zhetrd_he2hb.
   64: *> \endverbatim
   65: *>
   66: *> \param[in] VECT
   67: *> \verbatim
   68: *>          VECT is CHARACTER*1
   69: *>          = 'N':  No need for the Housholder representation, 
   70: *>                  and thus LHOUS is of size max(1, 4*N);
   71: *>          = 'V':  the Householder representation is needed to 
   72: *>                  either generate or to apply Q later on, 
   73: *>                  then LHOUS is to be queried and computed.
   74: *>                  (NOT AVAILABLE IN THIS RELEASE).
   75: *> \endverbatim
   76: *>
   77: *> \param[in] UPLO
   78: *> \verbatim
   79: *>          UPLO is CHARACTER*1
   80: *>          = 'U':  Upper triangle of A is stored;
   81: *>          = 'L':  Lower triangle of A is stored.
   82: *> \endverbatim
   83: *>
   84: *> \param[in] N
   85: *> \verbatim
   86: *>          N is INTEGER
   87: *>          The order of the matrix A.  N >= 0.
   88: *> \endverbatim
   89: *>
   90: *> \param[in] KD
   91: *> \verbatim
   92: *>          KD is INTEGER
   93: *>          The number of superdiagonals of the matrix A if UPLO = 'U',
   94: *>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
   95: *> \endverbatim
   96: *>
   97: *> \param[in,out] AB
   98: *> \verbatim
   99: *>          AB is COMPLEX*16 array, dimension (LDAB,N)
  100: *>          On entry, the upper or lower triangle of the Hermitian band
  101: *>          matrix A, stored in the first KD+1 rows of the array.  The
  102: *>          j-th column of A is stored in the j-th column of the array AB
  103: *>          as follows:
  104: *>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
  105: *>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
  106: *>          On exit, the diagonal elements of AB are overwritten by the
  107: *>          diagonal elements of the tridiagonal matrix T; if KD > 0, the
  108: *>          elements on the first superdiagonal (if UPLO = 'U') or the
  109: *>          first subdiagonal (if UPLO = 'L') are overwritten by the
  110: *>          off-diagonal elements of T; the rest of AB is overwritten by
  111: *>          values generated during the reduction.
  112: *> \endverbatim
  113: *>
  114: *> \param[in] LDAB
  115: *> \verbatim
  116: *>          LDAB is INTEGER
  117: *>          The leading dimension of the array AB.  LDAB >= KD+1.
  118: *> \endverbatim
  119: *>
  120: *> \param[out] D
  121: *> \verbatim
  122: *>          D is DOUBLE PRECISION array, dimension (N)
  123: *>          The diagonal elements of the tridiagonal matrix T.
  124: *> \endverbatim
  125: *>
  126: *> \param[out] E
  127: *> \verbatim
  128: *>          E is DOUBLE PRECISION array, dimension (N-1)
  129: *>          The off-diagonal elements of the tridiagonal matrix T:
  130: *>          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
  131: *> \endverbatim
  132: *>
  133: *> \param[out] HOUS
  134: *> \verbatim
  135: *>          HOUS is COMPLEX*16 array, dimension LHOUS, that
  136: *>          store the Householder representation.
  137: *> \endverbatim
  138: *>
  139: *> \param[in] LHOUS
  140: *> \verbatim
  141: *>          LHOUS is INTEGER
  142: *>          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
  143: *>          If LWORK = -1, or LHOUS=-1,
  144: *>          then a query is assumed; the routine
  145: *>          only calculates the optimal size of the HOUS array, returns
  146: *>          this value as the first entry of the HOUS array, and no error
  147: *>          message related to LHOUS is issued by XERBLA.
  148: *>          LHOUS = MAX(1, dimension) where
  149: *>          dimension = 4*N if VECT='N'
  150: *>          not available now if VECT='H'     
  151: *> \endverbatim
  152: *>
  153: *> \param[out] WORK
  154: *> \verbatim
  155: *>          WORK is COMPLEX*16 array, dimension LWORK.
  156: *> \endverbatim
  157: *>
  158: *> \param[in] LWORK
  159: *> \verbatim
  160: *>          LWORK is INTEGER
  161: *>          The dimension of the array WORK. LWORK = MAX(1, dimension)
  162: *>          If LWORK = -1, or LHOUS=-1,
  163: *>          then a workspace query is assumed; the routine
  164: *>          only calculates the optimal size of the WORK array, returns
  165: *>          this value as the first entry of the WORK array, and no error
  166: *>          message related to LWORK is issued by XERBLA.
  167: *>          LWORK = MAX(1, dimension) where
  168: *>          dimension   = (2KD+1)*N + KD*NTHREADS
  169: *>          where KD is the blocking size of the reduction,
  170: *>          FACTOPTNB is the blocking used by the QR or LQ
  171: *>          algorithm, usually FACTOPTNB=128 is a good choice
  172: *>          NTHREADS is the number of threads used when
  173: *>          openMP compilation is enabled, otherwise =1.
  174: *> \endverbatim
  175: *>
  176: *> \param[out] INFO
  177: *> \verbatim
  178: *>          INFO is INTEGER
  179: *>          = 0:  successful exit
  180: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
  181: *> \endverbatim
  182: *
  183: *  Authors:
  184: *  ========
  185: *
  186: *> \author Univ. of Tennessee
  187: *> \author Univ. of California Berkeley
  188: *> \author Univ. of Colorado Denver
  189: *> \author NAG Ltd.
  190: *
  191: *> \date December 2016
  192: *
  193: *> \ingroup complex16OTHERcomputational
  194: *
  195: *> \par Further Details:
  196: *  =====================
  197: *>
  198: *> \verbatim
  199: *>
  200: *>  Implemented by Azzam Haidar.
  201: *>
  202: *>  All details are available on technical report, SC11, SC13 papers.
  203: *>
  204: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  205: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  206: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  207: *>  of 2011 International Conference for High Performance Computing,
  208: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  209: *>  Article 8 , 11 pages.
  210: *>  http://doi.acm.org/10.1145/2063384.2063394
  211: *>
  212: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  213: *>  An improved parallel singular value algorithm and its implementation 
  214: *>  for multicore hardware, In Proceedings of 2013 International Conference
  215: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  216: *>  Denver, Colorado, USA, 2013.
  217: *>  Article 90, 12 pages.
  218: *>  http://doi.acm.org/10.1145/2503210.2503292
  219: *>
  220: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  221: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  222: *>  calculations based on fine-grained memory aware tasks.
  223: *>  International Journal of High Performance Computing Applications.
  224: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  225: *>  http://hpc.sagepub.com/content/28/2/196 
  226: *>
  227: *> \endverbatim
  228: *>
  229: *  =====================================================================
  230:       SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, 
  231:      $                         D, E, HOUS, LHOUS, WORK, LWORK, INFO )
  232: *
  233: *
  234: #if defined(_OPENMP)
  235:       use omp_lib
  236: #endif
  237: *
  238:       IMPLICIT NONE
  239: *
  240: *  -- LAPACK computational routine (version 3.7.0) --
  241: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  242: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  243: *     December 2016
  244: *
  245: *     .. Scalar Arguments ..
  246:       CHARACTER          STAGE1, UPLO, VECT
  247:       INTEGER            N, KD, LDAB, LHOUS, LWORK, INFO
  248: *     ..
  249: *     .. Array Arguments ..
  250:       DOUBLE PRECISION   D( * ), E( * )
  251:       COMPLEX*16         AB( LDAB, * ), HOUS( * ), WORK( * )
  252: *     ..
  253: *
  254: *  =====================================================================
  255: *
  256: *     .. Parameters ..
  257:       DOUBLE PRECISION   RZERO
  258:       COMPLEX*16         ZERO, ONE
  259:       PARAMETER          ( RZERO = 0.0D+0,
  260:      $                   ZERO = ( 0.0D+0, 0.0D+0 ),
  261:      $                   ONE  = ( 1.0D+0, 0.0D+0 ) )
  262: *     ..
  263: *     .. Local Scalars ..
  264:       LOGICAL            LQUERY, WANTQ, UPPER, AFTERS1
  265:       INTEGER            I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, 
  266:      $                   ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
  267:      $                   STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
  268:      $                   NBTILES, TTYPE, TID, NTHREADS, DEBUG,
  269:      $                   ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, 
  270:      $                   INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
  271:      $                   SIZEV, SIZETAU, LDV, LHMIN, LWMIN
  272:       DOUBLE PRECISION   ABSTMP
  273:       COMPLEX*16         TMP
  274: *     ..
  275: *     .. External Subroutines ..
  276:       EXTERNAL           ZHB2ST_KERNELS, ZLACPY, ZLASET
  277: *     ..
  278: *     .. Intrinsic Functions ..
  279:       INTRINSIC          MIN, MAX, CEILING, DBLE, REAL
  280: *     ..
  281: *     .. External Functions ..
  282:       LOGICAL            LSAME
  283:       INTEGER            ILAENV 
  284:       EXTERNAL           LSAME, ILAENV
  285: *     ..
  286: *     .. Executable Statements ..
  287: *
  288: *     Determine the minimal workspace size required.
  289: *     Test the input parameters
  290: *
  291:       DEBUG   = 0
  292:       INFO    = 0
  293:       AFTERS1 = LSAME( STAGE1, 'Y' )
  294:       WANTQ   = LSAME( VECT, 'V' )
  295:       UPPER   = LSAME( UPLO, 'U' )
  296:       LQUERY  = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 )
  297: *
  298: *     Determine the block size, the workspace size and the hous size.
  299: *
  300:       IB     = ILAENV( 18, 'ZHETRD_HB2ST', VECT, N, KD, -1, -1 )
  301:       LHMIN  = ILAENV( 19, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 )
  302:       LWMIN  = ILAENV( 20, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 )
  303: *
  304:       IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN
  305:          INFO = -1
  306:       ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN
  307:          INFO = -2
  308:       ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
  309:          INFO = -3
  310:       ELSE IF( N.LT.0 ) THEN
  311:          INFO = -4
  312:       ELSE IF( KD.LT.0 ) THEN
  313:          INFO = -5
  314:       ELSE IF( LDAB.LT.(KD+1) ) THEN
  315:          INFO = -7
  316:       ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN
  317:          INFO = -11
  318:       ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
  319:          INFO = -13
  320:       END IF
  321: *
  322:       IF( INFO.EQ.0 ) THEN
  323:          HOUS( 1 ) = LHMIN
  324:          WORK( 1 ) = LWMIN
  325:       END IF
  326: *
  327:       IF( INFO.NE.0 ) THEN
  328:          CALL XERBLA( 'ZHETRD_HB2ST', -INFO )
  329:          RETURN
  330:       ELSE IF( LQUERY ) THEN
  331:          RETURN
  332:       END IF
  333: *
  334: *     Quick return if possible
  335: *
  336:       IF( N.EQ.0 ) THEN
  337:           HOUS( 1 ) = 1
  338:           WORK( 1 ) = 1
  339:           RETURN
  340:       END IF
  341: *
  342: *     Determine pointer position
  343: *
  344:       LDV      = KD + IB
  345:       SIZETAU  = 2 * N
  346:       SIZEV    = 2 * N
  347:       INDTAU   = 1
  348:       INDV     = INDTAU + SIZETAU
  349:       LDA      = 2 * KD + 1
  350:       SIZEA    = LDA * N
  351:       INDA     = 1
  352:       INDW     = INDA + SIZEA
  353:       NTHREADS = 1
  354:       TID      = 0
  355: *
  356:       IF( UPPER ) THEN
  357:           APOS     = INDA + KD
  358:           AWPOS    = INDA
  359:           DPOS     = APOS + KD
  360:           OFDPOS   = DPOS - 1
  361:           ABDPOS   = KD + 1
  362:           ABOFDPOS = KD
  363:       ELSE
  364:           APOS     = INDA 
  365:           AWPOS    = INDA + KD + 1
  366:           DPOS     = APOS
  367:           OFDPOS   = DPOS + 1
  368:           ABDPOS   = 1
  369:           ABOFDPOS = 2
  370: 
  371:       ENDIF
  372: *      
  373: *     Case KD=0: 
  374: *     The matrix is diagonal. We just copy it (convert to "real" for 
  375: *     complex because D is double and the imaginary part should be 0) 
  376: *     and store it in D. A sequential code here is better or 
  377: *     in a parallel environment it might need two cores for D and E
  378: *
  379:       IF( KD.EQ.0 ) THEN
  380:           DO 30 I = 1, N
  381:               D( I ) = DBLE( AB( ABDPOS, I ) )
  382:    30     CONTINUE
  383:           DO 40 I = 1, N-1
  384:               E( I ) = RZERO
  385:    40     CONTINUE
  386: *
  387:           HOUS( 1 ) = 1
  388:           WORK( 1 ) = 1
  389:           RETURN
  390:       END IF
  391: *      
  392: *     Case KD=1: 
  393: *     The matrix is already Tridiagonal. We have to make diagonal 
  394: *     and offdiagonal elements real, and store them in D and E.
  395: *     For that, for real precision just copy the diag and offdiag 
  396: *     to D and E while for the COMPLEX case the bulge chasing is  
  397: *     performed to convert the hermetian tridiagonal to symmetric 
  398: *     tridiagonal. A simpler coversion formula might be used, but then 
  399: *     updating the Q matrix will be required and based if Q is generated
  400: *     or not this might complicate the story. 
  401: *      
  402:       IF( KD.EQ.1 ) THEN
  403:           DO 50 I = 1, N
  404:               D( I ) = DBLE( AB( ABDPOS, I ) )
  405:    50     CONTINUE
  406: *
  407: *         make off-diagonal elements real and copy them to E
  408: *
  409:           IF( UPPER ) THEN
  410:               DO 60 I = 1, N - 1
  411:                   TMP = AB( ABOFDPOS, I+1 )
  412:                   ABSTMP = ABS( TMP )
  413:                   AB( ABOFDPOS, I+1 ) = ABSTMP
  414:                   E( I ) = ABSTMP
  415:                   IF( ABSTMP.NE.RZERO ) THEN
  416:                      TMP = TMP / ABSTMP
  417:                   ELSE
  418:                      TMP = ONE
  419:                   END IF
  420:                   IF( I.LT.N-1 )
  421:      $               AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP
  422: C                  IF( WANTZ ) THEN
  423: C                     CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 )
  424: C                  END IF
  425:    60         CONTINUE
  426:           ELSE
  427:               DO 70 I = 1, N - 1
  428:                  TMP = AB( ABOFDPOS, I )
  429:                  ABSTMP = ABS( TMP )
  430:                  AB( ABOFDPOS, I ) = ABSTMP
  431:                  E( I ) = ABSTMP
  432:                  IF( ABSTMP.NE.RZERO ) THEN
  433:                     TMP = TMP / ABSTMP
  434:                  ELSE
  435:                     TMP = ONE
  436:                  END IF
  437:                  IF( I.LT.N-1 )
  438:      $              AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP
  439: C                 IF( WANTQ ) THEN
  440: C                    CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 )
  441: C                 END IF
  442:    70         CONTINUE
  443:           ENDIF
  444: *
  445:           HOUS( 1 ) = 1
  446:           WORK( 1 ) = 1
  447:           RETURN
  448:       END IF
  449: *
  450: *     Main code start here. 
  451: *     Reduce the hermitian band of A to a tridiagonal matrix.
  452: *
  453:       THGRSIZ   = N
  454:       GRSIZ     = 1
  455:       SHIFT     = 3
  456:       NBTILES   = CEILING( REAL(N)/REAL(KD) )
  457:       STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
  458:       THGRNB    = CEILING( REAL(N-1)/REAL(THGRSIZ) )
  459: *      
  460:       CALL ZLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
  461:       CALL ZLASET( "A", KD,   N, ZERO, ZERO, WORK( AWPOS ), LDA )
  462: *
  463: *
  464: *     openMP parallelisation start here
  465: *
  466: #if defined(_OPENMP)
  467: !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
  468: !$OMP$         PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) 
  469: !$OMP$         PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
  470: !$OMP$         SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
  471: !$OMP$         SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
  472: !$OMP$         SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
  473: !$OMP MASTER
  474: #endif
  475: *
  476: *     main bulge chasing loop
  477: *      
  478:       DO 100 THGRID = 1, THGRNB
  479:           STT  = (THGRID-1)*THGRSIZ+1
  480:           THED = MIN( (STT + THGRSIZ -1), (N-1))
  481:           DO 110 I = STT, N-1
  482:               ED = MIN( I, THED )
  483:               IF( STT.GT.ED ) EXIT
  484:               DO 120 M = 1, STEPERCOL
  485:                   ST = STT
  486:                   DO 130 SWEEPID = ST, ED
  487:                       DO 140 K = 1, GRSIZ
  488:                           MYID  = (I-SWEEPID)*(STEPERCOL*GRSIZ) 
  489:      $                           + (M-1)*GRSIZ + K
  490:                           IF ( MYID.EQ.1 ) THEN
  491:                               TTYPE = 1
  492:                           ELSE
  493:                               TTYPE = MOD( MYID, 2 ) + 2
  494:                           ENDIF
  495: 
  496:                           IF( TTYPE.EQ.2 ) THEN
  497:                               COLPT      = (MYID/2)*KD + SWEEPID
  498:                               STIND      = COLPT-KD+1
  499:                               EDIND      = MIN(COLPT,N)
  500:                               BLKLASTIND = COLPT
  501:                           ELSE
  502:                               COLPT      = ((MYID+1)/2)*KD + SWEEPID
  503:                               STIND      = COLPT-KD+1
  504:                               EDIND      = MIN(COLPT,N)
  505:                               IF( ( STIND.GE.EDIND-1 ).AND.
  506:      $                            ( EDIND.EQ.N ) ) THEN
  507:                                   BLKLASTIND = N
  508:                               ELSE
  509:                                   BLKLASTIND = 0
  510:                               ENDIF
  511:                           ENDIF
  512: *
  513: *                         Call the kernel
  514: *                             
  515: #if defined(_OPENMP)
  516:                           IF( TTYPE.NE.1 ) THEN      
  517: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
  518: !$OMP$     DEPEND(in:WORK(MYID-1))
  519: !$OMP$     DEPEND(out:WORK(MYID))
  520:                               TID      = OMP_GET_THREAD_NUM()
  521:                               CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
  522:      $                             STIND, EDIND, SWEEPID, N, KD, IB,
  523:      $                             WORK ( INDA ), LDA, 
  524:      $                             HOUS( INDV ), HOUS( INDTAU ), LDV,
  525:      $                             WORK( INDW + TID*KD ) )
  526: !$OMP END TASK
  527:                           ELSE
  528: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
  529: !$OMP$     DEPEND(out:WORK(MYID))
  530:                               TID      = OMP_GET_THREAD_NUM()
  531:                               CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
  532:      $                             STIND, EDIND, SWEEPID, N, KD, IB,
  533:      $                             WORK ( INDA ), LDA, 
  534:      $                             HOUS( INDV ), HOUS( INDTAU ), LDV,
  535:      $                             WORK( INDW + TID*KD ) )
  536: !$OMP END TASK
  537:                           ENDIF
  538: #else
  539:                           CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
  540:      $                         STIND, EDIND, SWEEPID, N, KD, IB,
  541:      $                         WORK ( INDA ), LDA, 
  542:      $                         HOUS( INDV ), HOUS( INDTAU ), LDV,
  543:      $                         WORK( INDW + TID*KD ) )
  544: #endif 
  545:                           IF ( BLKLASTIND.GE.(N-1) ) THEN
  546:                               STT = STT + 1
  547:                               EXIT
  548:                           ENDIF
  549:   140                 CONTINUE
  550:   130             CONTINUE
  551:   120         CONTINUE
  552:   110     CONTINUE
  553:   100 CONTINUE
  554: *
  555: #if defined(_OPENMP)
  556: !$OMP END MASTER
  557: !$OMP END PARALLEL
  558: #endif
  559: *      
  560: *     Copy the diagonal from A to D. Note that D is REAL thus only
  561: *     the Real part is needed, the imaginary part should be zero.
  562: *
  563:       DO 150 I = 1, N
  564:           D( I ) = DBLE( WORK( DPOS+(I-1)*LDA ) )
  565:   150 CONTINUE
  566: *      
  567: *     Copy the off diagonal from A to E. Note that E is REAL thus only
  568: *     the Real part is needed, the imaginary part should be zero.
  569: *
  570:       IF( UPPER ) THEN
  571:           DO 160 I = 1, N-1
  572:              E( I ) = DBLE( WORK( OFDPOS+I*LDA ) )
  573:   160     CONTINUE
  574:       ELSE
  575:           DO 170 I = 1, N-1
  576:              E( I ) = DBLE( WORK( OFDPOS+(I-1)*LDA ) )
  577:   170     CONTINUE
  578:       ENDIF
  579: *
  580:       HOUS( 1 ) = LHMIN
  581:       WORK( 1 ) = LWMIN
  582:       RETURN
  583: *
  584: *     End of ZHETRD_HB2ST
  585: *
  586:       END
  587:       

CVSweb interface <joel.bertrand@systella.fr>