Annotation of rpl/lapack/lapack/zhetrd_hb2st.F, revision 1.5

1.1       bertrand    1: *> \brief \b ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
                      2: *
                      3: *  =========== DOCUMENTATION ===========
                      4: *
                      5: * Online html documentation available at
                      6: *            http://www.netlib.org/lapack/explore-html/
                      7: *
                      8: *> \htmlonly
                      9: *> Download ZHETRD_HB2ST + dependencies
1.5     ! bertrand   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_hb2st.f">
1.1       bertrand   11: *> [TGZ]</a>
1.5     ! bertrand   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_hb2st.f">
1.1       bertrand   13: *> [ZIP]</a>
1.5     ! bertrand   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_hb2st.f">
1.1       bertrand   15: *> [TXT]</a>
                     16: *> \endhtmlonly
                     17: *
                     18: *  Definition:
                     19: *  ===========
                     20: *
                     21: *       SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, 
                     22: *                               D, E, HOUS, LHOUS, WORK, LWORK, INFO )
                     23: *
                     24: *       #if defined(_OPENMP)
                     25: *       use omp_lib
                     26: *       #endif
                     27: *
                     28: *       IMPLICIT NONE
                     29: *
                     30: *       .. Scalar Arguments ..
                     31: *       CHARACTER          STAGE1, UPLO, VECT
                     32: *       INTEGER            N, KD, IB, LDAB, LHOUS, LWORK, INFO
                     33: *       ..
                     34: *       .. Array Arguments ..
                     35: *       DOUBLE PRECISION   D( * ), E( * )
                     36: *       COMPLEX*16         AB( LDAB, * ), HOUS( * ), WORK( * )
                     37: *       ..
                     38: *
                     39: *
                     40: *> \par Purpose:
                     41: *  =============
                     42: *>
                     43: *> \verbatim
                     44: *>
                     45: *> ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
                     46: *> tridiagonal form T by a unitary similarity transformation:
                     47: *> Q**H * A * Q = T.
                     48: *> \endverbatim
                     49: *
                     50: *  Arguments:
                     51: *  ==========
                     52: *
1.4       bertrand   53: *> \param[in] STAGE1
1.1       bertrand   54: *> \verbatim
1.4       bertrand   55: *>          STAGE1 is CHARACTER*1
1.1       bertrand   56: *>          = 'N':  "No": to mention that the stage 1 of the reduction  
                     57: *>                  from dense to band using the zhetrd_he2hb routine
                     58: *>                  was not called before this routine to reproduce AB. 
                     59: *>                  In other term this routine is called as standalone. 
                     60: *>          = 'Y':  "Yes": to mention that the stage 1 of the 
                     61: *>                  reduction from dense to band using the zhetrd_he2hb 
                     62: *>                  routine has been called to produce AB (e.g., AB is
                     63: *>                  the output of zhetrd_he2hb.
                     64: *> \endverbatim
                     65: *>
                     66: *> \param[in] VECT
                     67: *> \verbatim
                     68: *>          VECT is CHARACTER*1
                     69: *>          = 'N':  No need for the Housholder representation, 
                     70: *>                  and thus LHOUS is of size max(1, 4*N);
                     71: *>          = 'V':  the Householder representation is needed to 
                     72: *>                  either generate or to apply Q later on, 
                     73: *>                  then LHOUS is to be queried and computed.
                     74: *>                  (NOT AVAILABLE IN THIS RELEASE).
                     75: *> \endverbatim
                     76: *>
                     77: *> \param[in] UPLO
                     78: *> \verbatim
                     79: *>          UPLO is CHARACTER*1
                     80: *>          = 'U':  Upper triangle of A is stored;
                     81: *>          = 'L':  Lower triangle of A is stored.
                     82: *> \endverbatim
                     83: *>
                     84: *> \param[in] N
                     85: *> \verbatim
                     86: *>          N is INTEGER
                     87: *>          The order of the matrix A.  N >= 0.
                     88: *> \endverbatim
                     89: *>
                     90: *> \param[in] KD
                     91: *> \verbatim
                     92: *>          KD is INTEGER
                     93: *>          The number of superdiagonals of the matrix A if UPLO = 'U',
                     94: *>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
                     95: *> \endverbatim
                     96: *>
                     97: *> \param[in,out] AB
                     98: *> \verbatim
                     99: *>          AB is COMPLEX*16 array, dimension (LDAB,N)
                    100: *>          On entry, the upper or lower triangle of the Hermitian band
                    101: *>          matrix A, stored in the first KD+1 rows of the array.  The
                    102: *>          j-th column of A is stored in the j-th column of the array AB
                    103: *>          as follows:
                    104: *>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
                    105: *>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
                    106: *>          On exit, the diagonal elements of AB are overwritten by the
                    107: *>          diagonal elements of the tridiagonal matrix T; if KD > 0, the
                    108: *>          elements on the first superdiagonal (if UPLO = 'U') or the
                    109: *>          first subdiagonal (if UPLO = 'L') are overwritten by the
                    110: *>          off-diagonal elements of T; the rest of AB is overwritten by
                    111: *>          values generated during the reduction.
                    112: *> \endverbatim
                    113: *>
                    114: *> \param[in] LDAB
                    115: *> \verbatim
                    116: *>          LDAB is INTEGER
                    117: *>          The leading dimension of the array AB.  LDAB >= KD+1.
                    118: *> \endverbatim
                    119: *>
                    120: *> \param[out] D
                    121: *> \verbatim
                    122: *>          D is DOUBLE PRECISION array, dimension (N)
                    123: *>          The diagonal elements of the tridiagonal matrix T.
                    124: *> \endverbatim
                    125: *>
                    126: *> \param[out] E
                    127: *> \verbatim
                    128: *>          E is DOUBLE PRECISION array, dimension (N-1)
                    129: *>          The off-diagonal elements of the tridiagonal matrix T:
                    130: *>          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
                    131: *> \endverbatim
                    132: *>
                    133: *> \param[out] HOUS
                    134: *> \verbatim
                    135: *>          HOUS is COMPLEX*16 array, dimension LHOUS, that
                    136: *>          store the Householder representation.
                    137: *> \endverbatim
                    138: *>
                    139: *> \param[in] LHOUS
                    140: *> \verbatim
                    141: *>          LHOUS is INTEGER
                    142: *>          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
                    143: *>          If LWORK = -1, or LHOUS=-1,
                    144: *>          then a query is assumed; the routine
                    145: *>          only calculates the optimal size of the HOUS array, returns
                    146: *>          this value as the first entry of the HOUS array, and no error
                    147: *>          message related to LHOUS is issued by XERBLA.
                    148: *>          LHOUS = MAX(1, dimension) where
                    149: *>          dimension = 4*N if VECT='N'
                    150: *>          not available now if VECT='H'     
                    151: *> \endverbatim
                    152: *>
                    153: *> \param[out] WORK
                    154: *> \verbatim
                    155: *>          WORK is COMPLEX*16 array, dimension LWORK.
                    156: *> \endverbatim
                    157: *>
                    158: *> \param[in] LWORK
                    159: *> \verbatim
                    160: *>          LWORK is INTEGER
                    161: *>          The dimension of the array WORK. LWORK = MAX(1, dimension)
                    162: *>          If LWORK = -1, or LHOUS=-1,
                    163: *>          then a workspace query is assumed; the routine
                    164: *>          only calculates the optimal size of the WORK array, returns
                    165: *>          this value as the first entry of the WORK array, and no error
                    166: *>          message related to LWORK is issued by XERBLA.
                    167: *>          LWORK = MAX(1, dimension) where
                    168: *>          dimension   = (2KD+1)*N + KD*NTHREADS
                    169: *>          where KD is the blocking size of the reduction,
                    170: *>          FACTOPTNB is the blocking used by the QR or LQ
                    171: *>          algorithm, usually FACTOPTNB=128 is a good choice
                    172: *>          NTHREADS is the number of threads used when
                    173: *>          openMP compilation is enabled, otherwise =1.
                    174: *> \endverbatim
                    175: *>
                    176: *> \param[out] INFO
                    177: *> \verbatim
                    178: *>          INFO is INTEGER
                    179: *>          = 0:  successful exit
                    180: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
                    181: *> \endverbatim
                    182: *
                    183: *  Authors:
                    184: *  ========
                    185: *
                    186: *> \author Univ. of Tennessee
                    187: *> \author Univ. of California Berkeley
                    188: *> \author Univ. of Colorado Denver
                    189: *> \author NAG Ltd.
                    190: *
                    191: *> \ingroup complex16OTHERcomputational
                    192: *
                    193: *> \par Further Details:
                    194: *  =====================
                    195: *>
                    196: *> \verbatim
                    197: *>
                    198: *>  Implemented by Azzam Haidar.
                    199: *>
                    200: *>  All details are available on technical report, SC11, SC13 papers.
                    201: *>
                    202: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
                    203: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
                    204: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
                    205: *>  of 2011 International Conference for High Performance Computing,
                    206: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
                    207: *>  Article 8 , 11 pages.
                    208: *>  http://doi.acm.org/10.1145/2063384.2063394
                    209: *>
                    210: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
                    211: *>  An improved parallel singular value algorithm and its implementation 
                    212: *>  for multicore hardware, In Proceedings of 2013 International Conference
                    213: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
                    214: *>  Denver, Colorado, USA, 2013.
                    215: *>  Article 90, 12 pages.
                    216: *>  http://doi.acm.org/10.1145/2503210.2503292
                    217: *>
                    218: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
                    219: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
                    220: *>  calculations based on fine-grained memory aware tasks.
                    221: *>  International Journal of High Performance Computing Applications.
                    222: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
                    223: *>  http://hpc.sagepub.com/content/28/2/196 
                    224: *>
                    225: *> \endverbatim
                    226: *>
                    227: *  =====================================================================
                    228:       SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, 
                    229:      $                         D, E, HOUS, LHOUS, WORK, LWORK, INFO )
                    230: *
                    231: *
                    232: #if defined(_OPENMP)
                    233:       use omp_lib
                    234: #endif
                    235: *
                    236:       IMPLICIT NONE
                    237: *
1.5     ! bertrand  238: *  -- LAPACK computational routine --
1.1       bertrand  239: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
                    240: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
                    241: *
                    242: *     .. Scalar Arguments ..
                    243:       CHARACTER          STAGE1, UPLO, VECT
                    244:       INTEGER            N, KD, LDAB, LHOUS, LWORK, INFO
                    245: *     ..
                    246: *     .. Array Arguments ..
                    247:       DOUBLE PRECISION   D( * ), E( * )
                    248:       COMPLEX*16         AB( LDAB, * ), HOUS( * ), WORK( * )
                    249: *     ..
                    250: *
                    251: *  =====================================================================
                    252: *
                    253: *     .. Parameters ..
                    254:       DOUBLE PRECISION   RZERO
                    255:       COMPLEX*16         ZERO, ONE
                    256:       PARAMETER          ( RZERO = 0.0D+0,
                    257:      $                   ZERO = ( 0.0D+0, 0.0D+0 ),
                    258:      $                   ONE  = ( 1.0D+0, 0.0D+0 ) )
                    259: *     ..
                    260: *     .. Local Scalars ..
                    261:       LOGICAL            LQUERY, WANTQ, UPPER, AFTERS1
                    262:       INTEGER            I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, 
                    263:      $                   ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
                    264:      $                   STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
                    265:      $                   NBTILES, TTYPE, TID, NTHREADS, DEBUG,
                    266:      $                   ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, 
                    267:      $                   INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
                    268:      $                   SIZEV, SIZETAU, LDV, LHMIN, LWMIN
                    269:       DOUBLE PRECISION   ABSTMP
                    270:       COMPLEX*16         TMP
                    271: *     ..
                    272: *     .. External Subroutines ..
1.4       bertrand  273:       EXTERNAL           ZHB2ST_KERNELS, ZLACPY, ZLASET, XERBLA
1.1       bertrand  274: *     ..
                    275: *     .. Intrinsic Functions ..
                    276:       INTRINSIC          MIN, MAX, CEILING, DBLE, REAL
                    277: *     ..
                    278: *     .. External Functions ..
                    279:       LOGICAL            LSAME
1.4       bertrand  280:       INTEGER            ILAENV2STAGE 
                    281:       EXTERNAL           LSAME, ILAENV2STAGE
1.1       bertrand  282: *     ..
                    283: *     .. Executable Statements ..
                    284: *
                    285: *     Determine the minimal workspace size required.
                    286: *     Test the input parameters
                    287: *
                    288:       DEBUG   = 0
                    289:       INFO    = 0
                    290:       AFTERS1 = LSAME( STAGE1, 'Y' )
                    291:       WANTQ   = LSAME( VECT, 'V' )
                    292:       UPPER   = LSAME( UPLO, 'U' )
                    293:       LQUERY  = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 )
                    294: *
                    295: *     Determine the block size, the workspace size and the hous size.
                    296: *
1.4       bertrand  297:       IB     = ILAENV2STAGE( 2, 'ZHETRD_HB2ST', VECT, N, KD, -1, -1 )
                    298:       LHMIN  = ILAENV2STAGE( 3, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 )
                    299:       LWMIN  = ILAENV2STAGE( 4, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 )
1.1       bertrand  300: *
                    301:       IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN
                    302:          INFO = -1
                    303:       ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN
                    304:          INFO = -2
                    305:       ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
                    306:          INFO = -3
                    307:       ELSE IF( N.LT.0 ) THEN
                    308:          INFO = -4
                    309:       ELSE IF( KD.LT.0 ) THEN
                    310:          INFO = -5
                    311:       ELSE IF( LDAB.LT.(KD+1) ) THEN
                    312:          INFO = -7
                    313:       ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN
                    314:          INFO = -11
                    315:       ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
                    316:          INFO = -13
                    317:       END IF
                    318: *
                    319:       IF( INFO.EQ.0 ) THEN
                    320:          HOUS( 1 ) = LHMIN
                    321:          WORK( 1 ) = LWMIN
                    322:       END IF
                    323: *
                    324:       IF( INFO.NE.0 ) THEN
                    325:          CALL XERBLA( 'ZHETRD_HB2ST', -INFO )
                    326:          RETURN
                    327:       ELSE IF( LQUERY ) THEN
                    328:          RETURN
                    329:       END IF
                    330: *
                    331: *     Quick return if possible
                    332: *
                    333:       IF( N.EQ.0 ) THEN
                    334:           HOUS( 1 ) = 1
                    335:           WORK( 1 ) = 1
                    336:           RETURN
                    337:       END IF
                    338: *
                    339: *     Determine pointer position
                    340: *
                    341:       LDV      = KD + IB
                    342:       SIZETAU  = 2 * N
                    343:       SIZEV    = 2 * N
                    344:       INDTAU   = 1
                    345:       INDV     = INDTAU + SIZETAU
                    346:       LDA      = 2 * KD + 1
                    347:       SIZEA    = LDA * N
                    348:       INDA     = 1
                    349:       INDW     = INDA + SIZEA
                    350:       NTHREADS = 1
                    351:       TID      = 0
                    352: *
                    353:       IF( UPPER ) THEN
                    354:           APOS     = INDA + KD
                    355:           AWPOS    = INDA
                    356:           DPOS     = APOS + KD
                    357:           OFDPOS   = DPOS - 1
                    358:           ABDPOS   = KD + 1
                    359:           ABOFDPOS = KD
                    360:       ELSE
                    361:           APOS     = INDA 
                    362:           AWPOS    = INDA + KD + 1
                    363:           DPOS     = APOS
                    364:           OFDPOS   = DPOS + 1
                    365:           ABDPOS   = 1
                    366:           ABOFDPOS = 2
                    367: 
                    368:       ENDIF
                    369: *      
                    370: *     Case KD=0: 
                    371: *     The matrix is diagonal. We just copy it (convert to "real" for 
                    372: *     complex because D is double and the imaginary part should be 0) 
                    373: *     and store it in D. A sequential code here is better or 
                    374: *     in a parallel environment it might need two cores for D and E
                    375: *
                    376:       IF( KD.EQ.0 ) THEN
                    377:           DO 30 I = 1, N
                    378:               D( I ) = DBLE( AB( ABDPOS, I ) )
                    379:    30     CONTINUE
                    380:           DO 40 I = 1, N-1
                    381:               E( I ) = RZERO
                    382:    40     CONTINUE
                    383: *
                    384:           HOUS( 1 ) = 1
                    385:           WORK( 1 ) = 1
                    386:           RETURN
                    387:       END IF
                    388: *      
                    389: *     Case KD=1: 
                    390: *     The matrix is already Tridiagonal. We have to make diagonal 
                    391: *     and offdiagonal elements real, and store them in D and E.
                    392: *     For that, for real precision just copy the diag and offdiag 
                    393: *     to D and E while for the COMPLEX case the bulge chasing is  
                    394: *     performed to convert the hermetian tridiagonal to symmetric 
1.5     ! bertrand  395: *     tridiagonal. A simpler conversion formula might be used, but then 
1.1       bertrand  396: *     updating the Q matrix will be required and based if Q is generated
                    397: *     or not this might complicate the story. 
                    398: *      
                    399:       IF( KD.EQ.1 ) THEN
                    400:           DO 50 I = 1, N
                    401:               D( I ) = DBLE( AB( ABDPOS, I ) )
                    402:    50     CONTINUE
                    403: *
                    404: *         make off-diagonal elements real and copy them to E
                    405: *
                    406:           IF( UPPER ) THEN
                    407:               DO 60 I = 1, N - 1
                    408:                   TMP = AB( ABOFDPOS, I+1 )
                    409:                   ABSTMP = ABS( TMP )
                    410:                   AB( ABOFDPOS, I+1 ) = ABSTMP
                    411:                   E( I ) = ABSTMP
                    412:                   IF( ABSTMP.NE.RZERO ) THEN
                    413:                      TMP = TMP / ABSTMP
                    414:                   ELSE
                    415:                      TMP = ONE
                    416:                   END IF
                    417:                   IF( I.LT.N-1 )
                    418:      $               AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP
                    419: C                  IF( WANTZ ) THEN
                    420: C                     CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 )
                    421: C                  END IF
                    422:    60         CONTINUE
                    423:           ELSE
                    424:               DO 70 I = 1, N - 1
                    425:                  TMP = AB( ABOFDPOS, I )
                    426:                  ABSTMP = ABS( TMP )
                    427:                  AB( ABOFDPOS, I ) = ABSTMP
                    428:                  E( I ) = ABSTMP
                    429:                  IF( ABSTMP.NE.RZERO ) THEN
                    430:                     TMP = TMP / ABSTMP
                    431:                  ELSE
                    432:                     TMP = ONE
                    433:                  END IF
                    434:                  IF( I.LT.N-1 )
                    435:      $              AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP
                    436: C                 IF( WANTQ ) THEN
                    437: C                    CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 )
                    438: C                 END IF
                    439:    70         CONTINUE
                    440:           ENDIF
                    441: *
                    442:           HOUS( 1 ) = 1
                    443:           WORK( 1 ) = 1
                    444:           RETURN
                    445:       END IF
                    446: *
                    447: *     Main code start here. 
                    448: *     Reduce the hermitian band of A to a tridiagonal matrix.
                    449: *
                    450:       THGRSIZ   = N
                    451:       GRSIZ     = 1
                    452:       SHIFT     = 3
                    453:       NBTILES   = CEILING( REAL(N)/REAL(KD) )
                    454:       STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
                    455:       THGRNB    = CEILING( REAL(N-1)/REAL(THGRSIZ) )
                    456: *      
                    457:       CALL ZLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
                    458:       CALL ZLASET( "A", KD,   N, ZERO, ZERO, WORK( AWPOS ), LDA )
                    459: *
                    460: *
                    461: *     openMP parallelisation start here
                    462: *
                    463: #if defined(_OPENMP)
                    464: !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
                    465: !$OMP$         PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) 
                    466: !$OMP$         PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
                    467: !$OMP$         SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
                    468: !$OMP$         SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
                    469: !$OMP$         SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
                    470: !$OMP MASTER
                    471: #endif
                    472: *
                    473: *     main bulge chasing loop
                    474: *      
                    475:       DO 100 THGRID = 1, THGRNB
                    476:           STT  = (THGRID-1)*THGRSIZ+1
                    477:           THED = MIN( (STT + THGRSIZ -1), (N-1))
                    478:           DO 110 I = STT, N-1
                    479:               ED = MIN( I, THED )
                    480:               IF( STT.GT.ED ) EXIT
                    481:               DO 120 M = 1, STEPERCOL
                    482:                   ST = STT
                    483:                   DO 130 SWEEPID = ST, ED
                    484:                       DO 140 K = 1, GRSIZ
                    485:                           MYID  = (I-SWEEPID)*(STEPERCOL*GRSIZ) 
                    486:      $                           + (M-1)*GRSIZ + K
                    487:                           IF ( MYID.EQ.1 ) THEN
                    488:                               TTYPE = 1
                    489:                           ELSE
                    490:                               TTYPE = MOD( MYID, 2 ) + 2
                    491:                           ENDIF
                    492: 
                    493:                           IF( TTYPE.EQ.2 ) THEN
                    494:                               COLPT      = (MYID/2)*KD + SWEEPID
                    495:                               STIND      = COLPT-KD+1
                    496:                               EDIND      = MIN(COLPT,N)
                    497:                               BLKLASTIND = COLPT
                    498:                           ELSE
                    499:                               COLPT      = ((MYID+1)/2)*KD + SWEEPID
                    500:                               STIND      = COLPT-KD+1
                    501:                               EDIND      = MIN(COLPT,N)
                    502:                               IF( ( STIND.GE.EDIND-1 ).AND.
                    503:      $                            ( EDIND.EQ.N ) ) THEN
                    504:                                   BLKLASTIND = N
                    505:                               ELSE
                    506:                                   BLKLASTIND = 0
                    507:                               ENDIF
                    508:                           ENDIF
                    509: *
                    510: *                         Call the kernel
                    511: *                             
1.5     ! bertrand  512: #if defined(_OPENMP) &&  _OPENMP >= 201307
        !           513: 
1.1       bertrand  514:                           IF( TTYPE.NE.1 ) THEN      
                    515: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
                    516: !$OMP$     DEPEND(in:WORK(MYID-1))
                    517: !$OMP$     DEPEND(out:WORK(MYID))
                    518:                               TID      = OMP_GET_THREAD_NUM()
                    519:                               CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
                    520:      $                             STIND, EDIND, SWEEPID, N, KD, IB,
                    521:      $                             WORK ( INDA ), LDA, 
                    522:      $                             HOUS( INDV ), HOUS( INDTAU ), LDV,
                    523:      $                             WORK( INDW + TID*KD ) )
                    524: !$OMP END TASK
                    525:                           ELSE
                    526: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
                    527: !$OMP$     DEPEND(out:WORK(MYID))
                    528:                               TID      = OMP_GET_THREAD_NUM()
                    529:                               CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
                    530:      $                             STIND, EDIND, SWEEPID, N, KD, IB,
                    531:      $                             WORK ( INDA ), LDA, 
                    532:      $                             HOUS( INDV ), HOUS( INDTAU ), LDV,
                    533:      $                             WORK( INDW + TID*KD ) )
                    534: !$OMP END TASK
                    535:                           ENDIF
                    536: #else
                    537:                           CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
                    538:      $                         STIND, EDIND, SWEEPID, N, KD, IB,
                    539:      $                         WORK ( INDA ), LDA, 
                    540:      $                         HOUS( INDV ), HOUS( INDTAU ), LDV,
                    541:      $                         WORK( INDW + TID*KD ) )
                    542: #endif 
                    543:                           IF ( BLKLASTIND.GE.(N-1) ) THEN
                    544:                               STT = STT + 1
                    545:                               EXIT
                    546:                           ENDIF
                    547:   140                 CONTINUE
                    548:   130             CONTINUE
                    549:   120         CONTINUE
                    550:   110     CONTINUE
                    551:   100 CONTINUE
                    552: *
                    553: #if defined(_OPENMP)
                    554: !$OMP END MASTER
                    555: !$OMP END PARALLEL
                    556: #endif
                    557: *      
                    558: *     Copy the diagonal from A to D. Note that D is REAL thus only
                    559: *     the Real part is needed, the imaginary part should be zero.
                    560: *
                    561:       DO 150 I = 1, N
                    562:           D( I ) = DBLE( WORK( DPOS+(I-1)*LDA ) )
                    563:   150 CONTINUE
                    564: *      
                    565: *     Copy the off diagonal from A to E. Note that E is REAL thus only
                    566: *     the Real part is needed, the imaginary part should be zero.
                    567: *
                    568:       IF( UPPER ) THEN
                    569:           DO 160 I = 1, N-1
                    570:              E( I ) = DBLE( WORK( OFDPOS+I*LDA ) )
                    571:   160     CONTINUE
                    572:       ELSE
                    573:           DO 170 I = 1, N-1
                    574:              E( I ) = DBLE( WORK( OFDPOS+(I-1)*LDA ) )
                    575:   170     CONTINUE
                    576:       ENDIF
                    577: *
                    578:       HOUS( 1 ) = LHMIN
                    579:       WORK( 1 ) = LWMIN
                    580:       RETURN
                    581: *
                    582: *     End of ZHETRD_HB2ST
                    583: *
                    584:       END
                    585:       

CVSweb interface <joel.bertrand@systella.fr>