Annotation of rpl/lapack/lapack/zhetrd_hb2st.F, revision 1.1

1.1     ! bertrand    1: *> \brief \b ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric tridiagonal form T
        !             2: *
        !             3: *  =========== DOCUMENTATION ===========
        !             4: *
        !             5: * Online html documentation available at
        !             6: *            http://www.netlib.org/lapack/explore-html/
        !             7: *
        !             8: *> \htmlonly
        !             9: *> Download ZHETRD_HB2ST + dependencies
        !            10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhbtrd_hb2st.f">
        !            11: *> [TGZ]</a>
        !            12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhbtrd_hb2st.f">
        !            13: *> [ZIP]</a>
        !            14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhbtrd_hb2st.f">
        !            15: *> [TXT]</a>
        !            16: *> \endhtmlonly
        !            17: *
        !            18: *  Definition:
        !            19: *  ===========
        !            20: *
        !            21: *       SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, 
        !            22: *                               D, E, HOUS, LHOUS, WORK, LWORK, INFO )
        !            23: *
        !            24: *       #if defined(_OPENMP)
        !            25: *       use omp_lib
        !            26: *       #endif
        !            27: *
        !            28: *       IMPLICIT NONE
        !            29: *
        !            30: *       .. Scalar Arguments ..
        !            31: *       CHARACTER          STAGE1, UPLO, VECT
        !            32: *       INTEGER            N, KD, IB, LDAB, LHOUS, LWORK, INFO
        !            33: *       ..
        !            34: *       .. Array Arguments ..
        !            35: *       DOUBLE PRECISION   D( * ), E( * )
        !            36: *       COMPLEX*16         AB( LDAB, * ), HOUS( * ), WORK( * )
        !            37: *       ..
        !            38: *
        !            39: *
        !            40: *> \par Purpose:
        !            41: *  =============
        !            42: *>
        !            43: *> \verbatim
        !            44: *>
        !            45: *> ZHETRD_HB2ST reduces a complex Hermitian band matrix A to real symmetric
        !            46: *> tridiagonal form T by a unitary similarity transformation:
        !            47: *> Q**H * A * Q = T.
        !            48: *> \endverbatim
        !            49: *
        !            50: *  Arguments:
        !            51: *  ==========
        !            52: *
        !            53: *> \param[in] STAGE
        !            54: *> \verbatim
        !            55: *>          STAGE is CHARACTER*1
        !            56: *>          = 'N':  "No": to mention that the stage 1 of the reduction  
        !            57: *>                  from dense to band using the zhetrd_he2hb routine
        !            58: *>                  was not called before this routine to reproduce AB. 
        !            59: *>                  In other term this routine is called as standalone. 
        !            60: *>          = 'Y':  "Yes": to mention that the stage 1 of the 
        !            61: *>                  reduction from dense to band using the zhetrd_he2hb 
        !            62: *>                  routine has been called to produce AB (e.g., AB is
        !            63: *>                  the output of zhetrd_he2hb.
        !            64: *> \endverbatim
        !            65: *>
        !            66: *> \param[in] VECT
        !            67: *> \verbatim
        !            68: *>          VECT is CHARACTER*1
        !            69: *>          = 'N':  No need for the Housholder representation, 
        !            70: *>                  and thus LHOUS is of size max(1, 4*N);
        !            71: *>          = 'V':  the Householder representation is needed to 
        !            72: *>                  either generate or to apply Q later on, 
        !            73: *>                  then LHOUS is to be queried and computed.
        !            74: *>                  (NOT AVAILABLE IN THIS RELEASE).
        !            75: *> \endverbatim
        !            76: *>
        !            77: *> \param[in] UPLO
        !            78: *> \verbatim
        !            79: *>          UPLO is CHARACTER*1
        !            80: *>          = 'U':  Upper triangle of A is stored;
        !            81: *>          = 'L':  Lower triangle of A is stored.
        !            82: *> \endverbatim
        !            83: *>
        !            84: *> \param[in] N
        !            85: *> \verbatim
        !            86: *>          N is INTEGER
        !            87: *>          The order of the matrix A.  N >= 0.
        !            88: *> \endverbatim
        !            89: *>
        !            90: *> \param[in] KD
        !            91: *> \verbatim
        !            92: *>          KD is INTEGER
        !            93: *>          The number of superdiagonals of the matrix A if UPLO = 'U',
        !            94: *>          or the number of subdiagonals if UPLO = 'L'.  KD >= 0.
        !            95: *> \endverbatim
        !            96: *>
        !            97: *> \param[in,out] AB
        !            98: *> \verbatim
        !            99: *>          AB is COMPLEX*16 array, dimension (LDAB,N)
        !           100: *>          On entry, the upper or lower triangle of the Hermitian band
        !           101: *>          matrix A, stored in the first KD+1 rows of the array.  The
        !           102: *>          j-th column of A is stored in the j-th column of the array AB
        !           103: *>          as follows:
        !           104: *>          if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
        !           105: *>          if UPLO = 'L', AB(1+i-j,j)    = A(i,j) for j<=i<=min(n,j+kd).
        !           106: *>          On exit, the diagonal elements of AB are overwritten by the
        !           107: *>          diagonal elements of the tridiagonal matrix T; if KD > 0, the
        !           108: *>          elements on the first superdiagonal (if UPLO = 'U') or the
        !           109: *>          first subdiagonal (if UPLO = 'L') are overwritten by the
        !           110: *>          off-diagonal elements of T; the rest of AB is overwritten by
        !           111: *>          values generated during the reduction.
        !           112: *> \endverbatim
        !           113: *>
        !           114: *> \param[in] LDAB
        !           115: *> \verbatim
        !           116: *>          LDAB is INTEGER
        !           117: *>          The leading dimension of the array AB.  LDAB >= KD+1.
        !           118: *> \endverbatim
        !           119: *>
        !           120: *> \param[out] D
        !           121: *> \verbatim
        !           122: *>          D is DOUBLE PRECISION array, dimension (N)
        !           123: *>          The diagonal elements of the tridiagonal matrix T.
        !           124: *> \endverbatim
        !           125: *>
        !           126: *> \param[out] E
        !           127: *> \verbatim
        !           128: *>          E is DOUBLE PRECISION array, dimension (N-1)
        !           129: *>          The off-diagonal elements of the tridiagonal matrix T:
        !           130: *>          E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
        !           131: *> \endverbatim
        !           132: *>
        !           133: *> \param[out] HOUS
        !           134: *> \verbatim
        !           135: *>          HOUS is COMPLEX*16 array, dimension LHOUS, that
        !           136: *>          store the Householder representation.
        !           137: *> \endverbatim
        !           138: *>
        !           139: *> \param[in] LHOUS
        !           140: *> \verbatim
        !           141: *>          LHOUS is INTEGER
        !           142: *>          The dimension of the array HOUS. LHOUS = MAX(1, dimension)
        !           143: *>          If LWORK = -1, or LHOUS=-1,
        !           144: *>          then a query is assumed; the routine
        !           145: *>          only calculates the optimal size of the HOUS array, returns
        !           146: *>          this value as the first entry of the HOUS array, and no error
        !           147: *>          message related to LHOUS is issued by XERBLA.
        !           148: *>          LHOUS = MAX(1, dimension) where
        !           149: *>          dimension = 4*N if VECT='N'
        !           150: *>          not available now if VECT='H'     
        !           151: *> \endverbatim
        !           152: *>
        !           153: *> \param[out] WORK
        !           154: *> \verbatim
        !           155: *>          WORK is COMPLEX*16 array, dimension LWORK.
        !           156: *> \endverbatim
        !           157: *>
        !           158: *> \param[in] LWORK
        !           159: *> \verbatim
        !           160: *>          LWORK is INTEGER
        !           161: *>          The dimension of the array WORK. LWORK = MAX(1, dimension)
        !           162: *>          If LWORK = -1, or LHOUS=-1,
        !           163: *>          then a workspace query is assumed; the routine
        !           164: *>          only calculates the optimal size of the WORK array, returns
        !           165: *>          this value as the first entry of the WORK array, and no error
        !           166: *>          message related to LWORK is issued by XERBLA.
        !           167: *>          LWORK = MAX(1, dimension) where
        !           168: *>          dimension   = (2KD+1)*N + KD*NTHREADS
        !           169: *>          where KD is the blocking size of the reduction,
        !           170: *>          FACTOPTNB is the blocking used by the QR or LQ
        !           171: *>          algorithm, usually FACTOPTNB=128 is a good choice
        !           172: *>          NTHREADS is the number of threads used when
        !           173: *>          openMP compilation is enabled, otherwise =1.
        !           174: *> \endverbatim
        !           175: *>
        !           176: *> \param[out] INFO
        !           177: *> \verbatim
        !           178: *>          INFO is INTEGER
        !           179: *>          = 0:  successful exit
        !           180: *>          < 0:  if INFO = -i, the i-th argument had an illegal value
        !           181: *> \endverbatim
        !           182: *
        !           183: *  Authors:
        !           184: *  ========
        !           185: *
        !           186: *> \author Univ. of Tennessee
        !           187: *> \author Univ. of California Berkeley
        !           188: *> \author Univ. of Colorado Denver
        !           189: *> \author NAG Ltd.
        !           190: *
        !           191: *> \date December 2016
        !           192: *
        !           193: *> \ingroup complex16OTHERcomputational
        !           194: *
        !           195: *> \par Further Details:
        !           196: *  =====================
        !           197: *>
        !           198: *> \verbatim
        !           199: *>
        !           200: *>  Implemented by Azzam Haidar.
        !           201: *>
        !           202: *>  All details are available on technical report, SC11, SC13 papers.
        !           203: *>
        !           204: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
        !           205: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
        !           206: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
        !           207: *>  of 2011 International Conference for High Performance Computing,
        !           208: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
        !           209: *>  Article 8 , 11 pages.
        !           210: *>  http://doi.acm.org/10.1145/2063384.2063394
        !           211: *>
        !           212: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
        !           213: *>  An improved parallel singular value algorithm and its implementation 
        !           214: *>  for multicore hardware, In Proceedings of 2013 International Conference
        !           215: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
        !           216: *>  Denver, Colorado, USA, 2013.
        !           217: *>  Article 90, 12 pages.
        !           218: *>  http://doi.acm.org/10.1145/2503210.2503292
        !           219: *>
        !           220: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
        !           221: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
        !           222: *>  calculations based on fine-grained memory aware tasks.
        !           223: *>  International Journal of High Performance Computing Applications.
        !           224: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
        !           225: *>  http://hpc.sagepub.com/content/28/2/196 
        !           226: *>
        !           227: *> \endverbatim
        !           228: *>
        !           229: *  =====================================================================
        !           230:       SUBROUTINE ZHETRD_HB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB, 
        !           231:      $                         D, E, HOUS, LHOUS, WORK, LWORK, INFO )
        !           232: *
        !           233: *
        !           234: #if defined(_OPENMP)
        !           235:       use omp_lib
        !           236: #endif
        !           237: *
        !           238:       IMPLICIT NONE
        !           239: *
        !           240: *  -- LAPACK computational routine (version 3.7.0) --
        !           241: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
        !           242: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
        !           243: *     December 2016
        !           244: *
        !           245: *     .. Scalar Arguments ..
        !           246:       CHARACTER          STAGE1, UPLO, VECT
        !           247:       INTEGER            N, KD, LDAB, LHOUS, LWORK, INFO
        !           248: *     ..
        !           249: *     .. Array Arguments ..
        !           250:       DOUBLE PRECISION   D( * ), E( * )
        !           251:       COMPLEX*16         AB( LDAB, * ), HOUS( * ), WORK( * )
        !           252: *     ..
        !           253: *
        !           254: *  =====================================================================
        !           255: *
        !           256: *     .. Parameters ..
        !           257:       DOUBLE PRECISION   RZERO
        !           258:       COMPLEX*16         ZERO, ONE
        !           259:       PARAMETER          ( RZERO = 0.0D+0,
        !           260:      $                   ZERO = ( 0.0D+0, 0.0D+0 ),
        !           261:      $                   ONE  = ( 1.0D+0, 0.0D+0 ) )
        !           262: *     ..
        !           263: *     .. Local Scalars ..
        !           264:       LOGICAL            LQUERY, WANTQ, UPPER, AFTERS1
        !           265:       INTEGER            I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST, 
        !           266:      $                   ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
        !           267:      $                   STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
        !           268:      $                   NBTILES, TTYPE, TID, NTHREADS, DEBUG,
        !           269:      $                   ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS, 
        !           270:      $                   INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
        !           271:      $                   SIZEV, SIZETAU, LDV, LHMIN, LWMIN
        !           272:       DOUBLE PRECISION   ABSTMP
        !           273:       COMPLEX*16         TMP
        !           274: *     ..
        !           275: *     .. External Subroutines ..
        !           276:       EXTERNAL           ZHB2ST_KERNELS, ZLACPY, ZLASET
        !           277: *     ..
        !           278: *     .. Intrinsic Functions ..
        !           279:       INTRINSIC          MIN, MAX, CEILING, DBLE, REAL
        !           280: *     ..
        !           281: *     .. External Functions ..
        !           282:       LOGICAL            LSAME
        !           283:       INTEGER            ILAENV 
        !           284:       EXTERNAL           LSAME, ILAENV
        !           285: *     ..
        !           286: *     .. Executable Statements ..
        !           287: *
        !           288: *     Determine the minimal workspace size required.
        !           289: *     Test the input parameters
        !           290: *
        !           291:       DEBUG   = 0
        !           292:       INFO    = 0
        !           293:       AFTERS1 = LSAME( STAGE1, 'Y' )
        !           294:       WANTQ   = LSAME( VECT, 'V' )
        !           295:       UPPER   = LSAME( UPLO, 'U' )
        !           296:       LQUERY  = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 )
        !           297: *
        !           298: *     Determine the block size, the workspace size and the hous size.
        !           299: *
        !           300:       IB     = ILAENV( 18, 'ZHETRD_HB2ST', VECT, N, KD, -1, -1 )
        !           301:       LHMIN  = ILAENV( 19, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 )
        !           302:       LWMIN  = ILAENV( 20, 'ZHETRD_HB2ST', VECT, N, KD, IB, -1 )
        !           303: *
        !           304:       IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN
        !           305:          INFO = -1
        !           306:       ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN
        !           307:          INFO = -2
        !           308:       ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
        !           309:          INFO = -3
        !           310:       ELSE IF( N.LT.0 ) THEN
        !           311:          INFO = -4
        !           312:       ELSE IF( KD.LT.0 ) THEN
        !           313:          INFO = -5
        !           314:       ELSE IF( LDAB.LT.(KD+1) ) THEN
        !           315:          INFO = -7
        !           316:       ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN
        !           317:          INFO = -11
        !           318:       ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
        !           319:          INFO = -13
        !           320:       END IF
        !           321: *
        !           322:       IF( INFO.EQ.0 ) THEN
        !           323:          HOUS( 1 ) = LHMIN
        !           324:          WORK( 1 ) = LWMIN
        !           325:       END IF
        !           326: *
        !           327:       IF( INFO.NE.0 ) THEN
        !           328:          CALL XERBLA( 'ZHETRD_HB2ST', -INFO )
        !           329:          RETURN
        !           330:       ELSE IF( LQUERY ) THEN
        !           331:          RETURN
        !           332:       END IF
        !           333: *
        !           334: *     Quick return if possible
        !           335: *
        !           336:       IF( N.EQ.0 ) THEN
        !           337:           HOUS( 1 ) = 1
        !           338:           WORK( 1 ) = 1
        !           339:           RETURN
        !           340:       END IF
        !           341: *
        !           342: *     Determine pointer position
        !           343: *
        !           344:       LDV      = KD + IB
        !           345:       SIZETAU  = 2 * N
        !           346:       SIZEV    = 2 * N
        !           347:       INDTAU   = 1
        !           348:       INDV     = INDTAU + SIZETAU
        !           349:       LDA      = 2 * KD + 1
        !           350:       SIZEA    = LDA * N
        !           351:       INDA     = 1
        !           352:       INDW     = INDA + SIZEA
        !           353:       NTHREADS = 1
        !           354:       TID      = 0
        !           355: *
        !           356:       IF( UPPER ) THEN
        !           357:           APOS     = INDA + KD
        !           358:           AWPOS    = INDA
        !           359:           DPOS     = APOS + KD
        !           360:           OFDPOS   = DPOS - 1
        !           361:           ABDPOS   = KD + 1
        !           362:           ABOFDPOS = KD
        !           363:       ELSE
        !           364:           APOS     = INDA 
        !           365:           AWPOS    = INDA + KD + 1
        !           366:           DPOS     = APOS
        !           367:           OFDPOS   = DPOS + 1
        !           368:           ABDPOS   = 1
        !           369:           ABOFDPOS = 2
        !           370: 
        !           371:       ENDIF
        !           372: *      
        !           373: *     Case KD=0: 
        !           374: *     The matrix is diagonal. We just copy it (convert to "real" for 
        !           375: *     complex because D is double and the imaginary part should be 0) 
        !           376: *     and store it in D. A sequential code here is better or 
        !           377: *     in a parallel environment it might need two cores for D and E
        !           378: *
        !           379:       IF( KD.EQ.0 ) THEN
        !           380:           DO 30 I = 1, N
        !           381:               D( I ) = DBLE( AB( ABDPOS, I ) )
        !           382:    30     CONTINUE
        !           383:           DO 40 I = 1, N-1
        !           384:               E( I ) = RZERO
        !           385:    40     CONTINUE
        !           386: *
        !           387:           HOUS( 1 ) = 1
        !           388:           WORK( 1 ) = 1
        !           389:           RETURN
        !           390:       END IF
        !           391: *      
        !           392: *     Case KD=1: 
        !           393: *     The matrix is already Tridiagonal. We have to make diagonal 
        !           394: *     and offdiagonal elements real, and store them in D and E.
        !           395: *     For that, for real precision just copy the diag and offdiag 
        !           396: *     to D and E while for the COMPLEX case the bulge chasing is  
        !           397: *     performed to convert the hermetian tridiagonal to symmetric 
        !           398: *     tridiagonal. A simpler coversion formula might be used, but then 
        !           399: *     updating the Q matrix will be required and based if Q is generated
        !           400: *     or not this might complicate the story. 
        !           401: *      
        !           402:       IF( KD.EQ.1 ) THEN
        !           403:           DO 50 I = 1, N
        !           404:               D( I ) = DBLE( AB( ABDPOS, I ) )
        !           405:    50     CONTINUE
        !           406: *
        !           407: *         make off-diagonal elements real and copy them to E
        !           408: *
        !           409:           IF( UPPER ) THEN
        !           410:               DO 60 I = 1, N - 1
        !           411:                   TMP = AB( ABOFDPOS, I+1 )
        !           412:                   ABSTMP = ABS( TMP )
        !           413:                   AB( ABOFDPOS, I+1 ) = ABSTMP
        !           414:                   E( I ) = ABSTMP
        !           415:                   IF( ABSTMP.NE.RZERO ) THEN
        !           416:                      TMP = TMP / ABSTMP
        !           417:                   ELSE
        !           418:                      TMP = ONE
        !           419:                   END IF
        !           420:                   IF( I.LT.N-1 )
        !           421:      $               AB( ABOFDPOS, I+2 ) = AB( ABOFDPOS, I+2 )*TMP
        !           422: C                  IF( WANTZ ) THEN
        !           423: C                     CALL ZSCAL( N, DCONJG( TMP ), Q( 1, I+1 ), 1 )
        !           424: C                  END IF
        !           425:    60         CONTINUE
        !           426:           ELSE
        !           427:               DO 70 I = 1, N - 1
        !           428:                  TMP = AB( ABOFDPOS, I )
        !           429:                  ABSTMP = ABS( TMP )
        !           430:                  AB( ABOFDPOS, I ) = ABSTMP
        !           431:                  E( I ) = ABSTMP
        !           432:                  IF( ABSTMP.NE.RZERO ) THEN
        !           433:                     TMP = TMP / ABSTMP
        !           434:                  ELSE
        !           435:                     TMP = ONE
        !           436:                  END IF
        !           437:                  IF( I.LT.N-1 )
        !           438:      $              AB( ABOFDPOS, I+1 ) = AB( ABOFDPOS, I+1 )*TMP
        !           439: C                 IF( WANTQ ) THEN
        !           440: C                    CALL ZSCAL( N, TMP, Q( 1, I+1 ), 1 )
        !           441: C                 END IF
        !           442:    70         CONTINUE
        !           443:           ENDIF
        !           444: *
        !           445:           HOUS( 1 ) = 1
        !           446:           WORK( 1 ) = 1
        !           447:           RETURN
        !           448:       END IF
        !           449: *
        !           450: *     Main code start here. 
        !           451: *     Reduce the hermitian band of A to a tridiagonal matrix.
        !           452: *
        !           453:       THGRSIZ   = N
        !           454:       GRSIZ     = 1
        !           455:       SHIFT     = 3
        !           456:       NBTILES   = CEILING( REAL(N)/REAL(KD) )
        !           457:       STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
        !           458:       THGRNB    = CEILING( REAL(N-1)/REAL(THGRSIZ) )
        !           459: *      
        !           460:       CALL ZLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
        !           461:       CALL ZLASET( "A", KD,   N, ZERO, ZERO, WORK( AWPOS ), LDA )
        !           462: *
        !           463: *
        !           464: *     openMP parallelisation start here
        !           465: *
        !           466: #if defined(_OPENMP)
        !           467: !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
        !           468: !$OMP$         PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID ) 
        !           469: !$OMP$         PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
        !           470: !$OMP$         SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
        !           471: !$OMP$         SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
        !           472: !$OMP$         SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
        !           473: !$OMP MASTER
        !           474: #endif
        !           475: *
        !           476: *     main bulge chasing loop
        !           477: *      
        !           478:       DO 100 THGRID = 1, THGRNB
        !           479:           STT  = (THGRID-1)*THGRSIZ+1
        !           480:           THED = MIN( (STT + THGRSIZ -1), (N-1))
        !           481:           DO 110 I = STT, N-1
        !           482:               ED = MIN( I, THED )
        !           483:               IF( STT.GT.ED ) EXIT
        !           484:               DO 120 M = 1, STEPERCOL
        !           485:                   ST = STT
        !           486:                   DO 130 SWEEPID = ST, ED
        !           487:                       DO 140 K = 1, GRSIZ
        !           488:                           MYID  = (I-SWEEPID)*(STEPERCOL*GRSIZ) 
        !           489:      $                           + (M-1)*GRSIZ + K
        !           490:                           IF ( MYID.EQ.1 ) THEN
        !           491:                               TTYPE = 1
        !           492:                           ELSE
        !           493:                               TTYPE = MOD( MYID, 2 ) + 2
        !           494:                           ENDIF
        !           495: 
        !           496:                           IF( TTYPE.EQ.2 ) THEN
        !           497:                               COLPT      = (MYID/2)*KD + SWEEPID
        !           498:                               STIND      = COLPT-KD+1
        !           499:                               EDIND      = MIN(COLPT,N)
        !           500:                               BLKLASTIND = COLPT
        !           501:                           ELSE
        !           502:                               COLPT      = ((MYID+1)/2)*KD + SWEEPID
        !           503:                               STIND      = COLPT-KD+1
        !           504:                               EDIND      = MIN(COLPT,N)
        !           505:                               IF( ( STIND.GE.EDIND-1 ).AND.
        !           506:      $                            ( EDIND.EQ.N ) ) THEN
        !           507:                                   BLKLASTIND = N
        !           508:                               ELSE
        !           509:                                   BLKLASTIND = 0
        !           510:                               ENDIF
        !           511:                           ENDIF
        !           512: *
        !           513: *                         Call the kernel
        !           514: *                             
        !           515: #if defined(_OPENMP)
        !           516:                           IF( TTYPE.NE.1 ) THEN      
        !           517: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
        !           518: !$OMP$     DEPEND(in:WORK(MYID-1))
        !           519: !$OMP$     DEPEND(out:WORK(MYID))
        !           520:                               TID      = OMP_GET_THREAD_NUM()
        !           521:                               CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
        !           522:      $                             STIND, EDIND, SWEEPID, N, KD, IB,
        !           523:      $                             WORK ( INDA ), LDA, 
        !           524:      $                             HOUS( INDV ), HOUS( INDTAU ), LDV,
        !           525:      $                             WORK( INDW + TID*KD ) )
        !           526: !$OMP END TASK
        !           527:                           ELSE
        !           528: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
        !           529: !$OMP$     DEPEND(out:WORK(MYID))
        !           530:                               TID      = OMP_GET_THREAD_NUM()
        !           531:                               CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
        !           532:      $                             STIND, EDIND, SWEEPID, N, KD, IB,
        !           533:      $                             WORK ( INDA ), LDA, 
        !           534:      $                             HOUS( INDV ), HOUS( INDTAU ), LDV,
        !           535:      $                             WORK( INDW + TID*KD ) )
        !           536: !$OMP END TASK
        !           537:                           ENDIF
        !           538: #else
        !           539:                           CALL ZHB2ST_KERNELS( UPLO, WANTQ, TTYPE, 
        !           540:      $                         STIND, EDIND, SWEEPID, N, KD, IB,
        !           541:      $                         WORK ( INDA ), LDA, 
        !           542:      $                         HOUS( INDV ), HOUS( INDTAU ), LDV,
        !           543:      $                         WORK( INDW + TID*KD ) )
        !           544: #endif 
        !           545:                           IF ( BLKLASTIND.GE.(N-1) ) THEN
        !           546:                               STT = STT + 1
        !           547:                               EXIT
        !           548:                           ENDIF
        !           549:   140                 CONTINUE
        !           550:   130             CONTINUE
        !           551:   120         CONTINUE
        !           552:   110     CONTINUE
        !           553:   100 CONTINUE
        !           554: *
        !           555: #if defined(_OPENMP)
        !           556: !$OMP END MASTER
        !           557: !$OMP END PARALLEL
        !           558: #endif
        !           559: *      
        !           560: *     Copy the diagonal from A to D. Note that D is REAL thus only
        !           561: *     the Real part is needed, the imaginary part should be zero.
        !           562: *
        !           563:       DO 150 I = 1, N
        !           564:           D( I ) = DBLE( WORK( DPOS+(I-1)*LDA ) )
        !           565:   150 CONTINUE
        !           566: *      
        !           567: *     Copy the off diagonal from A to E. Note that E is REAL thus only
        !           568: *     the Real part is needed, the imaginary part should be zero.
        !           569: *
        !           570:       IF( UPPER ) THEN
        !           571:           DO 160 I = 1, N-1
        !           572:              E( I ) = DBLE( WORK( OFDPOS+I*LDA ) )
        !           573:   160     CONTINUE
        !           574:       ELSE
        !           575:           DO 170 I = 1, N-1
        !           576:              E( I ) = DBLE( WORK( OFDPOS+(I-1)*LDA ) )
        !           577:   170     CONTINUE
        !           578:       ENDIF
        !           579: *
        !           580:       HOUS( 1 ) = LHMIN
        !           581:       WORK( 1 ) = LWMIN
        !           582:       RETURN
        !           583: *
        !           584: *     End of ZHETRD_HB2ST
        !           585: *
        !           586:       END
        !           587:       

CVSweb interface <joel.bertrand@systella.fr>