File:  [local] / rpl / lapack / lapack / zhb2st_kernels.f
Revision 1.6: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:39:22 2023 UTC (8 months, 3 weeks ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD
Première mise à jour de lapack et blas.

    1: *> \brief \b ZHB2ST_KERNELS
    2: *
    3: *  @precisions fortran z -> s d c
    4: *
    5: *  =========== DOCUMENTATION ===========
    6: *
    7: * Online html documentation available at
    8: *            http://www.netlib.org/lapack/explore-html/
    9: *
   10: *> \htmlonly
   11: *> Download ZHB2ST_KERNELS + dependencies
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f">
   13: *> [TGZ]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f">
   15: *> [ZIP]</a>
   16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f">
   17: *> [TXT]</a>
   18: *> \endhtmlonly
   19: *
   20: *  Definition:
   21: *  ===========
   22: *
   23: *       SUBROUTINE  ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
   24: *                                   ST, ED, SWEEP, N, NB, IB,
   25: *                                   A, LDA, V, TAU, LDVT, WORK)
   26: *
   27: *       IMPLICIT NONE
   28: *
   29: *       .. Scalar Arguments ..
   30: *       CHARACTER          UPLO
   31: *       LOGICAL            WANTZ
   32: *       INTEGER            TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
   33: *       ..
   34: *       .. Array Arguments ..
   35: *       COMPLEX*16         A( LDA, * ), V( * ),
   36: *                          TAU( * ), WORK( * )
   37: *
   38: *> \par Purpose:
   39: *  =============
   40: *>
   41: *> \verbatim
   42: *>
   43: *> ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
   44: *> subroutine.
   45: *> \endverbatim
   46: *
   47: *  Arguments:
   48: *  ==========
   49: *
   50: *> \param[in] UPLO
   51: *> \verbatim
   52: *>          UPLO is CHARACTER*1
   53: *> \endverbatim
   54: *>
   55: *> \param[in] WANTZ
   56: *> \verbatim
   57: *>          WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
   58: *>          Eigenvalue/Eigenvectors.
   59: *> \endverbatim
   60: *>
   61: *> \param[in] TTYPE
   62: *> \verbatim
   63: *>          TTYPE is INTEGER
   64: *> \endverbatim
   65: *>
   66: *> \param[in] ST
   67: *> \verbatim
   68: *>          ST is INTEGER
   69: *>          internal parameter for indices.
   70: *> \endverbatim
   71: *>
   72: *> \param[in] ED
   73: *> \verbatim
   74: *>          ED is INTEGER
   75: *>          internal parameter for indices.
   76: *> \endverbatim
   77: *>
   78: *> \param[in] SWEEP
   79: *> \verbatim
   80: *>          SWEEP is INTEGER
   81: *>          internal parameter for indices.
   82: *> \endverbatim
   83: *>
   84: *> \param[in] N
   85: *> \verbatim
   86: *>          N is INTEGER. The order of the matrix A.
   87: *> \endverbatim
   88: *>
   89: *> \param[in] NB
   90: *> \verbatim
   91: *>          NB is INTEGER. The size of the band.
   92: *> \endverbatim
   93: *>
   94: *> \param[in] IB
   95: *> \verbatim
   96: *>          IB is INTEGER.
   97: *> \endverbatim
   98: *>
   99: *> \param[in, out] A
  100: *> \verbatim
  101: *>          A is COMPLEX*16 array. A pointer to the matrix A.
  102: *> \endverbatim
  103: *>
  104: *> \param[in] LDA
  105: *> \verbatim
  106: *>          LDA is INTEGER. The leading dimension of the matrix A.
  107: *> \endverbatim
  108: *>
  109: *> \param[out] V
  110: *> \verbatim
  111: *>          V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
  112: *>          requested or to be queried for vectors.
  113: *> \endverbatim
  114: *>
  115: *> \param[out] TAU
  116: *> \verbatim
  117: *>          TAU is COMPLEX*16 array, dimension (2*n).
  118: *>          The scalar factors of the Householder reflectors are stored
  119: *>          in this array.
  120: *> \endverbatim
  121: *>
  122: *> \param[in] LDVT
  123: *> \verbatim
  124: *>          LDVT is INTEGER.
  125: *> \endverbatim
  126: *>
  127: *> \param[out] WORK
  128: *> \verbatim
  129: *>          WORK is COMPLEX*16 array. Workspace of size nb.
  130: *> \endverbatim
  131: *>
  132: *> \par Further Details:
  133: *  =====================
  134: *>
  135: *> \verbatim
  136: *>
  137: *>  Implemented by Azzam Haidar.
  138: *>
  139: *>  All details are available on technical report, SC11, SC13 papers.
  140: *>
  141: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  142: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  143: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  144: *>  of 2011 International Conference for High Performance Computing,
  145: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  146: *>  Article 8 , 11 pages.
  147: *>  http://doi.acm.org/10.1145/2063384.2063394
  148: *>
  149: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  150: *>  An improved parallel singular value algorithm and its implementation
  151: *>  for multicore hardware, In Proceedings of 2013 International Conference
  152: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  153: *>  Denver, Colorado, USA, 2013.
  154: *>  Article 90, 12 pages.
  155: *>  http://doi.acm.org/10.1145/2503210.2503292
  156: *>
  157: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  158: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure
  159: *>  calculations based on fine-grained memory aware tasks.
  160: *>  International Journal of High Performance Computing Applications.
  161: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  162: *>  http://hpc.sagepub.com/content/28/2/196
  163: *>
  164: *> \endverbatim
  165: *>
  166: *  =====================================================================
  167:       SUBROUTINE  ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
  168:      $                            ST, ED, SWEEP, N, NB, IB,
  169:      $                            A, LDA, V, TAU, LDVT, WORK)
  170: *
  171:       IMPLICIT NONE
  172: *
  173: *  -- LAPACK computational routine --
  174: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  175: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  176: *
  177: *     .. Scalar Arguments ..
  178:       CHARACTER          UPLO
  179:       LOGICAL            WANTZ
  180:       INTEGER            TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
  181: *     ..
  182: *     .. Array Arguments ..
  183:       COMPLEX*16         A( LDA, * ), V( * ),
  184:      $                   TAU( * ), WORK( * )
  185: *     ..
  186: *
  187: *  =====================================================================
  188: *
  189: *     .. Parameters ..
  190:       COMPLEX*16         ZERO, ONE
  191:       PARAMETER          ( ZERO = ( 0.0D+0, 0.0D+0 ),
  192:      $                   ONE = ( 1.0D+0, 0.0D+0 ) )
  193: *     ..
  194: *     .. Local Scalars ..
  195:       LOGICAL            UPPER
  196:       INTEGER            I, J1, J2, LM, LN, VPOS, TAUPOS,
  197:      $                   DPOS, OFDPOS, AJETER
  198:       COMPLEX*16         CTMP
  199: *     ..
  200: *     .. External Subroutines ..
  201:       EXTERNAL           ZLARFG, ZLARFX, ZLARFY
  202: *     ..
  203: *     .. Intrinsic Functions ..
  204:       INTRINSIC          DCONJG, MOD
  205: *     .. External Functions ..
  206:       LOGICAL            LSAME
  207:       EXTERNAL           LSAME
  208: *     ..
  209: *     ..
  210: *     .. Executable Statements ..
  211: *
  212:       AJETER = IB + LDVT
  213:       UPPER = LSAME( UPLO, 'U' )
  214: 
  215:       IF( UPPER ) THEN
  216:           DPOS    = 2 * NB + 1
  217:           OFDPOS  = 2 * NB
  218:       ELSE
  219:           DPOS    = 1
  220:           OFDPOS  = 2
  221:       ENDIF
  222: 
  223: *
  224: *     Upper case
  225: *
  226:       IF( UPPER ) THEN
  227: *
  228:           IF( WANTZ ) THEN
  229:               VPOS   = MOD( SWEEP-1, 2 ) * N + ST
  230:               TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  231:           ELSE
  232:               VPOS   = MOD( SWEEP-1, 2 ) * N + ST
  233:               TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  234:           ENDIF
  235: *
  236:           IF( TTYPE.EQ.1 ) THEN
  237:               LM = ED - ST + 1
  238: *
  239:               V( VPOS ) = ONE
  240:               DO 10 I = 1, LM-1
  241:                   V( VPOS+I )         = DCONJG( A( OFDPOS-I, ST+I ) )
  242:                   A( OFDPOS-I, ST+I ) = ZERO
  243:    10         CONTINUE
  244:               CTMP = DCONJG( A( OFDPOS, ST ) )
  245:               CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1,
  246:      $                                       TAU( TAUPOS ) )
  247:               A( OFDPOS, ST ) = CTMP
  248: *
  249:               LM = ED - ST + 1
  250:               CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
  251:      $                     DCONJG( TAU( TAUPOS ) ),
  252:      $                     A( DPOS, ST ), LDA-1, WORK)
  253:           ENDIF
  254: *
  255:           IF( TTYPE.EQ.3 ) THEN
  256: *
  257:               LM = ED - ST + 1
  258:               CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
  259:      $                     DCONJG( TAU( TAUPOS ) ),
  260:      $                     A( DPOS, ST ), LDA-1, WORK)
  261:           ENDIF
  262: *
  263:           IF( TTYPE.EQ.2 ) THEN
  264:               J1 = ED+1
  265:               J2 = MIN( ED+NB, N )
  266:               LN = ED-ST+1
  267:               LM = J2-J1+1
  268:               IF( LM.GT.0) THEN
  269:                   CALL ZLARFX( 'Left', LN, LM, V( VPOS ),
  270:      $                         DCONJG( TAU( TAUPOS ) ),
  271:      $                         A( DPOS-NB, J1 ), LDA-1, WORK)
  272: *
  273:                   IF( WANTZ ) THEN
  274:                       VPOS   = MOD( SWEEP-1, 2 ) * N + J1
  275:                       TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  276:                   ELSE
  277:                       VPOS   = MOD( SWEEP-1, 2 ) * N + J1
  278:                       TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  279:                   ENDIF
  280: *
  281:                   V( VPOS ) = ONE
  282:                   DO 30 I = 1, LM-1
  283:                       V( VPOS+I )          =
  284:      $                                    DCONJG( A( DPOS-NB-I, J1+I ) )
  285:                       A( DPOS-NB-I, J1+I ) = ZERO
  286:    30             CONTINUE
  287:                   CTMP = DCONJG( A( DPOS-NB, J1 ) )
  288:                   CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
  289:                   A( DPOS-NB, J1 ) = CTMP
  290: *
  291:                   CALL ZLARFX( 'Right', LN-1, LM, V( VPOS ),
  292:      $                         TAU( TAUPOS ),
  293:      $                         A( DPOS-NB+1, J1 ), LDA-1, WORK)
  294:               ENDIF
  295:           ENDIF
  296: *
  297: *     Lower case
  298: *
  299:       ELSE
  300: *
  301:           IF( WANTZ ) THEN
  302:               VPOS   = MOD( SWEEP-1, 2 ) * N + ST
  303:               TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  304:           ELSE
  305:               VPOS   = MOD( SWEEP-1, 2 ) * N + ST
  306:               TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
  307:           ENDIF
  308: *
  309:           IF( TTYPE.EQ.1 ) THEN
  310:               LM = ED - ST + 1
  311: *
  312:               V( VPOS ) = ONE
  313:               DO 20 I = 1, LM-1
  314:                   V( VPOS+I )         = A( OFDPOS+I, ST-1 )
  315:                   A( OFDPOS+I, ST-1 ) = ZERO
  316:    20         CONTINUE
  317:               CALL ZLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1,
  318:      $                                       TAU( TAUPOS ) )
  319: *
  320:               LM = ED - ST + 1
  321: *
  322:               CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
  323:      $                     DCONJG( TAU( TAUPOS ) ),
  324:      $                     A( DPOS, ST ), LDA-1, WORK)
  325: 
  326:           ENDIF
  327: *
  328:           IF( TTYPE.EQ.3 ) THEN
  329:               LM = ED - ST + 1
  330: *
  331:               CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
  332:      $                     DCONJG( TAU( TAUPOS ) ),
  333:      $                     A( DPOS, ST ), LDA-1, WORK)
  334: 
  335:           ENDIF
  336: *
  337:           IF( TTYPE.EQ.2 ) THEN
  338:               J1 = ED+1
  339:               J2 = MIN( ED+NB, N )
  340:               LN = ED-ST+1
  341:               LM = J2-J1+1
  342: *
  343:               IF( LM.GT.0) THEN
  344:                   CALL ZLARFX( 'Right', LM, LN, V( VPOS ),
  345:      $                         TAU( TAUPOS ), A( DPOS+NB, ST ),
  346:      $                         LDA-1, WORK)
  347: *
  348:                   IF( WANTZ ) THEN
  349:                       VPOS   = MOD( SWEEP-1, 2 ) * N + J1
  350:                       TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  351:                   ELSE
  352:                       VPOS   = MOD( SWEEP-1, 2 ) * N + J1
  353:                       TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
  354:                   ENDIF
  355: *
  356:                   V( VPOS ) = ONE
  357:                   DO 40 I = 1, LM-1
  358:                       V( VPOS+I )        = A( DPOS+NB+I, ST )
  359:                       A( DPOS+NB+I, ST ) = ZERO
  360:    40             CONTINUE
  361:                   CALL ZLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1,
  362:      $                                        TAU( TAUPOS ) )
  363: *
  364:                   CALL ZLARFX( 'Left', LM, LN-1, V( VPOS ),
  365:      $                         DCONJG( TAU( TAUPOS ) ),
  366:      $                         A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
  367: 
  368:               ENDIF
  369:           ENDIF
  370:       ENDIF
  371: *
  372:       RETURN
  373: *
  374: *     End of ZHB2ST_KERNELS
  375: *
  376:       END

CVSweb interface <joel.bertrand@systella.fr>