version 1.4, 2018/05/29 07:18:18
|
version 1.5, 2020/05/21 21:46:05
|
Line 1
|
Line 1
|
*> \brief \b ZHB2ST_KERNELS |
*> \brief \b ZHB2ST_KERNELS |
* |
* |
* @precisions fortran z -> s d c |
* @precisions fortran z -> s d c |
* |
* |
* =========== DOCUMENTATION =========== |
* =========== DOCUMENTATION =========== |
* |
* |
* Online html documentation available at |
* Online html documentation available at |
* http://www.netlib.org/lapack/explore-html/ |
* http://www.netlib.org/lapack/explore-html/ |
* |
* |
*> \htmlonly |
*> \htmlonly |
*> Download ZHB2ST_KERNELS + dependencies |
*> Download ZHB2ST_KERNELS + dependencies |
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f"> |
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f"> |
*> [TGZ]</a> |
*> [TGZ]</a> |
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f"> |
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f"> |
*> [ZIP]</a> |
*> [ZIP]</a> |
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f"> |
*> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f"> |
*> [TXT]</a> |
*> [TXT]</a> |
*> \endhtmlonly |
*> \endhtmlonly |
* |
* |
* Definition: |
* Definition: |
* =========== |
* =========== |
* |
* |
* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, |
* SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, |
* ST, ED, SWEEP, N, NB, IB, |
* ST, ED, SWEEP, N, NB, IB, |
* A, LDA, V, TAU, LDVT, WORK) |
* A, LDA, V, TAU, LDVT, WORK) |
* |
* |
Line 32
|
Line 32
|
* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT |
* INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT |
* .. |
* .. |
* .. Array Arguments .. |
* .. Array Arguments .. |
* COMPLEX*16 A( LDA, * ), V( * ), |
* COMPLEX*16 A( LDA, * ), V( * ), |
* TAU( * ), WORK( * ) |
* TAU( * ), WORK( * ) |
* |
* |
*> \par Purpose: |
*> \par Purpose: |
* ============= |
* ============= |
*> |
*> |
Line 124
|
Line 124
|
*> LDVT is INTEGER. |
*> LDVT is INTEGER. |
*> \endverbatim |
*> \endverbatim |
*> |
*> |
*> \param[in] WORK |
*> \param[out] WORK |
*> \verbatim |
*> \verbatim |
*> WORK is COMPLEX*16 array. Workspace of size nb. |
*> WORK is COMPLEX*16 array. Workspace of size nb. |
*> \endverbatim |
*> \endverbatim |
Line 147
|
Line 147
|
*> http://doi.acm.org/10.1145/2063384.2063394 |
*> http://doi.acm.org/10.1145/2063384.2063394 |
*> |
*> |
*> A. Haidar, J. Kurzak, P. Luszczek, 2013. |
*> A. Haidar, J. Kurzak, P. Luszczek, 2013. |
*> An improved parallel singular value algorithm and its implementation |
*> An improved parallel singular value algorithm and its implementation |
*> for multicore hardware, In Proceedings of 2013 International Conference |
*> for multicore hardware, In Proceedings of 2013 International Conference |
*> for High Performance Computing, Networking, Storage and Analysis (SC '13). |
*> for High Performance Computing, Networking, Storage and Analysis (SC '13). |
*> Denver, Colorado, USA, 2013. |
*> Denver, Colorado, USA, 2013. |
Line 155
|
Line 155
|
*> http://doi.acm.org/10.1145/2503210.2503292 |
*> http://doi.acm.org/10.1145/2503210.2503292 |
*> |
*> |
*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. |
*> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra. |
*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure |
*> A novel hybrid CPU-GPU generalized eigensolver for electronic structure |
*> calculations based on fine-grained memory aware tasks. |
*> calculations based on fine-grained memory aware tasks. |
*> International Journal of High Performance Computing Applications. |
*> International Journal of High Performance Computing Applications. |
*> Volume 28 Issue 2, Pages 196-209, May 2014. |
*> Volume 28 Issue 2, Pages 196-209, May 2014. |
*> http://hpc.sagepub.com/content/28/2/196 |
*> http://hpc.sagepub.com/content/28/2/196 |
*> |
*> |
*> \endverbatim |
*> \endverbatim |
*> |
*> |
* ===================================================================== |
* ===================================================================== |
SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, |
SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE, |
$ ST, ED, SWEEP, N, NB, IB, |
$ ST, ED, SWEEP, N, NB, IB, |
$ A, LDA, V, TAU, LDVT, WORK) |
$ A, LDA, V, TAU, LDVT, WORK) |
* |
* |
Line 181
|
Line 181
|
INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT |
INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT |
* .. |
* .. |
* .. Array Arguments .. |
* .. Array Arguments .. |
COMPLEX*16 A( LDA, * ), V( * ), |
COMPLEX*16 A( LDA, * ), V( * ), |
$ TAU( * ), WORK( * ) |
$ TAU( * ), WORK( * ) |
* .. |
* .. |
* |
* |
Line 195
|
Line 195
|
* .. Local Scalars .. |
* .. Local Scalars .. |
LOGICAL UPPER |
LOGICAL UPPER |
INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, |
INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS, |
$ DPOS, OFDPOS, AJETER |
$ DPOS, OFDPOS, AJETER |
COMPLEX*16 CTMP |
COMPLEX*16 CTMP |
* .. |
* .. |
* .. External Subroutines .. |
* .. External Subroutines .. |
EXTERNAL ZLARFG, ZLARFX, ZLARFY |
EXTERNAL ZLARFG, ZLARFX, ZLARFY |
Line 209
|
Line 209
|
* .. |
* .. |
* .. |
* .. |
* .. Executable Statements .. |
* .. Executable Statements .. |
* |
* |
AJETER = IB + LDVT |
AJETER = IB + LDVT |
UPPER = LSAME( UPLO, 'U' ) |
UPPER = LSAME( UPLO, 'U' ) |
|
|
Line 240
|
Line 240
|
V( VPOS ) = ONE |
V( VPOS ) = ONE |
DO 10 I = 1, LM-1 |
DO 10 I = 1, LM-1 |
V( VPOS+I ) = DCONJG( A( OFDPOS-I, ST+I ) ) |
V( VPOS+I ) = DCONJG( A( OFDPOS-I, ST+I ) ) |
A( OFDPOS-I, ST+I ) = ZERO |
A( OFDPOS-I, ST+I ) = ZERO |
10 CONTINUE |
10 CONTINUE |
CTMP = DCONJG( A( OFDPOS, ST ) ) |
CTMP = DCONJG( A( OFDPOS, ST ) ) |
CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, |
CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, |
$ TAU( TAUPOS ) ) |
$ TAU( TAUPOS ) ) |
A( OFDPOS, ST ) = CTMP |
A( OFDPOS, ST ) = CTMP |
* |
* |
Line 281
|
Line 281
|
* |
* |
V( VPOS ) = ONE |
V( VPOS ) = ONE |
DO 30 I = 1, LM-1 |
DO 30 I = 1, LM-1 |
V( VPOS+I ) = |
V( VPOS+I ) = |
$ DCONJG( A( DPOS-NB-I, J1+I ) ) |
$ DCONJG( A( DPOS-NB-I, J1+I ) ) |
A( DPOS-NB-I, J1+I ) = ZERO |
A( DPOS-NB-I, J1+I ) = ZERO |
30 CONTINUE |
30 CONTINUE |
CTMP = DCONJG( A( DPOS-NB, J1 ) ) |
CTMP = DCONJG( A( DPOS-NB, J1 ) ) |
CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) |
CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) ) |
A( DPOS-NB, J1 ) = CTMP |
A( DPOS-NB, J1 ) = CTMP |
* |
* |
CALL ZLARFX( 'Right', LN-1, LM, V( VPOS ), |
CALL ZLARFX( 'Right', LN-1, LM, V( VPOS ), |
$ TAU( TAUPOS ), |
$ TAU( TAUPOS ), |
$ A( DPOS-NB+1, J1 ), LDA-1, WORK) |
$ A( DPOS-NB+1, J1 ), LDA-1, WORK) |
Line 296
|
Line 296
|
ENDIF |
ENDIF |
* |
* |
* Lower case |
* Lower case |
* |
* |
ELSE |
ELSE |
* |
* |
IF( WANTZ ) THEN |
IF( WANTZ ) THEN |
VPOS = MOD( SWEEP-1, 2 ) * N + ST |
VPOS = MOD( SWEEP-1, 2 ) * N + ST |
TAUPOS = MOD( SWEEP-1, 2 ) * N + ST |
TAUPOS = MOD( SWEEP-1, 2 ) * N + ST |
Line 313
|
Line 313
|
V( VPOS ) = ONE |
V( VPOS ) = ONE |
DO 20 I = 1, LM-1 |
DO 20 I = 1, LM-1 |
V( VPOS+I ) = A( OFDPOS+I, ST-1 ) |
V( VPOS+I ) = A( OFDPOS+I, ST-1 ) |
A( OFDPOS+I, ST-1 ) = ZERO |
A( OFDPOS+I, ST-1 ) = ZERO |
20 CONTINUE |
20 CONTINUE |
CALL ZLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, |
CALL ZLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1, |
$ TAU( TAUPOS ) ) |
$ TAU( TAUPOS ) ) |
* |
* |
LM = ED - ST + 1 |
LM = ED - ST + 1 |
Line 342
|
Line 342
|
LM = J2-J1+1 |
LM = J2-J1+1 |
* |
* |
IF( LM.GT.0) THEN |
IF( LM.GT.0) THEN |
CALL ZLARFX( 'Right', LM, LN, V( VPOS ), |
CALL ZLARFX( 'Right', LM, LN, V( VPOS ), |
$ TAU( TAUPOS ), A( DPOS+NB, ST ), |
$ TAU( TAUPOS ), A( DPOS+NB, ST ), |
$ LDA-1, WORK) |
$ LDA-1, WORK) |
* |
* |
Line 359
|
Line 359
|
V( VPOS+I ) = A( DPOS+NB+I, ST ) |
V( VPOS+I ) = A( DPOS+NB+I, ST ) |
A( DPOS+NB+I, ST ) = ZERO |
A( DPOS+NB+I, ST ) = ZERO |
40 CONTINUE |
40 CONTINUE |
CALL ZLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, |
CALL ZLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1, |
$ TAU( TAUPOS ) ) |
$ TAU( TAUPOS ) ) |
* |
* |
CALL ZLARFX( 'Left', LM, LN-1, V( VPOS ), |
CALL ZLARFX( 'Left', LM, LN-1, V( VPOS ), |
$ DCONJG( TAU( TAUPOS ) ), |
$ DCONJG( TAU( TAUPOS ) ), |
$ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) |
$ A( DPOS+NB-1, ST+1 ), LDA-1, WORK) |
|
|
ENDIF |
ENDIF |
ENDIF |
ENDIF |
ENDIF |
ENDIF |
Line 374
|
Line 374
|
* |
* |
* END OF ZHB2ST_KERNELS |
* END OF ZHB2ST_KERNELS |
* |
* |
END |
END |