Annotation of rpl/lapack/lapack/zhegv_2stage.f, revision 1.3
1.1 bertrand 1: *> \brief \b ZHEGV_2STAGE
2: *
3: * @precisions fortran z -> c
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download ZHEGV_2STAGE + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv_2stage.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv_2stage.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv_2stage.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
24: * WORK, LWORK, RWORK, INFO )
25: *
26: * IMPLICIT NONE
27: *
28: * .. Scalar Arguments ..
29: * CHARACTER JOBZ, UPLO
30: * INTEGER INFO, ITYPE, LDA, LDB, LWORK, N
31: * ..
32: * .. Array Arguments ..
33: * DOUBLE PRECISION RWORK( * ), W( * )
34: * COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * )
35: * ..
36: *
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors
44: *> of a complex generalized Hermitian-definite eigenproblem, of the form
45: *> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x.
46: *> Here A and B are assumed to be Hermitian and B is also
47: *> positive definite.
48: *> This routine use the 2stage technique for the reduction to tridiagonal
49: *> which showed higher performance on recent architecture and for large
1.3 ! bertrand 50: *> sizes N>2000.
1.1 bertrand 51: *> \endverbatim
52: *
53: * Arguments:
54: * ==========
55: *
56: *> \param[in] ITYPE
57: *> \verbatim
58: *> ITYPE is INTEGER
59: *> Specifies the problem type to be solved:
60: *> = 1: A*x = (lambda)*B*x
61: *> = 2: A*B*x = (lambda)*x
62: *> = 3: B*A*x = (lambda)*x
63: *> \endverbatim
64: *>
65: *> \param[in] JOBZ
66: *> \verbatim
67: *> JOBZ is CHARACTER*1
68: *> = 'N': Compute eigenvalues only;
69: *> = 'V': Compute eigenvalues and eigenvectors.
70: *> Not available in this release.
71: *> \endverbatim
72: *>
73: *> \param[in] UPLO
74: *> \verbatim
75: *> UPLO is CHARACTER*1
76: *> = 'U': Upper triangles of A and B are stored;
77: *> = 'L': Lower triangles of A and B are stored.
78: *> \endverbatim
79: *>
80: *> \param[in] N
81: *> \verbatim
82: *> N is INTEGER
83: *> The order of the matrices A and B. N >= 0.
84: *> \endverbatim
85: *>
86: *> \param[in,out] A
87: *> \verbatim
88: *> A is COMPLEX*16 array, dimension (LDA, N)
89: *> On entry, the Hermitian matrix A. If UPLO = 'U', the
90: *> leading N-by-N upper triangular part of A contains the
91: *> upper triangular part of the matrix A. If UPLO = 'L',
92: *> the leading N-by-N lower triangular part of A contains
93: *> the lower triangular part of the matrix A.
94: *>
95: *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
96: *> matrix Z of eigenvectors. The eigenvectors are normalized
97: *> as follows:
98: *> if ITYPE = 1 or 2, Z**H*B*Z = I;
99: *> if ITYPE = 3, Z**H*inv(B)*Z = I.
100: *> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
101: *> or the lower triangle (if UPLO='L') of A, including the
102: *> diagonal, is destroyed.
103: *> \endverbatim
104: *>
105: *> \param[in] LDA
106: *> \verbatim
107: *> LDA is INTEGER
108: *> The leading dimension of the array A. LDA >= max(1,N).
109: *> \endverbatim
110: *>
111: *> \param[in,out] B
112: *> \verbatim
113: *> B is COMPLEX*16 array, dimension (LDB, N)
114: *> On entry, the Hermitian positive definite matrix B.
115: *> If UPLO = 'U', the leading N-by-N upper triangular part of B
116: *> contains the upper triangular part of the matrix B.
117: *> If UPLO = 'L', the leading N-by-N lower triangular part of B
118: *> contains the lower triangular part of the matrix B.
119: *>
120: *> On exit, if INFO <= N, the part of B containing the matrix is
121: *> overwritten by the triangular factor U or L from the Cholesky
122: *> factorization B = U**H*U or B = L*L**H.
123: *> \endverbatim
124: *>
125: *> \param[in] LDB
126: *> \verbatim
127: *> LDB is INTEGER
128: *> The leading dimension of the array B. LDB >= max(1,N).
129: *> \endverbatim
130: *>
131: *> \param[out] W
132: *> \verbatim
133: *> W is DOUBLE PRECISION array, dimension (N)
134: *> If INFO = 0, the eigenvalues in ascending order.
135: *> \endverbatim
136: *>
137: *> \param[out] WORK
138: *> \verbatim
139: *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
140: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
141: *> \endverbatim
142: *>
143: *> \param[in] LWORK
144: *> \verbatim
145: *> LWORK is INTEGER
146: *> The length of the array WORK. LWORK >= 1, when N <= 1;
147: *> otherwise
148: *> If JOBZ = 'N' and N > 1, LWORK must be queried.
149: *> LWORK = MAX(1, dimension) where
150: *> dimension = max(stage1,stage2) + (KD+1)*N + N
151: *> = N*KD + N*max(KD+1,FACTOPTNB)
152: *> + max(2*KD*KD, KD*NTHREADS)
153: *> + (KD+1)*N + N
154: *> where KD is the blocking size of the reduction,
155: *> FACTOPTNB is the blocking used by the QR or LQ
156: *> algorithm, usually FACTOPTNB=128 is a good choice
157: *> NTHREADS is the number of threads used when
158: *> openMP compilation is enabled, otherwise =1.
159: *> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available
160: *>
161: *> If LWORK = -1, then a workspace query is assumed; the routine
162: *> only calculates the optimal size of the WORK array, returns
163: *> this value as the first entry of the WORK array, and no error
164: *> message related to LWORK is issued by XERBLA.
165: *> \endverbatim
166: *>
167: *> \param[out] RWORK
168: *> \verbatim
169: *> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2))
170: *> \endverbatim
171: *>
172: *> \param[out] INFO
173: *> \verbatim
174: *> INFO is INTEGER
175: *> = 0: successful exit
176: *> < 0: if INFO = -i, the i-th argument had an illegal value
177: *> > 0: ZPOTRF or ZHEEV returned an error code:
178: *> <= N: if INFO = i, ZHEEV failed to converge;
179: *> i off-diagonal elements of an intermediate
180: *> tridiagonal form did not converge to zero;
181: *> > N: if INFO = N + i, for 1 <= i <= N, then the leading
182: *> minor of order i of B is not positive definite.
183: *> The factorization of B could not be completed and
184: *> no eigenvalues or eigenvectors were computed.
185: *> \endverbatim
186: *
187: * Authors:
188: * ========
189: *
190: *> \author Univ. of Tennessee
191: *> \author Univ. of California Berkeley
192: *> \author Univ. of Colorado Denver
193: *> \author NAG Ltd.
194: *
1.3 ! bertrand 195: *> \date November 2017
1.1 bertrand 196: *
197: *> \ingroup complex16HEeigen
198: *
199: *> \par Further Details:
200: * =====================
201: *>
202: *> \verbatim
203: *>
204: *> All details about the 2stage techniques are available in:
205: *>
206: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
207: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
208: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
209: *> of 2011 International Conference for High Performance Computing,
210: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
211: *> Article 8 , 11 pages.
212: *> http://doi.acm.org/10.1145/2063384.2063394
213: *>
214: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
215: *> An improved parallel singular value algorithm and its implementation
216: *> for multicore hardware, In Proceedings of 2013 International Conference
217: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
218: *> Denver, Colorado, USA, 2013.
219: *> Article 90, 12 pages.
220: *> http://doi.acm.org/10.1145/2503210.2503292
221: *>
222: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
223: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
224: *> calculations based on fine-grained memory aware tasks.
225: *> International Journal of High Performance Computing Applications.
226: *> Volume 28 Issue 2, Pages 196-209, May 2014.
227: *> http://hpc.sagepub.com/content/28/2/196
228: *>
229: *> \endverbatim
230: *
231: * =====================================================================
232: SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
233: $ WORK, LWORK, RWORK, INFO )
234: *
235: IMPLICIT NONE
236: *
1.3 ! bertrand 237: * -- LAPACK driver routine (version 3.8.0) --
1.1 bertrand 238: * -- LAPACK is a software package provided by Univ. of Tennessee, --
239: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.3 ! bertrand 240: * November 2017
1.1 bertrand 241: *
242: * .. Scalar Arguments ..
243: CHARACTER JOBZ, UPLO
244: INTEGER INFO, ITYPE, LDA, LDB, LWORK, N
245: * ..
246: * .. Array Arguments ..
247: DOUBLE PRECISION RWORK( * ), W( * )
248: COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * )
249: * ..
250: *
251: * =====================================================================
252: *
253: * .. Parameters ..
254: COMPLEX*16 ONE
255: PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) )
256: * ..
257: * .. Local Scalars ..
258: LOGICAL LQUERY, UPPER, WANTZ
259: CHARACTER TRANS
260: INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB
261: * ..
262: * .. External Functions ..
263: LOGICAL LSAME
1.3 ! bertrand 264: INTEGER ILAENV2STAGE
! 265: EXTERNAL LSAME, ILAENV2STAGE
1.1 bertrand 266: * ..
267: * .. External Subroutines ..
268: EXTERNAL XERBLA, ZHEGST, ZPOTRF, ZTRMM, ZTRSM,
269: $ ZHEEV_2STAGE
270: * ..
271: * .. Intrinsic Functions ..
272: INTRINSIC MAX
273: * ..
274: * .. Executable Statements ..
275: *
276: * Test the input parameters.
277: *
278: WANTZ = LSAME( JOBZ, 'V' )
279: UPPER = LSAME( UPLO, 'U' )
280: LQUERY = ( LWORK.EQ.-1 )
281: *
282: INFO = 0
283: IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
284: INFO = -1
285: ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
286: INFO = -2
287: ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
288: INFO = -3
289: ELSE IF( N.LT.0 ) THEN
290: INFO = -4
291: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
292: INFO = -6
293: ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
294: INFO = -8
295: END IF
296: *
297: IF( INFO.EQ.0 ) THEN
1.3 ! bertrand 298: KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 )
! 299: IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 )
! 300: LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
! 301: LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
1.1 bertrand 302: LWMIN = N + LHTRD + LWTRD
303: WORK( 1 ) = LWMIN
304: *
305: IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
306: INFO = -11
307: END IF
308: END IF
309: *
310: IF( INFO.NE.0 ) THEN
311: CALL XERBLA( 'ZHEGV_2STAGE ', -INFO )
312: RETURN
313: ELSE IF( LQUERY ) THEN
314: RETURN
315: END IF
316: *
317: * Quick return if possible
318: *
319: IF( N.EQ.0 )
320: $ RETURN
321: *
322: * Form a Cholesky factorization of B.
323: *
324: CALL ZPOTRF( UPLO, N, B, LDB, INFO )
325: IF( INFO.NE.0 ) THEN
326: INFO = N + INFO
327: RETURN
328: END IF
329: *
330: * Transform problem to standard eigenvalue problem and solve.
331: *
332: CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
333: CALL ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W,
334: $ WORK, LWORK, RWORK, INFO )
335: *
336: IF( WANTZ ) THEN
337: *
338: * Backtransform eigenvectors to the original problem.
339: *
340: NEIG = N
341: IF( INFO.GT.0 )
342: $ NEIG = INFO - 1
343: IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
344: *
345: * For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
346: * backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y
347: *
348: IF( UPPER ) THEN
349: TRANS = 'N'
350: ELSE
351: TRANS = 'C'
352: END IF
353: *
354: CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
355: $ B, LDB, A, LDA )
356: *
357: ELSE IF( ITYPE.EQ.3 ) THEN
358: *
359: * For B*A*x=(lambda)*x;
360: * backtransform eigenvectors: x = L*y or U**H *y
361: *
362: IF( UPPER ) THEN
363: TRANS = 'C'
364: ELSE
365: TRANS = 'N'
366: END IF
367: *
368: CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
369: $ B, LDB, A, LDA )
370: END IF
371: END IF
372: *
373: WORK( 1 ) = LWMIN
374: *
375: RETURN
376: *
377: * End of ZHEGV_2STAGE
378: *
379: END
CVSweb interface <joel.bertrand@systella.fr>