1: *> \brief \b ZHEGV_2STAGE
2: *
3: * @precisions fortran z -> c
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download ZHEGV_2STAGE + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhegv_2stage.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhegv_2stage.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhegv_2stage.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
24: * WORK, LWORK, RWORK, INFO )
25: *
26: * IMPLICIT NONE
27: *
28: * .. Scalar Arguments ..
29: * CHARACTER JOBZ, UPLO
30: * INTEGER INFO, ITYPE, LDA, LDB, LWORK, N
31: * ..
32: * .. Array Arguments ..
33: * DOUBLE PRECISION RWORK( * ), W( * )
34: * COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * )
35: * ..
36: *
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> ZHEGV_2STAGE computes all the eigenvalues, and optionally, the eigenvectors
44: *> of a complex generalized Hermitian-definite eigenproblem, of the form
45: *> A*x=(lambda)*B*x, A*Bx=(lambda)*x, or B*A*x=(lambda)*x.
46: *> Here A and B are assumed to be Hermitian and B is also
47: *> positive definite.
48: *> This routine use the 2stage technique for the reduction to tridiagonal
49: *> which showed higher performance on recent architecture and for large
50: *> sizes N>2000.
51: *> \endverbatim
52: *
53: * Arguments:
54: * ==========
55: *
56: *> \param[in] ITYPE
57: *> \verbatim
58: *> ITYPE is INTEGER
59: *> Specifies the problem type to be solved:
60: *> = 1: A*x = (lambda)*B*x
61: *> = 2: A*B*x = (lambda)*x
62: *> = 3: B*A*x = (lambda)*x
63: *> \endverbatim
64: *>
65: *> \param[in] JOBZ
66: *> \verbatim
67: *> JOBZ is CHARACTER*1
68: *> = 'N': Compute eigenvalues only;
69: *> = 'V': Compute eigenvalues and eigenvectors.
70: *> Not available in this release.
71: *> \endverbatim
72: *>
73: *> \param[in] UPLO
74: *> \verbatim
75: *> UPLO is CHARACTER*1
76: *> = 'U': Upper triangles of A and B are stored;
77: *> = 'L': Lower triangles of A and B are stored.
78: *> \endverbatim
79: *>
80: *> \param[in] N
81: *> \verbatim
82: *> N is INTEGER
83: *> The order of the matrices A and B. N >= 0.
84: *> \endverbatim
85: *>
86: *> \param[in,out] A
87: *> \verbatim
88: *> A is COMPLEX*16 array, dimension (LDA, N)
89: *> On entry, the Hermitian matrix A. If UPLO = 'U', the
90: *> leading N-by-N upper triangular part of A contains the
91: *> upper triangular part of the matrix A. If UPLO = 'L',
92: *> the leading N-by-N lower triangular part of A contains
93: *> the lower triangular part of the matrix A.
94: *>
95: *> On exit, if JOBZ = 'V', then if INFO = 0, A contains the
96: *> matrix Z of eigenvectors. The eigenvectors are normalized
97: *> as follows:
98: *> if ITYPE = 1 or 2, Z**H*B*Z = I;
99: *> if ITYPE = 3, Z**H*inv(B)*Z = I.
100: *> If JOBZ = 'N', then on exit the upper triangle (if UPLO='U')
101: *> or the lower triangle (if UPLO='L') of A, including the
102: *> diagonal, is destroyed.
103: *> \endverbatim
104: *>
105: *> \param[in] LDA
106: *> \verbatim
107: *> LDA is INTEGER
108: *> The leading dimension of the array A. LDA >= max(1,N).
109: *> \endverbatim
110: *>
111: *> \param[in,out] B
112: *> \verbatim
113: *> B is COMPLEX*16 array, dimension (LDB, N)
114: *> On entry, the Hermitian positive definite matrix B.
115: *> If UPLO = 'U', the leading N-by-N upper triangular part of B
116: *> contains the upper triangular part of the matrix B.
117: *> If UPLO = 'L', the leading N-by-N lower triangular part of B
118: *> contains the lower triangular part of the matrix B.
119: *>
120: *> On exit, if INFO <= N, the part of B containing the matrix is
121: *> overwritten by the triangular factor U or L from the Cholesky
122: *> factorization B = U**H*U or B = L*L**H.
123: *> \endverbatim
124: *>
125: *> \param[in] LDB
126: *> \verbatim
127: *> LDB is INTEGER
128: *> The leading dimension of the array B. LDB >= max(1,N).
129: *> \endverbatim
130: *>
131: *> \param[out] W
132: *> \verbatim
133: *> W is DOUBLE PRECISION array, dimension (N)
134: *> If INFO = 0, the eigenvalues in ascending order.
135: *> \endverbatim
136: *>
137: *> \param[out] WORK
138: *> \verbatim
139: *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
140: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
141: *> \endverbatim
142: *>
143: *> \param[in] LWORK
144: *> \verbatim
145: *> LWORK is INTEGER
146: *> The length of the array WORK. LWORK >= 1, when N <= 1;
147: *> otherwise
148: *> If JOBZ = 'N' and N > 1, LWORK must be queried.
149: *> LWORK = MAX(1, dimension) where
150: *> dimension = max(stage1,stage2) + (KD+1)*N + N
151: *> = N*KD + N*max(KD+1,FACTOPTNB)
152: *> + max(2*KD*KD, KD*NTHREADS)
153: *> + (KD+1)*N + N
154: *> where KD is the blocking size of the reduction,
155: *> FACTOPTNB is the blocking used by the QR or LQ
156: *> algorithm, usually FACTOPTNB=128 is a good choice
157: *> NTHREADS is the number of threads used when
158: *> openMP compilation is enabled, otherwise =1.
159: *> If JOBZ = 'V' and N > 1, LWORK must be queried. Not yet available
160: *>
161: *> If LWORK = -1, then a workspace query is assumed; the routine
162: *> only calculates the optimal size of the WORK array, returns
163: *> this value as the first entry of the WORK array, and no error
164: *> message related to LWORK is issued by XERBLA.
165: *> \endverbatim
166: *>
167: *> \param[out] RWORK
168: *> \verbatim
169: *> RWORK is DOUBLE PRECISION array, dimension (max(1, 3*N-2))
170: *> \endverbatim
171: *>
172: *> \param[out] INFO
173: *> \verbatim
174: *> INFO is INTEGER
175: *> = 0: successful exit
176: *> < 0: if INFO = -i, the i-th argument had an illegal value
177: *> > 0: ZPOTRF or ZHEEV returned an error code:
178: *> <= N: if INFO = i, ZHEEV failed to converge;
179: *> i off-diagonal elements of an intermediate
180: *> tridiagonal form did not converge to zero;
181: *> > N: if INFO = N + i, for 1 <= i <= N, then the leading
182: *> minor of order i of B is not positive definite.
183: *> The factorization of B could not be completed and
184: *> no eigenvalues or eigenvectors were computed.
185: *> \endverbatim
186: *
187: * Authors:
188: * ========
189: *
190: *> \author Univ. of Tennessee
191: *> \author Univ. of California Berkeley
192: *> \author Univ. of Colorado Denver
193: *> \author NAG Ltd.
194: *
195: *> \ingroup complex16HEeigen
196: *
197: *> \par Further Details:
198: * =====================
199: *>
200: *> \verbatim
201: *>
202: *> All details about the 2stage techniques are available in:
203: *>
204: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
205: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
206: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
207: *> of 2011 International Conference for High Performance Computing,
208: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
209: *> Article 8 , 11 pages.
210: *> http://doi.acm.org/10.1145/2063384.2063394
211: *>
212: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
213: *> An improved parallel singular value algorithm and its implementation
214: *> for multicore hardware, In Proceedings of 2013 International Conference
215: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
216: *> Denver, Colorado, USA, 2013.
217: *> Article 90, 12 pages.
218: *> http://doi.acm.org/10.1145/2503210.2503292
219: *>
220: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
221: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
222: *> calculations based on fine-grained memory aware tasks.
223: *> International Journal of High Performance Computing Applications.
224: *> Volume 28 Issue 2, Pages 196-209, May 2014.
225: *> http://hpc.sagepub.com/content/28/2/196
226: *>
227: *> \endverbatim
228: *
229: * =====================================================================
230: SUBROUTINE ZHEGV_2STAGE( ITYPE, JOBZ, UPLO, N, A, LDA, B, LDB, W,
231: $ WORK, LWORK, RWORK, INFO )
232: *
233: IMPLICIT NONE
234: *
235: * -- LAPACK driver routine --
236: * -- LAPACK is a software package provided by Univ. of Tennessee, --
237: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
238: *
239: * .. Scalar Arguments ..
240: CHARACTER JOBZ, UPLO
241: INTEGER INFO, ITYPE, LDA, LDB, LWORK, N
242: * ..
243: * .. Array Arguments ..
244: DOUBLE PRECISION RWORK( * ), W( * )
245: COMPLEX*16 A( LDA, * ), B( LDB, * ), WORK( * )
246: * ..
247: *
248: * =====================================================================
249: *
250: * .. Parameters ..
251: COMPLEX*16 ONE
252: PARAMETER ( ONE = ( 1.0D+0, 0.0D+0 ) )
253: * ..
254: * .. Local Scalars ..
255: LOGICAL LQUERY, UPPER, WANTZ
256: CHARACTER TRANS
257: INTEGER NEIG, LWMIN, LHTRD, LWTRD, KD, IB
258: * ..
259: * .. External Functions ..
260: LOGICAL LSAME
261: INTEGER ILAENV2STAGE
262: EXTERNAL LSAME, ILAENV2STAGE
263: * ..
264: * .. External Subroutines ..
265: EXTERNAL XERBLA, ZHEGST, ZPOTRF, ZTRMM, ZTRSM,
266: $ ZHEEV_2STAGE
267: * ..
268: * .. Intrinsic Functions ..
269: INTRINSIC MAX
270: * ..
271: * .. Executable Statements ..
272: *
273: * Test the input parameters.
274: *
275: WANTZ = LSAME( JOBZ, 'V' )
276: UPPER = LSAME( UPLO, 'U' )
277: LQUERY = ( LWORK.EQ.-1 )
278: *
279: INFO = 0
280: IF( ITYPE.LT.1 .OR. ITYPE.GT.3 ) THEN
281: INFO = -1
282: ELSE IF( .NOT.( LSAME( JOBZ, 'N' ) ) ) THEN
283: INFO = -2
284: ELSE IF( .NOT.( UPPER .OR. LSAME( UPLO, 'L' ) ) ) THEN
285: INFO = -3
286: ELSE IF( N.LT.0 ) THEN
287: INFO = -4
288: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
289: INFO = -6
290: ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
291: INFO = -8
292: END IF
293: *
294: IF( INFO.EQ.0 ) THEN
295: KD = ILAENV2STAGE( 1, 'ZHETRD_2STAGE', JOBZ, N, -1, -1, -1 )
296: IB = ILAENV2STAGE( 2, 'ZHETRD_2STAGE', JOBZ, N, KD, -1, -1 )
297: LHTRD = ILAENV2STAGE( 3, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
298: LWTRD = ILAENV2STAGE( 4, 'ZHETRD_2STAGE', JOBZ, N, KD, IB, -1 )
299: LWMIN = N + LHTRD + LWTRD
300: WORK( 1 ) = LWMIN
301: *
302: IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
303: INFO = -11
304: END IF
305: END IF
306: *
307: IF( INFO.NE.0 ) THEN
308: CALL XERBLA( 'ZHEGV_2STAGE ', -INFO )
309: RETURN
310: ELSE IF( LQUERY ) THEN
311: RETURN
312: END IF
313: *
314: * Quick return if possible
315: *
316: IF( N.EQ.0 )
317: $ RETURN
318: *
319: * Form a Cholesky factorization of B.
320: *
321: CALL ZPOTRF( UPLO, N, B, LDB, INFO )
322: IF( INFO.NE.0 ) THEN
323: INFO = N + INFO
324: RETURN
325: END IF
326: *
327: * Transform problem to standard eigenvalue problem and solve.
328: *
329: CALL ZHEGST( ITYPE, UPLO, N, A, LDA, B, LDB, INFO )
330: CALL ZHEEV_2STAGE( JOBZ, UPLO, N, A, LDA, W,
331: $ WORK, LWORK, RWORK, INFO )
332: *
333: IF( WANTZ ) THEN
334: *
335: * Backtransform eigenvectors to the original problem.
336: *
337: NEIG = N
338: IF( INFO.GT.0 )
339: $ NEIG = INFO - 1
340: IF( ITYPE.EQ.1 .OR. ITYPE.EQ.2 ) THEN
341: *
342: * For A*x=(lambda)*B*x and A*B*x=(lambda)*x;
343: * backtransform eigenvectors: x = inv(L)**H *y or inv(U)*y
344: *
345: IF( UPPER ) THEN
346: TRANS = 'N'
347: ELSE
348: TRANS = 'C'
349: END IF
350: *
351: CALL ZTRSM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
352: $ B, LDB, A, LDA )
353: *
354: ELSE IF( ITYPE.EQ.3 ) THEN
355: *
356: * For B*A*x=(lambda)*x;
357: * backtransform eigenvectors: x = L*y or U**H *y
358: *
359: IF( UPPER ) THEN
360: TRANS = 'C'
361: ELSE
362: TRANS = 'N'
363: END IF
364: *
365: CALL ZTRMM( 'Left', UPLO, TRANS, 'Non-unit', N, NEIG, ONE,
366: $ B, LDB, A, LDA )
367: END IF
368: END IF
369: *
370: WORK( 1 ) = LWMIN
371: *
372: RETURN
373: *
374: * End of ZHEGV_2STAGE
375: *
376: END
CVSweb interface <joel.bertrand@systella.fr>