1: *> \brief \b ZHETRD_HE2HB
2: *
3: * @precisions fortran z -> s d c
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download ZHETRD_HE2HB + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrd_he2hb.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
24: * WORK, LWORK, INFO )
25: *
26: * IMPLICIT NONE
27: *
28: * .. Scalar Arguments ..
29: * CHARACTER UPLO
30: * INTEGER INFO, LDA, LDAB, LWORK, N, KD
31: * ..
32: * .. Array Arguments ..
33: * COMPLEX*16 A( LDA, * ), AB( LDAB, * ),
34: * TAU( * ), WORK( * )
35: * ..
36: *
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> ZHETRD_HE2HB reduces a complex Hermitian matrix A to complex Hermitian
44: *> band-diagonal form AB by a unitary similarity transformation:
45: *> Q**H * A * Q = AB.
46: *> \endverbatim
47: *
48: * Arguments:
49: * ==========
50: *
51: *> \param[in] UPLO
52: *> \verbatim
53: *> UPLO is CHARACTER*1
54: *> = 'U': Upper triangle of A is stored;
55: *> = 'L': Lower triangle of A is stored.
56: *> \endverbatim
57: *>
58: *> \param[in] N
59: *> \verbatim
60: *> N is INTEGER
61: *> The order of the matrix A. N >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in] KD
65: *> \verbatim
66: *> KD is INTEGER
67: *> The number of superdiagonals of the reduced matrix if UPLO = 'U',
68: *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
69: *> The reduced matrix is stored in the array AB.
70: *> \endverbatim
71: *>
72: *> \param[in,out] A
73: *> \verbatim
74: *> A is COMPLEX*16 array, dimension (LDA,N)
75: *> On entry, the Hermitian matrix A. If UPLO = 'U', the leading
76: *> N-by-N upper triangular part of A contains the upper
77: *> triangular part of the matrix A, and the strictly lower
78: *> triangular part of A is not referenced. If UPLO = 'L', the
79: *> leading N-by-N lower triangular part of A contains the lower
80: *> triangular part of the matrix A, and the strictly upper
81: *> triangular part of A is not referenced.
82: *> On exit, if UPLO = 'U', the diagonal and first superdiagonal
83: *> of A are overwritten by the corresponding elements of the
84: *> tridiagonal matrix T, and the elements above the first
85: *> superdiagonal, with the array TAU, represent the unitary
86: *> matrix Q as a product of elementary reflectors; if UPLO
87: *> = 'L', the diagonal and first subdiagonal of A are over-
88: *> written by the corresponding elements of the tridiagonal
89: *> matrix T, and the elements below the first subdiagonal, with
90: *> the array TAU, represent the unitary matrix Q as a product
91: *> of elementary reflectors. See Further Details.
92: *> \endverbatim
93: *>
94: *> \param[in] LDA
95: *> \verbatim
96: *> LDA is INTEGER
97: *> The leading dimension of the array A. LDA >= max(1,N).
98: *> \endverbatim
99: *>
100: *> \param[out] AB
101: *> \verbatim
102: *> AB is COMPLEX*16 array, dimension (LDAB,N)
103: *> On exit, the upper or lower triangle of the Hermitian band
104: *> matrix A, stored in the first KD+1 rows of the array. The
105: *> j-th column of A is stored in the j-th column of the array AB
106: *> as follows:
107: *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
108: *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
109: *> \endverbatim
110: *>
111: *> \param[in] LDAB
112: *> \verbatim
113: *> LDAB is INTEGER
114: *> The leading dimension of the array AB. LDAB >= KD+1.
115: *> \endverbatim
116: *>
117: *> \param[out] TAU
118: *> \verbatim
119: *> TAU is COMPLEX*16 array, dimension (N-KD)
120: *> The scalar factors of the elementary reflectors (see Further
121: *> Details).
122: *> \endverbatim
123: *>
124: *> \param[out] WORK
125: *> \verbatim
126: *> WORK is COMPLEX*16 array, dimension (LWORK)
127: *> On exit, if INFO = 0, or if LWORK=-1,
128: *> WORK(1) returns the size of LWORK.
129: *> \endverbatim
130: *>
131: *> \param[in] LWORK
132: *> \verbatim
133: *> LWORK is INTEGER
134: *> The dimension of the array WORK which should be calculated
135: *> by a workspace query. LWORK = MAX(1, LWORK_QUERY)
136: *> If LWORK = -1, then a workspace query is assumed; the routine
137: *> only calculates the optimal size of the WORK array, returns
138: *> this value as the first entry of the WORK array, and no error
139: *> message related to LWORK is issued by XERBLA.
140: *> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
141: *> where FACTOPTNB is the blocking used by the QR or LQ
142: *> algorithm, usually FACTOPTNB=128 is a good choice otherwise
143: *> putting LWORK=-1 will provide the size of WORK.
144: *> \endverbatim
145: *>
146: *> \param[out] INFO
147: *> \verbatim
148: *> INFO is INTEGER
149: *> = 0: successful exit
150: *> < 0: if INFO = -i, the i-th argument had an illegal value
151: *> \endverbatim
152: *
153: * Authors:
154: * ========
155: *
156: *> \author Univ. of Tennessee
157: *> \author Univ. of California Berkeley
158: *> \author Univ. of Colorado Denver
159: *> \author NAG Ltd.
160: *
161: *> \ingroup complex16HEcomputational
162: *
163: *> \par Further Details:
164: * =====================
165: *>
166: *> \verbatim
167: *>
168: *> Implemented by Azzam Haidar.
169: *>
170: *> All details are available on technical report, SC11, SC13 papers.
171: *>
172: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
173: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
174: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
175: *> of 2011 International Conference for High Performance Computing,
176: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
177: *> Article 8 , 11 pages.
178: *> http://doi.acm.org/10.1145/2063384.2063394
179: *>
180: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
181: *> An improved parallel singular value algorithm and its implementation
182: *> for multicore hardware, In Proceedings of 2013 International Conference
183: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
184: *> Denver, Colorado, USA, 2013.
185: *> Article 90, 12 pages.
186: *> http://doi.acm.org/10.1145/2503210.2503292
187: *>
188: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
189: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
190: *> calculations based on fine-grained memory aware tasks.
191: *> International Journal of High Performance Computing Applications.
192: *> Volume 28 Issue 2, Pages 196-209, May 2014.
193: *> http://hpc.sagepub.com/content/28/2/196
194: *>
195: *> \endverbatim
196: *>
197: *> \verbatim
198: *>
199: *> If UPLO = 'U', the matrix Q is represented as a product of elementary
200: *> reflectors
201: *>
202: *> Q = H(k)**H . . . H(2)**H H(1)**H, where k = n-kd.
203: *>
204: *> Each H(i) has the form
205: *>
206: *> H(i) = I - tau * v * v**H
207: *>
208: *> where tau is a complex scalar, and v is a complex vector with
209: *> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
210: *> A(i,i+kd+1:n), and tau in TAU(i).
211: *>
212: *> If UPLO = 'L', the matrix Q is represented as a product of elementary
213: *> reflectors
214: *>
215: *> Q = H(1) H(2) . . . H(k), where k = n-kd.
216: *>
217: *> Each H(i) has the form
218: *>
219: *> H(i) = I - tau * v * v**H
220: *>
221: *> where tau is a complex scalar, and v is a complex vector with
222: *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
223: *> A(i+kd+2:n,i), and tau in TAU(i).
224: *>
225: *> The contents of A on exit are illustrated by the following examples
226: *> with n = 5:
227: *>
228: *> if UPLO = 'U': if UPLO = 'L':
229: *>
230: *> ( ab ab/v1 v1 v1 v1 ) ( ab )
231: *> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
232: *> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
233: *> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
234: *> ( ab ) ( v1 v2 v3 ab/v4 ab )
235: *>
236: *> where d and e denote diagonal and off-diagonal elements of T, and vi
237: *> denotes an element of the vector defining H(i).
238: *> \endverbatim
239: *>
240: * =====================================================================
241: SUBROUTINE ZHETRD_HE2HB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
242: $ WORK, LWORK, INFO )
243: *
244: IMPLICIT NONE
245: *
246: * -- LAPACK computational routine --
247: * -- LAPACK is a software package provided by Univ. of Tennessee, --
248: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
249: *
250: * .. Scalar Arguments ..
251: CHARACTER UPLO
252: INTEGER INFO, LDA, LDAB, LWORK, N, KD
253: * ..
254: * .. Array Arguments ..
255: COMPLEX*16 A( LDA, * ), AB( LDAB, * ),
256: $ TAU( * ), WORK( * )
257: * ..
258: *
259: * =====================================================================
260: *
261: * .. Parameters ..
262: DOUBLE PRECISION RONE
263: COMPLEX*16 ZERO, ONE, HALF
264: PARAMETER ( RONE = 1.0D+0,
265: $ ZERO = ( 0.0D+0, 0.0D+0 ),
266: $ ONE = ( 1.0D+0, 0.0D+0 ),
267: $ HALF = ( 0.5D+0, 0.0D+0 ) )
268: * ..
269: * .. Local Scalars ..
270: LOGICAL LQUERY, UPPER
271: INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
272: $ LDT, LDW, LDS2, LDS1,
273: $ LS2, LS1, LW, LT,
274: $ TPOS, WPOS, S2POS, S1POS
275: * ..
276: * .. External Subroutines ..
277: EXTERNAL XERBLA, ZHER2K, ZHEMM, ZGEMM, ZCOPY,
278: $ ZLARFT, ZGELQF, ZGEQRF, ZLASET
279: * ..
280: * .. Intrinsic Functions ..
281: INTRINSIC MIN, MAX
282: * ..
283: * .. External Functions ..
284: LOGICAL LSAME
285: INTEGER ILAENV2STAGE
286: EXTERNAL LSAME, ILAENV2STAGE
287: * ..
288: * .. Executable Statements ..
289: *
290: * Determine the minimal workspace size required
291: * and test the input parameters
292: *
293: INFO = 0
294: UPPER = LSAME( UPLO, 'U' )
295: LQUERY = ( LWORK.EQ.-1 )
296: LWMIN = ILAENV2STAGE( 4, 'ZHETRD_HE2HB', ' ', N, KD, -1, -1 )
297:
298: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
299: INFO = -1
300: ELSE IF( N.LT.0 ) THEN
301: INFO = -2
302: ELSE IF( KD.LT.0 ) THEN
303: INFO = -3
304: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
305: INFO = -5
306: ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
307: INFO = -7
308: ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
309: INFO = -10
310: END IF
311: *
312: IF( INFO.NE.0 ) THEN
313: CALL XERBLA( 'ZHETRD_HE2HB', -INFO )
314: RETURN
315: ELSE IF( LQUERY ) THEN
316: WORK( 1 ) = LWMIN
317: RETURN
318: END IF
319: *
320: * Quick return if possible
321: * Copy the upper/lower portion of A into AB
322: *
323: IF( N.LE.KD+1 ) THEN
324: IF( UPPER ) THEN
325: DO 100 I = 1, N
326: LK = MIN( KD+1, I )
327: CALL ZCOPY( LK, A( I-LK+1, I ), 1,
328: $ AB( KD+1-LK+1, I ), 1 )
329: 100 CONTINUE
330: ELSE
331: DO 110 I = 1, N
332: LK = MIN( KD+1, N-I+1 )
333: CALL ZCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
334: 110 CONTINUE
335: ENDIF
336: WORK( 1 ) = 1
337: RETURN
338: END IF
339: *
340: * Determine the pointer position for the workspace
341: *
342: LDT = KD
343: LDS1 = KD
344: LT = LDT*KD
345: LW = N*KD
346: LS1 = LDS1*KD
347: LS2 = LWMIN - LT - LW - LS1
348: * LS2 = N*MAX(KD,FACTOPTNB)
349: TPOS = 1
350: WPOS = TPOS + LT
351: S1POS = WPOS + LW
352: S2POS = S1POS + LS1
353: IF( UPPER ) THEN
354: LDW = KD
355: LDS2 = KD
356: ELSE
357: LDW = N
358: LDS2 = N
359: ENDIF
360: *
361: *
362: * Set the workspace of the triangular matrix T to zero once such a
363: * way every time T is generated the upper/lower portion will be always zero
364: *
365: CALL ZLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
366: *
367: IF( UPPER ) THEN
368: DO 10 I = 1, N - KD, KD
369: PN = N-I-KD+1
370: PK = MIN( N-I-KD+1, KD )
371: *
372: * Compute the LQ factorization of the current block
373: *
374: CALL ZGELQF( KD, PN, A( I, I+KD ), LDA,
375: $ TAU( I ), WORK( S2POS ), LS2, IINFO )
376: *
377: * Copy the upper portion of A into AB
378: *
379: DO 20 J = I, I+PK-1
380: LK = MIN( KD, N-J ) + 1
381: CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
382: 20 CONTINUE
383: *
384: CALL ZLASET( 'Lower', PK, PK, ZERO, ONE,
385: $ A( I, I+KD ), LDA )
386: *
387: * Form the matrix T
388: *
389: CALL ZLARFT( 'Forward', 'Rowwise', PN, PK,
390: $ A( I, I+KD ), LDA, TAU( I ),
391: $ WORK( TPOS ), LDT )
392: *
393: * Compute W:
394: *
395: CALL ZGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
396: $ ONE, WORK( TPOS ), LDT,
397: $ A( I, I+KD ), LDA,
398: $ ZERO, WORK( S2POS ), LDS2 )
399: *
400: CALL ZHEMM( 'Right', UPLO, PK, PN,
401: $ ONE, A( I+KD, I+KD ), LDA,
402: $ WORK( S2POS ), LDS2,
403: $ ZERO, WORK( WPOS ), LDW )
404: *
405: CALL ZGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
406: $ ONE, WORK( WPOS ), LDW,
407: $ WORK( S2POS ), LDS2,
408: $ ZERO, WORK( S1POS ), LDS1 )
409: *
410: CALL ZGEMM( 'No transpose', 'No transpose', PK, PN, PK,
411: $ -HALF, WORK( S1POS ), LDS1,
412: $ A( I, I+KD ), LDA,
413: $ ONE, WORK( WPOS ), LDW )
414: *
415: *
416: * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
417: * an update of the form: A := A - V'*W - W'*V
418: *
419: CALL ZHER2K( UPLO, 'Conjugate', PN, PK,
420: $ -ONE, A( I, I+KD ), LDA,
421: $ WORK( WPOS ), LDW,
422: $ RONE, A( I+KD, I+KD ), LDA )
423: 10 CONTINUE
424: *
425: * Copy the upper band to AB which is the band storage matrix
426: *
427: DO 30 J = N-KD+1, N
428: LK = MIN(KD, N-J) + 1
429: CALL ZCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
430: 30 CONTINUE
431: *
432: ELSE
433: *
434: * Reduce the lower triangle of A to lower band matrix
435: *
436: DO 40 I = 1, N - KD, KD
437: PN = N-I-KD+1
438: PK = MIN( N-I-KD+1, KD )
439: *
440: * Compute the QR factorization of the current block
441: *
442: CALL ZGEQRF( PN, KD, A( I+KD, I ), LDA,
443: $ TAU( I ), WORK( S2POS ), LS2, IINFO )
444: *
445: * Copy the upper portion of A into AB
446: *
447: DO 50 J = I, I+PK-1
448: LK = MIN( KD, N-J ) + 1
449: CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
450: 50 CONTINUE
451: *
452: CALL ZLASET( 'Upper', PK, PK, ZERO, ONE,
453: $ A( I+KD, I ), LDA )
454: *
455: * Form the matrix T
456: *
457: CALL ZLARFT( 'Forward', 'Columnwise', PN, PK,
458: $ A( I+KD, I ), LDA, TAU( I ),
459: $ WORK( TPOS ), LDT )
460: *
461: * Compute W:
462: *
463: CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK,
464: $ ONE, A( I+KD, I ), LDA,
465: $ WORK( TPOS ), LDT,
466: $ ZERO, WORK( S2POS ), LDS2 )
467: *
468: CALL ZHEMM( 'Left', UPLO, PN, PK,
469: $ ONE, A( I+KD, I+KD ), LDA,
470: $ WORK( S2POS ), LDS2,
471: $ ZERO, WORK( WPOS ), LDW )
472: *
473: CALL ZGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
474: $ ONE, WORK( S2POS ), LDS2,
475: $ WORK( WPOS ), LDW,
476: $ ZERO, WORK( S1POS ), LDS1 )
477: *
478: CALL ZGEMM( 'No transpose', 'No transpose', PN, PK, PK,
479: $ -HALF, A( I+KD, I ), LDA,
480: $ WORK( S1POS ), LDS1,
481: $ ONE, WORK( WPOS ), LDW )
482: *
483: *
484: * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
485: * an update of the form: A := A - V*W' - W*V'
486: *
487: CALL ZHER2K( UPLO, 'No transpose', PN, PK,
488: $ -ONE, A( I+KD, I ), LDA,
489: $ WORK( WPOS ), LDW,
490: $ RONE, A( I+KD, I+KD ), LDA )
491: * ==================================================================
492: * RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
493: * DO 45 J = I, I+PK-1
494: * LK = MIN( KD, N-J ) + 1
495: * CALL ZCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
496: * 45 CONTINUE
497: * ==================================================================
498: 40 CONTINUE
499: *
500: * Copy the lower band to AB which is the band storage matrix
501: *
502: DO 60 J = N-KD+1, N
503: LK = MIN(KD, N-J) + 1
504: CALL ZCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
505: 60 CONTINUE
506:
507: END IF
508: *
509: WORK( 1 ) = LWMIN
510: RETURN
511: *
512: * End of ZHETRD_HE2HB
513: *
514: END
CVSweb interface <joel.bertrand@systella.fr>