Annotation of rpl/lapack/lapack/zhetrf_aa.f, revision 1.5
1.1 bertrand 1: *> \brief \b ZHETRF_AA
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZHETRF_AA + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetrf_aa.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetrf_aa.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetrf_aa.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22: *
23: * .. Scalar Arguments ..
24: * CHARACTER UPLO
25: * INTEGER N, LDA, LWORK, INFO
26: * ..
27: * .. Array Arguments ..
28: * INTEGER IPIV( * )
29: * COMPLEX*16 A( LDA, * ), WORK( * )
30: * ..
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> ZHETRF_AA computes the factorization of a complex hermitian matrix A
38: *> using the Aasen's algorithm. The form of the factorization is
39: *>
1.5 ! bertrand 40: *> A = U**H*T*U or A = L*T*L**H
1.1 bertrand 41: *>
42: *> where U (or L) is a product of permutation and unit upper (lower)
43: *> triangular matrices, and T is a hermitian tridiagonal matrix.
44: *>
45: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
46: *> \endverbatim
47: *
48: * Arguments:
49: * ==========
50: *
51: *> \param[in] UPLO
52: *> \verbatim
53: *> UPLO is CHARACTER*1
54: *> = 'U': Upper triangle of A is stored;
55: *> = 'L': Lower triangle of A is stored.
56: *> \endverbatim
57: *>
58: *> \param[in] N
59: *> \verbatim
60: *> N is INTEGER
61: *> The order of the matrix A. N >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in,out] A
65: *> \verbatim
66: *> A is COMPLEX*16 array, dimension (LDA,N)
67: *> On entry, the hermitian matrix A. If UPLO = 'U', the leading
68: *> N-by-N upper triangular part of A contains the upper
69: *> triangular part of the matrix A, and the strictly lower
70: *> triangular part of A is not referenced. If UPLO = 'L', the
71: *> leading N-by-N lower triangular part of A contains the lower
72: *> triangular part of the matrix A, and the strictly upper
73: *> triangular part of A is not referenced.
74: *>
75: *> On exit, the tridiagonal matrix is stored in the diagonals
76: *> and the subdiagonals of A just below (or above) the diagonals,
77: *> and L is stored below (or above) the subdiaonals, when UPLO
78: *> is 'L' (or 'U').
79: *> \endverbatim
80: *>
81: *> \param[in] LDA
82: *> \verbatim
83: *> LDA is INTEGER
84: *> The leading dimension of the array A. LDA >= max(1,N).
85: *> \endverbatim
86: *>
87: *> \param[out] IPIV
88: *> \verbatim
89: *> IPIV is INTEGER array, dimension (N)
90: *> On exit, it contains the details of the interchanges, i.e.,
91: *> the row and column k of A were interchanged with the
92: *> row and column IPIV(k).
93: *> \endverbatim
94: *>
95: *> \param[out] WORK
96: *> \verbatim
97: *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
98: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99: *> \endverbatim
100: *>
101: *> \param[in] LWORK
102: *> \verbatim
103: *> LWORK is INTEGER
104: *> The length of WORK. LWORK >= MAX(1,2*N). For optimum performance
105: *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
106: *>
107: *> If LWORK = -1, then a workspace query is assumed; the routine
108: *> only calculates the optimal size of the WORK array, returns
109: *> this value as the first entry of the WORK array, and no error
110: *> message related to LWORK is issued by XERBLA.
111: *> \endverbatim
112: *>
113: *> \param[out] INFO
114: *> \verbatim
115: *> INFO is INTEGER
116: *> = 0: successful exit
1.3 bertrand 117: *> < 0: if INFO = -i, the i-th argument had an illegal value.
1.1 bertrand 118: *> \endverbatim
119: *
120: * Authors:
121: * ========
122: *
123: *> \author Univ. of Tennessee
124: *> \author Univ. of California Berkeley
125: *> \author Univ. of Colorado Denver
126: *> \author NAG Ltd.
127: *
1.3 bertrand 128: *> \date November 2017
1.1 bertrand 129: *
130: *> \ingroup complex16HEcomputational
131: *
132: * =====================================================================
133: SUBROUTINE ZHETRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
134: *
1.3 bertrand 135: * -- LAPACK computational routine (version 3.8.0) --
1.1 bertrand 136: * -- LAPACK is a software package provided by Univ. of Tennessee, --
137: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.3 bertrand 138: * November 2017
1.1 bertrand 139: *
140: IMPLICIT NONE
141: *
142: * .. Scalar Arguments ..
143: CHARACTER UPLO
144: INTEGER N, LDA, LWORK, INFO
145: * ..
146: * .. Array Arguments ..
147: INTEGER IPIV( * )
148: COMPLEX*16 A( LDA, * ), WORK( * )
149: * ..
150: *
151: * =====================================================================
152: * .. Parameters ..
153: COMPLEX*16 ZERO, ONE
154: PARAMETER ( ZERO = (0.0D+0, 0.0D+0), ONE = (1.0D+0, 0.0D+0) )
155: *
156: * .. Local Scalars ..
157: LOGICAL LQUERY, UPPER
1.3 bertrand 158: INTEGER J, LWKOPT
1.1 bertrand 159: INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
160: COMPLEX*16 ALPHA
161: * ..
162: * .. External Functions ..
163: LOGICAL LSAME
164: INTEGER ILAENV
165: EXTERNAL LSAME, ILAENV
166: * ..
167: * .. External Subroutines ..
1.3 bertrand 168: EXTERNAL ZLAHEF_AA, ZGEMM, ZGEMV, ZCOPY, ZSCAL, ZSWAP, XERBLA
1.1 bertrand 169: * ..
170: * .. Intrinsic Functions ..
171: INTRINSIC DBLE, DCONJG, MAX
172: * ..
173: * .. Executable Statements ..
174: *
175: * Determine the block size
176: *
1.3 bertrand 177: NB = ILAENV( 1, 'ZHETRF_AA', UPLO, N, -1, -1, -1 )
1.1 bertrand 178: *
179: * Test the input parameters.
180: *
181: INFO = 0
182: UPPER = LSAME( UPLO, 'U' )
183: LQUERY = ( LWORK.EQ.-1 )
184: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
185: INFO = -1
186: ELSE IF( N.LT.0 ) THEN
187: INFO = -2
188: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
189: INFO = -4
190: ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
191: INFO = -7
192: END IF
193: *
194: IF( INFO.EQ.0 ) THEN
195: LWKOPT = (NB+1)*N
196: WORK( 1 ) = LWKOPT
197: END IF
198: *
199: IF( INFO.NE.0 ) THEN
200: CALL XERBLA( 'ZHETRF_AA', -INFO )
201: RETURN
202: ELSE IF( LQUERY ) THEN
203: RETURN
204: END IF
205: *
206: * Quick return
207: *
208: IF ( N.EQ.0 ) THEN
209: RETURN
210: ENDIF
211: IPIV( 1 ) = 1
212: IF ( N.EQ.1 ) THEN
213: A( 1, 1 ) = DBLE( A( 1, 1 ) )
214: RETURN
215: END IF
216: *
1.3 bertrand 217: * Adjust block size based on the workspace size
1.1 bertrand 218: *
219: IF( LWORK.LT.((1+NB)*N) ) THEN
220: NB = ( LWORK-N ) / N
221: END IF
222: *
223: IF( UPPER ) THEN
224: *
225: * .....................................................
1.5 ! bertrand 226: * Factorize A as U**H*D*U using the upper triangle of A
1.1 bertrand 227: * .....................................................
228: *
229: * copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
230: *
231: CALL ZCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
232: *
233: * J is the main loop index, increasing from 1 to N in steps of
234: * JB, where JB is the number of columns factorized by ZLAHEF;
235: * JB is either NB, or N-J+1 for the last block
236: *
237: J = 0
238: 10 CONTINUE
239: IF( J.GE.N )
240: $ GO TO 20
241: *
242: * each step of the main loop
243: * J is the last column of the previous panel
244: * J1 is the first column of the current panel
245: * K1 identifies if the previous column of the panel has been
246: * explicitly stored, e.g., K1=1 for the first panel, and
247: * K1=0 for the rest
248: *
249: J1 = J + 1
250: JB = MIN( N-J1+1, NB )
251: K1 = MAX(1, J)-J
252: *
253: * Panel factorization
254: *
255: CALL ZLAHEF_AA( UPLO, 2-K1, N-J, JB,
256: $ A( MAX(1, J), J+1 ), LDA,
1.3 bertrand 257: $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) )
1.1 bertrand 258: *
1.5 ! bertrand 259: * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
1.1 bertrand 260: *
261: DO J2 = J+2, MIN(N, J+JB+1)
262: IPIV( J2 ) = IPIV( J2 ) + J
263: IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
264: CALL ZSWAP( J1-K1-2, A( 1, J2 ), 1,
265: $ A( 1, IPIV(J2) ), 1 )
266: END IF
267: END DO
268: J = J + JB
269: *
270: * Trailing submatrix update, where
271: * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
272: * WORK stores the current block of the auxiriarly matrix H
273: *
274: IF( J.LT.N ) THEN
275: *
276: * if the first panel and JB=1 (NB=1), then nothing to do
277: *
278: IF( J1.GT.1 .OR. JB.GT.1 ) THEN
279: *
280: * Merge rank-1 update with BLAS-3 update
281: *
282: ALPHA = DCONJG( A( J, J+1 ) )
283: A( J, J+1 ) = ONE
284: CALL ZCOPY( N-J, A( J-1, J+1 ), LDA,
285: $ WORK( (J+1-J1+1)+JB*N ), 1 )
286: CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
287: *
288: * K1 identifies if the previous column of the panel has been
289: * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
290: * and K1=1 and K2=0 for the rest
291: *
292: IF( J1.GT.1 ) THEN
293: *
294: * Not first panel
295: *
296: K2 = 1
297: ELSE
298: *
299: * First panel
300: *
301: K2 = 0
302: *
303: * First update skips the first column
304: *
305: JB = JB - 1
306: END IF
307: *
308: DO J2 = J+1, N, NB
309: NJ = MIN( NB, N-J2+1 )
310: *
311: * Update (J2, J2) diagonal block with ZGEMV
312: *
313: J3 = J2
314: DO MJ = NJ-1, 1, -1
315: CALL ZGEMM( 'Conjugate transpose', 'Transpose',
316: $ 1, MJ, JB+1,
317: $ -ONE, A( J1-K2, J3 ), LDA,
318: $ WORK( (J3-J1+1)+K1*N ), N,
319: $ ONE, A( J3, J3 ), LDA )
320: J3 = J3 + 1
321: END DO
322: *
323: * Update off-diagonal block of J2-th block row with ZGEMM
324: *
325: CALL ZGEMM( 'Conjugate transpose', 'Transpose',
326: $ NJ, N-J3+1, JB+1,
327: $ -ONE, A( J1-K2, J2 ), LDA,
328: $ WORK( (J3-J1+1)+K1*N ), N,
329: $ ONE, A( J2, J3 ), LDA )
330: END DO
331: *
332: * Recover T( J, J+1 )
333: *
334: A( J, J+1 ) = DCONJG( ALPHA )
335: END IF
336: *
337: * WORK(J+1, 1) stores H(J+1, 1)
338: *
339: CALL ZCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
340: END IF
341: GO TO 10
342: ELSE
343: *
344: * .....................................................
345: * Factorize A as L*D*L**H using the lower triangle of A
346: * .....................................................
347: *
348: * copy first column A(1:N, 1) into H(1:N, 1)
349: * (stored in WORK(1:N))
350: *
351: CALL ZCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
352: *
353: * J is the main loop index, increasing from 1 to N in steps of
354: * JB, where JB is the number of columns factorized by ZLAHEF;
355: * JB is either NB, or N-J+1 for the last block
356: *
357: J = 0
358: 11 CONTINUE
359: IF( J.GE.N )
360: $ GO TO 20
361: *
362: * each step of the main loop
363: * J is the last column of the previous panel
364: * J1 is the first column of the current panel
365: * K1 identifies if the previous column of the panel has been
366: * explicitly stored, e.g., K1=1 for the first panel, and
367: * K1=0 for the rest
368: *
369: J1 = J+1
370: JB = MIN( N-J1+1, NB )
371: K1 = MAX(1, J)-J
372: *
373: * Panel factorization
374: *
375: CALL ZLAHEF_AA( UPLO, 2-K1, N-J, JB,
376: $ A( J+1, MAX(1, J) ), LDA,
1.3 bertrand 377: $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ) )
1.1 bertrand 378: *
1.5 ! bertrand 379: * Adjust IPIV and apply it back (J-th step picks (J+1)-th pivot)
1.1 bertrand 380: *
381: DO J2 = J+2, MIN(N, J+JB+1)
382: IPIV( J2 ) = IPIV( J2 ) + J
383: IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
384: CALL ZSWAP( J1-K1-2, A( J2, 1 ), LDA,
385: $ A( IPIV(J2), 1 ), LDA )
386: END IF
387: END DO
388: J = J + JB
389: *
390: * Trailing submatrix update, where
391: * A(J2+1, J1-1) stores L(J2+1, J1) and
392: * WORK(J2+1, 1) stores H(J2+1, 1)
393: *
394: IF( J.LT.N ) THEN
395: *
396: * if the first panel and JB=1 (NB=1), then nothing to do
397: *
398: IF( J1.GT.1 .OR. JB.GT.1 ) THEN
399: *
400: * Merge rank-1 update with BLAS-3 update
401: *
402: ALPHA = DCONJG( A( J+1, J ) )
403: A( J+1, J ) = ONE
404: CALL ZCOPY( N-J, A( J+1, J-1 ), 1,
405: $ WORK( (J+1-J1+1)+JB*N ), 1 )
406: CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
407: *
408: * K1 identifies if the previous column of the panel has been
409: * explicitly stored, e.g., K1=0 and K2=1 for the first panel,
410: * and K1=1 and K2=0 for the rest
411: *
412: IF( J1.GT.1 ) THEN
413: *
414: * Not first panel
415: *
416: K2 = 1
417: ELSE
418: *
419: * First panel
420: *
421: K2 = 0
422: *
423: * First update skips the first column
424: *
425: JB = JB - 1
426: END IF
427: *
428: DO J2 = J+1, N, NB
429: NJ = MIN( NB, N-J2+1 )
430: *
431: * Update (J2, J2) diagonal block with ZGEMV
432: *
433: J3 = J2
434: DO MJ = NJ-1, 1, -1
435: CALL ZGEMM( 'No transpose', 'Conjugate transpose',
436: $ MJ, 1, JB+1,
437: $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
438: $ A( J3, J1-K2 ), LDA,
439: $ ONE, A( J3, J3 ), LDA )
440: J3 = J3 + 1
441: END DO
442: *
443: * Update off-diagonal block of J2-th block column with ZGEMM
444: *
445: CALL ZGEMM( 'No transpose', 'Conjugate transpose',
446: $ N-J3+1, NJ, JB+1,
447: $ -ONE, WORK( (J3-J1+1)+K1*N ), N,
448: $ A( J2, J1-K2 ), LDA,
449: $ ONE, A( J3, J2 ), LDA )
450: END DO
451: *
452: * Recover T( J+1, J )
453: *
454: A( J+1, J ) = DCONJG( ALPHA )
455: END IF
456: *
457: * WORK(J+1, 1) stores H(J+1, 1)
458: *
459: CALL ZCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
460: END IF
461: GO TO 11
462: END IF
463: *
464: 20 CONTINUE
465: RETURN
466: *
467: * End of ZHETRF_AA
468: *
469: END
CVSweb interface <joel.bertrand@systella.fr>