Annotation of rpl/lapack/lapack/zsytrf_aa.f, revision 1.2
1.1 bertrand 1: *> \brief \b ZSYTRF_AA
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZSYTRF_AA + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsytrf_aa.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsytrf_aa.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsytrf_aa.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO )
22: *
23: * .. Scalar Arguments ..
24: * CHARACTER UPLO
25: * INTEGER N, LDA, LWORK, INFO
26: * ..
27: * .. Array Arguments ..
28: * INTEGER IPIV( * )
29: * COMPLEX*16 A( LDA, * ), WORK( * )
30: * ..
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> ZSYTRF_AA computes the factorization of a complex symmetric matrix A
38: *> using the Aasen's algorithm. The form of the factorization is
39: *>
40: *> A = U*T*U**T or A = L*T*L**T
41: *>
42: *> where U (or L) is a product of permutation and unit upper (lower)
43: *> triangular matrices, and T is a complex symmetric tridiagonal matrix.
44: *>
45: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
46: *> \endverbatim
47: *
48: * Arguments:
49: * ==========
50: *
51: *> \param[in] UPLO
52: *> \verbatim
53: *> UPLO is CHARACTER*1
54: *> = 'U': Upper triangle of A is stored;
55: *> = 'L': Lower triangle of A is stored.
56: *> \endverbatim
57: *>
58: *> \param[in] N
59: *> \verbatim
60: *> N is INTEGER
61: *> The order of the matrix A. N >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in,out] A
65: *> \verbatim
66: *> A is COMPLEX*16 array, dimension (LDA,N)
67: *> On entry, the symmetric matrix A. If UPLO = 'U', the leading
68: *> N-by-N upper triangular part of A contains the upper
69: *> triangular part of the matrix A, and the strictly lower
70: *> triangular part of A is not referenced. If UPLO = 'L', the
71: *> leading N-by-N lower triangular part of A contains the lower
72: *> triangular part of the matrix A, and the strictly upper
73: *> triangular part of A is not referenced.
74: *>
75: *> On exit, the tridiagonal matrix is stored in the diagonals
76: *> and the subdiagonals of A just below (or above) the diagonals,
77: *> and L is stored below (or above) the subdiaonals, when UPLO
78: *> is 'L' (or 'U').
79: *> \endverbatim
80: *>
81: *> \param[in] LDA
82: *> \verbatim
83: *> LDA is INTEGER
84: *> The leading dimension of the array A. LDA >= max(1,N).
85: *> \endverbatim
86: *>
87: *> \param[out] IPIV
88: *> \verbatim
89: *> IPIV is INTEGER array, dimension (N)
90: *> On exit, it contains the details of the interchanges, i.e.,
91: *> the row and column k of A were interchanged with the
92: *> row and column IPIV(k).
93: *> \endverbatim
94: *>
95: *> \param[out] WORK
96: *> \verbatim
97: *> WORK is COMPLEX*16 array, dimension (MAX(1,LWORK))
98: *> On exit, if INFO = 0, WORK(1) returns the optimal LWORK.
99: *> \endverbatim
100: *>
101: *> \param[in] LWORK
102: *> \verbatim
103: *> LWORK is INTEGER
104: *> The length of WORK. LWORK >=MAX(1,2*N). For optimum performance
105: *> LWORK >= N*(1+NB), where NB is the optimal blocksize.
106: *>
107: *> If LWORK = -1, then a workspace query is assumed; the routine
108: *> only calculates the optimal size of the WORK array, returns
109: *> this value as the first entry of the WORK array, and no error
110: *> message related to LWORK is issued by XERBLA.
111: *> \endverbatim
112: *>
113: *> \param[out] INFO
114: *> \verbatim
115: *> INFO is INTEGER
116: *> = 0: successful exit
117: *> < 0: if INFO = -i, the i-th argument had an illegal value
118: *> > 0: if INFO = i, D(i,i) is exactly zero. The factorization
119: *> has been completed, but the block diagonal matrix D is
120: *> exactly singular, and division by zero will occur if it
121: *> is used to solve a system of equations.
122: *> \endverbatim
123: *
124: * Authors:
125: * ========
126: *
127: *> \author Univ. of Tennessee
128: *> \author Univ. of California Berkeley
129: *> \author Univ. of Colorado Denver
130: *> \author NAG Ltd.
131: *
132: *> \date December 2016
133: *
134: *> \ingroup complex16SYcomputational
135: *
136: * =====================================================================
137: SUBROUTINE ZSYTRF_AA( UPLO, N, A, LDA, IPIV, WORK, LWORK, INFO)
138: *
139: * -- LAPACK computational routine (version 3.7.0) --
140: * -- LAPACK is a software package provided by Univ. of Tennessee, --
141: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
142: * December 2016
143: *
144: IMPLICIT NONE
145: *
146: * .. Scalar Arguments ..
147: CHARACTER UPLO
148: INTEGER N, LDA, LWORK, INFO
149: * ..
150: * .. Array Arguments ..
151: INTEGER IPIV( * )
152: COMPLEX*16 A( LDA, * ), WORK( * )
153: * ..
154: *
155: * =====================================================================
156: * .. Parameters ..
157: COMPLEX*16 ZERO, ONE
158: PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
159: *
160: * .. Local Scalars ..
161: LOGICAL LQUERY, UPPER
162: INTEGER J, LWKOPT, IINFO
163: INTEGER NB, MJ, NJ, K1, K2, J1, J2, J3, JB
164: COMPLEX*16 ALPHA
165: * ..
166: * .. External Functions ..
167: LOGICAL LSAME
168: INTEGER ILAENV
169: EXTERNAL LSAME, ILAENV
170: * ..
171: * .. External Subroutines ..
172: EXTERNAL XERBLA
173: * ..
174: * .. Intrinsic Functions ..
175: INTRINSIC MAX
176: * ..
177: * .. Executable Statements ..
178: *
179: * Determine the block size
180: *
181: NB = ILAENV( 1, 'ZSYTRF', UPLO, N, -1, -1, -1 )
182: *
183: * Test the input parameters.
184: *
185: INFO = 0
186: UPPER = LSAME( UPLO, 'U' )
187: LQUERY = ( LWORK.EQ.-1 )
188: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
189: INFO = -1
190: ELSE IF( N.LT.0 ) THEN
191: INFO = -2
192: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
193: INFO = -4
194: ELSE IF( LWORK.LT.MAX( 1, 2*N ) .AND. .NOT.LQUERY ) THEN
195: INFO = -7
196: END IF
197: *
198: IF( INFO.EQ.0 ) THEN
199: LWKOPT = (NB+1)*N
200: WORK( 1 ) = LWKOPT
201: END IF
202: *
203: IF( INFO.NE.0 ) THEN
204: CALL XERBLA( 'ZSYTRF_AA', -INFO )
205: RETURN
206: ELSE IF( LQUERY ) THEN
207: RETURN
208: END IF
209: *
210: * Quick return
211: *
212: IF ( N.EQ.0 ) THEN
213: RETURN
214: ENDIF
215: IPIV( 1 ) = 1
216: IF ( N.EQ.1 ) THEN
217: IF ( A( 1, 1 ).EQ.ZERO ) THEN
218: INFO = 1
219: END IF
220: RETURN
221: END IF
222: *
223: * Adjubst block size based on the workspace size
224: *
225: IF( LWORK.LT.((1+NB)*N) ) THEN
226: NB = ( LWORK-N ) / N
227: END IF
228: *
229: IF( UPPER ) THEN
230: *
231: * .....................................................
232: * Factorize A as L*D*L**T using the upper triangle of A
233: * .....................................................
234: *
235: * Copy first row A(1, 1:N) into H(1:n) (stored in WORK(1:N))
236: *
237: CALL ZCOPY( N, A( 1, 1 ), LDA, WORK( 1 ), 1 )
238: *
239: * J is the main loop index, increasing from 1 to N in steps of
240: * JB, where JB is the number of columns factorized by ZLASYF;
241: * JB is either NB, or N-J+1 for the last block
242: *
243: J = 0
244: 10 CONTINUE
245: IF( J.GE.N )
246: $ GO TO 20
247: *
248: * each step of the main loop
249: * J is the last column of the previous panel
250: * J1 is the first column of the current panel
251: * K1 identifies if the previous column of the panel has been
252: * explicitly stored, e.g., K1=1 for the first panel, and
253: * K1=0 for the rest
254: *
255: J1 = J + 1
256: JB = MIN( N-J1+1, NB )
257: K1 = MAX(1, J)-J
258: *
259: * Panel factorization
260: *
261: CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB,
262: $ A( MAX(1, J), J+1 ), LDA,
263: $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ),
264: $ IINFO )
265: IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
266: INFO = IINFO+J
267: ENDIF
268: *
269: * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
270: *
271: DO J2 = J+2, MIN(N, J+JB+1)
272: IPIV( J2 ) = IPIV( J2 ) + J
273: IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
274: CALL ZSWAP( J1-K1-2, A( 1, J2 ), 1,
275: $ A( 1, IPIV(J2) ), 1 )
276: END IF
277: END DO
278: J = J + JB
279: *
280: * Trailing submatrix update, where
281: * the row A(J1-1, J2-1:N) stores U(J1, J2+1:N) and
282: * WORK stores the current block of the auxiriarly matrix H
283: *
284: IF( J.LT.N ) THEN
285: *
286: * If first panel and JB=1 (NB=1), then nothing to do
287: *
288: IF( J1.GT.1 .OR. JB.GT.1 ) THEN
289: *
290: * Merge rank-1 update with BLAS-3 update
291: *
292: ALPHA = A( J, J+1 )
293: A( J, J+1 ) = ONE
294: CALL ZCOPY( N-J, A( J-1, J+1 ), LDA,
295: $ WORK( (J+1-J1+1)+JB*N ), 1 )
296: CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
297: *
298: * K1 identifies if the previous column of the panel has been
299: * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
300: * while K1=0 and K2=1 for the rest
301: *
302: IF( J1.GT.1 ) THEN
303: *
304: * Not first panel
305: *
306: K2 = 1
307: ELSE
308: *
309: * First panel
310: *
311: K2 = 0
312: *
313: * First update skips the first column
314: *
315: JB = JB - 1
316: END IF
317: *
318: DO J2 = J+1, N, NB
319: NJ = MIN( NB, N-J2+1 )
320: *
321: * Update (J2, J2) diagonal block with ZGEMV
322: *
323: J3 = J2
324: DO MJ = NJ-1, 1, -1
325: CALL ZGEMV( 'No transpose', MJ, JB+1,
326: $ -ONE, WORK( J3-J1+1+K1*N ), N,
327: $ A( J1-K2, J3 ), 1,
328: $ ONE, A( J3, J3 ), LDA )
329: J3 = J3 + 1
330: END DO
331: *
332: * Update off-diagonal block of J2-th block row with ZGEMM
333: *
334: CALL ZGEMM( 'Transpose', 'Transpose',
335: $ NJ, N-J3+1, JB+1,
336: $ -ONE, A( J1-K2, J2 ), LDA,
337: $ WORK( J3-J1+1+K1*N ), N,
338: $ ONE, A( J2, J3 ), LDA )
339: END DO
340: *
341: * Recover T( J, J+1 )
342: *
343: A( J, J+1 ) = ALPHA
344: END IF
345: *
346: * WORK(J+1, 1) stores H(J+1, 1)
347: *
348: CALL ZCOPY( N-J, A( J+1, J+1 ), LDA, WORK( 1 ), 1 )
349: END IF
350: GO TO 10
351: ELSE
352: *
353: * .....................................................
354: * Factorize A as L*D*L**T using the lower triangle of A
355: * .....................................................
356: *
357: * copy first column A(1:N, 1) into H(1:N, 1)
358: * (stored in WORK(1:N))
359: *
360: CALL ZCOPY( N, A( 1, 1 ), 1, WORK( 1 ), 1 )
361: *
362: * J is the main loop index, increasing from 1 to N in steps of
363: * JB, where JB is the number of columns factorized by ZLASYF;
364: * JB is either NB, or N-J+1 for the last block
365: *
366: J = 0
367: 11 CONTINUE
368: IF( J.GE.N )
369: $ GO TO 20
370: *
371: * each step of the main loop
372: * J is the last column of the previous panel
373: * J1 is the first column of the current panel
374: * K1 identifies if the previous column of the panel has been
375: * explicitly stored, e.g., K1=1 for the first panel, and
376: * K1=0 for the rest
377: *
378: J1 = J+1
379: JB = MIN( N-J1+1, NB )
380: K1 = MAX(1, J)-J
381: *
382: * Panel factorization
383: *
384: CALL ZLASYF_AA( UPLO, 2-K1, N-J, JB,
385: $ A( J+1, MAX(1, J) ), LDA,
386: $ IPIV( J+1 ), WORK, N, WORK( N*NB+1 ), IINFO)
387: IF( (IINFO.GT.0) .AND. (INFO.EQ.0) ) THEN
388: INFO = IINFO+J
389: ENDIF
390: *
391: * Ajust IPIV and apply it back (J-th step picks (J+1)-th pivot)
392: *
393: DO J2 = J+2, MIN(N, J+JB+1)
394: IPIV( J2 ) = IPIV( J2 ) + J
395: IF( (J2.NE.IPIV(J2)) .AND. ((J1-K1).GT.2) ) THEN
396: CALL ZSWAP( J1-K1-2, A( J2, 1 ), LDA,
397: $ A( IPIV(J2), 1 ), LDA )
398: END IF
399: END DO
400: J = J + JB
401: *
402: * Trailing submatrix update, where
403: * A(J2+1, J1-1) stores L(J2+1, J1) and
404: * WORK(J2+1, 1) stores H(J2+1, 1)
405: *
406: IF( J.LT.N ) THEN
407: *
408: * if first panel and JB=1 (NB=1), then nothing to do
409: *
410: IF( J1.GT.1 .OR. JB.GT.1 ) THEN
411: *
412: * Merge rank-1 update with BLAS-3 update
413: *
414: ALPHA = A( J+1, J )
415: A( J+1, J ) = ONE
416: CALL ZCOPY( N-J, A( J+1, J-1 ), 1,
417: $ WORK( (J+1-J1+1)+JB*N ), 1 )
418: CALL ZSCAL( N-J, ALPHA, WORK( (J+1-J1+1)+JB*N ), 1 )
419: *
420: * K1 identifies if the previous column of the panel has been
421: * explicitly stored, e.g., K1=1 and K2= 0 for the first panel,
422: * while K1=0 and K2=1 for the rest
423: *
424: IF( J1.GT.1 ) THEN
425: *
426: * Not first panel
427: *
428: K2 = 1
429: ELSE
430: *
431: * First panel
432: *
433: K2 = 0
434: *
435: * First update skips the first column
436: *
437: JB = JB - 1
438: END IF
439: *
440: DO J2 = J+1, N, NB
441: NJ = MIN( NB, N-J2+1 )
442: *
443: * Update (J2, J2) diagonal block with ZGEMV
444: *
445: J3 = J2
446: DO MJ = NJ-1, 1, -1
447: CALL ZGEMV( 'No transpose', MJ, JB+1,
448: $ -ONE, WORK( J3-J1+1+K1*N ), N,
449: $ A( J3, J1-K2 ), LDA,
450: $ ONE, A( J3, J3 ), 1 )
451: J3 = J3 + 1
452: END DO
453: *
454: * Update off-diagonal block in J2-th block column with ZGEMM
455: *
456: CALL ZGEMM( 'No transpose', 'Transpose',
457: $ N-J3+1, NJ, JB+1,
458: $ -ONE, WORK( J3-J1+1+K1*N ), N,
459: $ A( J2, J1-K2 ), LDA,
460: $ ONE, A( J3, J2 ), LDA )
461: END DO
462: *
463: * Recover T( J+1, J )
464: *
465: A( J+1, J ) = ALPHA
466: END IF
467: *
468: * WORK(J+1, 1) stores H(J+1, 1)
469: *
470: CALL ZCOPY( N-J, A( J+1, J+1 ), 1, WORK( 1 ), 1 )
471: END IF
472: GO TO 11
473: END IF
474: *
475: 20 CONTINUE
476: RETURN
477: *
478: * End of ZSYTRF_AA
479: *
480: END
CVSweb interface <joel.bertrand@systella.fr>