Annotation of rpl/lapack/lapack/zsytrf_aa_2stage.f, revision 1.2
1.1 bertrand 1: *> \brief \b ZSYTRF_AA_2STAGE
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZSYTRF_AA_2STAGE + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/csytrf_aa_2stage.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/csytrf_aa_2stage.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/csytrf_aa_2stage.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
22: * IPIV2, WORK, LWORK, INFO )
23: *
24: * .. Scalar Arguments ..
25: * CHARACTER UPLO
26: * INTEGER N, LDA, LTB, LWORK, INFO
27: * ..
28: * .. Array Arguments ..
29: * INTEGER IPIV( * ), IPIV2( * )
30: * COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
31: * ..
32: *
33: *> \par Purpose:
34: * =============
35: *>
36: *> \verbatim
37: *>
38: *> ZSYTRF_AA_2STAGE computes the factorization of a complex symmetric matrix A
39: *> using the Aasen's algorithm. The form of the factorization is
40: *>
1.2 ! bertrand 41: *> A = U**T*T*U or A = L*T*L**T
1.1 bertrand 42: *>
43: *> where U (or L) is a product of permutation and unit upper (lower)
44: *> triangular matrices, and T is a complex symmetric band matrix with the
45: *> bandwidth of NB (NB is internally selected and stored in TB( 1 ), and T is
46: *> LU factorized with partial pivoting).
47: *>
48: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
49: *> \endverbatim
50: *
51: * Arguments:
52: * ==========
53: *
54: *> \param[in] UPLO
55: *> \verbatim
56: *> UPLO is CHARACTER*1
57: *> = 'U': Upper triangle of A is stored;
58: *> = 'L': Lower triangle of A is stored.
59: *> \endverbatim
60: *>
61: *> \param[in] N
62: *> \verbatim
63: *> N is INTEGER
64: *> The order of the matrix A. N >= 0.
65: *> \endverbatim
66: *>
67: *> \param[in,out] A
68: *> \verbatim
69: *> A is COMPLEX*16 array, dimension (LDA,N)
70: *> On entry, the hermitian matrix A. If UPLO = 'U', the leading
71: *> N-by-N upper triangular part of A contains the upper
72: *> triangular part of the matrix A, and the strictly lower
73: *> triangular part of A is not referenced. If UPLO = 'L', the
74: *> leading N-by-N lower triangular part of A contains the lower
75: *> triangular part of the matrix A, and the strictly upper
76: *> triangular part of A is not referenced.
77: *>
78: *> On exit, L is stored below (or above) the subdiaonal blocks,
79: *> when UPLO is 'L' (or 'U').
80: *> \endverbatim
81: *>
82: *> \param[in] LDA
83: *> \verbatim
84: *> LDA is INTEGER
85: *> The leading dimension of the array A. LDA >= max(1,N).
86: *> \endverbatim
87: *>
88: *> \param[out] TB
89: *> \verbatim
90: *> TB is COMPLEX*16 array, dimension (LTB)
91: *> On exit, details of the LU factorization of the band matrix.
92: *> \endverbatim
93: *>
94: *> \param[in] LTB
95: *> \verbatim
1.2 ! bertrand 96: *> LTB is INTEGER
1.1 bertrand 97: *> The size of the array TB. LTB >= 4*N, internally
98: *> used to select NB such that LTB >= (3*NB+1)*N.
99: *>
100: *> If LTB = -1, then a workspace query is assumed; the
101: *> routine only calculates the optimal size of LTB,
102: *> returns this value as the first entry of TB, and
103: *> no error message related to LTB is issued by XERBLA.
104: *> \endverbatim
105: *>
106: *> \param[out] IPIV
107: *> \verbatim
108: *> IPIV is INTEGER array, dimension (N)
109: *> On exit, it contains the details of the interchanges, i.e.,
110: *> the row and column k of A were interchanged with the
111: *> row and column IPIV(k).
112: *> \endverbatim
113: *>
114: *> \param[out] IPIV2
115: *> \verbatim
1.2 ! bertrand 116: *> IPIV2 is INTEGER array, dimension (N)
1.1 bertrand 117: *> On exit, it contains the details of the interchanges, i.e.,
118: *> the row and column k of T were interchanged with the
119: *> row and column IPIV(k).
120: *> \endverbatim
121: *>
122: *> \param[out] WORK
123: *> \verbatim
124: *> WORK is COMPLEX*16 workspace of size LWORK
125: *> \endverbatim
126: *>
127: *> \param[in] LWORK
128: *> \verbatim
1.2 ! bertrand 129: *> LWORK is INTEGER
1.1 bertrand 130: *> The size of WORK. LWORK >= N, internally used to select NB
131: *> such that LWORK >= N*NB.
132: *>
133: *> If LWORK = -1, then a workspace query is assumed; the
134: *> routine only calculates the optimal size of the WORK array,
135: *> returns this value as the first entry of the WORK array, and
136: *> no error message related to LWORK is issued by XERBLA.
137: *> \endverbatim
138: *>
139: *> \param[out] INFO
140: *> \verbatim
141: *> INFO is INTEGER
142: *> = 0: successful exit
143: *> < 0: if INFO = -i, the i-th argument had an illegal value.
144: *> > 0: if INFO = i, band LU factorization failed on i-th column
145: *> \endverbatim
146: *
147: * Authors:
148: * ========
149: *
150: *> \author Univ. of Tennessee
151: *> \author Univ. of California Berkeley
152: *> \author Univ. of Colorado Denver
153: *> \author NAG Ltd.
154: *
155: *> \date November 2017
156: *
157: *> \ingroup complex16SYcomputational
158: *
159: * =====================================================================
160: SUBROUTINE ZSYTRF_AA_2STAGE( UPLO, N, A, LDA, TB, LTB, IPIV,
161: $ IPIV2, WORK, LWORK, INFO )
162: *
163: * -- LAPACK computational routine (version 3.8.0) --
164: * -- LAPACK is a software package provided by Univ. of Tennessee, --
165: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
166: * November 2017
167: *
168: IMPLICIT NONE
169: *
170: * .. Scalar Arguments ..
171: CHARACTER UPLO
172: INTEGER N, LDA, LTB, LWORK, INFO
173: * ..
174: * .. Array Arguments ..
175: INTEGER IPIV( * ), IPIV2( * )
176: COMPLEX*16 A( LDA, * ), TB( * ), WORK( * )
177: * ..
178: *
179: * =====================================================================
180: * .. Parameters ..
181: COMPLEX*16 CZERO, CONE
182: PARAMETER ( CZERO = ( 0.0D+0, 0.0D+0 ),
183: $ CONE = ( 1.0D+0, 0.0D+0 ) )
184: *
185: * .. Local Scalars ..
186: LOGICAL UPPER, TQUERY, WQUERY
187: INTEGER I, J, K, I1, I2, TD
188: INTEGER LDTB, NB, KB, JB, NT, IINFO
189: COMPLEX*16 PIV
190: * ..
191: * .. External Functions ..
192: LOGICAL LSAME
193: INTEGER ILAENV
194: EXTERNAL LSAME, ILAENV
195: * ..
196: * .. External Subroutines ..
197: EXTERNAL XERBLA, ZCOPY, ZGBTRF, ZGEMM, ZGETRF,
198: $ ZLACPY, ZLASET, ZLASWP, ZTRSM, ZSWAP
199: * ..
200: * .. Intrinsic Functions ..
201: INTRINSIC MIN, MAX
202: * ..
203: * .. Executable Statements ..
204: *
205: * Test the input parameters.
206: *
207: INFO = 0
208: UPPER = LSAME( UPLO, 'U' )
209: WQUERY = ( LWORK.EQ.-1 )
210: TQUERY = ( LTB.EQ.-1 )
211: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
212: INFO = -1
213: ELSE IF( N.LT.0 ) THEN
214: INFO = -2
215: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
216: INFO = -4
217: ELSE IF ( LTB .LT. 4*N .AND. .NOT.TQUERY ) THEN
218: INFO = -6
219: ELSE IF ( LWORK .LT. N .AND. .NOT.WQUERY ) THEN
220: INFO = -10
221: END IF
222: *
223: IF( INFO.NE.0 ) THEN
224: CALL XERBLA( 'ZSYTRF_AA_2STAGE', -INFO )
225: RETURN
226: END IF
227: *
228: * Answer the query
229: *
230: NB = ILAENV( 1, 'ZSYTRF_AA_2STAGE', UPLO, N, -1, -1, -1 )
231: IF( INFO.EQ.0 ) THEN
232: IF( TQUERY ) THEN
233: TB( 1 ) = (3*NB+1)*N
234: END IF
235: IF( WQUERY ) THEN
236: WORK( 1 ) = N*NB
237: END IF
238: END IF
239: IF( TQUERY .OR. WQUERY ) THEN
240: RETURN
241: END IF
242: *
243: * Quick return
244: *
245: IF ( N.EQ.0 ) THEN
246: RETURN
247: ENDIF
248: *
249: * Determine the number of the block size
250: *
251: LDTB = LTB/N
252: IF( LDTB .LT. 3*NB+1 ) THEN
253: NB = (LDTB-1)/3
254: END IF
255: IF( LWORK .LT. NB*N ) THEN
256: NB = LWORK/N
257: END IF
258: *
259: * Determine the number of the block columns
260: *
261: NT = (N+NB-1)/NB
262: TD = 2*NB
263: KB = MIN(NB, N)
264: *
265: * Initialize vectors/matrices
266: *
267: DO J = 1, KB
268: IPIV( J ) = J
269: END DO
270: *
271: * Save NB
272: *
273: TB( 1 ) = NB
274: *
275: IF( UPPER ) THEN
276: *
277: * .....................................................
1.2 ! bertrand 278: * Factorize A as U**T*D*U using the upper triangle of A
1.1 bertrand 279: * .....................................................
280: *
281: DO J = 0, NT-1
282: *
283: * Generate Jth column of W and H
284: *
285: KB = MIN(NB, N-J*NB)
286: DO I = 1, J-1
287: IF( I.EQ.1 ) THEN
288: * H(I,J) = T(I,I)*U(I,J) + T(I+1,I)*U(I+1,J)
289: IF( I .EQ. (J-1) ) THEN
290: JB = NB+KB
291: ELSE
292: JB = 2*NB
293: END IF
294: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
295: $ NB, KB, JB,
296: $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
297: $ A( (I-1)*NB+1, J*NB+1 ), LDA,
298: $ CZERO, WORK( I*NB+1 ), N )
299: ELSE
300: * H(I,J) = T(I,I-1)*U(I-1,J) + T(I,I)*U(I,J) + T(I,I+1)*U(I+1,J)
301: IF( I .EQ. (J-1) ) THEN
302: JB = 2*NB+KB
303: ELSE
304: JB = 3*NB
305: END IF
306: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
307: $ NB, KB, JB,
308: $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
309: $ LDTB-1,
310: $ A( (I-2)*NB+1, J*NB+1 ), LDA,
311: $ CZERO, WORK( I*NB+1 ), N )
312: END IF
313: END DO
314: *
315: * Compute T(J,J)
316: *
317: CALL ZLACPY( 'Upper', KB, KB, A( J*NB+1, J*NB+1 ), LDA,
318: $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
319: IF( J.GT.1 ) THEN
320: * T(J,J) = U(1:J,J)'*H(1:J)
321: CALL ZGEMM( 'Transpose', 'NoTranspose',
322: $ KB, KB, (J-1)*NB,
323: $ -CONE, A( 1, J*NB+1 ), LDA,
324: $ WORK( NB+1 ), N,
325: $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
326: * T(J,J) += U(J,J)'*T(J,J-1)*U(J-1,J)
327: CALL ZGEMM( 'Transpose', 'NoTranspose',
328: $ KB, NB, KB,
329: $ CONE, A( (J-1)*NB+1, J*NB+1 ), LDA,
330: $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1,
331: $ CZERO, WORK( 1 ), N )
332: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
333: $ KB, KB, NB,
334: $ -CONE, WORK( 1 ), N,
335: $ A( (J-2)*NB+1, J*NB+1 ), LDA,
336: $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
337: END IF
338: *
339: * Expand T(J,J) into full format
340: *
341: DO I = 1, KB
342: DO K = I+1, KB
343: TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB )
344: $ = TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB )
345: END DO
346: END DO
347: IF( J.GT.0 ) THEN
348: c CALL CHEGST( 1, 'Upper', KB,
349: c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1,
350: c $ A( (J-1)*NB+1, J*NB+1 ), LDA, IINFO )
351: CALL ZTRSM( 'L', 'U', 'T', 'N', KB, KB, CONE,
352: $ A( (J-1)*NB+1, J*NB+1 ), LDA,
353: $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
354: CALL ZTRSM( 'R', 'U', 'N', 'N', KB, KB, CONE,
355: $ A( (J-1)*NB+1, J*NB+1 ), LDA,
356: $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
357: END IF
358: *
359: IF( J.LT.NT-1 ) THEN
360: IF( J.GT.0 ) THEN
361: *
362: * Compute H(J,J)
363: *
364: IF( J.EQ.1 ) THEN
365: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
366: $ KB, KB, KB,
367: $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1,
368: $ A( (J-1)*NB+1, J*NB+1 ), LDA,
369: $ CZERO, WORK( J*NB+1 ), N )
370: ELSE
371: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
372: $ KB, KB, NB+KB,
373: $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ),
374: $ LDTB-1,
375: $ A( (J-2)*NB+1, J*NB+1 ), LDA,
376: $ CZERO, WORK( J*NB+1 ), N )
377: END IF
378: *
379: * Update with the previous column
380: *
381: CALL ZGEMM( 'Transpose', 'NoTranspose',
382: $ NB, N-(J+1)*NB, J*NB,
383: $ -CONE, WORK( NB+1 ), N,
384: $ A( 1, (J+1)*NB+1 ), LDA,
385: $ CONE, A( J*NB+1, (J+1)*NB+1 ), LDA )
386: END IF
387: *
388: * Copy panel to workspace to call ZGETRF
389: *
390: DO K = 1, NB
391: CALL ZCOPY( N-(J+1)*NB,
392: $ A( J*NB+K, (J+1)*NB+1 ), LDA,
393: $ WORK( 1+(K-1)*N ), 1 )
394: END DO
395: *
396: * Factorize panel
397: *
398: CALL ZGETRF( N-(J+1)*NB, NB,
399: $ WORK, N,
400: $ IPIV( (J+1)*NB+1 ), IINFO )
401: c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN
402: c INFO = IINFO+(J+1)*NB
403: c END IF
404: *
405: * Copy panel back
406: *
407: DO K = 1, NB
408: CALL ZCOPY( N-(J+1)*NB,
409: $ WORK( 1+(K-1)*N ), 1,
410: $ A( J*NB+K, (J+1)*NB+1 ), LDA )
411: END DO
412: *
413: * Compute T(J+1, J), zero out for GEMM update
414: *
415: KB = MIN(NB, N-(J+1)*NB)
416: CALL ZLASET( 'Full', KB, NB, CZERO, CZERO,
417: $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 )
418: CALL ZLACPY( 'Upper', KB, NB,
419: $ WORK, N,
420: $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 )
421: IF( J.GT.0 ) THEN
422: CALL ZTRSM( 'R', 'U', 'N', 'U', KB, NB, CONE,
423: $ A( (J-1)*NB+1, J*NB+1 ), LDA,
424: $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 )
425: END IF
426: *
427: * Copy T(J,J+1) into T(J+1, J), both upper/lower for GEMM
428: * updates
429: *
430: DO K = 1, NB
431: DO I = 1, KB
432: TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB )
433: $ = TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB )
434: END DO
435: END DO
436: CALL ZLASET( 'Lower', KB, NB, CZERO, CONE,
437: $ A( J*NB+1, (J+1)*NB+1), LDA )
438: *
439: * Apply pivots to trailing submatrix of A
440: *
441: DO K = 1, KB
442: * > Adjust ipiv
443: IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB
444: *
445: I1 = (J+1)*NB+K
446: I2 = IPIV( (J+1)*NB+K )
447: IF( I1.NE.I2 ) THEN
448: * > Apply pivots to previous columns of L
449: CALL ZSWAP( K-1, A( (J+1)*NB+1, I1 ), 1,
450: $ A( (J+1)*NB+1, I2 ), 1 )
1.2 ! bertrand 451: * > Swap A(I1+1:M, I1) with A(I2, I1+1:M)
! 452: IF( I2.GT.(I1+1) )
! 453: $ CALL ZSWAP( I2-I1-1, A( I1, I1+1 ), LDA,
! 454: $ A( I1+1, I2 ), 1 )
1.1 bertrand 455: * > Swap A(I2+1:M, I1) with A(I2+1:M, I2)
1.2 ! bertrand 456: IF( I2.LT.N )
! 457: $ CALL ZSWAP( N-I2, A( I1, I2+1 ), LDA,
! 458: $ A( I2, I2+1 ), LDA )
1.1 bertrand 459: * > Swap A(I1, I1) with A(I2, I2)
460: PIV = A( I1, I1 )
461: A( I1, I1 ) = A( I2, I2 )
462: A( I2, I2 ) = PIV
463: * > Apply pivots to previous columns of L
464: IF( J.GT.0 ) THEN
465: CALL ZSWAP( J*NB, A( 1, I1 ), 1,
466: $ A( 1, I2 ), 1 )
467: END IF
468: ENDIF
469: END DO
470: END IF
471: END DO
472: ELSE
473: *
474: * .....................................................
475: * Factorize A as L*D*L**T using the lower triangle of A
476: * .....................................................
477: *
478: DO J = 0, NT-1
479: *
480: * Generate Jth column of W and H
481: *
482: KB = MIN(NB, N-J*NB)
483: DO I = 1, J-1
484: IF( I.EQ.1 ) THEN
485: * H(I,J) = T(I,I)*L(J,I)' + T(I+1,I)'*L(J,I+1)'
486: IF( I .EQ. (J-1) ) THEN
487: JB = NB+KB
488: ELSE
489: JB = 2*NB
490: END IF
491: CALL ZGEMM( 'NoTranspose', 'Transpose',
492: $ NB, KB, JB,
493: $ CONE, TB( TD+1 + (I*NB)*LDTB ), LDTB-1,
494: $ A( J*NB+1, (I-1)*NB+1 ), LDA,
495: $ CZERO, WORK( I*NB+1 ), N )
496: ELSE
497: * H(I,J) = T(I,I-1)*L(J,I-1)' + T(I,I)*L(J,I)' + T(I,I+1)*L(J,I+1)'
498: IF( I .EQ. (J-1) ) THEN
499: JB = 2*NB+KB
500: ELSE
501: JB = 3*NB
502: END IF
503: CALL ZGEMM( 'NoTranspose', 'Transpose',
504: $ NB, KB, JB,
505: $ CONE, TB( TD+NB+1 + ((I-1)*NB)*LDTB ),
506: $ LDTB-1,
507: $ A( J*NB+1, (I-2)*NB+1 ), LDA,
508: $ CZERO, WORK( I*NB+1 ), N )
509: END IF
510: END DO
511: *
512: * Compute T(J,J)
513: *
514: CALL ZLACPY( 'Lower', KB, KB, A( J*NB+1, J*NB+1 ), LDA,
515: $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
516: IF( J.GT.1 ) THEN
517: * T(J,J) = L(J,1:J)*H(1:J)
518: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
519: $ KB, KB, (J-1)*NB,
520: $ -CONE, A( J*NB+1, 1 ), LDA,
521: $ WORK( NB+1 ), N,
522: $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
523: * T(J,J) += L(J,J)*T(J,J-1)*L(J,J-1)'
524: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
525: $ KB, NB, KB,
526: $ CONE, A( J*NB+1, (J-1)*NB+1 ), LDA,
527: $ TB( TD+NB+1 + ((J-1)*NB)*LDTB ), LDTB-1,
528: $ CZERO, WORK( 1 ), N )
529: CALL ZGEMM( 'NoTranspose', 'Transpose',
530: $ KB, KB, NB,
531: $ -CONE, WORK( 1 ), N,
532: $ A( J*NB+1, (J-2)*NB+1 ), LDA,
533: $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
534: END IF
535: *
536: * Expand T(J,J) into full format
537: *
538: DO I = 1, KB
539: DO K = I+1, KB
540: TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB )
541: $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB )
542: END DO
543: END DO
544: IF( J.GT.0 ) THEN
545: c CALL CHEGST( 1, 'Lower', KB,
546: c $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1,
547: c $ A( J*NB+1, (J-1)*NB+1 ), LDA, IINFO )
548: CALL ZTRSM( 'L', 'L', 'N', 'N', KB, KB, CONE,
549: $ A( J*NB+1, (J-1)*NB+1 ), LDA,
550: $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
551: CALL ZTRSM( 'R', 'L', 'T', 'N', KB, KB, CONE,
552: $ A( J*NB+1, (J-1)*NB+1 ), LDA,
553: $ TB( TD+1 + (J*NB)*LDTB ), LDTB-1 )
554: END IF
555: *
556: * Symmetrize T(J,J)
557: *
558: DO I = 1, KB
559: DO K = I+1, KB
560: TB( TD-(K-(I+1)) + (J*NB+K-1)*LDTB )
561: $ = TB( TD+(K-I)+1 + (J*NB+I-1)*LDTB )
562: END DO
563: END DO
564: *
565: IF( J.LT.NT-1 ) THEN
566: IF( J.GT.0 ) THEN
567: *
568: * Compute H(J,J)
569: *
570: IF( J.EQ.1 ) THEN
571: CALL ZGEMM( 'NoTranspose', 'Transpose',
572: $ KB, KB, KB,
573: $ CONE, TB( TD+1 + (J*NB)*LDTB ), LDTB-1,
574: $ A( J*NB+1, (J-1)*NB+1 ), LDA,
575: $ CZERO, WORK( J*NB+1 ), N )
576: ELSE
577: CALL ZGEMM( 'NoTranspose', 'Transpose',
578: $ KB, KB, NB+KB,
579: $ CONE, TB( TD+NB+1 + ((J-1)*NB)*LDTB ),
580: $ LDTB-1,
581: $ A( J*NB+1, (J-2)*NB+1 ), LDA,
582: $ CZERO, WORK( J*NB+1 ), N )
583: END IF
584: *
585: * Update with the previous column
586: *
587: CALL ZGEMM( 'NoTranspose', 'NoTranspose',
588: $ N-(J+1)*NB, NB, J*NB,
589: $ -CONE, A( (J+1)*NB+1, 1 ), LDA,
590: $ WORK( NB+1 ), N,
591: $ CONE, A( (J+1)*NB+1, J*NB+1 ), LDA )
592: END IF
593: *
594: * Factorize panel
595: *
596: CALL ZGETRF( N-(J+1)*NB, NB,
597: $ A( (J+1)*NB+1, J*NB+1 ), LDA,
598: $ IPIV( (J+1)*NB+1 ), IINFO )
599: c IF (IINFO.NE.0 .AND. INFO.EQ.0) THEN
600: c INFO = IINFO+(J+1)*NB
601: c END IF
602: *
603: * Compute T(J+1, J), zero out for GEMM update
604: *
605: KB = MIN(NB, N-(J+1)*NB)
606: CALL ZLASET( 'Full', KB, NB, CZERO, CZERO,
607: $ TB( TD+NB+1 + (J*NB)*LDTB), LDTB-1 )
608: CALL ZLACPY( 'Upper', KB, NB,
609: $ A( (J+1)*NB+1, J*NB+1 ), LDA,
610: $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 )
611: IF( J.GT.0 ) THEN
612: CALL ZTRSM( 'R', 'L', 'T', 'U', KB, NB, CONE,
613: $ A( J*NB+1, (J-1)*NB+1 ), LDA,
614: $ TB( TD+NB+1 + (J*NB)*LDTB ), LDTB-1 )
615: END IF
616: *
617: * Copy T(J+1,J) into T(J, J+1), both upper/lower for GEMM
618: * updates
619: *
620: DO K = 1, NB
621: DO I = 1, KB
622: TB( TD-NB+K-I+1 + (J*NB+NB+I-1)*LDTB ) =
623: $ TB( TD+NB+I-K+1 + (J*NB+K-1)*LDTB )
624: END DO
625: END DO
626: CALL ZLASET( 'Upper', KB, NB, CZERO, CONE,
627: $ A( (J+1)*NB+1, J*NB+1 ), LDA )
628: *
629: * Apply pivots to trailing submatrix of A
630: *
631: DO K = 1, KB
632: * > Adjust ipiv
633: IPIV( (J+1)*NB+K ) = IPIV( (J+1)*NB+K ) + (J+1)*NB
634: *
635: I1 = (J+1)*NB+K
636: I2 = IPIV( (J+1)*NB+K )
637: IF( I1.NE.I2 ) THEN
638: * > Apply pivots to previous columns of L
639: CALL ZSWAP( K-1, A( I1, (J+1)*NB+1 ), LDA,
640: $ A( I2, (J+1)*NB+1 ), LDA )
641: * > Swap A(I1+1:M, I1) with A(I2, I1+1:M)
1.2 ! bertrand 642: IF( I2.GT.(I1+1) )
! 643: $ CALL ZSWAP( I2-I1-1, A( I1+1, I1 ), 1,
! 644: $ A( I2, I1+1 ), LDA )
1.1 bertrand 645: * > Swap A(I2+1:M, I1) with A(I2+1:M, I2)
1.2 ! bertrand 646: IF( I2.LT.N )
! 647: $ CALL ZSWAP( N-I2, A( I2+1, I1 ), 1,
! 648: $ A( I2+1, I2 ), 1 )
1.1 bertrand 649: * > Swap A(I1, I1) with A(I2, I2)
650: PIV = A( I1, I1 )
651: A( I1, I1 ) = A( I2, I2 )
652: A( I2, I2 ) = PIV
653: * > Apply pivots to previous columns of L
654: IF( J.GT.0 ) THEN
655: CALL ZSWAP( J*NB, A( I1, 1 ), LDA,
656: $ A( I2, 1 ), LDA )
657: END IF
658: ENDIF
659: END DO
660: *
661: * Apply pivots to previous columns of L
662: *
663: c CALL ZLASWP( J*NB, A( 1, 1 ), LDA,
664: c $ (J+1)*NB+1, (J+1)*NB+KB, IPIV, 1 )
665: END IF
666: END DO
667: END IF
668: *
669: * Factor the band matrix
670: CALL ZGBTRF( N, N, NB, NB, TB, LDTB, IPIV2, INFO )
671: *
1.2 ! bertrand 672: RETURN
! 673: *
1.1 bertrand 674: * End of ZSYTRF_AA_2STAGE
675: *
676: END
CVSweb interface <joel.bertrand@systella.fr>