1: *> \brief \b ZSYTRI_3X
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZSYTRI_3X + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zsytri_3x.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zsytri_3x.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zsytri_3x.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22: *
23: * .. Scalar Arguments ..
24: * CHARACTER UPLO
25: * INTEGER INFO, LDA, N, NB
26: * ..
27: * .. Array Arguments ..
28: * INTEGER IPIV( * )
29: * COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
30: * ..
31: *
32: *
33: *> \par Purpose:
34: * =============
35: *>
36: *> \verbatim
37: *> ZSYTRI_3X computes the inverse of a complex symmetric indefinite
38: *> matrix A using the factorization computed by ZSYTRF_RK or ZSYTRF_BK:
39: *>
40: *> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41: *>
42: *> where U (or L) is unit upper (or lower) triangular matrix,
43: *> U**T (or L**T) is the transpose of U (or L), P is a permutation
44: *> matrix, P**T is the transpose of P, and D is symmetric and block
45: *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46: *>
47: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
48: *> \endverbatim
49: *
50: * Arguments:
51: * ==========
52: *
53: *> \param[in] UPLO
54: *> \verbatim
55: *> UPLO is CHARACTER*1
56: *> Specifies whether the details of the factorization are
57: *> stored as an upper or lower triangular matrix.
58: *> = 'U': Upper triangle of A is stored;
59: *> = 'L': Lower triangle of A is stored.
60: *> \endverbatim
61: *>
62: *> \param[in] N
63: *> \verbatim
64: *> N is INTEGER
65: *> The order of the matrix A. N >= 0.
66: *> \endverbatim
67: *>
68: *> \param[in,out] A
69: *> \verbatim
70: *> A is COMPLEX*16 array, dimension (LDA,N)
71: *> On entry, diagonal of the block diagonal matrix D and
72: *> factors U or L as computed by ZSYTRF_RK and ZSYTRF_BK:
73: *> a) ONLY diagonal elements of the symmetric block diagonal
74: *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75: *> (superdiagonal (or subdiagonal) elements of D
76: *> should be provided on entry in array E), and
77: *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78: *> If UPLO = 'L': factor L in the subdiagonal part of A.
79: *>
80: *> On exit, if INFO = 0, the symmetric inverse of the original
81: *> matrix.
82: *> If UPLO = 'U': the upper triangular part of the inverse
83: *> is formed and the part of A below the diagonal is not
84: *> referenced;
85: *> If UPLO = 'L': the lower triangular part of the inverse
86: *> is formed and the part of A above the diagonal is not
87: *> referenced.
88: *> \endverbatim
89: *>
90: *> \param[in] LDA
91: *> \verbatim
92: *> LDA is INTEGER
93: *> The leading dimension of the array A. LDA >= max(1,N).
94: *> \endverbatim
95: *>
96: *> \param[in] E
97: *> \verbatim
98: *> E is COMPLEX*16 array, dimension (N)
99: *> On entry, contains the superdiagonal (or subdiagonal)
100: *> elements of the symmetric block diagonal matrix D
101: *> with 1-by-1 or 2-by-2 diagonal blocks, where
102: *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103: *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104: *>
105: *> NOTE: For 1-by-1 diagonal block D(k), where
106: *> 1 <= k <= N, the element E(k) is not referenced in both
107: *> UPLO = 'U' or UPLO = 'L' cases.
108: *> \endverbatim
109: *>
110: *> \param[in] IPIV
111: *> \verbatim
112: *> IPIV is INTEGER array, dimension (N)
113: *> Details of the interchanges and the block structure of D
114: *> as determined by ZSYTRF_RK or ZSYTRF_BK.
115: *> \endverbatim
116: *>
117: *> \param[out] WORK
118: *> \verbatim
119: *> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3).
120: *> \endverbatim
121: *>
122: *> \param[in] NB
123: *> \verbatim
124: *> NB is INTEGER
125: *> Block size.
126: *> \endverbatim
127: *>
128: *> \param[out] INFO
129: *> \verbatim
130: *> INFO is INTEGER
131: *> = 0: successful exit
132: *> < 0: if INFO = -i, the i-th argument had an illegal value
133: *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134: *> inverse could not be computed.
135: *> \endverbatim
136: *
137: * Authors:
138: * ========
139: *
140: *> \author Univ. of Tennessee
141: *> \author Univ. of California Berkeley
142: *> \author Univ. of Colorado Denver
143: *> \author NAG Ltd.
144: *
145: *> \ingroup complex16SYcomputational
146: *
147: *> \par Contributors:
148: * ==================
149: *> \verbatim
150: *>
151: *> June 2017, Igor Kozachenko,
152: *> Computer Science Division,
153: *> University of California, Berkeley
154: *>
155: *> \endverbatim
156: *
157: * =====================================================================
158: SUBROUTINE ZSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
159: *
160: * -- LAPACK computational routine --
161: * -- LAPACK is a software package provided by Univ. of Tennessee, --
162: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163: *
164: * .. Scalar Arguments ..
165: CHARACTER UPLO
166: INTEGER INFO, LDA, N, NB
167: * ..
168: * .. Array Arguments ..
169: INTEGER IPIV( * )
170: COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
171: * ..
172: *
173: * =====================================================================
174: *
175: * .. Parameters ..
176: COMPLEX*16 CONE, CZERO
177: PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ),
178: $ CZERO = ( 0.0D+0, 0.0D+0 ) )
179: * ..
180: * .. Local Scalars ..
181: LOGICAL UPPER
182: INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
183: COMPLEX*16 AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
184: $ U11_I_J, U11_IP1_J
185: * ..
186: * .. External Functions ..
187: LOGICAL LSAME
188: EXTERNAL LSAME
189: * ..
190: * .. External Subroutines ..
191: EXTERNAL ZGEMM, ZSYSWAPR, ZTRTRI, ZTRMM, XERBLA
192: * ..
193: * .. Intrinsic Functions ..
194: INTRINSIC ABS, MAX, MOD
195: * ..
196: * .. Executable Statements ..
197: *
198: * Test the input parameters.
199: *
200: INFO = 0
201: UPPER = LSAME( UPLO, 'U' )
202: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
203: INFO = -1
204: ELSE IF( N.LT.0 ) THEN
205: INFO = -2
206: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
207: INFO = -4
208: END IF
209: *
210: * Quick return if possible
211: *
212: IF( INFO.NE.0 ) THEN
213: CALL XERBLA( 'ZSYTRI_3X', -INFO )
214: RETURN
215: END IF
216: IF( N.EQ.0 )
217: $ RETURN
218: *
219: * Workspace got Non-diag elements of D
220: *
221: DO K = 1, N
222: WORK( K, 1 ) = E( K )
223: END DO
224: *
225: * Check that the diagonal matrix D is nonsingular.
226: *
227: IF( UPPER ) THEN
228: *
229: * Upper triangular storage: examine D from bottom to top
230: *
231: DO INFO = N, 1, -1
232: IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO )
233: $ RETURN
234: END DO
235: ELSE
236: *
237: * Lower triangular storage: examine D from top to bottom.
238: *
239: DO INFO = 1, N
240: IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO )
241: $ RETURN
242: END DO
243: END IF
244: *
245: INFO = 0
246: *
247: * Splitting Workspace
248: * U01 is a block ( N, NB+1 )
249: * The first element of U01 is in WORK( 1, 1 )
250: * U11 is a block ( NB+1, NB+1 )
251: * The first element of U11 is in WORK( N+1, 1 )
252: *
253: U11 = N
254: *
255: * INVD is a block ( N, 2 )
256: * The first element of INVD is in WORK( 1, INVD )
257: *
258: INVD = NB + 2
259:
260: IF( UPPER ) THEN
261: *
262: * Begin Upper
263: *
264: * invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
265: *
266: CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO )
267: *
268: * inv(D) and inv(D) * inv(U)
269: *
270: K = 1
271: DO WHILE( K.LE.N )
272: IF( IPIV( K ).GT.0 ) THEN
273: * 1 x 1 diagonal NNB
274: WORK( K, INVD ) = CONE / A( K, K )
275: WORK( K, INVD+1 ) = CZERO
276: ELSE
277: * 2 x 2 diagonal NNB
278: T = WORK( K+1, 1 )
279: AK = A( K, K ) / T
280: AKP1 = A( K+1, K+1 ) / T
281: AKKP1 = WORK( K+1, 1 ) / T
282: D = T*( AK*AKP1-CONE )
283: WORK( K, INVD ) = AKP1 / D
284: WORK( K+1, INVD+1 ) = AK / D
285: WORK( K, INVD+1 ) = -AKKP1 / D
286: WORK( K+1, INVD ) = WORK( K, INVD+1 )
287: K = K + 1
288: END IF
289: K = K + 1
290: END DO
291: *
292: * inv(U**T) = (inv(U))**T
293: *
294: * inv(U**T) * inv(D) * inv(U)
295: *
296: CUT = N
297: DO WHILE( CUT.GT.0 )
298: NNB = NB
299: IF( CUT.LE.NNB ) THEN
300: NNB = CUT
301: ELSE
302: ICOUNT = 0
303: * count negative elements,
304: DO I = CUT+1-NNB, CUT
305: IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
306: END DO
307: * need a even number for a clear cut
308: IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
309: END IF
310:
311: CUT = CUT - NNB
312: *
313: * U01 Block
314: *
315: DO I = 1, CUT
316: DO J = 1, NNB
317: WORK( I, J ) = A( I, CUT+J )
318: END DO
319: END DO
320: *
321: * U11 Block
322: *
323: DO I = 1, NNB
324: WORK( U11+I, I ) = CONE
325: DO J = 1, I-1
326: WORK( U11+I, J ) = CZERO
327: END DO
328: DO J = I+1, NNB
329: WORK( U11+I, J ) = A( CUT+I, CUT+J )
330: END DO
331: END DO
332: *
333: * invD * U01
334: *
335: I = 1
336: DO WHILE( I.LE.CUT )
337: IF( IPIV( I ).GT.0 ) THEN
338: DO J = 1, NNB
339: WORK( I, J ) = WORK( I, INVD ) * WORK( I, J )
340: END DO
341: ELSE
342: DO J = 1, NNB
343: U01_I_J = WORK( I, J )
344: U01_IP1_J = WORK( I+1, J )
345: WORK( I, J ) = WORK( I, INVD ) * U01_I_J
346: $ + WORK( I, INVD+1 ) * U01_IP1_J
347: WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J
348: $ + WORK( I+1, INVD+1 ) * U01_IP1_J
349: END DO
350: I = I + 1
351: END IF
352: I = I + 1
353: END DO
354: *
355: * invD1 * U11
356: *
357: I = 1
358: DO WHILE ( I.LE.NNB )
359: IF( IPIV( CUT+I ).GT.0 ) THEN
360: DO J = I, NNB
361: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
362: END DO
363: ELSE
364: DO J = I, NNB
365: U11_I_J = WORK(U11+I,J)
366: U11_IP1_J = WORK(U11+I+1,J)
367: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
368: $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J)
369: WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J
370: $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J
371: END DO
372: I = I + 1
373: END IF
374: I = I + 1
375: END DO
376: *
377: * U11**T * invD1 * U11 -> U11
378: *
379: CALL ZTRMM( 'L', 'U', 'T', 'U', NNB, NNB,
380: $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
381: $ N+NB+1 )
382: *
383: DO I = 1, NNB
384: DO J = I, NNB
385: A( CUT+I, CUT+J ) = WORK( U11+I, J )
386: END DO
387: END DO
388: *
389: * U01**T * invD * U01 -> A( CUT+I, CUT+J )
390: *
391: CALL ZGEMM( 'T', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ),
392: $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1),
393: $ N+NB+1 )
394:
395: *
396: * U11 = U11**T * invD1 * U11 + U01**T * invD * U01
397: *
398: DO I = 1, NNB
399: DO J = I, NNB
400: A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J)
401: END DO
402: END DO
403: *
404: * U01 = U00**T * invD0 * U01
405: *
406: CALL ZTRMM( 'L', UPLO, 'T', 'U', CUT, NNB,
407: $ CONE, A, LDA, WORK, N+NB+1 )
408:
409: *
410: * Update U01
411: *
412: DO I = 1, CUT
413: DO J = 1, NNB
414: A( I, CUT+J ) = WORK( I, J )
415: END DO
416: END DO
417: *
418: * Next Block
419: *
420: END DO
421: *
422: * Apply PERMUTATIONS P and P**T:
423: * P * inv(U**T) * inv(D) * inv(U) * P**T.
424: * Interchange rows and columns I and IPIV(I) in reverse order
425: * from the formation order of IPIV vector for Upper case.
426: *
427: * ( We can use a loop over IPIV with increment 1,
428: * since the ABS value of IPIV(I) represents the row (column)
429: * index of the interchange with row (column) i in both 1x1
430: * and 2x2 pivot cases, i.e. we don't need separate code branches
431: * for 1x1 and 2x2 pivot cases )
432: *
433: DO I = 1, N
434: IP = ABS( IPIV( I ) )
435: IF( IP.NE.I ) THEN
436: IF (I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP )
437: IF (I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I )
438: END IF
439: END DO
440: *
441: ELSE
442: *
443: * Begin Lower
444: *
445: * inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
446: *
447: CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO )
448: *
449: * inv(D) and inv(D) * inv(L)
450: *
451: K = N
452: DO WHILE ( K .GE. 1 )
453: IF( IPIV( K ).GT.0 ) THEN
454: * 1 x 1 diagonal NNB
455: WORK( K, INVD ) = CONE / A( K, K )
456: WORK( K, INVD+1 ) = CZERO
457: ELSE
458: * 2 x 2 diagonal NNB
459: T = WORK( K-1, 1 )
460: AK = A( K-1, K-1 ) / T
461: AKP1 = A( K, K ) / T
462: AKKP1 = WORK( K-1, 1 ) / T
463: D = T*( AK*AKP1-CONE )
464: WORK( K-1, INVD ) = AKP1 / D
465: WORK( K, INVD ) = AK / D
466: WORK( K, INVD+1 ) = -AKKP1 / D
467: WORK( K-1, INVD+1 ) = WORK( K, INVD+1 )
468: K = K - 1
469: END IF
470: K = K - 1
471: END DO
472: *
473: * inv(L**T) = (inv(L))**T
474: *
475: * inv(L**T) * inv(D) * inv(L)
476: *
477: CUT = 0
478: DO WHILE( CUT.LT.N )
479: NNB = NB
480: IF( (CUT + NNB).GT.N ) THEN
481: NNB = N - CUT
482: ELSE
483: ICOUNT = 0
484: * count negative elements,
485: DO I = CUT + 1, CUT+NNB
486: IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
487: END DO
488: * need a even number for a clear cut
489: IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
490: END IF
491: *
492: * L21 Block
493: *
494: DO I = 1, N-CUT-NNB
495: DO J = 1, NNB
496: WORK( I, J ) = A( CUT+NNB+I, CUT+J )
497: END DO
498: END DO
499: *
500: * L11 Block
501: *
502: DO I = 1, NNB
503: WORK( U11+I, I) = CONE
504: DO J = I+1, NNB
505: WORK( U11+I, J ) = CZERO
506: END DO
507: DO J = 1, I-1
508: WORK( U11+I, J ) = A( CUT+I, CUT+J )
509: END DO
510: END DO
511: *
512: * invD*L21
513: *
514: I = N-CUT-NNB
515: DO WHILE( I.GE.1 )
516: IF( IPIV( CUT+NNB+I ).GT.0 ) THEN
517: DO J = 1, NNB
518: WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J)
519: END DO
520: ELSE
521: DO J = 1, NNB
522: U01_I_J = WORK(I,J)
523: U01_IP1_J = WORK(I-1,J)
524: WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+
525: $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J
526: WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+
527: $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J
528: END DO
529: I = I - 1
530: END IF
531: I = I - 1
532: END DO
533: *
534: * invD1*L11
535: *
536: I = NNB
537: DO WHILE( I.GE.1 )
538: IF( IPIV( CUT+I ).GT.0 ) THEN
539: DO J = 1, NNB
540: WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J)
541: END DO
542:
543: ELSE
544: DO J = 1, NNB
545: U11_I_J = WORK( U11+I, J )
546: U11_IP1_J = WORK( U11+I-1, J )
547: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
548: $ + WORK(CUT+I,INVD+1) * U11_IP1_J
549: WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J
550: $ + WORK(CUT+I-1,INVD) * U11_IP1_J
551: END DO
552: I = I - 1
553: END IF
554: I = I - 1
555: END DO
556: *
557: * L11**T * invD1 * L11 -> L11
558: *
559: CALL ZTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, CONE,
560: $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
561: $ N+NB+1 )
562:
563: *
564: DO I = 1, NNB
565: DO J = 1, I
566: A( CUT+I, CUT+J ) = WORK( U11+I, J )
567: END DO
568: END DO
569: *
570: IF( (CUT+NNB).LT.N ) THEN
571: *
572: * L21**T * invD2*L21 -> A( CUT+I, CUT+J )
573: *
574: CALL ZGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, CONE,
575: $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1,
576: $ CZERO, WORK( U11+1, 1 ), N+NB+1 )
577:
578: *
579: * L11 = L11**T * invD1 * L11 + U01**T * invD * U01
580: *
581: DO I = 1, NNB
582: DO J = 1, I
583: A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J)
584: END DO
585: END DO
586: *
587: * L01 = L22**T * invD2 * L21
588: *
589: CALL ZTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, CONE,
590: $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK,
591: $ N+NB+1 )
592: *
593: * Update L21
594: *
595: DO I = 1, N-CUT-NNB
596: DO J = 1, NNB
597: A( CUT+NNB+I, CUT+J ) = WORK( I, J )
598: END DO
599: END DO
600: *
601: ELSE
602: *
603: * L11 = L11**T * invD1 * L11
604: *
605: DO I = 1, NNB
606: DO J = 1, I
607: A( CUT+I, CUT+J ) = WORK( U11+I, J )
608: END DO
609: END DO
610: END IF
611: *
612: * Next Block
613: *
614: CUT = CUT + NNB
615: *
616: END DO
617: *
618: * Apply PERMUTATIONS P and P**T:
619: * P * inv(L**T) * inv(D) * inv(L) * P**T.
620: * Interchange rows and columns I and IPIV(I) in reverse order
621: * from the formation order of IPIV vector for Lower case.
622: *
623: * ( We can use a loop over IPIV with increment -1,
624: * since the ABS value of IPIV(I) represents the row (column)
625: * index of the interchange with row (column) i in both 1x1
626: * and 2x2 pivot cases, i.e. we don't need separate code branches
627: * for 1x1 and 2x2 pivot cases )
628: *
629: DO I = N, 1, -1
630: IP = ABS( IPIV( I ) )
631: IF( IP.NE.I ) THEN
632: IF (I .LT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, I ,IP )
633: IF (I .GT. IP) CALL ZSYSWAPR( UPLO, N, A, LDA, IP ,I )
634: END IF
635: END DO
636: *
637: END IF
638: *
639: RETURN
640: *
641: * End of ZSYTRI_3X
642: *
643: END
644:
CVSweb interface <joel.bertrand@systella.fr>