1: *> \brief \b ZHETRI_3X
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download ZHETRI_3X + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhetri_3x.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhetri_3x.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhetri_3x.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22: *
23: * .. Scalar Arguments ..
24: * CHARACTER UPLO
25: * INTEGER INFO, LDA, N, NB
26: * ..
27: * .. Array Arguments ..
28: * INTEGER IPIV( * )
29: * COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
30: * ..
31: *
32: *
33: *> \par Purpose:
34: * =============
35: *>
36: *> \verbatim
37: *> ZHETRI_3X computes the inverse of a complex Hermitian indefinite
38: *> matrix A using the factorization computed by ZHETRF_RK or ZHETRF_BK:
39: *>
40: *> A = P*U*D*(U**H)*(P**T) or A = P*L*D*(L**H)*(P**T),
41: *>
42: *> where U (or L) is unit upper (or lower) triangular matrix,
43: *> U**H (or L**H) is the conjugate of U (or L), P is a permutation
44: *> matrix, P**T is the transpose of P, and D is Hermitian and block
45: *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46: *>
47: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
48: *> \endverbatim
49: *
50: * Arguments:
51: * ==========
52: *
53: *> \param[in] UPLO
54: *> \verbatim
55: *> UPLO is CHARACTER*1
56: *> Specifies whether the details of the factorization are
57: *> stored as an upper or lower triangular matrix.
58: *> = 'U': Upper triangle of A is stored;
59: *> = 'L': Lower triangle of A is stored.
60: *> \endverbatim
61: *>
62: *> \param[in] N
63: *> \verbatim
64: *> N is INTEGER
65: *> The order of the matrix A. N >= 0.
66: *> \endverbatim
67: *>
68: *> \param[in,out] A
69: *> \verbatim
70: *> A is COMPLEX*16 array, dimension (LDA,N)
71: *> On entry, diagonal of the block diagonal matrix D and
72: *> factors U or L as computed by ZHETRF_RK and ZHETRF_BK:
73: *> a) ONLY diagonal elements of the Hermitian block diagonal
74: *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75: *> (superdiagonal (or subdiagonal) elements of D
76: *> should be provided on entry in array E), and
77: *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78: *> If UPLO = 'L': factor L in the subdiagonal part of A.
79: *>
80: *> On exit, if INFO = 0, the Hermitian inverse of the original
81: *> matrix.
82: *> If UPLO = 'U': the upper triangular part of the inverse
83: *> is formed and the part of A below the diagonal is not
84: *> referenced;
85: *> If UPLO = 'L': the lower triangular part of the inverse
86: *> is formed and the part of A above the diagonal is not
87: *> referenced.
88: *> \endverbatim
89: *>
90: *> \param[in] LDA
91: *> \verbatim
92: *> LDA is INTEGER
93: *> The leading dimension of the array A. LDA >= max(1,N).
94: *> \endverbatim
95: *>
96: *> \param[in] E
97: *> \verbatim
98: *> E is COMPLEX*16 array, dimension (N)
99: *> On entry, contains the superdiagonal (or subdiagonal)
100: *> elements of the Hermitian block diagonal matrix D
101: *> with 1-by-1 or 2-by-2 diagonal blocks, where
102: *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103: *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104: *>
105: *> NOTE: For 1-by-1 diagonal block D(k), where
106: *> 1 <= k <= N, the element E(k) is not referenced in both
107: *> UPLO = 'U' or UPLO = 'L' cases.
108: *> \endverbatim
109: *>
110: *> \param[in] IPIV
111: *> \verbatim
112: *> IPIV is INTEGER array, dimension (N)
113: *> Details of the interchanges and the block structure of D
114: *> as determined by ZHETRF_RK or ZHETRF_BK.
115: *> \endverbatim
116: *>
117: *> \param[out] WORK
118: *> \verbatim
119: *> WORK is COMPLEX*16 array, dimension (N+NB+1,NB+3).
120: *> \endverbatim
121: *>
122: *> \param[in] NB
123: *> \verbatim
124: *> NB is INTEGER
125: *> Block size.
126: *> \endverbatim
127: *>
128: *> \param[out] INFO
129: *> \verbatim
130: *> INFO is INTEGER
131: *> = 0: successful exit
132: *> < 0: if INFO = -i, the i-th argument had an illegal value
133: *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134: *> inverse could not be computed.
135: *> \endverbatim
136: *
137: * Authors:
138: * ========
139: *
140: *> \author Univ. of Tennessee
141: *> \author Univ. of California Berkeley
142: *> \author Univ. of Colorado Denver
143: *> \author NAG Ltd.
144: *
145: *> \date June 2017
146: *
147: *> \ingroup complex16HEcomputational
148: *
149: *> \par Contributors:
150: * ==================
151: *> \verbatim
152: *>
153: *> June 2017, Igor Kozachenko,
154: *> Computer Science Division,
155: *> University of California, Berkeley
156: *>
157: *> \endverbatim
158: *
159: * =====================================================================
160: SUBROUTINE ZHETRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
161: *
162: * -- LAPACK computational routine (version 3.7.1) --
163: * -- LAPACK is a software package provided by Univ. of Tennessee, --
164: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
165: * June 2017
166: *
167: * .. Scalar Arguments ..
168: CHARACTER UPLO
169: INTEGER INFO, LDA, N, NB
170: * ..
171: * .. Array Arguments ..
172: INTEGER IPIV( * )
173: COMPLEX*16 A( LDA, * ), E( * ), WORK( N+NB+1, * )
174: * ..
175: *
176: * =====================================================================
177: *
178: * .. Parameters ..
179: DOUBLE PRECISION ONE
180: PARAMETER ( ONE = 1.0D+0 )
181: COMPLEX*16 CONE, CZERO
182: PARAMETER ( CONE = ( 1.0D+0, 0.0D+0 ),
183: $ CZERO = ( 0.0D+0, 0.0D+0 ) )
184: * ..
185: * .. Local Scalars ..
186: LOGICAL UPPER
187: INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
188: DOUBLE PRECISION AK, AKP1, T
189: COMPLEX*16 AKKP1, D, U01_I_J, U01_IP1_J, U11_I_J,
190: $ U11_IP1_J
191: * ..
192: * .. External Functions ..
193: LOGICAL LSAME
194: EXTERNAL LSAME
195: * ..
196: * .. External Subroutines ..
197: EXTERNAL ZGEMM, ZHESWAPR, ZTRTRI, ZTRMM, XERBLA
198: * ..
199: * .. Intrinsic Functions ..
200: INTRINSIC ABS, DCONJG, DBLE, MAX
201: * ..
202: * .. Executable Statements ..
203: *
204: * Test the input parameters.
205: *
206: INFO = 0
207: UPPER = LSAME( UPLO, 'U' )
208: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
209: INFO = -1
210: ELSE IF( N.LT.0 ) THEN
211: INFO = -2
212: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
213: INFO = -4
214: END IF
215: *
216: * Quick return if possible
217: *
218: IF( INFO.NE.0 ) THEN
219: CALL XERBLA( 'ZHETRI_3X', -INFO )
220: RETURN
221: END IF
222: IF( N.EQ.0 )
223: $ RETURN
224: *
225: * Workspace got Non-diag elements of D
226: *
227: DO K = 1, N
228: WORK( K, 1 ) = E( K )
229: END DO
230: *
231: * Check that the diagonal matrix D is nonsingular.
232: *
233: IF( UPPER ) THEN
234: *
235: * Upper triangular storage: examine D from bottom to top
236: *
237: DO INFO = N, 1, -1
238: IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO )
239: $ RETURN
240: END DO
241: ELSE
242: *
243: * Lower triangular storage: examine D from top to bottom.
244: *
245: DO INFO = 1, N
246: IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.CZERO )
247: $ RETURN
248: END DO
249: END IF
250: *
251: INFO = 0
252: *
253: * Splitting Workspace
254: * U01 is a block ( N, NB+1 )
255: * The first element of U01 is in WORK( 1, 1 )
256: * U11 is a block ( NB+1, NB+1 )
257: * The first element of U11 is in WORK( N+1, 1 )
258: *
259: U11 = N
260: *
261: * INVD is a block ( N, 2 )
262: * The first element of INVD is in WORK( 1, INVD )
263: *
264: INVD = NB + 2
265:
266: IF( UPPER ) THEN
267: *
268: * Begin Upper
269: *
270: * invA = P * inv(U**H) * inv(D) * inv(U) * P**T.
271: *
272: CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO )
273: *
274: * inv(D) and inv(D) * inv(U)
275: *
276: K = 1
277: DO WHILE( K.LE.N )
278: IF( IPIV( K ).GT.0 ) THEN
279: * 1 x 1 diagonal NNB
280: WORK( K, INVD ) = ONE / DBLE( A( K, K ) )
281: WORK( K, INVD+1 ) = CZERO
282: ELSE
283: * 2 x 2 diagonal NNB
284: T = ABS( WORK( K+1, 1 ) )
285: AK = DBLE( A( K, K ) ) / T
286: AKP1 = DBLE( A( K+1, K+1 ) ) / T
287: AKKP1 = WORK( K+1, 1 ) / T
288: D = T*( AK*AKP1-CONE )
289: WORK( K, INVD ) = AKP1 / D
290: WORK( K+1, INVD+1 ) = AK / D
291: WORK( K, INVD+1 ) = -AKKP1 / D
292: WORK( K+1, INVD ) = DCONJG( WORK( K, INVD+1 ) )
293: K = K + 1
294: END IF
295: K = K + 1
296: END DO
297: *
298: * inv(U**H) = (inv(U))**H
299: *
300: * inv(U**H) * inv(D) * inv(U)
301: *
302: CUT = N
303: DO WHILE( CUT.GT.0 )
304: NNB = NB
305: IF( CUT.LE.NNB ) THEN
306: NNB = CUT
307: ELSE
308: ICOUNT = 0
309: * count negative elements,
310: DO I = CUT+1-NNB, CUT
311: IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
312: END DO
313: * need a even number for a clear cut
314: IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
315: END IF
316:
317: CUT = CUT - NNB
318: *
319: * U01 Block
320: *
321: DO I = 1, CUT
322: DO J = 1, NNB
323: WORK( I, J ) = A( I, CUT+J )
324: END DO
325: END DO
326: *
327: * U11 Block
328: *
329: DO I = 1, NNB
330: WORK( U11+I, I ) = CONE
331: DO J = 1, I-1
332: WORK( U11+I, J ) = CZERO
333: END DO
334: DO J = I+1, NNB
335: WORK( U11+I, J ) = A( CUT+I, CUT+J )
336: END DO
337: END DO
338: *
339: * invD * U01
340: *
341: I = 1
342: DO WHILE( I.LE.CUT )
343: IF( IPIV( I ).GT.0 ) THEN
344: DO J = 1, NNB
345: WORK( I, J ) = WORK( I, INVD ) * WORK( I, J )
346: END DO
347: ELSE
348: DO J = 1, NNB
349: U01_I_J = WORK( I, J )
350: U01_IP1_J = WORK( I+1, J )
351: WORK( I, J ) = WORK( I, INVD ) * U01_I_J
352: $ + WORK( I, INVD+1 ) * U01_IP1_J
353: WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J
354: $ + WORK( I+1, INVD+1 ) * U01_IP1_J
355: END DO
356: I = I + 1
357: END IF
358: I = I + 1
359: END DO
360: *
361: * invD1 * U11
362: *
363: I = 1
364: DO WHILE ( I.LE.NNB )
365: IF( IPIV( CUT+I ).GT.0 ) THEN
366: DO J = I, NNB
367: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
368: END DO
369: ELSE
370: DO J = I, NNB
371: U11_I_J = WORK(U11+I,J)
372: U11_IP1_J = WORK(U11+I+1,J)
373: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
374: $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J)
375: WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J
376: $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J
377: END DO
378: I = I + 1
379: END IF
380: I = I + 1
381: END DO
382: *
383: * U11**H * invD1 * U11 -> U11
384: *
385: CALL ZTRMM( 'L', 'U', 'C', 'U', NNB, NNB,
386: $ CONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
387: $ N+NB+1 )
388: *
389: DO I = 1, NNB
390: DO J = I, NNB
391: A( CUT+I, CUT+J ) = WORK( U11+I, J )
392: END DO
393: END DO
394: *
395: * U01**H * invD * U01 -> A( CUT+I, CUT+J )
396: *
397: CALL ZGEMM( 'C', 'N', NNB, NNB, CUT, CONE, A( 1, CUT+1 ),
398: $ LDA, WORK, N+NB+1, CZERO, WORK(U11+1,1),
399: $ N+NB+1 )
400:
401: *
402: * U11 = U11**H * invD1 * U11 + U01**H * invD * U01
403: *
404: DO I = 1, NNB
405: DO J = I, NNB
406: A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J)
407: END DO
408: END DO
409: *
410: * U01 = U00**H * invD0 * U01
411: *
412: CALL ZTRMM( 'L', UPLO, 'C', 'U', CUT, NNB,
413: $ CONE, A, LDA, WORK, N+NB+1 )
414:
415: *
416: * Update U01
417: *
418: DO I = 1, CUT
419: DO J = 1, NNB
420: A( I, CUT+J ) = WORK( I, J )
421: END DO
422: END DO
423: *
424: * Next Block
425: *
426: END DO
427: *
428: * Apply PERMUTATIONS P and P**T:
429: * P * inv(U**H) * inv(D) * inv(U) * P**T.
430: * Interchange rows and columns I and IPIV(I) in reverse order
431: * from the formation order of IPIV vector for Upper case.
432: *
433: * ( We can use a loop over IPIV with increment 1,
434: * since the ABS value of IPIV(I) represents the row (column)
435: * index of the interchange with row (column) i in both 1x1
436: * and 2x2 pivot cases, i.e. we don't need separate code branches
437: * for 1x1 and 2x2 pivot cases )
438: *
439: DO I = 1, N
440: IP = ABS( IPIV( I ) )
441: IF( IP.NE.I ) THEN
442: IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP )
443: IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I )
444: END IF
445: END DO
446: *
447: ELSE
448: *
449: * Begin Lower
450: *
451: * inv A = P * inv(L**H) * inv(D) * inv(L) * P**T.
452: *
453: CALL ZTRTRI( UPLO, 'U', N, A, LDA, INFO )
454: *
455: * inv(D) and inv(D) * inv(L)
456: *
457: K = N
458: DO WHILE ( K .GE. 1 )
459: IF( IPIV( K ).GT.0 ) THEN
460: * 1 x 1 diagonal NNB
461: WORK( K, INVD ) = ONE / DBLE( A( K, K ) )
462: WORK( K, INVD+1 ) = CZERO
463: ELSE
464: * 2 x 2 diagonal NNB
465: T = ABS( WORK( K-1, 1 ) )
466: AK = DBLE( A( K-1, K-1 ) ) / T
467: AKP1 = DBLE( A( K, K ) ) / T
468: AKKP1 = WORK( K-1, 1 ) / T
469: D = T*( AK*AKP1-CONE )
470: WORK( K-1, INVD ) = AKP1 / D
471: WORK( K, INVD ) = AK / D
472: WORK( K, INVD+1 ) = -AKKP1 / D
473: WORK( K-1, INVD+1 ) = DCONJG( WORK( K, INVD+1 ) )
474: K = K - 1
475: END IF
476: K = K - 1
477: END DO
478: *
479: * inv(L**H) = (inv(L))**H
480: *
481: * inv(L**H) * inv(D) * inv(L)
482: *
483: CUT = 0
484: DO WHILE( CUT.LT.N )
485: NNB = NB
486: IF( (CUT + NNB).GT.N ) THEN
487: NNB = N - CUT
488: ELSE
489: ICOUNT = 0
490: * count negative elements,
491: DO I = CUT + 1, CUT+NNB
492: IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
493: END DO
494: * need a even number for a clear cut
495: IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
496: END IF
497: *
498: * L21 Block
499: *
500: DO I = 1, N-CUT-NNB
501: DO J = 1, NNB
502: WORK( I, J ) = A( CUT+NNB+I, CUT+J )
503: END DO
504: END DO
505: *
506: * L11 Block
507: *
508: DO I = 1, NNB
509: WORK( U11+I, I) = CONE
510: DO J = I+1, NNB
511: WORK( U11+I, J ) = CZERO
512: END DO
513: DO J = 1, I-1
514: WORK( U11+I, J ) = A( CUT+I, CUT+J )
515: END DO
516: END DO
517: *
518: * invD*L21
519: *
520: I = N-CUT-NNB
521: DO WHILE( I.GE.1 )
522: IF( IPIV( CUT+NNB+I ).GT.0 ) THEN
523: DO J = 1, NNB
524: WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J)
525: END DO
526: ELSE
527: DO J = 1, NNB
528: U01_I_J = WORK(I,J)
529: U01_IP1_J = WORK(I-1,J)
530: WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+
531: $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J
532: WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+
533: $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J
534: END DO
535: I = I - 1
536: END IF
537: I = I - 1
538: END DO
539: *
540: * invD1*L11
541: *
542: I = NNB
543: DO WHILE( I.GE.1 )
544: IF( IPIV( CUT+I ).GT.0 ) THEN
545: DO J = 1, NNB
546: WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J)
547: END DO
548:
549: ELSE
550: DO J = 1, NNB
551: U11_I_J = WORK( U11+I, J )
552: U11_IP1_J = WORK( U11+I-1, J )
553: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
554: $ + WORK(CUT+I,INVD+1) * U11_IP1_J
555: WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J
556: $ + WORK(CUT+I-1,INVD) * U11_IP1_J
557: END DO
558: I = I - 1
559: END IF
560: I = I - 1
561: END DO
562: *
563: * L11**H * invD1 * L11 -> L11
564: *
565: CALL ZTRMM( 'L', UPLO, 'C', 'U', NNB, NNB, CONE,
566: $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
567: $ N+NB+1 )
568:
569: *
570: DO I = 1, NNB
571: DO J = 1, I
572: A( CUT+I, CUT+J ) = WORK( U11+I, J )
573: END DO
574: END DO
575: *
576: IF( (CUT+NNB).LT.N ) THEN
577: *
578: * L21**H * invD2*L21 -> A( CUT+I, CUT+J )
579: *
580: CALL ZGEMM( 'C', 'N', NNB, NNB, N-NNB-CUT, CONE,
581: $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1,
582: $ CZERO, WORK( U11+1, 1 ), N+NB+1 )
583:
584: *
585: * L11 = L11**H * invD1 * L11 + U01**H * invD * U01
586: *
587: DO I = 1, NNB
588: DO J = 1, I
589: A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J)
590: END DO
591: END DO
592: *
593: * L01 = L22**H * invD2 * L21
594: *
595: CALL ZTRMM( 'L', UPLO, 'C', 'U', N-NNB-CUT, NNB, CONE,
596: $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK,
597: $ N+NB+1 )
598: *
599: * Update L21
600: *
601: DO I = 1, N-CUT-NNB
602: DO J = 1, NNB
603: A( CUT+NNB+I, CUT+J ) = WORK( I, J )
604: END DO
605: END DO
606: *
607: ELSE
608: *
609: * L11 = L11**H * invD1 * L11
610: *
611: DO I = 1, NNB
612: DO J = 1, I
613: A( CUT+I, CUT+J ) = WORK( U11+I, J )
614: END DO
615: END DO
616: END IF
617: *
618: * Next Block
619: *
620: CUT = CUT + NNB
621: *
622: END DO
623: *
624: * Apply PERMUTATIONS P and P**T:
625: * P * inv(L**H) * inv(D) * inv(L) * P**T.
626: * Interchange rows and columns I and IPIV(I) in reverse order
627: * from the formation order of IPIV vector for Lower case.
628: *
629: * ( We can use a loop over IPIV with increment -1,
630: * since the ABS value of IPIV(I) represents the row (column)
631: * index of the interchange with row (column) i in both 1x1
632: * and 2x2 pivot cases, i.e. we don't need separate code branches
633: * for 1x1 and 2x2 pivot cases )
634: *
635: DO I = N, 1, -1
636: IP = ABS( IPIV( I ) )
637: IF( IP.NE.I ) THEN
638: IF (I .LT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, I ,IP )
639: IF (I .GT. IP) CALL ZHESWAPR( UPLO, N, A, LDA, IP ,I )
640: END IF
641: END DO
642: *
643: END IF
644: *
645: RETURN
646: *
647: * End of ZHETRI_3X
648: *
649: END
CVSweb interface <joel.bertrand@systella.fr>