1: *> \brief \b DSYTRI_3X
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DSYTRI_3X + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytri_3x.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytri_3x.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytri_3x.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
22: *
23: * .. Scalar Arguments ..
24: * CHARACTER UPLO
25: * INTEGER INFO, LDA, N, NB
26: * ..
27: * .. Array Arguments ..
28: * INTEGER IPIV( * )
29: * DOUBLE PRECISION A( LDA, * ), E( * ), WORK( N+NB+1, * )
30: * ..
31: *
32: *
33: *> \par Purpose:
34: * =============
35: *>
36: *> \verbatim
37: *> DSYTRI_3X computes the inverse of a real symmetric indefinite
38: *> matrix A using the factorization computed by DSYTRF_RK or DSYTRF_BK:
39: *>
40: *> A = P*U*D*(U**T)*(P**T) or A = P*L*D*(L**T)*(P**T),
41: *>
42: *> where U (or L) is unit upper (or lower) triangular matrix,
43: *> U**T (or L**T) is the transpose of U (or L), P is a permutation
44: *> matrix, P**T is the transpose of P, and D is symmetric and block
45: *> diagonal with 1-by-1 and 2-by-2 diagonal blocks.
46: *>
47: *> This is the blocked version of the algorithm, calling Level 3 BLAS.
48: *> \endverbatim
49: *
50: * Arguments:
51: * ==========
52: *
53: *> \param[in] UPLO
54: *> \verbatim
55: *> UPLO is CHARACTER*1
56: *> Specifies whether the details of the factorization are
57: *> stored as an upper or lower triangular matrix.
58: *> = 'U': Upper triangle of A is stored;
59: *> = 'L': Lower triangle of A is stored.
60: *> \endverbatim
61: *>
62: *> \param[in] N
63: *> \verbatim
64: *> N is INTEGER
65: *> The order of the matrix A. N >= 0.
66: *> \endverbatim
67: *>
68: *> \param[in,out] A
69: *> \verbatim
70: *> A is DOUBLE PRECISION array, dimension (LDA,N)
71: *> On entry, diagonal of the block diagonal matrix D and
72: *> factors U or L as computed by DSYTRF_RK and DSYTRF_BK:
73: *> a) ONLY diagonal elements of the symmetric block diagonal
74: *> matrix D on the diagonal of A, i.e. D(k,k) = A(k,k);
75: *> (superdiagonal (or subdiagonal) elements of D
76: *> should be provided on entry in array E), and
77: *> b) If UPLO = 'U': factor U in the superdiagonal part of A.
78: *> If UPLO = 'L': factor L in the subdiagonal part of A.
79: *>
80: *> On exit, if INFO = 0, the symmetric inverse of the original
81: *> matrix.
82: *> If UPLO = 'U': the upper triangular part of the inverse
83: *> is formed and the part of A below the diagonal is not
84: *> referenced;
85: *> If UPLO = 'L': the lower triangular part of the inverse
86: *> is formed and the part of A above the diagonal is not
87: *> referenced.
88: *> \endverbatim
89: *>
90: *> \param[in] LDA
91: *> \verbatim
92: *> LDA is INTEGER
93: *> The leading dimension of the array A. LDA >= max(1,N).
94: *> \endverbatim
95: *>
96: *> \param[in] E
97: *> \verbatim
98: *> E is DOUBLE PRECISION array, dimension (N)
99: *> On entry, contains the superdiagonal (or subdiagonal)
100: *> elements of the symmetric block diagonal matrix D
101: *> with 1-by-1 or 2-by-2 diagonal blocks, where
102: *> If UPLO = 'U': E(i) = D(i-1,i), i=2:N, E(1) not referenced;
103: *> If UPLO = 'L': E(i) = D(i+1,i), i=1:N-1, E(N) not referenced.
104: *>
105: *> NOTE: For 1-by-1 diagonal block D(k), where
106: *> 1 <= k <= N, the element E(k) is not referenced in both
107: *> UPLO = 'U' or UPLO = 'L' cases.
108: *> \endverbatim
109: *>
110: *> \param[in] IPIV
111: *> \verbatim
112: *> IPIV is INTEGER array, dimension (N)
113: *> Details of the interchanges and the block structure of D
114: *> as determined by DSYTRF_RK or DSYTRF_BK.
115: *> \endverbatim
116: *>
117: *> \param[out] WORK
118: *> \verbatim
119: *> WORK is DOUBLE PRECISION array, dimension (N+NB+1,NB+3).
120: *> \endverbatim
121: *>
122: *> \param[in] NB
123: *> \verbatim
124: *> NB is INTEGER
125: *> Block size.
126: *> \endverbatim
127: *>
128: *> \param[out] INFO
129: *> \verbatim
130: *> INFO is INTEGER
131: *> = 0: successful exit
132: *> < 0: if INFO = -i, the i-th argument had an illegal value
133: *> > 0: if INFO = i, D(i,i) = 0; the matrix is singular and its
134: *> inverse could not be computed.
135: *> \endverbatim
136: *
137: * Authors:
138: * ========
139: *
140: *> \author Univ. of Tennessee
141: *> \author Univ. of California Berkeley
142: *> \author Univ. of Colorado Denver
143: *> \author NAG Ltd.
144: *
145: *> \date June 2017
146: *
147: *> \ingroup doubleSYcomputational
148: *
149: *> \par Contributors:
150: * ==================
151: *> \verbatim
152: *>
153: *> June 2017, Igor Kozachenko,
154: *> Computer Science Division,
155: *> University of California, Berkeley
156: *>
157: *> \endverbatim
158: *
159: * =====================================================================
160: SUBROUTINE DSYTRI_3X( UPLO, N, A, LDA, E, IPIV, WORK, NB, INFO )
161: *
162: * -- LAPACK computational routine (version 3.7.1) --
163: * -- LAPACK is a software package provided by Univ. of Tennessee, --
164: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
165: * June 2017
166: *
167: * .. Scalar Arguments ..
168: CHARACTER UPLO
169: INTEGER INFO, LDA, N, NB
170: * ..
171: * .. Array Arguments ..
172: INTEGER IPIV( * )
173: DOUBLE PRECISION A( LDA, * ), E( * ), WORK( N+NB+1, * )
174: * ..
175: *
176: * =====================================================================
177: *
178: * .. Parameters ..
179: DOUBLE PRECISION ONE, ZERO
180: PARAMETER ( ONE = 1.0D+0, ZERO = 0.0D+0 )
181: * ..
182: * .. Local Scalars ..
183: LOGICAL UPPER
184: INTEGER CUT, I, ICOUNT, INVD, IP, K, NNB, J, U11
185: DOUBLE PRECISION AK, AKKP1, AKP1, D, T, U01_I_J, U01_IP1_J,
186: $ U11_I_J, U11_IP1_J
187: * ..
188: * .. External Functions ..
189: LOGICAL LSAME
190: EXTERNAL LSAME
191: * ..
192: * .. External Subroutines ..
193: EXTERNAL DGEMM, DSYSWAPR, DTRTRI, DTRMM, XERBLA
194: * ..
195: * .. Intrinsic Functions ..
196: INTRINSIC ABS, MAX, MOD
197: * ..
198: * .. Executable Statements ..
199: *
200: * Test the input parameters.
201: *
202: INFO = 0
203: UPPER = LSAME( UPLO, 'U' )
204: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
205: INFO = -1
206: ELSE IF( N.LT.0 ) THEN
207: INFO = -2
208: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
209: INFO = -4
210: END IF
211: *
212: * Quick return if possible
213: *
214: IF( INFO.NE.0 ) THEN
215: CALL XERBLA( 'DSYTRI_3X', -INFO )
216: RETURN
217: END IF
218: IF( N.EQ.0 )
219: $ RETURN
220: *
221: * Workspace got Non-diag elements of D
222: *
223: DO K = 1, N
224: WORK( K, 1 ) = E( K )
225: END DO
226: *
227: * Check that the diagonal matrix D is nonsingular.
228: *
229: IF( UPPER ) THEN
230: *
231: * Upper triangular storage: examine D from bottom to top
232: *
233: DO INFO = N, 1, -1
234: IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
235: $ RETURN
236: END DO
237: ELSE
238: *
239: * Lower triangular storage: examine D from top to bottom.
240: *
241: DO INFO = 1, N
242: IF( IPIV( INFO ).GT.0 .AND. A( INFO, INFO ).EQ.ZERO )
243: $ RETURN
244: END DO
245: END IF
246: *
247: INFO = 0
248: *
249: * Splitting Workspace
250: * U01 is a block ( N, NB+1 )
251: * The first element of U01 is in WORK( 1, 1 )
252: * U11 is a block ( NB+1, NB+1 )
253: * The first element of U11 is in WORK( N+1, 1 )
254: *
255: U11 = N
256: *
257: * INVD is a block ( N, 2 )
258: * The first element of INVD is in WORK( 1, INVD )
259: *
260: INVD = NB + 2
261:
262: IF( UPPER ) THEN
263: *
264: * Begin Upper
265: *
266: * invA = P * inv(U**T) * inv(D) * inv(U) * P**T.
267: *
268: CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO )
269: *
270: * inv(D) and inv(D) * inv(U)
271: *
272: K = 1
273: DO WHILE( K.LE.N )
274: IF( IPIV( K ).GT.0 ) THEN
275: * 1 x 1 diagonal NNB
276: WORK( K, INVD ) = ONE / A( K, K )
277: WORK( K, INVD+1 ) = ZERO
278: ELSE
279: * 2 x 2 diagonal NNB
280: T = WORK( K+1, 1 )
281: AK = A( K, K ) / T
282: AKP1 = A( K+1, K+1 ) / T
283: AKKP1 = WORK( K+1, 1 ) / T
284: D = T*( AK*AKP1-ONE )
285: WORK( K, INVD ) = AKP1 / D
286: WORK( K+1, INVD+1 ) = AK / D
287: WORK( K, INVD+1 ) = -AKKP1 / D
288: WORK( K+1, INVD ) = WORK( K, INVD+1 )
289: K = K + 1
290: END IF
291: K = K + 1
292: END DO
293: *
294: * inv(U**T) = (inv(U))**T
295: *
296: * inv(U**T) * inv(D) * inv(U)
297: *
298: CUT = N
299: DO WHILE( CUT.GT.0 )
300: NNB = NB
301: IF( CUT.LE.NNB ) THEN
302: NNB = CUT
303: ELSE
304: ICOUNT = 0
305: * count negative elements,
306: DO I = CUT+1-NNB, CUT
307: IF( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
308: END DO
309: * need a even number for a clear cut
310: IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
311: END IF
312:
313: CUT = CUT - NNB
314: *
315: * U01 Block
316: *
317: DO I = 1, CUT
318: DO J = 1, NNB
319: WORK( I, J ) = A( I, CUT+J )
320: END DO
321: END DO
322: *
323: * U11 Block
324: *
325: DO I = 1, NNB
326: WORK( U11+I, I ) = ONE
327: DO J = 1, I-1
328: WORK( U11+I, J ) = ZERO
329: END DO
330: DO J = I+1, NNB
331: WORK( U11+I, J ) = A( CUT+I, CUT+J )
332: END DO
333: END DO
334: *
335: * invD * U01
336: *
337: I = 1
338: DO WHILE( I.LE.CUT )
339: IF( IPIV( I ).GT.0 ) THEN
340: DO J = 1, NNB
341: WORK( I, J ) = WORK( I, INVD ) * WORK( I, J )
342: END DO
343: ELSE
344: DO J = 1, NNB
345: U01_I_J = WORK( I, J )
346: U01_IP1_J = WORK( I+1, J )
347: WORK( I, J ) = WORK( I, INVD ) * U01_I_J
348: $ + WORK( I, INVD+1 ) * U01_IP1_J
349: WORK( I+1, J ) = WORK( I+1, INVD ) * U01_I_J
350: $ + WORK( I+1, INVD+1 ) * U01_IP1_J
351: END DO
352: I = I + 1
353: END IF
354: I = I + 1
355: END DO
356: *
357: * invD1 * U11
358: *
359: I = 1
360: DO WHILE ( I.LE.NNB )
361: IF( IPIV( CUT+I ).GT.0 ) THEN
362: DO J = I, NNB
363: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
364: END DO
365: ELSE
366: DO J = I, NNB
367: U11_I_J = WORK(U11+I,J)
368: U11_IP1_J = WORK(U11+I+1,J)
369: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
370: $ + WORK(CUT+I,INVD+1) * WORK(U11+I+1,J)
371: WORK( U11+I+1, J ) = WORK(CUT+I+1,INVD) * U11_I_J
372: $ + WORK(CUT+I+1,INVD+1) * U11_IP1_J
373: END DO
374: I = I + 1
375: END IF
376: I = I + 1
377: END DO
378: *
379: * U11**T * invD1 * U11 -> U11
380: *
381: CALL DTRMM( 'L', 'U', 'T', 'U', NNB, NNB,
382: $ ONE, A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
383: $ N+NB+1 )
384: *
385: DO I = 1, NNB
386: DO J = I, NNB
387: A( CUT+I, CUT+J ) = WORK( U11+I, J )
388: END DO
389: END DO
390: *
391: * U01**T * invD * U01 -> A( CUT+I, CUT+J )
392: *
393: CALL DGEMM( 'T', 'N', NNB, NNB, CUT, ONE, A( 1, CUT+1 ),
394: $ LDA, WORK, N+NB+1, ZERO, WORK(U11+1,1), N+NB+1 )
395:
396: *
397: * U11 = U11**T * invD1 * U11 + U01**T * invD * U01
398: *
399: DO I = 1, NNB
400: DO J = I, NNB
401: A( CUT+I, CUT+J ) = A( CUT+I, CUT+J ) + WORK(U11+I,J)
402: END DO
403: END DO
404: *
405: * U01 = U00**T * invD0 * U01
406: *
407: CALL DTRMM( 'L', UPLO, 'T', 'U', CUT, NNB,
408: $ ONE, A, LDA, WORK, N+NB+1 )
409:
410: *
411: * Update U01
412: *
413: DO I = 1, CUT
414: DO J = 1, NNB
415: A( I, CUT+J ) = WORK( I, J )
416: END DO
417: END DO
418: *
419: * Next Block
420: *
421: END DO
422: *
423: * Apply PERMUTATIONS P and P**T:
424: * P * inv(U**T) * inv(D) * inv(U) * P**T.
425: * Interchange rows and columns I and IPIV(I) in reverse order
426: * from the formation order of IPIV vector for Upper case.
427: *
428: * ( We can use a loop over IPIV with increment 1,
429: * since the ABS value of IPIV(I) represents the row (column)
430: * index of the interchange with row (column) i in both 1x1
431: * and 2x2 pivot cases, i.e. we don't need separate code branches
432: * for 1x1 and 2x2 pivot cases )
433: *
434: DO I = 1, N
435: IP = ABS( IPIV( I ) )
436: IF( IP.NE.I ) THEN
437: IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP )
438: IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I )
439: END IF
440: END DO
441: *
442: ELSE
443: *
444: * Begin Lower
445: *
446: * inv A = P * inv(L**T) * inv(D) * inv(L) * P**T.
447: *
448: CALL DTRTRI( UPLO, 'U', N, A, LDA, INFO )
449: *
450: * inv(D) and inv(D) * inv(L)
451: *
452: K = N
453: DO WHILE ( K .GE. 1 )
454: IF( IPIV( K ).GT.0 ) THEN
455: * 1 x 1 diagonal NNB
456: WORK( K, INVD ) = ONE / A( K, K )
457: WORK( K, INVD+1 ) = ZERO
458: ELSE
459: * 2 x 2 diagonal NNB
460: T = WORK( K-1, 1 )
461: AK = A( K-1, K-1 ) / T
462: AKP1 = A( K, K ) / T
463: AKKP1 = WORK( K-1, 1 ) / T
464: D = T*( AK*AKP1-ONE )
465: WORK( K-1, INVD ) = AKP1 / D
466: WORK( K, INVD ) = AK / D
467: WORK( K, INVD+1 ) = -AKKP1 / D
468: WORK( K-1, INVD+1 ) = WORK( K, INVD+1 )
469: K = K - 1
470: END IF
471: K = K - 1
472: END DO
473: *
474: * inv(L**T) = (inv(L))**T
475: *
476: * inv(L**T) * inv(D) * inv(L)
477: *
478: CUT = 0
479: DO WHILE( CUT.LT.N )
480: NNB = NB
481: IF( (CUT + NNB).GT.N ) THEN
482: NNB = N - CUT
483: ELSE
484: ICOUNT = 0
485: * count negative elements,
486: DO I = CUT + 1, CUT+NNB
487: IF ( IPIV( I ).LT.0 ) ICOUNT = ICOUNT + 1
488: END DO
489: * need a even number for a clear cut
490: IF( MOD( ICOUNT, 2 ).EQ.1 ) NNB = NNB + 1
491: END IF
492: *
493: * L21 Block
494: *
495: DO I = 1, N-CUT-NNB
496: DO J = 1, NNB
497: WORK( I, J ) = A( CUT+NNB+I, CUT+J )
498: END DO
499: END DO
500: *
501: * L11 Block
502: *
503: DO I = 1, NNB
504: WORK( U11+I, I) = ONE
505: DO J = I+1, NNB
506: WORK( U11+I, J ) = ZERO
507: END DO
508: DO J = 1, I-1
509: WORK( U11+I, J ) = A( CUT+I, CUT+J )
510: END DO
511: END DO
512: *
513: * invD*L21
514: *
515: I = N-CUT-NNB
516: DO WHILE( I.GE.1 )
517: IF( IPIV( CUT+NNB+I ).GT.0 ) THEN
518: DO J = 1, NNB
519: WORK( I, J ) = WORK( CUT+NNB+I, INVD) * WORK( I, J)
520: END DO
521: ELSE
522: DO J = 1, NNB
523: U01_I_J = WORK(I,J)
524: U01_IP1_J = WORK(I-1,J)
525: WORK(I,J)=WORK(CUT+NNB+I,INVD)*U01_I_J+
526: $ WORK(CUT+NNB+I,INVD+1)*U01_IP1_J
527: WORK(I-1,J)=WORK(CUT+NNB+I-1,INVD+1)*U01_I_J+
528: $ WORK(CUT+NNB+I-1,INVD)*U01_IP1_J
529: END DO
530: I = I - 1
531: END IF
532: I = I - 1
533: END DO
534: *
535: * invD1*L11
536: *
537: I = NNB
538: DO WHILE( I.GE.1 )
539: IF( IPIV( CUT+I ).GT.0 ) THEN
540: DO J = 1, NNB
541: WORK( U11+I, J ) = WORK( CUT+I, INVD)*WORK(U11+I,J)
542: END DO
543:
544: ELSE
545: DO J = 1, NNB
546: U11_I_J = WORK( U11+I, J )
547: U11_IP1_J = WORK( U11+I-1, J )
548: WORK( U11+I, J ) = WORK(CUT+I,INVD) * WORK(U11+I,J)
549: $ + WORK(CUT+I,INVD+1) * U11_IP1_J
550: WORK( U11+I-1, J ) = WORK(CUT+I-1,INVD+1) * U11_I_J
551: $ + WORK(CUT+I-1,INVD) * U11_IP1_J
552: END DO
553: I = I - 1
554: END IF
555: I = I - 1
556: END DO
557: *
558: * L11**T * invD1 * L11 -> L11
559: *
560: CALL DTRMM( 'L', UPLO, 'T', 'U', NNB, NNB, ONE,
561: $ A( CUT+1, CUT+1 ), LDA, WORK( U11+1, 1 ),
562: $ N+NB+1 )
563:
564: *
565: DO I = 1, NNB
566: DO J = 1, I
567: A( CUT+I, CUT+J ) = WORK( U11+I, J )
568: END DO
569: END DO
570: *
571: IF( (CUT+NNB).LT.N ) THEN
572: *
573: * L21**T * invD2*L21 -> A( CUT+I, CUT+J )
574: *
575: CALL DGEMM( 'T', 'N', NNB, NNB, N-NNB-CUT, ONE,
576: $ A( CUT+NNB+1, CUT+1 ), LDA, WORK, N+NB+1,
577: $ ZERO, WORK( U11+1, 1 ), N+NB+1 )
578:
579: *
580: * L11 = L11**T * invD1 * L11 + U01**T * invD * U01
581: *
582: DO I = 1, NNB
583: DO J = 1, I
584: A( CUT+I, CUT+J ) = A( CUT+I, CUT+J )+WORK(U11+I,J)
585: END DO
586: END DO
587: *
588: * L01 = L22**T * invD2 * L21
589: *
590: CALL DTRMM( 'L', UPLO, 'T', 'U', N-NNB-CUT, NNB, ONE,
591: $ A( CUT+NNB+1, CUT+NNB+1 ), LDA, WORK,
592: $ N+NB+1 )
593: *
594: * Update L21
595: *
596: DO I = 1, N-CUT-NNB
597: DO J = 1, NNB
598: A( CUT+NNB+I, CUT+J ) = WORK( I, J )
599: END DO
600: END DO
601: *
602: ELSE
603: *
604: * L11 = L11**T * invD1 * L11
605: *
606: DO I = 1, NNB
607: DO J = 1, I
608: A( CUT+I, CUT+J ) = WORK( U11+I, J )
609: END DO
610: END DO
611: END IF
612: *
613: * Next Block
614: *
615: CUT = CUT + NNB
616: *
617: END DO
618: *
619: * Apply PERMUTATIONS P and P**T:
620: * P * inv(L**T) * inv(D) * inv(L) * P**T.
621: * Interchange rows and columns I and IPIV(I) in reverse order
622: * from the formation order of IPIV vector for Lower case.
623: *
624: * ( We can use a loop over IPIV with increment -1,
625: * since the ABS value of IPIV(I) represents the row (column)
626: * index of the interchange with row (column) i in both 1x1
627: * and 2x2 pivot cases, i.e. we don't need separate code branches
628: * for 1x1 and 2x2 pivot cases )
629: *
630: DO I = N, 1, -1
631: IP = ABS( IPIV( I ) )
632: IF( IP.NE.I ) THEN
633: IF (I .LT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, I ,IP )
634: IF (I .GT. IP) CALL DSYSWAPR( UPLO, N, A, LDA, IP ,I )
635: END IF
636: END DO
637: *
638: END IF
639: *
640: RETURN
641: *
642: * End of DSYTRI_3X
643: *
644: END
645:
CVSweb interface <joel.bertrand@systella.fr>