1: *> \brief \b DTRSYL3
2: *
3: * Definition:
4: * ===========
5: *
6: *
7: *> \par Purpose
8: * =============
9: *>
10: *> \verbatim
11: *>
12: *> DTRSYL3 solves the real Sylvester matrix equation:
13: *>
14: *> op(A)*X + X*op(B) = scale*C or
15: *> op(A)*X - X*op(B) = scale*C,
16: *>
17: *> where op(A) = A or A**T, and A and B are both upper quasi-
18: *> triangular. A is M-by-M and B is N-by-N; the right hand side C and
19: *> the solution X are M-by-N; and scale is an output scale factor, set
20: *> <= 1 to avoid overflow in X.
21: *>
22: *> A and B must be in Schur canonical form (as returned by DHSEQR), that
23: *> is, block upper triangular with 1-by-1 and 2-by-2 diagonal blocks;
24: *> each 2-by-2 diagonal block has its diagonal elements equal and its
25: *> off-diagonal elements of opposite sign.
26: *>
27: *> This is the block version of the algorithm.
28: *> \endverbatim
29: *
30: * Arguments
31: * =========
32: *
33: *> \param[in] TRANA
34: *> \verbatim
35: *> TRANA is CHARACTER*1
36: *> Specifies the option op(A):
37: *> = 'N': op(A) = A (No transpose)
38: *> = 'T': op(A) = A**T (Transpose)
39: *> = 'C': op(A) = A**H (Conjugate transpose = Transpose)
40: *> \endverbatim
41: *>
42: *> \param[in] TRANB
43: *> \verbatim
44: *> TRANB is CHARACTER*1
45: *> Specifies the option op(B):
46: *> = 'N': op(B) = B (No transpose)
47: *> = 'T': op(B) = B**T (Transpose)
48: *> = 'C': op(B) = B**H (Conjugate transpose = Transpose)
49: *> \endverbatim
50: *>
51: *> \param[in] ISGN
52: *> \verbatim
53: *> ISGN is INTEGER
54: *> Specifies the sign in the equation:
55: *> = +1: solve op(A)*X + X*op(B) = scale*C
56: *> = -1: solve op(A)*X - X*op(B) = scale*C
57: *> \endverbatim
58: *>
59: *> \param[in] M
60: *> \verbatim
61: *> M is INTEGER
62: *> The order of the matrix A, and the number of rows in the
63: *> matrices X and C. M >= 0.
64: *> \endverbatim
65: *>
66: *> \param[in] N
67: *> \verbatim
68: *> N is INTEGER
69: *> The order of the matrix B, and the number of columns in the
70: *> matrices X and C. N >= 0.
71: *> \endverbatim
72: *>
73: *> \param[in] A
74: *> \verbatim
75: *> A is DOUBLE PRECISION array, dimension (LDA,M)
76: *> The upper quasi-triangular matrix A, in Schur canonical form.
77: *> \endverbatim
78: *>
79: *> \param[in] LDA
80: *> \verbatim
81: *> LDA is INTEGER
82: *> The leading dimension of the array A. LDA >= max(1,M).
83: *> \endverbatim
84: *>
85: *> \param[in] B
86: *> \verbatim
87: *> B is DOUBLE PRECISION array, dimension (LDB,N)
88: *> The upper quasi-triangular matrix B, in Schur canonical form.
89: *> \endverbatim
90: *>
91: *> \param[in] LDB
92: *> \verbatim
93: *> LDB is INTEGER
94: *> The leading dimension of the array B. LDB >= max(1,N).
95: *> \endverbatim
96: *>
97: *> \param[in,out] C
98: *> \verbatim
99: *> C is DOUBLE PRECISION array, dimension (LDC,N)
100: *> On entry, the M-by-N right hand side matrix C.
101: *> On exit, C is overwritten by the solution matrix X.
102: *> \endverbatim
103: *>
104: *> \param[in] LDC
105: *> \verbatim
106: *> LDC is INTEGER
107: *> The leading dimension of the array C. LDC >= max(1,M)
108: *> \endverbatim
109: *>
110: *> \param[out] SCALE
111: *> \verbatim
112: *> SCALE is DOUBLE PRECISION
113: *> The scale factor, scale, set <= 1 to avoid overflow in X.
114: *> \endverbatim
115: *>
116: *> \param[out] IWORK
117: *> \verbatim
118: *> IWORK is INTEGER array, dimension (MAX(1,LIWORK))
119: *> On exit, if INFO = 0, IWORK(1) returns the optimal LIWORK.
120: *> \endverbatim
121: *>
122: *> \param[in] LIWORK
123: *> \verbatim
124: *> IWORK is INTEGER
125: *> The dimension of the array IWORK. LIWORK >= ((M + NB - 1) / NB + 1)
126: *> + ((N + NB - 1) / NB + 1), where NB is the optimal block size.
127: *>
128: *> If LIWORK = -1, then a workspace query is assumed; the routine
129: *> only calculates the optimal dimension of the IWORK array,
130: *> returns this value as the first entry of the IWORK array, and
131: *> no error message related to LIWORK is issued by XERBLA.
132: *> \endverbatim
133: *>
134: *> \param[out] SWORK
135: *> \verbatim
136: *> SWORK is DOUBLE PRECISION array, dimension (MAX(2, ROWS),
137: *> MAX(1,COLS)).
138: *> On exit, if INFO = 0, SWORK(1) returns the optimal value ROWS
139: *> and SWORK(2) returns the optimal COLS.
140: *> \endverbatim
141: *>
142: *> \param[in] LDSWORK
143: *> \verbatim
144: *> LDSWORK is INTEGER
145: *> LDSWORK >= MAX(2,ROWS), where ROWS = ((M + NB - 1) / NB + 1)
146: *> and NB is the optimal block size.
147: *>
148: *> If LDSWORK = -1, then a workspace query is assumed; the routine
149: *> only calculates the optimal dimensions of the SWORK matrix,
150: *> returns these values as the first and second entry of the SWORK
151: *> matrix, and no error message related LWORK is issued by XERBLA.
152: *> \endverbatim
153: *>
154: *> \param[out] INFO
155: *> \verbatim
156: *> INFO is INTEGER
157: *> = 0: successful exit
158: *> < 0: if INFO = -i, the i-th argument had an illegal value
159: *> = 1: A and B have common or very close eigenvalues; perturbed
160: *> values were used to solve the equation (but the matrices
161: *> A and B are unchanged).
162: *> \endverbatim
163: *
164: * =====================================================================
165: * References:
166: * E. S. Quintana-Orti and R. A. Van De Geijn (2003). Formal derivation of
167: * algorithms: The triangular Sylvester equation, ACM Transactions
168: * on Mathematical Software (TOMS), volume 29, pages 218--243.
169: *
170: * A. Schwarz and C. C. Kjelgaard Mikkelsen (2020). Robust Task-Parallel
171: * Solution of the Triangular Sylvester Equation. Lecture Notes in
172: * Computer Science, vol 12043, pages 82--92, Springer.
173: *
174: * Contributor:
175: * Angelika Schwarz, Umea University, Sweden.
176: *
177: * =====================================================================
178: SUBROUTINE DTRSYL3( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB, C,
179: $ LDC, SCALE, IWORK, LIWORK, SWORK, LDSWORK,
180: $ INFO )
181: IMPLICIT NONE
182: *
183: * .. Scalar Arguments ..
184: CHARACTER TRANA, TRANB
185: INTEGER INFO, ISGN, LDA, LDB, LDC, M, N,
186: $ LIWORK, LDSWORK
187: DOUBLE PRECISION SCALE
188: * ..
189: * .. Array Arguments ..
190: INTEGER IWORK( * )
191: DOUBLE PRECISION A( LDA, * ), B( LDB, * ), C( LDC, * ),
192: $ SWORK( LDSWORK, * )
193: * ..
194: * .. Parameters ..
195: DOUBLE PRECISION ZERO, ONE
196: PARAMETER ( ZERO = 0.0D+0, ONE = 1.0D+0 )
197: * ..
198: * .. Local Scalars ..
199: LOGICAL NOTRNA, NOTRNB, LQUERY, SKIP
200: INTEGER AWRK, BWRK, I, I1, I2, IINFO, J, J1, J2, JJ,
201: $ K, K1, K2, L, L1, L2, LL, NBA, NB, NBB, PC
202: DOUBLE PRECISION ANRM, BIGNUM, BNRM, CNRM, SCAL, SCALOC,
203: $ SCAMIN, SGN, XNRM, BUF, SMLNUM
204: * ..
205: * .. Local Arrays ..
206: DOUBLE PRECISION WNRM( MAX( M, N ) )
207: * ..
208: * .. External Functions ..
209: LOGICAL LSAME
210: INTEGER ILAENV
211: DOUBLE PRECISION DLANGE, DLAMCH, DLARMM
212: EXTERNAL DLANGE, DLAMCH, DLARMM, ILAENV, LSAME
213: * ..
214: * .. External Subroutines ..
215: EXTERNAL DGEMM, DLASCL, DSCAL, DTRSYL, XERBLA
216: * ..
217: * .. Intrinsic Functions ..
218: INTRINSIC ABS, DBLE, EXPONENT, MAX, MIN
219: * ..
220: * .. Executable Statements ..
221: *
222: * Decode and Test input parameters
223: *
224: NOTRNA = LSAME( TRANA, 'N' )
225: NOTRNB = LSAME( TRANB, 'N' )
226: *
227: * Use the same block size for all matrices.
228: *
229: NB = MAX(8, ILAENV( 1, 'DTRSYL', '', M, N, -1, -1) )
230: *
231: * Compute number of blocks in A and B
232: *
233: NBA = MAX( 1, (M + NB - 1) / NB )
234: NBB = MAX( 1, (N + NB - 1) / NB )
235: *
236: * Compute workspace
237: *
238: INFO = 0
239: LQUERY = ( LIWORK.EQ.-1 .OR. LDSWORK.EQ.-1 )
240: IWORK( 1 ) = NBA + NBB + 2
241: IF( LQUERY ) THEN
242: LDSWORK = 2
243: SWORK( 1, 1 ) = MAX( NBA, NBB )
244: SWORK( 2, 1 ) = 2 * NBB + NBA
245: END IF
246: *
247: * Test the input arguments
248: *
249: IF( .NOT.NOTRNA .AND. .NOT.LSAME( TRANA, 'T' ) .AND. .NOT.
250: $ LSAME( TRANA, 'C' ) ) THEN
251: INFO = -1
252: ELSE IF( .NOT.NOTRNB .AND. .NOT.LSAME( TRANB, 'T' ) .AND. .NOT.
253: $ LSAME( TRANB, 'C' ) ) THEN
254: INFO = -2
255: ELSE IF( ISGN.NE.1 .AND. ISGN.NE.-1 ) THEN
256: INFO = -3
257: ELSE IF( M.LT.0 ) THEN
258: INFO = -4
259: ELSE IF( N.LT.0 ) THEN
260: INFO = -5
261: ELSE IF( LDA.LT.MAX( 1, M ) ) THEN
262: INFO = -7
263: ELSE IF( LDB.LT.MAX( 1, N ) ) THEN
264: INFO = -9
265: ELSE IF( LDC.LT.MAX( 1, M ) ) THEN
266: INFO = -11
267: END IF
268: IF( INFO.NE.0 ) THEN
269: CALL XERBLA( 'DTRSYL3', -INFO )
270: RETURN
271: ELSE IF( LQUERY ) THEN
272: RETURN
273: END IF
274: *
275: * Quick return if possible
276: *
277: SCALE = ONE
278: IF( M.EQ.0 .OR. N.EQ.0 )
279: $ RETURN
280: *
281: * Use unblocked code for small problems or if insufficient
282: * workspaces are provided
283: *
284: IF( MIN( NBA, NBB ).EQ.1 .OR. LDSWORK.LT.MAX( NBA, NBB ) .OR.
285: $ LIWORK.LT.IWORK(1) ) THEN
286: CALL DTRSYL( TRANA, TRANB, ISGN, M, N, A, LDA, B, LDB,
287: $ C, LDC, SCALE, INFO )
288: RETURN
289: END IF
290: *
291: * Set constants to control overflow
292: *
293: SMLNUM = DLAMCH( 'S' )
294: BIGNUM = ONE / SMLNUM
295: *
296: * Partition A such that 2-by-2 blocks on the diagonal are not split
297: *
298: SKIP = .FALSE.
299: DO I = 1, NBA
300: IWORK( I ) = ( I - 1 ) * NB + 1
301: END DO
302: IWORK( NBA + 1 ) = M + 1
303: DO K = 1, NBA
304: L1 = IWORK( K )
305: L2 = IWORK( K + 1 ) - 1
306: DO L = L1, L2
307: IF( SKIP ) THEN
308: SKIP = .FALSE.
309: CYCLE
310: END IF
311: IF( L.GE.M ) THEN
312: * A( M, M ) is a 1-by-1 block
313: CYCLE
314: END IF
315: IF( A( L, L+1 ).NE.ZERO .AND. A( L+1, L ).NE.ZERO ) THEN
316: * Check if 2-by-2 block is split
317: IF( L + 1 .EQ. IWORK( K + 1 ) ) THEN
318: IWORK( K + 1 ) = IWORK( K + 1 ) + 1
319: CYCLE
320: END IF
321: SKIP = .TRUE.
322: END IF
323: END DO
324: END DO
325: IWORK( NBA + 1 ) = M + 1
326: IF( IWORK( NBA ).GE.IWORK( NBA + 1 ) ) THEN
327: IWORK( NBA ) = IWORK( NBA + 1 )
328: NBA = NBA - 1
329: END IF
330: *
331: * Partition B such that 2-by-2 blocks on the diagonal are not split
332: *
333: PC = NBA + 1
334: SKIP = .FALSE.
335: DO I = 1, NBB
336: IWORK( PC + I ) = ( I - 1 ) * NB + 1
337: END DO
338: IWORK( PC + NBB + 1 ) = N + 1
339: DO K = 1, NBB
340: L1 = IWORK( PC + K )
341: L2 = IWORK( PC + K + 1 ) - 1
342: DO L = L1, L2
343: IF( SKIP ) THEN
344: SKIP = .FALSE.
345: CYCLE
346: END IF
347: IF( L.GE.N ) THEN
348: * B( N, N ) is a 1-by-1 block
349: CYCLE
350: END IF
351: IF( B( L, L+1 ).NE.ZERO .AND. B( L+1, L ).NE.ZERO ) THEN
352: * Check if 2-by-2 block is split
353: IF( L + 1 .EQ. IWORK( PC + K + 1 ) ) THEN
354: IWORK( PC + K + 1 ) = IWORK( PC + K + 1 ) + 1
355: CYCLE
356: END IF
357: SKIP = .TRUE.
358: END IF
359: END DO
360: END DO
361: IWORK( PC + NBB + 1 ) = N + 1
362: IF( IWORK( PC + NBB ).GE.IWORK( PC + NBB + 1 ) ) THEN
363: IWORK( PC + NBB ) = IWORK( PC + NBB + 1 )
364: NBB = NBB - 1
365: END IF
366: *
367: * Set local scaling factors - must never attain zero.
368: *
369: DO L = 1, NBB
370: DO K = 1, NBA
371: SWORK( K, L ) = ONE
372: END DO
373: END DO
374: *
375: * Fallback scaling factor to prevent flushing of SWORK( K, L ) to zero.
376: * This scaling is to ensure compatibility with TRSYL and may get flushed.
377: *
378: BUF = ONE
379: *
380: * Compute upper bounds of blocks of A and B
381: *
382: AWRK = NBB
383: DO K = 1, NBA
384: K1 = IWORK( K )
385: K2 = IWORK( K + 1 )
386: DO L = K, NBA
387: L1 = IWORK( L )
388: L2 = IWORK( L + 1 )
389: IF( NOTRNA ) THEN
390: SWORK( K, AWRK + L ) = DLANGE( 'I', K2-K1, L2-L1,
391: $ A( K1, L1 ), LDA, WNRM )
392: ELSE
393: SWORK( L, AWRK + K ) = DLANGE( '1', K2-K1, L2-L1,
394: $ A( K1, L1 ), LDA, WNRM )
395: END IF
396: END DO
397: END DO
398: BWRK = NBB + NBA
399: DO K = 1, NBB
400: K1 = IWORK( PC + K )
401: K2 = IWORK( PC + K + 1 )
402: DO L = K, NBB
403: L1 = IWORK( PC + L )
404: L2 = IWORK( PC + L + 1 )
405: IF( NOTRNB ) THEN
406: SWORK( K, BWRK + L ) = DLANGE( 'I', K2-K1, L2-L1,
407: $ B( K1, L1 ), LDB, WNRM )
408: ELSE
409: SWORK( L, BWRK + K ) = DLANGE( '1', K2-K1, L2-L1,
410: $ B( K1, L1 ), LDB, WNRM )
411: END IF
412: END DO
413: END DO
414: *
415: SGN = DBLE( ISGN )
416: *
417: IF( NOTRNA .AND. NOTRNB ) THEN
418: *
419: * Solve A*X + ISGN*X*B = scale*C.
420: *
421: * The (K,L)th block of X is determined starting from
422: * bottom-left corner column by column by
423: *
424: * A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
425: *
426: * Where
427: * M L-1
428: * R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(J,L)].
429: * I=K+1 J=1
430: *
431: * Start loop over block rows (index = K) and block columns (index = L)
432: *
433: DO K = NBA, 1, -1
434: *
435: * K1: row index of the first row in X( K, L )
436: * K2: row index of the first row in X( K+1, L )
437: * so the K2 - K1 is the column count of the block X( K, L )
438: *
439: K1 = IWORK( K )
440: K2 = IWORK( K + 1 )
441: DO L = 1, NBB
442: *
443: * L1: column index of the first column in X( K, L )
444: * L2: column index of the first column in X( K, L + 1)
445: * so that L2 - L1 is the row count of the block X( K, L )
446: *
447: L1 = IWORK( PC + L )
448: L2 = IWORK( PC + L + 1 )
449: *
450: CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
451: $ A( K1, K1 ), LDA,
452: $ B( L1, L1 ), LDB,
453: $ C( K1, L1 ), LDC, SCALOC, IINFO )
454: INFO = MAX( INFO, IINFO )
455: *
456: IF ( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
457: IF( SCALOC .EQ. ZERO ) THEN
458: * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
459: * is larger than the product of BIGNUM**2 and cannot be
460: * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
461: * Mark the computation as pointless.
462: BUF = ZERO
463: ELSE
464: * Use second scaling factor to prevent flushing to zero.
465: BUF = BUF*2.D0**EXPONENT( SCALOC )
466: END IF
467: DO JJ = 1, NBB
468: DO LL = 1, NBA
469: * Bound by BIGNUM to not introduce Inf. The value
470: * is irrelevant; corresponding entries of the
471: * solution will be flushed in consistency scaling.
472: SWORK( LL, JJ ) = MIN( BIGNUM,
473: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
474: END DO
475: END DO
476: END IF
477: SWORK( K, L ) = SCALOC * SWORK( K, L )
478: XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
479: $ WNRM )
480: *
481: DO I = K - 1, 1, -1
482: *
483: * C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
484: *
485: I1 = IWORK( I )
486: I2 = IWORK( I + 1 )
487: *
488: * Compute scaling factor to survive the linear update
489: * simulating consistent scaling.
490: *
491: CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
492: $ LDC, WNRM )
493: SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
494: CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
495: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
496: ANRM = SWORK( I, AWRK + K )
497: SCALOC = DLARMM( ANRM, XNRM, CNRM )
498: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
499: * Use second scaling factor to prevent flushing to zero.
500: BUF = BUF*2.D0**EXPONENT( SCALOC )
501: DO JJ = 1, NBB
502: DO LL = 1, NBA
503: SWORK( LL, JJ ) = MIN( BIGNUM,
504: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
505: END DO
506: END DO
507: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
508: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
509: END IF
510: CNRM = CNRM * SCALOC
511: XNRM = XNRM * SCALOC
512: *
513: * Simultaneously apply the robust update factor and the
514: * consistency scaling factor to C( I, L ) and C( K, L ).
515: *
516: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
517: IF (SCAL .NE. ONE) THEN
518: DO JJ = L1, L2-1
519: CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1)
520: END DO
521: ENDIF
522: *
523: SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
524: IF (SCAL .NE. ONE) THEN
525: DO LL = L1, L2-1
526: CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1)
527: END DO
528: ENDIF
529: *
530: * Record current scaling factor
531: *
532: SWORK( K, L ) = SCAMIN * SCALOC
533: SWORK( I, L ) = SCAMIN * SCALOC
534: *
535: CALL DGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
536: $ A( I1, K1 ), LDA, C( K1, L1 ), LDC,
537: $ ONE, C( I1, L1 ), LDC )
538: *
539: END DO
540: *
541: DO J = L + 1, NBB
542: *
543: * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
544: *
545: J1 = IWORK( PC + J )
546: J2 = IWORK( PC + J + 1 )
547: *
548: * Compute scaling factor to survive the linear update
549: * simulating consistent scaling.
550: *
551: CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
552: $ LDC, WNRM )
553: SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
554: CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
555: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
556: BNRM = SWORK(L, BWRK + J)
557: SCALOC = DLARMM( BNRM, XNRM, CNRM )
558: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
559: * Use second scaling factor to prevent flushing to zero.
560: BUF = BUF*2.D0**EXPONENT( SCALOC )
561: DO JJ = 1, NBB
562: DO LL = 1, NBA
563: SWORK( LL, JJ ) = MIN( BIGNUM,
564: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
565: END DO
566: END DO
567: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
568: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
569: END IF
570: CNRM = CNRM * SCALOC
571: XNRM = XNRM * SCALOC
572: *
573: * Simultaneously apply the robust update factor and the
574: * consistency scaling factor to C( K, J ) and C( K, L).
575: *
576: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
577: IF( SCAL .NE. ONE ) THEN
578: DO LL = L1, L2-1
579: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
580: END DO
581: ENDIF
582: *
583: SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
584: IF( SCAL .NE. ONE ) THEN
585: DO JJ = J1, J2-1
586: CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
587: END DO
588: ENDIF
589: *
590: * Record current scaling factor
591: *
592: SWORK( K, L ) = SCAMIN * SCALOC
593: SWORK( K, J ) = SCAMIN * SCALOC
594: *
595: CALL DGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
596: $ C( K1, L1 ), LDC, B( L1, J1 ), LDB,
597: $ ONE, C( K1, J1 ), LDC )
598: END DO
599: END DO
600: END DO
601: ELSE IF( .NOT.NOTRNA .AND. NOTRNB ) THEN
602: *
603: * Solve A**T*X + ISGN*X*B = scale*C.
604: *
605: * The (K,L)th block of X is determined starting from
606: * upper-left corner column by column by
607: *
608: * A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L) = C(K,L) - R(K,L)
609: *
610: * Where
611: * K-1 L-1
612: * R(K,L) = SUM [A(I,K)**T*X(I,L)] +ISGN*SUM [X(K,J)*B(J,L)]
613: * I=1 J=1
614: *
615: * Start loop over block rows (index = K) and block columns (index = L)
616: *
617: DO K = 1, NBA
618: *
619: * K1: row index of the first row in X( K, L )
620: * K2: row index of the first row in X( K+1, L )
621: * so the K2 - K1 is the column count of the block X( K, L )
622: *
623: K1 = IWORK( K )
624: K2 = IWORK( K + 1 )
625: DO L = 1, NBB
626: *
627: * L1: column index of the first column in X( K, L )
628: * L2: column index of the first column in X( K, L + 1)
629: * so that L2 - L1 is the row count of the block X( K, L )
630: *
631: L1 = IWORK( PC + L )
632: L2 = IWORK( PC + L + 1 )
633: *
634: CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
635: $ A( K1, K1 ), LDA,
636: $ B( L1, L1 ), LDB,
637: $ C( K1, L1 ), LDC, SCALOC, IINFO )
638: INFO = MAX( INFO, IINFO )
639: *
640: IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
641: IF( SCALOC .EQ. ZERO ) THEN
642: * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
643: * is larger than the product of BIGNUM**2 and cannot be
644: * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
645: * Mark the computation as pointless.
646: BUF = ZERO
647: ELSE
648: * Use second scaling factor to prevent flushing to zero.
649: BUF = BUF*2.D0**EXPONENT( SCALOC )
650: END IF
651: DO JJ = 1, NBB
652: DO LL = 1, NBA
653: * Bound by BIGNUM to not introduce Inf. The value
654: * is irrelevant; corresponding entries of the
655: * solution will be flushed in consistency scaling.
656: SWORK( LL, JJ ) = MIN( BIGNUM,
657: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
658: END DO
659: END DO
660: END IF
661: SWORK( K, L ) = SCALOC * SWORK( K, L )
662: XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
663: $ WNRM )
664: *
665: DO I = K + 1, NBA
666: *
667: * C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
668: *
669: I1 = IWORK( I )
670: I2 = IWORK( I + 1 )
671: *
672: * Compute scaling factor to survive the linear update
673: * simulating consistent scaling.
674: *
675: CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
676: $ LDC, WNRM )
677: SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
678: CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
679: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
680: ANRM = SWORK( I, AWRK + K )
681: SCALOC = DLARMM( ANRM, XNRM, CNRM )
682: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
683: * Use second scaling factor to prevent flushing to zero.
684: BUF = BUF*2.D0**EXPONENT( SCALOC )
685: DO JJ = 1, NBB
686: DO LL = 1, NBA
687: SWORK( LL, JJ ) = MIN( BIGNUM,
688: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
689: END DO
690: END DO
691: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
692: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
693: END IF
694: CNRM = CNRM * SCALOC
695: XNRM = XNRM * SCALOC
696: *
697: * Simultaneously apply the robust update factor and the
698: * consistency scaling factor to to C( I, L ) and C( K, L ).
699: *
700: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
701: IF (SCAL .NE. ONE) THEN
702: DO LL = L1, L2-1
703: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
704: END DO
705: ENDIF
706: *
707: SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
708: IF (SCAL .NE. ONE) THEN
709: DO LL = L1, L2-1
710: CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
711: END DO
712: ENDIF
713: *
714: * Record current scaling factor
715: *
716: SWORK( K, L ) = SCAMIN * SCALOC
717: SWORK( I, L ) = SCAMIN * SCALOC
718: *
719: CALL DGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
720: $ A( K1, I1 ), LDA, C( K1, L1 ), LDC,
721: $ ONE, C( I1, L1 ), LDC )
722: END DO
723: *
724: DO J = L + 1, NBB
725: *
726: * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( L, J )
727: *
728: J1 = IWORK( PC + J )
729: J2 = IWORK( PC + J + 1 )
730: *
731: * Compute scaling factor to survive the linear update
732: * simulating consistent scaling.
733: *
734: CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
735: $ LDC, WNRM )
736: SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
737: CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
738: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
739: BNRM = SWORK( L, BWRK + J )
740: SCALOC = DLARMM( BNRM, XNRM, CNRM )
741: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
742: * Use second scaling factor to prevent flushing to zero.
743: BUF = BUF*2.D0**EXPONENT( SCALOC )
744: DO JJ = 1, NBB
745: DO LL = 1, NBA
746: SWORK( LL, JJ ) = MIN( BIGNUM,
747: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
748: END DO
749: END DO
750: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
751: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
752: END IF
753: CNRM = CNRM * SCALOC
754: XNRM = XNRM * SCALOC
755: *
756: * Simultaneously apply the robust update factor and the
757: * consistency scaling factor to to C( K, J ) and C( K, L ).
758: *
759: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
760: IF( SCAL .NE. ONE ) THEN
761: DO LL = L1, L2-1
762: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
763: END DO
764: ENDIF
765: *
766: SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
767: IF( SCAL .NE. ONE ) THEN
768: DO JJ = J1, J2-1
769: CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
770: END DO
771: ENDIF
772: *
773: * Record current scaling factor
774: *
775: SWORK( K, L ) = SCAMIN * SCALOC
776: SWORK( K, J ) = SCAMIN * SCALOC
777: *
778: CALL DGEMM( 'N', 'N', K2-K1, J2-J1, L2-L1, -SGN,
779: $ C( K1, L1 ), LDC, B( L1, J1 ), LDB,
780: $ ONE, C( K1, J1 ), LDC )
781: END DO
782: END DO
783: END DO
784: ELSE IF( .NOT.NOTRNA .AND. .NOT.NOTRNB ) THEN
785: *
786: * Solve A**T*X + ISGN*X*B**T = scale*C.
787: *
788: * The (K,L)th block of X is determined starting from
789: * top-right corner column by column by
790: *
791: * A(K,K)**T*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
792: *
793: * Where
794: * K-1 N
795: * R(K,L) = SUM [A(I,K)**T*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
796: * I=1 J=L+1
797: *
798: * Start loop over block rows (index = K) and block columns (index = L)
799: *
800: DO K = 1, NBA
801: *
802: * K1: row index of the first row in X( K, L )
803: * K2: row index of the first row in X( K+1, L )
804: * so the K2 - K1 is the column count of the block X( K, L )
805: *
806: K1 = IWORK( K )
807: K2 = IWORK( K + 1 )
808: DO L = NBB, 1, -1
809: *
810: * L1: column index of the first column in X( K, L )
811: * L2: column index of the first column in X( K, L + 1)
812: * so that L2 - L1 is the row count of the block X( K, L )
813: *
814: L1 = IWORK( PC + L )
815: L2 = IWORK( PC + L + 1 )
816: *
817: CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
818: $ A( K1, K1 ), LDA,
819: $ B( L1, L1 ), LDB,
820: $ C( K1, L1 ), LDC, SCALOC, IINFO )
821: INFO = MAX( INFO, IINFO )
822: *
823: SWORK( K, L ) = SCALOC * SWORK( K, L )
824: IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
825: IF( SCALOC .EQ. ZERO ) THEN
826: * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
827: * is larger than the product of BIGNUM**2 and cannot be
828: * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
829: * Mark the computation as pointless.
830: BUF = ZERO
831: ELSE
832: * Use second scaling factor to prevent flushing to zero.
833: BUF = BUF*2.D0**EXPONENT( SCALOC )
834: END IF
835: DO JJ = 1, NBB
836: DO LL = 1, NBA
837: * Bound by BIGNUM to not introduce Inf. The value
838: * is irrelevant; corresponding entries of the
839: * solution will be flushed in consistency scaling.
840: SWORK( LL, JJ ) = MIN( BIGNUM,
841: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
842: END DO
843: END DO
844: END IF
845: XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
846: $ WNRM )
847: *
848: DO I = K + 1, NBA
849: *
850: * C( I, L ) := C( I, L ) - A( K, I )**T * C( K, L )
851: *
852: I1 = IWORK( I )
853: I2 = IWORK( I + 1 )
854: *
855: * Compute scaling factor to survive the linear update
856: * simulating consistent scaling.
857: *
858: CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
859: $ LDC, WNRM )
860: SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
861: CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
862: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
863: ANRM = SWORK( I, AWRK + K )
864: SCALOC = DLARMM( ANRM, XNRM, CNRM )
865: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
866: * Use second scaling factor to prevent flushing to zero.
867: BUF = BUF*2.D0**EXPONENT( SCALOC )
868: DO JJ = 1, NBB
869: DO LL = 1, NBA
870: SWORK( LL, JJ ) = MIN( BIGNUM,
871: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
872: END DO
873: END DO
874: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
875: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
876: END IF
877: CNRM = CNRM * SCALOC
878: XNRM = XNRM * SCALOC
879: *
880: * Simultaneously apply the robust update factor and the
881: * consistency scaling factor to C( I, L ) and C( K, L ).
882: *
883: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
884: IF (SCAL .NE. ONE) THEN
885: DO LL = L1, L2-1
886: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
887: END DO
888: ENDIF
889: *
890: SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
891: IF (SCAL .NE. ONE) THEN
892: DO LL = L1, L2-1
893: CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
894: END DO
895: ENDIF
896: *
897: * Record current scaling factor
898: *
899: SWORK( K, L ) = SCAMIN * SCALOC
900: SWORK( I, L ) = SCAMIN * SCALOC
901: *
902: CALL DGEMM( 'T', 'N', I2-I1, L2-L1, K2-K1, -ONE,
903: $ A( K1, I1 ), LDA, C( K1, L1 ), LDC,
904: $ ONE, C( I1, L1 ), LDC )
905: END DO
906: *
907: DO J = 1, L - 1
908: *
909: * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
910: *
911: J1 = IWORK( PC + J )
912: J2 = IWORK( PC + J + 1 )
913: *
914: * Compute scaling factor to survive the linear update
915: * simulating consistent scaling.
916: *
917: CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
918: $ LDC, WNRM )
919: SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
920: CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
921: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
922: BNRM = SWORK( L, BWRK + J )
923: SCALOC = DLARMM( BNRM, XNRM, CNRM )
924: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
925: * Use second scaling factor to prevent flushing to zero.
926: BUF = BUF*2.D0**EXPONENT( SCALOC )
927: DO JJ = 1, NBB
928: DO LL = 1, NBA
929: SWORK( LL, JJ ) = MIN( BIGNUM,
930: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
931: END DO
932: END DO
933: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
934: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
935: END IF
936: CNRM = CNRM * SCALOC
937: XNRM = XNRM * SCALOC
938: *
939: * Simultaneously apply the robust update factor and the
940: * consistency scaling factor to C( K, J ) and C( K, L ).
941: *
942: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
943: IF( SCAL .NE. ONE ) THEN
944: DO LL = L1, L2-1
945: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1)
946: END DO
947: ENDIF
948: *
949: SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
950: IF( SCAL .NE. ONE ) THEN
951: DO JJ = J1, J2-1
952: CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
953: END DO
954: ENDIF
955: *
956: * Record current scaling factor
957: *
958: SWORK( K, L ) = SCAMIN * SCALOC
959: SWORK( K, J ) = SCAMIN * SCALOC
960: *
961: CALL DGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
962: $ C( K1, L1 ), LDC, B( J1, L1 ), LDB,
963: $ ONE, C( K1, J1 ), LDC )
964: END DO
965: END DO
966: END DO
967: ELSE IF( NOTRNA .AND. .NOT.NOTRNB ) THEN
968: *
969: * Solve A*X + ISGN*X*B**T = scale*C.
970: *
971: * The (K,L)th block of X is determined starting from
972: * bottom-right corner column by column by
973: *
974: * A(K,K)*X(K,L) + ISGN*X(K,L)*B(L,L)**T = C(K,L) - R(K,L)
975: *
976: * Where
977: * M N
978: * R(K,L) = SUM [A(K,I)*X(I,L)] + ISGN*SUM [X(K,J)*B(L,J)**T].
979: * I=K+1 J=L+1
980: *
981: * Start loop over block rows (index = K) and block columns (index = L)
982: *
983: DO K = NBA, 1, -1
984: *
985: * K1: row index of the first row in X( K, L )
986: * K2: row index of the first row in X( K+1, L )
987: * so the K2 - K1 is the column count of the block X( K, L )
988: *
989: K1 = IWORK( K )
990: K2 = IWORK( K + 1 )
991: DO L = NBB, 1, -1
992: *
993: * L1: column index of the first column in X( K, L )
994: * L2: column index of the first column in X( K, L + 1)
995: * so that L2 - L1 is the row count of the block X( K, L )
996: *
997: L1 = IWORK( PC + L )
998: L2 = IWORK( PC + L + 1 )
999: *
1000: CALL DTRSYL( TRANA, TRANB, ISGN, K2-K1, L2-L1,
1001: $ A( K1, K1 ), LDA,
1002: $ B( L1, L1 ), LDB,
1003: $ C( K1, L1 ), LDC, SCALOC, IINFO )
1004: INFO = MAX( INFO, IINFO )
1005: *
1006: IF( SCALOC * SWORK( K, L ) .EQ. ZERO ) THEN
1007: IF( SCALOC .EQ. ZERO ) THEN
1008: * The magnitude of the largest entry of X(K1:K2-1, L1:L2-1)
1009: * is larger than the product of BIGNUM**2 and cannot be
1010: * represented in the form (1/SCALE)*X(K1:K2-1, L1:L2-1).
1011: * Mark the computation as pointless.
1012: BUF = ZERO
1013: ELSE
1014: * Use second scaling factor to prevent flushing to zero.
1015: BUF = BUF*2.D0**EXPONENT( SCALOC )
1016: END IF
1017: DO JJ = 1, NBB
1018: DO LL = 1, NBA
1019: * Bound by BIGNUM to not introduce Inf. The value
1020: * is irrelevant; corresponding entries of the
1021: * solution will be flushed in consistency scaling.
1022: SWORK( LL, JJ ) = MIN( BIGNUM,
1023: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
1024: END DO
1025: END DO
1026: END IF
1027: SWORK( K, L ) = SCALOC * SWORK( K, L )
1028: XNRM = DLANGE( 'I', K2-K1, L2-L1, C( K1, L1 ), LDC,
1029: $ WNRM )
1030: *
1031: DO I = 1, K - 1
1032: *
1033: * C( I, L ) := C( I, L ) - A( I, K ) * C( K, L )
1034: *
1035: I1 = IWORK( I )
1036: I2 = IWORK( I + 1 )
1037: *
1038: * Compute scaling factor to survive the linear update
1039: * simulating consistent scaling.
1040: *
1041: CNRM = DLANGE( 'I', I2-I1, L2-L1, C( I1, L1 ),
1042: $ LDC, WNRM )
1043: SCAMIN = MIN( SWORK( I, L ), SWORK( K, L ) )
1044: CNRM = CNRM * ( SCAMIN / SWORK( I, L ) )
1045: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
1046: ANRM = SWORK( I, AWRK + K )
1047: SCALOC = DLARMM( ANRM, XNRM, CNRM )
1048: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
1049: * Use second scaling factor to prevent flushing to zero.
1050: BUF = BUF*2.D0**EXPONENT( SCALOC )
1051: DO JJ = 1, NBB
1052: DO LL = 1, NBA
1053: SWORK( LL, JJ ) = MIN( BIGNUM,
1054: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
1055: END DO
1056: END DO
1057: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
1058: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
1059: END IF
1060: CNRM = CNRM * SCALOC
1061: XNRM = XNRM * SCALOC
1062: *
1063: * Simultaneously apply the robust update factor and the
1064: * consistency scaling factor to C( I, L ) and C( K, L ).
1065: *
1066: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
1067: IF (SCAL .NE. ONE) THEN
1068: DO LL = L1, L2-1
1069: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
1070: END DO
1071: ENDIF
1072: *
1073: SCAL = ( SCAMIN / SWORK( I, L ) ) * SCALOC
1074: IF (SCAL .NE. ONE) THEN
1075: DO LL = L1, L2-1
1076: CALL DSCAL( I2-I1, SCAL, C( I1, LL ), 1 )
1077: END DO
1078: ENDIF
1079: *
1080: * Record current scaling factor
1081: *
1082: SWORK( K, L ) = SCAMIN * SCALOC
1083: SWORK( I, L ) = SCAMIN * SCALOC
1084: *
1085: CALL DGEMM( 'N', 'N', I2-I1, L2-L1, K2-K1, -ONE,
1086: $ A( I1, K1 ), LDA, C( K1, L1 ), LDC,
1087: $ ONE, C( I1, L1 ), LDC )
1088: *
1089: END DO
1090: *
1091: DO J = 1, L - 1
1092: *
1093: * C( K, J ) := C( K, J ) - SGN * C( K, L ) * B( J, L )**T
1094: *
1095: J1 = IWORK( PC + J )
1096: J2 = IWORK( PC + J + 1 )
1097: *
1098: * Compute scaling factor to survive the linear update
1099: * simulating consistent scaling.
1100: *
1101: CNRM = DLANGE( 'I', K2-K1, J2-J1, C( K1, J1 ),
1102: $ LDC, WNRM )
1103: SCAMIN = MIN( SWORK( K, J ), SWORK( K, L ) )
1104: CNRM = CNRM * ( SCAMIN / SWORK( K, J ) )
1105: XNRM = XNRM * ( SCAMIN / SWORK( K, L ) )
1106: BNRM = SWORK( L, BWRK + J )
1107: SCALOC = DLARMM( BNRM, XNRM, CNRM )
1108: IF( SCALOC * SCAMIN .EQ. ZERO ) THEN
1109: * Use second scaling factor to prevent flushing to zero.
1110: BUF = BUF*2.D0**EXPONENT( SCALOC )
1111: DO JJ = 1, NBB
1112: DO LL = 1, NBA
1113: SWORK( LL, JJ ) = MIN( BIGNUM,
1114: $ SWORK( LL, JJ ) / 2.D0**EXPONENT( SCALOC ) )
1115: END DO
1116: END DO
1117: SCAMIN = SCAMIN / 2.D0**EXPONENT( SCALOC )
1118: SCALOC = SCALOC / 2.D0**EXPONENT( SCALOC )
1119: END IF
1120: CNRM = CNRM * SCALOC
1121: XNRM = XNRM * SCALOC
1122: *
1123: * Simultaneously apply the robust update factor and the
1124: * consistency scaling factor to C( K, J ) and C( K, L ).
1125: *
1126: SCAL = ( SCAMIN / SWORK( K, L ) ) * SCALOC
1127: IF( SCAL .NE. ONE ) THEN
1128: DO JJ = L1, L2-1
1129: CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
1130: END DO
1131: ENDIF
1132: *
1133: SCAL = ( SCAMIN / SWORK( K, J ) ) * SCALOC
1134: IF( SCAL .NE. ONE ) THEN
1135: DO JJ = J1, J2-1
1136: CALL DSCAL( K2-K1, SCAL, C( K1, JJ ), 1 )
1137: END DO
1138: ENDIF
1139: *
1140: * Record current scaling factor
1141: *
1142: SWORK( K, L ) = SCAMIN * SCALOC
1143: SWORK( K, J ) = SCAMIN * SCALOC
1144: *
1145: CALL DGEMM( 'N', 'T', K2-K1, J2-J1, L2-L1, -SGN,
1146: $ C( K1, L1 ), LDC, B( J1, L1 ), LDB,
1147: $ ONE, C( K1, J1 ), LDC )
1148: END DO
1149: END DO
1150: END DO
1151: *
1152: END IF
1153: *
1154: * Reduce local scaling factors
1155: *
1156: SCALE = SWORK( 1, 1 )
1157: DO K = 1, NBA
1158: DO L = 1, NBB
1159: SCALE = MIN( SCALE, SWORK( K, L ) )
1160: END DO
1161: END DO
1162: *
1163: IF( SCALE .EQ. ZERO ) THEN
1164: *
1165: * The magnitude of the largest entry of the solution is larger
1166: * than the product of BIGNUM**2 and cannot be represented in the
1167: * form (1/SCALE)*X if SCALE is DOUBLE PRECISION. Set SCALE to
1168: * zero and give up.
1169: *
1170: IWORK(1) = NBA + NBB + 2
1171: SWORK(1,1) = MAX( NBA, NBB )
1172: SWORK(2,1) = 2 * NBB + NBA
1173: RETURN
1174: END IF
1175: *
1176: * Realize consistent scaling
1177: *
1178: DO K = 1, NBA
1179: K1 = IWORK( K )
1180: K2 = IWORK( K + 1 )
1181: DO L = 1, NBB
1182: L1 = IWORK( PC + L )
1183: L2 = IWORK( PC + L + 1 )
1184: SCAL = SCALE / SWORK( K, L )
1185: IF( SCAL .NE. ONE ) THEN
1186: DO LL = L1, L2-1
1187: CALL DSCAL( K2-K1, SCAL, C( K1, LL ), 1 )
1188: END DO
1189: ENDIF
1190: END DO
1191: END DO
1192: *
1193: IF( BUF .NE. ONE .AND. BUF.GT.ZERO ) THEN
1194: *
1195: * Decrease SCALE as much as possible.
1196: *
1197: SCALOC = MIN( SCALE / SMLNUM, ONE / BUF )
1198: BUF = BUF * SCALOC
1199: SCALE = SCALE / SCALOC
1200: END IF
1201:
1202: IF( BUF.NE.ONE .AND. BUF.GT.ZERO ) THEN
1203: *
1204: * In case of overly aggressive scaling during the computation,
1205: * flushing of the global scale factor may be prevented by
1206: * undoing some of the scaling. This step is to ensure that
1207: * this routine flushes only scale factors that TRSYL also
1208: * flushes and be usable as a drop-in replacement.
1209: *
1210: * How much can the normwise largest entry be upscaled?
1211: *
1212: SCAL = C( 1, 1 )
1213: DO K = 1, M
1214: DO L = 1, N
1215: SCAL = MAX( SCAL, ABS( C( K, L ) ) )
1216: END DO
1217: END DO
1218: *
1219: * Increase BUF as close to 1 as possible and apply scaling.
1220: *
1221: SCALOC = MIN( BIGNUM / SCAL, ONE / BUF )
1222: BUF = BUF * SCALOC
1223: CALL DLASCL( 'G', -1, -1, ONE, SCALOC, M, N, C, LDC, IWORK )
1224: END IF
1225: *
1226: * Combine with buffer scaling factor. SCALE will be flushed if
1227: * BUF is less than one here.
1228: *
1229: SCALE = SCALE * BUF
1230: *
1231: * Restore workspace dimensions
1232: *
1233: IWORK(1) = NBA + NBB + 2
1234: SWORK(1,1) = MAX( NBA, NBB )
1235: SWORK(2,1) = 2 * NBB + NBA
1236: *
1237: RETURN
1238: *
1239: * End of DTRSYL3
1240: *
1241: END
CVSweb interface <joel.bertrand@systella.fr>