Annotation of rpl/lapack/lapack/dlasd2.f, revision 1.4

1.1       bertrand    1:       SUBROUTINE DLASD2( NL, NR, SQRE, K, D, Z, ALPHA, BETA, U, LDU, VT,
                      2:      $                   LDVT, DSIGMA, U2, LDU2, VT2, LDVT2, IDXP, IDX,
                      3:      $                   IDXC, IDXQ, COLTYP, INFO )
                      4: *
                      5: *  -- LAPACK auxiliary routine (version 3.2) --
                      6: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
                      7: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
                      8: *     November 2006
                      9: *
                     10: *     .. Scalar Arguments ..
                     11:       INTEGER            INFO, K, LDU, LDU2, LDVT, LDVT2, NL, NR, SQRE
                     12:       DOUBLE PRECISION   ALPHA, BETA
                     13: *     ..
                     14: *     .. Array Arguments ..
                     15:       INTEGER            COLTYP( * ), IDX( * ), IDXC( * ), IDXP( * ),
                     16:      $                   IDXQ( * )
                     17:       DOUBLE PRECISION   D( * ), DSIGMA( * ), U( LDU, * ),
                     18:      $                   U2( LDU2, * ), VT( LDVT, * ), VT2( LDVT2, * ),
                     19:      $                   Z( * )
                     20: *     ..
                     21: *
                     22: *  Purpose
                     23: *  =======
                     24: *
                     25: *  DLASD2 merges the two sets of singular values together into a single
                     26: *  sorted set.  Then it tries to deflate the size of the problem.
                     27: *  There are two ways in which deflation can occur:  when two or more
                     28: *  singular values are close together or if there is a tiny entry in the
                     29: *  Z vector.  For each such occurrence the order of the related secular
                     30: *  equation problem is reduced by one.
                     31: *
                     32: *  DLASD2 is called from DLASD1.
                     33: *
                     34: *  Arguments
                     35: *  =========
                     36: *
                     37: *  NL     (input) INTEGER
                     38: *         The row dimension of the upper block.  NL >= 1.
                     39: *
                     40: *  NR     (input) INTEGER
                     41: *         The row dimension of the lower block.  NR >= 1.
                     42: *
                     43: *  SQRE   (input) INTEGER
                     44: *         = 0: the lower block is an NR-by-NR square matrix.
                     45: *         = 1: the lower block is an NR-by-(NR+1) rectangular matrix.
                     46: *
                     47: *         The bidiagonal matrix has N = NL + NR + 1 rows and
                     48: *         M = N + SQRE >= N columns.
                     49: *
                     50: *  K      (output) INTEGER
                     51: *         Contains the dimension of the non-deflated matrix,
                     52: *         This is the order of the related secular equation. 1 <= K <=N.
                     53: *
                     54: *  D      (input/output) DOUBLE PRECISION array, dimension(N)
                     55: *         On entry D contains the singular values of the two submatrices
                     56: *         to be combined.  On exit D contains the trailing (N-K) updated
                     57: *         singular values (those which were deflated) sorted into
                     58: *         increasing order.
                     59: *
                     60: *  Z      (output) DOUBLE PRECISION array, dimension(N)
                     61: *         On exit Z contains the updating row vector in the secular
                     62: *         equation.
                     63: *
                     64: *  ALPHA  (input) DOUBLE PRECISION
                     65: *         Contains the diagonal element associated with the added row.
                     66: *
                     67: *  BETA   (input) DOUBLE PRECISION
                     68: *         Contains the off-diagonal element associated with the added
                     69: *         row.
                     70: *
                     71: *  U      (input/output) DOUBLE PRECISION array, dimension(LDU,N)
                     72: *         On entry U contains the left singular vectors of two
                     73: *         submatrices in the two square blocks with corners at (1,1),
                     74: *         (NL, NL), and (NL+2, NL+2), (N,N).
                     75: *         On exit U contains the trailing (N-K) updated left singular
                     76: *         vectors (those which were deflated) in its last N-K columns.
                     77: *
                     78: *  LDU    (input) INTEGER
                     79: *         The leading dimension of the array U.  LDU >= N.
                     80: *
                     81: *  VT     (input/output) DOUBLE PRECISION array, dimension(LDVT,M)
                     82: *         On entry VT' contains the right singular vectors of two
                     83: *         submatrices in the two square blocks with corners at (1,1),
                     84: *         (NL+1, NL+1), and (NL+2, NL+2), (M,M).
                     85: *         On exit VT' contains the trailing (N-K) updated right singular
                     86: *         vectors (those which were deflated) in its last N-K columns.
                     87: *         In case SQRE =1, the last row of VT spans the right null
                     88: *         space.
                     89: *
                     90: *  LDVT   (input) INTEGER
                     91: *         The leading dimension of the array VT.  LDVT >= M.
                     92: *
                     93: *  DSIGMA (output) DOUBLE PRECISION array, dimension (N)
                     94: *         Contains a copy of the diagonal elements (K-1 singular values
                     95: *         and one zero) in the secular equation.
                     96: *
                     97: *  U2     (output) DOUBLE PRECISION array, dimension(LDU2,N)
                     98: *         Contains a copy of the first K-1 left singular vectors which
                     99: *         will be used by DLASD3 in a matrix multiply (DGEMM) to solve
                    100: *         for the new left singular vectors. U2 is arranged into four
                    101: *         blocks. The first block contains a column with 1 at NL+1 and
                    102: *         zero everywhere else; the second block contains non-zero
                    103: *         entries only at and above NL; the third contains non-zero
                    104: *         entries only below NL+1; and the fourth is dense.
                    105: *
                    106: *  LDU2   (input) INTEGER
                    107: *         The leading dimension of the array U2.  LDU2 >= N.
                    108: *
                    109: *  VT2    (output) DOUBLE PRECISION array, dimension(LDVT2,N)
                    110: *         VT2' contains a copy of the first K right singular vectors
                    111: *         which will be used by DLASD3 in a matrix multiply (DGEMM) to
                    112: *         solve for the new right singular vectors. VT2 is arranged into
                    113: *         three blocks. The first block contains a row that corresponds
                    114: *         to the special 0 diagonal element in SIGMA; the second block
                    115: *         contains non-zeros only at and before NL +1; the third block
                    116: *         contains non-zeros only at and after  NL +2.
                    117: *
                    118: *  LDVT2  (input) INTEGER
                    119: *         The leading dimension of the array VT2.  LDVT2 >= M.
                    120: *
                    121: *  IDXP   (workspace) INTEGER array dimension(N)
                    122: *         This will contain the permutation used to place deflated
                    123: *         values of D at the end of the array. On output IDXP(2:K)
                    124: *         points to the nondeflated D-values and IDXP(K+1:N)
                    125: *         points to the deflated singular values.
                    126: *
                    127: *  IDX    (workspace) INTEGER array dimension(N)
                    128: *         This will contain the permutation used to sort the contents of
                    129: *         D into ascending order.
                    130: *
                    131: *  IDXC   (output) INTEGER array dimension(N)
                    132: *         This will contain the permutation used to arrange the columns
                    133: *         of the deflated U matrix into three groups:  the first group
                    134: *         contains non-zero entries only at and above NL, the second
                    135: *         contains non-zero entries only below NL+2, and the third is
                    136: *         dense.
                    137: *
                    138: *  IDXQ   (input/output) INTEGER array dimension(N)
                    139: *         This contains the permutation which separately sorts the two
                    140: *         sub-problems in D into ascending order.  Note that entries in
                    141: *         the first hlaf of this permutation must first be moved one
                    142: *         position backward; and entries in the second half
                    143: *         must first have NL+1 added to their values.
                    144: *
                    145: *  COLTYP (workspace/output) INTEGER array dimension(N)
                    146: *         As workspace, this will contain a label which will indicate
                    147: *         which of the following types a column in the U2 matrix or a
                    148: *         row in the VT2 matrix is:
                    149: *         1 : non-zero in the upper half only
                    150: *         2 : non-zero in the lower half only
                    151: *         3 : dense
                    152: *         4 : deflated
                    153: *
                    154: *         On exit, it is an array of dimension 4, with COLTYP(I) being
                    155: *         the dimension of the I-th type columns.
                    156: *
                    157: *  INFO   (output) INTEGER
                    158: *          = 0:  successful exit.
                    159: *          < 0:  if INFO = -i, the i-th argument had an illegal value.
                    160: *
                    161: *  Further Details
                    162: *  ===============
                    163: *
                    164: *  Based on contributions by
                    165: *     Ming Gu and Huan Ren, Computer Science Division, University of
                    166: *     California at Berkeley, USA
                    167: *
                    168: *  =====================================================================
                    169: *
                    170: *     .. Parameters ..
                    171:       DOUBLE PRECISION   ZERO, ONE, TWO, EIGHT
                    172:       PARAMETER          ( ZERO = 0.0D+0, ONE = 1.0D+0, TWO = 2.0D+0,
                    173:      $                   EIGHT = 8.0D+0 )
                    174: *     ..
                    175: *     .. Local Arrays ..
                    176:       INTEGER            CTOT( 4 ), PSM( 4 )
                    177: *     ..
                    178: *     .. Local Scalars ..
                    179:       INTEGER            CT, I, IDXI, IDXJ, IDXJP, J, JP, JPREV, K2, M,
                    180:      $                   N, NLP1, NLP2
                    181:       DOUBLE PRECISION   C, EPS, HLFTOL, S, TAU, TOL, Z1
                    182: *     ..
                    183: *     .. External Functions ..
                    184:       DOUBLE PRECISION   DLAMCH, DLAPY2
                    185:       EXTERNAL           DLAMCH, DLAPY2
                    186: *     ..
                    187: *     .. External Subroutines ..
                    188:       EXTERNAL           DCOPY, DLACPY, DLAMRG, DLASET, DROT, XERBLA
                    189: *     ..
                    190: *     .. Intrinsic Functions ..
                    191:       INTRINSIC          ABS, MAX
                    192: *     ..
                    193: *     .. Executable Statements ..
                    194: *
                    195: *     Test the input parameters.
                    196: *
                    197:       INFO = 0
                    198: *
                    199:       IF( NL.LT.1 ) THEN
                    200:          INFO = -1
                    201:       ELSE IF( NR.LT.1 ) THEN
                    202:          INFO = -2
                    203:       ELSE IF( ( SQRE.NE.1 ) .AND. ( SQRE.NE.0 ) ) THEN
                    204:          INFO = -3
                    205:       END IF
                    206: *
                    207:       N = NL + NR + 1
                    208:       M = N + SQRE
                    209: *
                    210:       IF( LDU.LT.N ) THEN
                    211:          INFO = -10
                    212:       ELSE IF( LDVT.LT.M ) THEN
                    213:          INFO = -12
                    214:       ELSE IF( LDU2.LT.N ) THEN
                    215:          INFO = -15
                    216:       ELSE IF( LDVT2.LT.M ) THEN
                    217:          INFO = -17
                    218:       END IF
                    219:       IF( INFO.NE.0 ) THEN
                    220:          CALL XERBLA( 'DLASD2', -INFO )
                    221:          RETURN
                    222:       END IF
                    223: *
                    224:       NLP1 = NL + 1
                    225:       NLP2 = NL + 2
                    226: *
                    227: *     Generate the first part of the vector Z; and move the singular
                    228: *     values in the first part of D one position backward.
                    229: *
                    230:       Z1 = ALPHA*VT( NLP1, NLP1 )
                    231:       Z( 1 ) = Z1
                    232:       DO 10 I = NL, 1, -1
                    233:          Z( I+1 ) = ALPHA*VT( I, NLP1 )
                    234:          D( I+1 ) = D( I )
                    235:          IDXQ( I+1 ) = IDXQ( I ) + 1
                    236:    10 CONTINUE
                    237: *
                    238: *     Generate the second part of the vector Z.
                    239: *
                    240:       DO 20 I = NLP2, M
                    241:          Z( I ) = BETA*VT( I, NLP2 )
                    242:    20 CONTINUE
                    243: *
                    244: *     Initialize some reference arrays.
                    245: *
                    246:       DO 30 I = 2, NLP1
                    247:          COLTYP( I ) = 1
                    248:    30 CONTINUE
                    249:       DO 40 I = NLP2, N
                    250:          COLTYP( I ) = 2
                    251:    40 CONTINUE
                    252: *
                    253: *     Sort the singular values into increasing order
                    254: *
                    255:       DO 50 I = NLP2, N
                    256:          IDXQ( I ) = IDXQ( I ) + NLP1
                    257:    50 CONTINUE
                    258: *
                    259: *     DSIGMA, IDXC, IDXC, and the first column of U2
                    260: *     are used as storage space.
                    261: *
                    262:       DO 60 I = 2, N
                    263:          DSIGMA( I ) = D( IDXQ( I ) )
                    264:          U2( I, 1 ) = Z( IDXQ( I ) )
                    265:          IDXC( I ) = COLTYP( IDXQ( I ) )
                    266:    60 CONTINUE
                    267: *
                    268:       CALL DLAMRG( NL, NR, DSIGMA( 2 ), 1, 1, IDX( 2 ) )
                    269: *
                    270:       DO 70 I = 2, N
                    271:          IDXI = 1 + IDX( I )
                    272:          D( I ) = DSIGMA( IDXI )
                    273:          Z( I ) = U2( IDXI, 1 )
                    274:          COLTYP( I ) = IDXC( IDXI )
                    275:    70 CONTINUE
                    276: *
                    277: *     Calculate the allowable deflation tolerance
                    278: *
                    279:       EPS = DLAMCH( 'Epsilon' )
                    280:       TOL = MAX( ABS( ALPHA ), ABS( BETA ) )
                    281:       TOL = EIGHT*EPS*MAX( ABS( D( N ) ), TOL )
                    282: *
                    283: *     There are 2 kinds of deflation -- first a value in the z-vector
                    284: *     is small, second two (or more) singular values are very close
                    285: *     together (their difference is small).
                    286: *
                    287: *     If the value in the z-vector is small, we simply permute the
                    288: *     array so that the corresponding singular value is moved to the
                    289: *     end.
                    290: *
                    291: *     If two values in the D-vector are close, we perform a two-sided
                    292: *     rotation designed to make one of the corresponding z-vector
                    293: *     entries zero, and then permute the array so that the deflated
                    294: *     singular value is moved to the end.
                    295: *
                    296: *     If there are multiple singular values then the problem deflates.
                    297: *     Here the number of equal singular values are found.  As each equal
                    298: *     singular value is found, an elementary reflector is computed to
                    299: *     rotate the corresponding singular subspace so that the
                    300: *     corresponding components of Z are zero in this new basis.
                    301: *
                    302:       K = 1
                    303:       K2 = N + 1
                    304:       DO 80 J = 2, N
                    305:          IF( ABS( Z( J ) ).LE.TOL ) THEN
                    306: *
                    307: *           Deflate due to small z component.
                    308: *
                    309:             K2 = K2 - 1
                    310:             IDXP( K2 ) = J
                    311:             COLTYP( J ) = 4
                    312:             IF( J.EQ.N )
                    313:      $         GO TO 120
                    314:          ELSE
                    315:             JPREV = J
                    316:             GO TO 90
                    317:          END IF
                    318:    80 CONTINUE
                    319:    90 CONTINUE
                    320:       J = JPREV
                    321:   100 CONTINUE
                    322:       J = J + 1
                    323:       IF( J.GT.N )
                    324:      $   GO TO 110
                    325:       IF( ABS( Z( J ) ).LE.TOL ) THEN
                    326: *
                    327: *        Deflate due to small z component.
                    328: *
                    329:          K2 = K2 - 1
                    330:          IDXP( K2 ) = J
                    331:          COLTYP( J ) = 4
                    332:       ELSE
                    333: *
                    334: *        Check if singular values are close enough to allow deflation.
                    335: *
                    336:          IF( ABS( D( J )-D( JPREV ) ).LE.TOL ) THEN
                    337: *
                    338: *           Deflation is possible.
                    339: *
                    340:             S = Z( JPREV )
                    341:             C = Z( J )
                    342: *
                    343: *           Find sqrt(a**2+b**2) without overflow or
                    344: *           destructive underflow.
                    345: *
                    346:             TAU = DLAPY2( C, S )
                    347:             C = C / TAU
                    348:             S = -S / TAU
                    349:             Z( J ) = TAU
                    350:             Z( JPREV ) = ZERO
                    351: *
                    352: *           Apply back the Givens rotation to the left and right
                    353: *           singular vector matrices.
                    354: *
                    355:             IDXJP = IDXQ( IDX( JPREV )+1 )
                    356:             IDXJ = IDXQ( IDX( J )+1 )
                    357:             IF( IDXJP.LE.NLP1 ) THEN
                    358:                IDXJP = IDXJP - 1
                    359:             END IF
                    360:             IF( IDXJ.LE.NLP1 ) THEN
                    361:                IDXJ = IDXJ - 1
                    362:             END IF
                    363:             CALL DROT( N, U( 1, IDXJP ), 1, U( 1, IDXJ ), 1, C, S )
                    364:             CALL DROT( M, VT( IDXJP, 1 ), LDVT, VT( IDXJ, 1 ), LDVT, C,
                    365:      $                 S )
                    366:             IF( COLTYP( J ).NE.COLTYP( JPREV ) ) THEN
                    367:                COLTYP( J ) = 3
                    368:             END IF
                    369:             COLTYP( JPREV ) = 4
                    370:             K2 = K2 - 1
                    371:             IDXP( K2 ) = JPREV
                    372:             JPREV = J
                    373:          ELSE
                    374:             K = K + 1
                    375:             U2( K, 1 ) = Z( JPREV )
                    376:             DSIGMA( K ) = D( JPREV )
                    377:             IDXP( K ) = JPREV
                    378:             JPREV = J
                    379:          END IF
                    380:       END IF
                    381:       GO TO 100
                    382:   110 CONTINUE
                    383: *
                    384: *     Record the last singular value.
                    385: *
                    386:       K = K + 1
                    387:       U2( K, 1 ) = Z( JPREV )
                    388:       DSIGMA( K ) = D( JPREV )
                    389:       IDXP( K ) = JPREV
                    390: *
                    391:   120 CONTINUE
                    392: *
                    393: *     Count up the total number of the various types of columns, then
                    394: *     form a permutation which positions the four column types into
                    395: *     four groups of uniform structure (although one or more of these
                    396: *     groups may be empty).
                    397: *
                    398:       DO 130 J = 1, 4
                    399:          CTOT( J ) = 0
                    400:   130 CONTINUE
                    401:       DO 140 J = 2, N
                    402:          CT = COLTYP( J )
                    403:          CTOT( CT ) = CTOT( CT ) + 1
                    404:   140 CONTINUE
                    405: *
                    406: *     PSM(*) = Position in SubMatrix (of types 1 through 4)
                    407: *
                    408:       PSM( 1 ) = 2
                    409:       PSM( 2 ) = 2 + CTOT( 1 )
                    410:       PSM( 3 ) = PSM( 2 ) + CTOT( 2 )
                    411:       PSM( 4 ) = PSM( 3 ) + CTOT( 3 )
                    412: *
                    413: *     Fill out the IDXC array so that the permutation which it induces
                    414: *     will place all type-1 columns first, all type-2 columns next,
                    415: *     then all type-3's, and finally all type-4's, starting from the
                    416: *     second column. This applies similarly to the rows of VT.
                    417: *
                    418:       DO 150 J = 2, N
                    419:          JP = IDXP( J )
                    420:          CT = COLTYP( JP )
                    421:          IDXC( PSM( CT ) ) = J
                    422:          PSM( CT ) = PSM( CT ) + 1
                    423:   150 CONTINUE
                    424: *
                    425: *     Sort the singular values and corresponding singular vectors into
                    426: *     DSIGMA, U2, and VT2 respectively.  The singular values/vectors
                    427: *     which were not deflated go into the first K slots of DSIGMA, U2,
                    428: *     and VT2 respectively, while those which were deflated go into the
                    429: *     last N - K slots, except that the first column/row will be treated
                    430: *     separately.
                    431: *
                    432:       DO 160 J = 2, N
                    433:          JP = IDXP( J )
                    434:          DSIGMA( J ) = D( JP )
                    435:          IDXJ = IDXQ( IDX( IDXP( IDXC( J ) ) )+1 )
                    436:          IF( IDXJ.LE.NLP1 ) THEN
                    437:             IDXJ = IDXJ - 1
                    438:          END IF
                    439:          CALL DCOPY( N, U( 1, IDXJ ), 1, U2( 1, J ), 1 )
                    440:          CALL DCOPY( M, VT( IDXJ, 1 ), LDVT, VT2( J, 1 ), LDVT2 )
                    441:   160 CONTINUE
                    442: *
                    443: *     Determine DSIGMA(1), DSIGMA(2) and Z(1)
                    444: *
                    445:       DSIGMA( 1 ) = ZERO
                    446:       HLFTOL = TOL / TWO
                    447:       IF( ABS( DSIGMA( 2 ) ).LE.HLFTOL )
                    448:      $   DSIGMA( 2 ) = HLFTOL
                    449:       IF( M.GT.N ) THEN
                    450:          Z( 1 ) = DLAPY2( Z1, Z( M ) )
                    451:          IF( Z( 1 ).LE.TOL ) THEN
                    452:             C = ONE
                    453:             S = ZERO
                    454:             Z( 1 ) = TOL
                    455:          ELSE
                    456:             C = Z1 / Z( 1 )
                    457:             S = Z( M ) / Z( 1 )
                    458:          END IF
                    459:       ELSE
                    460:          IF( ABS( Z1 ).LE.TOL ) THEN
                    461:             Z( 1 ) = TOL
                    462:          ELSE
                    463:             Z( 1 ) = Z1
                    464:          END IF
                    465:       END IF
                    466: *
                    467: *     Move the rest of the updating row to Z.
                    468: *
                    469:       CALL DCOPY( K-1, U2( 2, 1 ), 1, Z( 2 ), 1 )
                    470: *
                    471: *     Determine the first column of U2, the first row of VT2 and the
                    472: *     last row of VT.
                    473: *
                    474:       CALL DLASET( 'A', N, 1, ZERO, ZERO, U2, LDU2 )
                    475:       U2( NLP1, 1 ) = ONE
                    476:       IF( M.GT.N ) THEN
                    477:          DO 170 I = 1, NLP1
                    478:             VT( M, I ) = -S*VT( NLP1, I )
                    479:             VT2( 1, I ) = C*VT( NLP1, I )
                    480:   170    CONTINUE
                    481:          DO 180 I = NLP2, M
                    482:             VT2( 1, I ) = S*VT( M, I )
                    483:             VT( M, I ) = C*VT( M, I )
                    484:   180    CONTINUE
                    485:       ELSE
                    486:          CALL DCOPY( M, VT( NLP1, 1 ), LDVT, VT2( 1, 1 ), LDVT2 )
                    487:       END IF
                    488:       IF( M.GT.N ) THEN
                    489:          CALL DCOPY( M, VT( M, 1 ), LDVT, VT2( M, 1 ), LDVT2 )
                    490:       END IF
                    491: *
                    492: *     The deflated singular values and their corresponding vectors go
                    493: *     into the back of D, U, and V respectively.
                    494: *
                    495:       IF( N.GT.K ) THEN
                    496:          CALL DCOPY( N-K, DSIGMA( K+1 ), 1, D( K+1 ), 1 )
                    497:          CALL DLACPY( 'A', N, N-K, U2( 1, K+1 ), LDU2, U( 1, K+1 ),
                    498:      $                LDU )
                    499:          CALL DLACPY( 'A', N-K, M, VT2( K+1, 1 ), LDVT2, VT( K+1, 1 ),
                    500:      $                LDVT )
                    501:       END IF
                    502: *
                    503: *     Copy CTOT into COLTYP for referencing in DLASD3.
                    504: *
                    505:       DO 190 J = 1, 4
                    506:          COLTYP( J ) = CTOT( J )
                    507:   190 CONTINUE
                    508: *
                    509:       RETURN
                    510: *
                    511: *     End of DLASD2
                    512: *
                    513:       END

CVSweb interface <joel.bertrand@systella.fr>