1: *> \brief \b DSYTRD_SY2SB
2: *
3: * @generated from zhetrd_he2hb.f, fortran z -> d, Wed Dec 7 08:22:39 2016
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download DSYTRD_SY2SB + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
24: * WORK, LWORK, INFO )
25: *
26: * IMPLICIT NONE
27: *
28: * .. Scalar Arguments ..
29: * CHARACTER UPLO
30: * INTEGER INFO, LDA, LDAB, LWORK, N, KD
31: * ..
32: * .. Array Arguments ..
33: * DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ),
34: * TAU( * ), WORK( * )
35: * ..
36: *
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> DSYTRD_SY2SB reduces a real symmetric matrix A to real symmetric
44: *> band-diagonal form AB by a orthogonal similarity transformation:
45: *> Q**T * A * Q = AB.
46: *> \endverbatim
47: *
48: * Arguments:
49: * ==========
50: *
51: *> \param[in] UPLO
52: *> \verbatim
53: *> UPLO is CHARACTER*1
54: *> = 'U': Upper triangle of A is stored;
55: *> = 'L': Lower triangle of A is stored.
56: *> \endverbatim
57: *>
58: *> \param[in] N
59: *> \verbatim
60: *> N is INTEGER
61: *> The order of the matrix A. N >= 0.
62: *> \endverbatim
63: *>
64: *> \param[in] KD
65: *> \verbatim
66: *> KD is INTEGER
67: *> The number of superdiagonals of the reduced matrix if UPLO = 'U',
68: *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
69: *> The reduced matrix is stored in the array AB.
70: *> \endverbatim
71: *>
72: *> \param[in,out] A
73: *> \verbatim
74: *> A is DOUBLE PRECISION array, dimension (LDA,N)
75: *> On entry, the symmetric matrix A. If UPLO = 'U', the leading
76: *> N-by-N upper triangular part of A contains the upper
77: *> triangular part of the matrix A, and the strictly lower
78: *> triangular part of A is not referenced. If UPLO = 'L', the
79: *> leading N-by-N lower triangular part of A contains the lower
80: *> triangular part of the matrix A, and the strictly upper
81: *> triangular part of A is not referenced.
82: *> On exit, if UPLO = 'U', the diagonal and first superdiagonal
83: *> of A are overwritten by the corresponding elements of the
84: *> tridiagonal matrix T, and the elements above the first
85: *> superdiagonal, with the array TAU, represent the orthogonal
86: *> matrix Q as a product of elementary reflectors; if UPLO
87: *> = 'L', the diagonal and first subdiagonal of A are over-
88: *> written by the corresponding elements of the tridiagonal
89: *> matrix T, and the elements below the first subdiagonal, with
90: *> the array TAU, represent the orthogonal matrix Q as a product
91: *> of elementary reflectors. See Further Details.
92: *> \endverbatim
93: *>
94: *> \param[in] LDA
95: *> \verbatim
96: *> LDA is INTEGER
97: *> The leading dimension of the array A. LDA >= max(1,N).
98: *> \endverbatim
99: *>
100: *> \param[out] AB
101: *> \verbatim
102: *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
103: *> On exit, the upper or lower triangle of the symmetric band
104: *> matrix A, stored in the first KD+1 rows of the array. The
105: *> j-th column of A is stored in the j-th column of the array AB
106: *> as follows:
107: *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
108: *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
109: *> \endverbatim
110: *>
111: *> \param[in] LDAB
112: *> \verbatim
113: *> LDAB is INTEGER
114: *> The leading dimension of the array AB. LDAB >= KD+1.
115: *> \endverbatim
116: *>
117: *> \param[out] TAU
118: *> \verbatim
119: *> TAU is DOUBLE PRECISION array, dimension (N-KD)
120: *> The scalar factors of the elementary reflectors (see Further
121: *> Details).
122: *> \endverbatim
123: *>
124: *> \param[out] WORK
125: *> \verbatim
126: *> WORK is DOUBLE PRECISION array, dimension (LWORK)
127: *> On exit, if INFO = 0, or if LWORK=-1,
128: *> WORK(1) returns the size of LWORK.
129: *> \endverbatim
130: *>
131: *> \param[in] LWORK
132: *> \verbatim
133: *> LWORK is INTEGER
134: *> The dimension of the array WORK which should be calculated
135: *> by a workspace query. LWORK = MAX(1, LWORK_QUERY)
136: *> If LWORK = -1, then a workspace query is assumed; the routine
137: *> only calculates the optimal size of the WORK array, returns
138: *> this value as the first entry of the WORK array, and no error
139: *> message related to LWORK is issued by XERBLA.
140: *> LWORK_QUERY = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
141: *> where FACTOPTNB is the blocking used by the QR or LQ
142: *> algorithm, usually FACTOPTNB=128 is a good choice otherwise
143: *> putting LWORK=-1 will provide the size of WORK.
144: *> \endverbatim
145: *>
146: *> \param[out] INFO
147: *> \verbatim
148: *> INFO is INTEGER
149: *> = 0: successful exit
150: *> < 0: if INFO = -i, the i-th argument had an illegal value
151: *> \endverbatim
152: *
153: * Authors:
154: * ========
155: *
156: *> \author Univ. of Tennessee
157: *> \author Univ. of California Berkeley
158: *> \author Univ. of Colorado Denver
159: *> \author NAG Ltd.
160: *
161: *> \date November 2017
162: *
163: *> \ingroup doubleSYcomputational
164: *
165: *> \par Further Details:
166: * =====================
167: *>
168: *> \verbatim
169: *>
170: *> Implemented by Azzam Haidar.
171: *>
172: *> All details are available on technical report, SC11, SC13 papers.
173: *>
174: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
175: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
176: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
177: *> of 2011 International Conference for High Performance Computing,
178: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
179: *> Article 8 , 11 pages.
180: *> http://doi.acm.org/10.1145/2063384.2063394
181: *>
182: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
183: *> An improved parallel singular value algorithm and its implementation
184: *> for multicore hardware, In Proceedings of 2013 International Conference
185: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
186: *> Denver, Colorado, USA, 2013.
187: *> Article 90, 12 pages.
188: *> http://doi.acm.org/10.1145/2503210.2503292
189: *>
190: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
191: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
192: *> calculations based on fine-grained memory aware tasks.
193: *> International Journal of High Performance Computing Applications.
194: *> Volume 28 Issue 2, Pages 196-209, May 2014.
195: *> http://hpc.sagepub.com/content/28/2/196
196: *>
197: *> \endverbatim
198: *>
199: *> \verbatim
200: *>
201: *> If UPLO = 'U', the matrix Q is represented as a product of elementary
202: *> reflectors
203: *>
204: *> Q = H(k)**T . . . H(2)**T H(1)**T, where k = n-kd.
205: *>
206: *> Each H(i) has the form
207: *>
208: *> H(i) = I - tau * v * v**T
209: *>
210: *> where tau is a real scalar, and v is a real vector with
211: *> v(1:i+kd-1) = 0 and v(i+kd) = 1; conjg(v(i+kd+1:n)) is stored on exit in
212: *> A(i,i+kd+1:n), and tau in TAU(i).
213: *>
214: *> If UPLO = 'L', the matrix Q is represented as a product of elementary
215: *> reflectors
216: *>
217: *> Q = H(1) H(2) . . . H(k), where k = n-kd.
218: *>
219: *> Each H(i) has the form
220: *>
221: *> H(i) = I - tau * v * v**T
222: *>
223: *> where tau is a real scalar, and v is a real vector with
224: *> v(kd+1:i) = 0 and v(i+kd+1) = 1; v(i+kd+2:n) is stored on exit in
225: *> A(i+kd+2:n,i), and tau in TAU(i).
226: *>
227: *> The contents of A on exit are illustrated by the following examples
228: *> with n = 5:
229: *>
230: *> if UPLO = 'U': if UPLO = 'L':
231: *>
232: *> ( ab ab/v1 v1 v1 v1 ) ( ab )
233: *> ( ab ab/v2 v2 v2 ) ( ab/v1 ab )
234: *> ( ab ab/v3 v3 ) ( v1 ab/v2 ab )
235: *> ( ab ab/v4 ) ( v1 v2 ab/v3 ab )
236: *> ( ab ) ( v1 v2 v3 ab/v4 ab )
237: *>
238: *> where d and e denote diagonal and off-diagonal elements of T, and vi
239: *> denotes an element of the vector defining H(i).
240: *> \endverbatim
241: *>
242: * =====================================================================
243: SUBROUTINE DSYTRD_SY2SB( UPLO, N, KD, A, LDA, AB, LDAB, TAU,
244: $ WORK, LWORK, INFO )
245: *
246: IMPLICIT NONE
247: *
248: * -- LAPACK computational routine (version 3.8.0) --
249: * -- LAPACK is a software package provided by Univ. of Tennessee, --
250: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
251: * November 2017
252: *
253: * .. Scalar Arguments ..
254: CHARACTER UPLO
255: INTEGER INFO, LDA, LDAB, LWORK, N, KD
256: * ..
257: * .. Array Arguments ..
258: DOUBLE PRECISION A( LDA, * ), AB( LDAB, * ),
259: $ TAU( * ), WORK( * )
260: * ..
261: *
262: * =====================================================================
263: *
264: * .. Parameters ..
265: DOUBLE PRECISION RONE
266: DOUBLE PRECISION ZERO, ONE, HALF
267: PARAMETER ( RONE = 1.0D+0,
268: $ ZERO = 0.0D+0,
269: $ ONE = 1.0D+0,
270: $ HALF = 0.5D+0 )
271: * ..
272: * .. Local Scalars ..
273: LOGICAL LQUERY, UPPER
274: INTEGER I, J, IINFO, LWMIN, PN, PK, LK,
275: $ LDT, LDW, LDS2, LDS1,
276: $ LS2, LS1, LW, LT,
277: $ TPOS, WPOS, S2POS, S1POS
278: * ..
279: * .. External Subroutines ..
280: EXTERNAL XERBLA, DSYR2K, DSYMM, DGEMM, DCOPY,
281: $ DLARFT, DGELQF, DGEQRF, DLASET
282: * ..
283: * .. Intrinsic Functions ..
284: INTRINSIC MIN, MAX
285: * ..
286: * .. External Functions ..
287: LOGICAL LSAME
288: INTEGER ILAENV2STAGE
289: EXTERNAL LSAME, ILAENV2STAGE
290: * ..
291: * .. Executable Statements ..
292: *
293: * Determine the minimal workspace size required
294: * and test the input parameters
295: *
296: INFO = 0
297: UPPER = LSAME( UPLO, 'U' )
298: LQUERY = ( LWORK.EQ.-1 )
299: LWMIN = ILAENV2STAGE( 4, 'DSYTRD_SY2SB', '', N, KD, -1, -1 )
300:
301: IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
302: INFO = -1
303: ELSE IF( N.LT.0 ) THEN
304: INFO = -2
305: ELSE IF( KD.LT.0 ) THEN
306: INFO = -3
307: ELSE IF( LDA.LT.MAX( 1, N ) ) THEN
308: INFO = -5
309: ELSE IF( LDAB.LT.MAX( 1, KD+1 ) ) THEN
310: INFO = -7
311: ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
312: INFO = -10
313: END IF
314: *
315: IF( INFO.NE.0 ) THEN
316: CALL XERBLA( 'DSYTRD_SY2SB', -INFO )
317: RETURN
318: ELSE IF( LQUERY ) THEN
319: WORK( 1 ) = LWMIN
320: RETURN
321: END IF
322: *
323: * Quick return if possible
324: * Copy the upper/lower portion of A into AB
325: *
326: IF( N.LE.KD+1 ) THEN
327: IF( UPPER ) THEN
328: DO 100 I = 1, N
329: LK = MIN( KD+1, I )
330: CALL DCOPY( LK, A( I-LK+1, I ), 1,
331: $ AB( KD+1-LK+1, I ), 1 )
332: 100 CONTINUE
333: ELSE
334: DO 110 I = 1, N
335: LK = MIN( KD+1, N-I+1 )
336: CALL DCOPY( LK, A( I, I ), 1, AB( 1, I ), 1 )
337: 110 CONTINUE
338: ENDIF
339: WORK( 1 ) = 1
340: RETURN
341: END IF
342: *
343: * Determine the pointer position for the workspace
344: *
345: LDT = KD
346: LDS1 = KD
347: LT = LDT*KD
348: LW = N*KD
349: LS1 = LDS1*KD
350: LS2 = LWMIN - LT - LW - LS1
351: * LS2 = N*MAX(KD,FACTOPTNB)
352: TPOS = 1
353: WPOS = TPOS + LT
354: S1POS = WPOS + LW
355: S2POS = S1POS + LS1
356: IF( UPPER ) THEN
357: LDW = KD
358: LDS2 = KD
359: ELSE
360: LDW = N
361: LDS2 = N
362: ENDIF
363: *
364: *
365: * Set the workspace of the triangular matrix T to zero once such a
366: * way every time T is generated the upper/lower portion will be always zero
367: *
368: CALL DLASET( "A", LDT, KD, ZERO, ZERO, WORK( TPOS ), LDT )
369: *
370: IF( UPPER ) THEN
371: DO 10 I = 1, N - KD, KD
372: PN = N-I-KD+1
373: PK = MIN( N-I-KD+1, KD )
374: *
375: * Compute the LQ factorization of the current block
376: *
377: CALL DGELQF( KD, PN, A( I, I+KD ), LDA,
378: $ TAU( I ), WORK( S2POS ), LS2, IINFO )
379: *
380: * Copy the upper portion of A into AB
381: *
382: DO 20 J = I, I+PK-1
383: LK = MIN( KD, N-J ) + 1
384: CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
385: 20 CONTINUE
386: *
387: CALL DLASET( 'Lower', PK, PK, ZERO, ONE,
388: $ A( I, I+KD ), LDA )
389: *
390: * Form the matrix T
391: *
392: CALL DLARFT( 'Forward', 'Rowwise', PN, PK,
393: $ A( I, I+KD ), LDA, TAU( I ),
394: $ WORK( TPOS ), LDT )
395: *
396: * Compute W:
397: *
398: CALL DGEMM( 'Conjugate', 'No transpose', PK, PN, PK,
399: $ ONE, WORK( TPOS ), LDT,
400: $ A( I, I+KD ), LDA,
401: $ ZERO, WORK( S2POS ), LDS2 )
402: *
403: CALL DSYMM( 'Right', UPLO, PK, PN,
404: $ ONE, A( I+KD, I+KD ), LDA,
405: $ WORK( S2POS ), LDS2,
406: $ ZERO, WORK( WPOS ), LDW )
407: *
408: CALL DGEMM( 'No transpose', 'Conjugate', PK, PK, PN,
409: $ ONE, WORK( WPOS ), LDW,
410: $ WORK( S2POS ), LDS2,
411: $ ZERO, WORK( S1POS ), LDS1 )
412: *
413: CALL DGEMM( 'No transpose', 'No transpose', PK, PN, PK,
414: $ -HALF, WORK( S1POS ), LDS1,
415: $ A( I, I+KD ), LDA,
416: $ ONE, WORK( WPOS ), LDW )
417: *
418: *
419: * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
420: * an update of the form: A := A - V'*W - W'*V
421: *
422: CALL DSYR2K( UPLO, 'Conjugate', PN, PK,
423: $ -ONE, A( I, I+KD ), LDA,
424: $ WORK( WPOS ), LDW,
425: $ RONE, A( I+KD, I+KD ), LDA )
426: 10 CONTINUE
427: *
428: * Copy the upper band to AB which is the band storage matrix
429: *
430: DO 30 J = N-KD+1, N
431: LK = MIN(KD, N-J) + 1
432: CALL DCOPY( LK, A( J, J ), LDA, AB( KD+1, J ), LDAB-1 )
433: 30 CONTINUE
434: *
435: ELSE
436: *
437: * Reduce the lower triangle of A to lower band matrix
438: *
439: DO 40 I = 1, N - KD, KD
440: PN = N-I-KD+1
441: PK = MIN( N-I-KD+1, KD )
442: *
443: * Compute the QR factorization of the current block
444: *
445: CALL DGEQRF( PN, KD, A( I+KD, I ), LDA,
446: $ TAU( I ), WORK( S2POS ), LS2, IINFO )
447: *
448: * Copy the upper portion of A into AB
449: *
450: DO 50 J = I, I+PK-1
451: LK = MIN( KD, N-J ) + 1
452: CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
453: 50 CONTINUE
454: *
455: CALL DLASET( 'Upper', PK, PK, ZERO, ONE,
456: $ A( I+KD, I ), LDA )
457: *
458: * Form the matrix T
459: *
460: CALL DLARFT( 'Forward', 'Columnwise', PN, PK,
461: $ A( I+KD, I ), LDA, TAU( I ),
462: $ WORK( TPOS ), LDT )
463: *
464: * Compute W:
465: *
466: CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK,
467: $ ONE, A( I+KD, I ), LDA,
468: $ WORK( TPOS ), LDT,
469: $ ZERO, WORK( S2POS ), LDS2 )
470: *
471: CALL DSYMM( 'Left', UPLO, PN, PK,
472: $ ONE, A( I+KD, I+KD ), LDA,
473: $ WORK( S2POS ), LDS2,
474: $ ZERO, WORK( WPOS ), LDW )
475: *
476: CALL DGEMM( 'Conjugate', 'No transpose', PK, PK, PN,
477: $ ONE, WORK( S2POS ), LDS2,
478: $ WORK( WPOS ), LDW,
479: $ ZERO, WORK( S1POS ), LDS1 )
480: *
481: CALL DGEMM( 'No transpose', 'No transpose', PN, PK, PK,
482: $ -HALF, A( I+KD, I ), LDA,
483: $ WORK( S1POS ), LDS1,
484: $ ONE, WORK( WPOS ), LDW )
485: *
486: *
487: * Update the unreduced submatrix A(i+kd:n,i+kd:n), using
488: * an update of the form: A := A - V*W' - W*V'
489: *
490: CALL DSYR2K( UPLO, 'No transpose', PN, PK,
491: $ -ONE, A( I+KD, I ), LDA,
492: $ WORK( WPOS ), LDW,
493: $ RONE, A( I+KD, I+KD ), LDA )
494: * ==================================================================
495: * RESTORE A FOR COMPARISON AND CHECKING TO BE REMOVED
496: * DO 45 J = I, I+PK-1
497: * LK = MIN( KD, N-J ) + 1
498: * CALL DCOPY( LK, AB( 1, J ), 1, A( J, J ), 1 )
499: * 45 CONTINUE
500: * ==================================================================
501: 40 CONTINUE
502: *
503: * Copy the lower band to AB which is the band storage matrix
504: *
505: DO 60 J = N-KD+1, N
506: LK = MIN(KD, N-J) + 1
507: CALL DCOPY( LK, A( J, J ), 1, AB( 1, J ), 1 )
508: 60 CONTINUE
509:
510: END IF
511: *
512: WORK( 1 ) = LWMIN
513: RETURN
514: *
515: * End of DSYTRD_SY2SB
516: *
517: END
CVSweb interface <joel.bertrand@systella.fr>