Annotation of rpl/lapack/lapack/dsytrd_sb2st.F, revision 1.4
1.1 bertrand 1: *> \brief \b DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric tridiagonal form T
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download DSYTRD_SB2ST + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsytrd_sb2st.f">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsytrd_sb2st.f">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsytrd_sb2st.f">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
22: * D, E, HOUS, LHOUS, WORK, LWORK, INFO )
23: *
24: * #if defined(_OPENMP)
25: * use omp_lib
26: * #endif
27: *
28: * IMPLICIT NONE
29: *
30: * .. Scalar Arguments ..
31: * CHARACTER STAGE1, UPLO, VECT
32: * INTEGER N, KD, IB, LDAB, LHOUS, LWORK, INFO
33: * ..
34: * .. Array Arguments ..
35: * DOUBLE PRECISION D( * ), E( * )
36: * DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * )
37: * ..
38: *
39: *
40: *> \par Purpose:
41: * =============
42: *>
43: *> \verbatim
44: *>
45: *> DSYTRD_SB2ST reduces a real symmetric band matrix A to real symmetric
46: *> tridiagonal form T by a orthogonal similarity transformation:
47: *> Q**T * A * Q = T.
48: *> \endverbatim
49: *
50: * Arguments:
51: * ==========
52: *
1.4 ! bertrand 53: *> \param[in] STAGE1
1.1 bertrand 54: *> \verbatim
1.4 ! bertrand 55: *> STAGE1 is CHARACTER*1
1.1 bertrand 56: *> = 'N': "No": to mention that the stage 1 of the reduction
57: *> from dense to band using the dsytrd_sy2sb routine
58: *> was not called before this routine to reproduce AB.
59: *> In other term this routine is called as standalone.
60: *> = 'Y': "Yes": to mention that the stage 1 of the
61: *> reduction from dense to band using the dsytrd_sy2sb
62: *> routine has been called to produce AB (e.g., AB is
63: *> the output of dsytrd_sy2sb.
64: *> \endverbatim
65: *>
66: *> \param[in] VECT
67: *> \verbatim
68: *> VECT is CHARACTER*1
69: *> = 'N': No need for the Housholder representation,
70: *> and thus LHOUS is of size max(1, 4*N);
71: *> = 'V': the Householder representation is needed to
72: *> either generate or to apply Q later on,
73: *> then LHOUS is to be queried and computed.
74: *> (NOT AVAILABLE IN THIS RELEASE).
75: *> \endverbatim
76: *>
77: *> \param[in] UPLO
78: *> \verbatim
79: *> UPLO is CHARACTER*1
80: *> = 'U': Upper triangle of A is stored;
81: *> = 'L': Lower triangle of A is stored.
82: *> \endverbatim
83: *>
84: *> \param[in] N
85: *> \verbatim
86: *> N is INTEGER
87: *> The order of the matrix A. N >= 0.
88: *> \endverbatim
89: *>
90: *> \param[in] KD
91: *> \verbatim
92: *> KD is INTEGER
93: *> The number of superdiagonals of the matrix A if UPLO = 'U',
94: *> or the number of subdiagonals if UPLO = 'L'. KD >= 0.
95: *> \endverbatim
96: *>
97: *> \param[in,out] AB
98: *> \verbatim
99: *> AB is DOUBLE PRECISION array, dimension (LDAB,N)
100: *> On entry, the upper or lower triangle of the symmetric band
101: *> matrix A, stored in the first KD+1 rows of the array. The
102: *> j-th column of A is stored in the j-th column of the array AB
103: *> as follows:
104: *> if UPLO = 'U', AB(kd+1+i-j,j) = A(i,j) for max(1,j-kd)<=i<=j;
105: *> if UPLO = 'L', AB(1+i-j,j) = A(i,j) for j<=i<=min(n,j+kd).
106: *> On exit, the diagonal elements of AB are overwritten by the
107: *> diagonal elements of the tridiagonal matrix T; if KD > 0, the
108: *> elements on the first superdiagonal (if UPLO = 'U') or the
109: *> first subdiagonal (if UPLO = 'L') are overwritten by the
110: *> off-diagonal elements of T; the rest of AB is overwritten by
111: *> values generated during the reduction.
112: *> \endverbatim
113: *>
114: *> \param[in] LDAB
115: *> \verbatim
116: *> LDAB is INTEGER
117: *> The leading dimension of the array AB. LDAB >= KD+1.
118: *> \endverbatim
119: *>
120: *> \param[out] D
121: *> \verbatim
122: *> D is DOUBLE PRECISION array, dimension (N)
123: *> The diagonal elements of the tridiagonal matrix T.
124: *> \endverbatim
125: *>
126: *> \param[out] E
127: *> \verbatim
128: *> E is DOUBLE PRECISION array, dimension (N-1)
129: *> The off-diagonal elements of the tridiagonal matrix T:
130: *> E(i) = T(i,i+1) if UPLO = 'U'; E(i) = T(i+1,i) if UPLO = 'L'.
131: *> \endverbatim
132: *>
133: *> \param[out] HOUS
134: *> \verbatim
135: *> HOUS is DOUBLE PRECISION array, dimension LHOUS, that
136: *> store the Householder representation.
137: *> \endverbatim
138: *>
139: *> \param[in] LHOUS
140: *> \verbatim
141: *> LHOUS is INTEGER
142: *> The dimension of the array HOUS. LHOUS = MAX(1, dimension)
143: *> If LWORK = -1, or LHOUS=-1,
144: *> then a query is assumed; the routine
145: *> only calculates the optimal size of the HOUS array, returns
146: *> this value as the first entry of the HOUS array, and no error
147: *> message related to LHOUS is issued by XERBLA.
148: *> LHOUS = MAX(1, dimension) where
149: *> dimension = 4*N if VECT='N'
150: *> not available now if VECT='H'
151: *> \endverbatim
152: *>
153: *> \param[out] WORK
154: *> \verbatim
155: *> WORK is DOUBLE PRECISION array, dimension LWORK.
156: *> \endverbatim
157: *>
158: *> \param[in] LWORK
159: *> \verbatim
160: *> LWORK is INTEGER
161: *> The dimension of the array WORK. LWORK = MAX(1, dimension)
162: *> If LWORK = -1, or LHOUS=-1,
163: *> then a workspace query is assumed; the routine
164: *> only calculates the optimal size of the WORK array, returns
165: *> this value as the first entry of the WORK array, and no error
166: *> message related to LWORK is issued by XERBLA.
167: *> LWORK = MAX(1, dimension) where
168: *> dimension = (2KD+1)*N + KD*NTHREADS
169: *> where KD is the blocking size of the reduction,
170: *> FACTOPTNB is the blocking used by the QR or LQ
171: *> algorithm, usually FACTOPTNB=128 is a good choice
172: *> NTHREADS is the number of threads used when
173: *> openMP compilation is enabled, otherwise =1.
174: *> \endverbatim
175: *>
176: *> \param[out] INFO
177: *> \verbatim
178: *> INFO is INTEGER
179: *> = 0: successful exit
180: *> < 0: if INFO = -i, the i-th argument had an illegal value
181: *> \endverbatim
182: *
183: * Authors:
184: * ========
185: *
186: *> \author Univ. of Tennessee
187: *> \author Univ. of California Berkeley
188: *> \author Univ. of Colorado Denver
189: *> \author NAG Ltd.
190: *
1.4 ! bertrand 191: *> \date November 2017
1.1 bertrand 192: *
193: *> \ingroup real16OTHERcomputational
194: *
195: *> \par Further Details:
196: * =====================
197: *>
198: *> \verbatim
199: *>
200: *> Implemented by Azzam Haidar.
201: *>
202: *> All details are available on technical report, SC11, SC13 papers.
203: *>
204: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
205: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
206: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
207: *> of 2011 International Conference for High Performance Computing,
208: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
209: *> Article 8 , 11 pages.
210: *> http://doi.acm.org/10.1145/2063384.2063394
211: *>
212: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
213: *> An improved parallel singular value algorithm and its implementation
214: *> for multicore hardware, In Proceedings of 2013 International Conference
215: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
216: *> Denver, Colorado, USA, 2013.
217: *> Article 90, 12 pages.
218: *> http://doi.acm.org/10.1145/2503210.2503292
219: *>
220: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
221: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
222: *> calculations based on fine-grained memory aware tasks.
223: *> International Journal of High Performance Computing Applications.
224: *> Volume 28 Issue 2, Pages 196-209, May 2014.
225: *> http://hpc.sagepub.com/content/28/2/196
226: *>
227: *> \endverbatim
228: *>
229: * =====================================================================
230: SUBROUTINE DSYTRD_SB2ST( STAGE1, VECT, UPLO, N, KD, AB, LDAB,
231: $ D, E, HOUS, LHOUS, WORK, LWORK, INFO )
232: *
233: #if defined(_OPENMP)
234: use omp_lib
235: #endif
236: *
237: IMPLICIT NONE
238: *
1.4 ! bertrand 239: * -- LAPACK computational routine (version 3.8.0) --
1.1 bertrand 240: * -- LAPACK is a software package provided by Univ. of Tennessee, --
241: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
1.4 ! bertrand 242: * November 2017
1.1 bertrand 243: *
244: * .. Scalar Arguments ..
245: CHARACTER STAGE1, UPLO, VECT
246: INTEGER N, KD, LDAB, LHOUS, LWORK, INFO
247: * ..
248: * .. Array Arguments ..
249: DOUBLE PRECISION D( * ), E( * )
250: DOUBLE PRECISION AB( LDAB, * ), HOUS( * ), WORK( * )
251: * ..
252: *
253: * =====================================================================
254: *
255: * .. Parameters ..
256: DOUBLE PRECISION RZERO
257: DOUBLE PRECISION ZERO, ONE
258: PARAMETER ( RZERO = 0.0D+0,
259: $ ZERO = 0.0D+0,
260: $ ONE = 1.0D+0 )
261: * ..
262: * .. Local Scalars ..
263: LOGICAL LQUERY, WANTQ, UPPER, AFTERS1
264: INTEGER I, M, K, IB, SWEEPID, MYID, SHIFT, STT, ST,
265: $ ED, STIND, EDIND, BLKLASTIND, COLPT, THED,
266: $ STEPERCOL, GRSIZ, THGRSIZ, THGRNB, THGRID,
267: $ NBTILES, TTYPE, TID, NTHREADS, DEBUG,
268: $ ABDPOS, ABOFDPOS, DPOS, OFDPOS, AWPOS,
269: $ INDA, INDW, APOS, SIZEA, LDA, INDV, INDTAU,
270: $ SIDEV, SIZETAU, LDV, LHMIN, LWMIN
271: * ..
272: * .. External Subroutines ..
1.4 ! bertrand 273: EXTERNAL DSB2ST_KERNELS, DLACPY, DLASET, XERBLA
1.1 bertrand 274: * ..
275: * .. Intrinsic Functions ..
276: INTRINSIC MIN, MAX, CEILING, REAL
277: * ..
278: * .. External Functions ..
279: LOGICAL LSAME
1.4 ! bertrand 280: INTEGER ILAENV2STAGE
! 281: EXTERNAL LSAME, ILAENV2STAGE
1.1 bertrand 282: * ..
283: * .. Executable Statements ..
284: *
285: * Determine the minimal workspace size required.
286: * Test the input parameters
287: *
288: DEBUG = 0
289: INFO = 0
290: AFTERS1 = LSAME( STAGE1, 'Y' )
291: WANTQ = LSAME( VECT, 'V' )
292: UPPER = LSAME( UPLO, 'U' )
293: LQUERY = ( LWORK.EQ.-1 ) .OR. ( LHOUS.EQ.-1 )
294: *
295: * Determine the block size, the workspace size and the hous size.
296: *
1.4 ! bertrand 297: IB = ILAENV2STAGE( 2, 'DSYTRD_SB2ST', VECT, N, KD, -1, -1 )
! 298: LHMIN = ILAENV2STAGE( 3, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 )
! 299: LWMIN = ILAENV2STAGE( 4, 'DSYTRD_SB2ST', VECT, N, KD, IB, -1 )
1.1 bertrand 300: *
301: IF( .NOT.AFTERS1 .AND. .NOT.LSAME( STAGE1, 'N' ) ) THEN
302: INFO = -1
303: ELSE IF( .NOT.LSAME( VECT, 'N' ) ) THEN
304: INFO = -2
305: ELSE IF( .NOT.UPPER .AND. .NOT.LSAME( UPLO, 'L' ) ) THEN
306: INFO = -3
307: ELSE IF( N.LT.0 ) THEN
308: INFO = -4
309: ELSE IF( KD.LT.0 ) THEN
310: INFO = -5
311: ELSE IF( LDAB.LT.(KD+1) ) THEN
312: INFO = -7
313: ELSE IF( LHOUS.LT.LHMIN .AND. .NOT.LQUERY ) THEN
314: INFO = -11
315: ELSE IF( LWORK.LT.LWMIN .AND. .NOT.LQUERY ) THEN
316: INFO = -13
317: END IF
318: *
319: IF( INFO.EQ.0 ) THEN
320: HOUS( 1 ) = LHMIN
321: WORK( 1 ) = LWMIN
322: END IF
323: *
324: IF( INFO.NE.0 ) THEN
325: CALL XERBLA( 'DSYTRD_SB2ST', -INFO )
326: RETURN
327: ELSE IF( LQUERY ) THEN
328: RETURN
329: END IF
330: *
331: * Quick return if possible
332: *
333: IF( N.EQ.0 ) THEN
334: HOUS( 1 ) = 1
335: WORK( 1 ) = 1
336: RETURN
337: END IF
338: *
339: * Determine pointer position
340: *
341: LDV = KD + IB
342: SIZETAU = 2 * N
343: SIDEV = 2 * N
344: INDTAU = 1
345: INDV = INDTAU + SIZETAU
346: LDA = 2 * KD + 1
347: SIZEA = LDA * N
348: INDA = 1
349: INDW = INDA + SIZEA
350: NTHREADS = 1
351: TID = 0
352: *
353: IF( UPPER ) THEN
354: APOS = INDA + KD
355: AWPOS = INDA
356: DPOS = APOS + KD
357: OFDPOS = DPOS - 1
358: ABDPOS = KD + 1
359: ABOFDPOS = KD
360: ELSE
361: APOS = INDA
362: AWPOS = INDA + KD + 1
363: DPOS = APOS
364: OFDPOS = DPOS + 1
365: ABDPOS = 1
366: ABOFDPOS = 2
367:
368: ENDIF
369: *
370: * Case KD=0:
371: * The matrix is diagonal. We just copy it (convert to "real" for
372: * real because D is double and the imaginary part should be 0)
373: * and store it in D. A sequential code here is better or
374: * in a parallel environment it might need two cores for D and E
375: *
376: IF( KD.EQ.0 ) THEN
377: DO 30 I = 1, N
378: D( I ) = ( AB( ABDPOS, I ) )
379: 30 CONTINUE
380: DO 40 I = 1, N-1
381: E( I ) = RZERO
382: 40 CONTINUE
383: *
384: HOUS( 1 ) = 1
385: WORK( 1 ) = 1
386: RETURN
387: END IF
388: *
389: * Case KD=1:
390: * The matrix is already Tridiagonal. We have to make diagonal
391: * and offdiagonal elements real, and store them in D and E.
392: * For that, for real precision just copy the diag and offdiag
393: * to D and E while for the COMPLEX case the bulge chasing is
394: * performed to convert the hermetian tridiagonal to symmetric
395: * tridiagonal. A simpler coversion formula might be used, but then
396: * updating the Q matrix will be required and based if Q is generated
397: * or not this might complicate the story.
398: *
399: IF( KD.EQ.1 ) THEN
400: DO 50 I = 1, N
401: D( I ) = ( AB( ABDPOS, I ) )
402: 50 CONTINUE
403: *
404: IF( UPPER ) THEN
405: DO 60 I = 1, N-1
406: E( I ) = ( AB( ABOFDPOS, I+1 ) )
407: 60 CONTINUE
408: ELSE
409: DO 70 I = 1, N-1
410: E( I ) = ( AB( ABOFDPOS, I ) )
411: 70 CONTINUE
412: ENDIF
413: *
414: HOUS( 1 ) = 1
415: WORK( 1 ) = 1
416: RETURN
417: END IF
418: *
419: * Main code start here.
420: * Reduce the symmetric band of A to a tridiagonal matrix.
421: *
422: THGRSIZ = N
423: GRSIZ = 1
424: SHIFT = 3
425: NBTILES = CEILING( REAL(N)/REAL(KD) )
426: STEPERCOL = CEILING( REAL(SHIFT)/REAL(GRSIZ) )
427: THGRNB = CEILING( REAL(N-1)/REAL(THGRSIZ) )
428: *
429: CALL DLACPY( "A", KD+1, N, AB, LDAB, WORK( APOS ), LDA )
430: CALL DLASET( "A", KD, N, ZERO, ZERO, WORK( AWPOS ), LDA )
431: *
432: *
433: * openMP parallelisation start here
434: *
435: #if defined(_OPENMP)
436: !$OMP PARALLEL PRIVATE( TID, THGRID, BLKLASTIND )
437: !$OMP$ PRIVATE( THED, I, M, K, ST, ED, STT, SWEEPID )
438: !$OMP$ PRIVATE( MYID, TTYPE, COLPT, STIND, EDIND )
439: !$OMP$ SHARED ( UPLO, WANTQ, INDV, INDTAU, HOUS, WORK)
440: !$OMP$ SHARED ( N, KD, IB, NBTILES, LDA, LDV, INDA )
441: !$OMP$ SHARED ( STEPERCOL, THGRNB, THGRSIZ, GRSIZ, SHIFT )
442: !$OMP MASTER
443: #endif
444: *
445: * main bulge chasing loop
446: *
447: DO 100 THGRID = 1, THGRNB
448: STT = (THGRID-1)*THGRSIZ+1
449: THED = MIN( (STT + THGRSIZ -1), (N-1))
450: DO 110 I = STT, N-1
451: ED = MIN( I, THED )
452: IF( STT.GT.ED ) EXIT
453: DO 120 M = 1, STEPERCOL
454: ST = STT
455: DO 130 SWEEPID = ST, ED
456: DO 140 K = 1, GRSIZ
457: MYID = (I-SWEEPID)*(STEPERCOL*GRSIZ)
458: $ + (M-1)*GRSIZ + K
459: IF ( MYID.EQ.1 ) THEN
460: TTYPE = 1
461: ELSE
462: TTYPE = MOD( MYID, 2 ) + 2
463: ENDIF
464:
465: IF( TTYPE.EQ.2 ) THEN
466: COLPT = (MYID/2)*KD + SWEEPID
467: STIND = COLPT-KD+1
468: EDIND = MIN(COLPT,N)
469: BLKLASTIND = COLPT
470: ELSE
471: COLPT = ((MYID+1)/2)*KD + SWEEPID
472: STIND = COLPT-KD+1
473: EDIND = MIN(COLPT,N)
474: IF( ( STIND.GE.EDIND-1 ).AND.
475: $ ( EDIND.EQ.N ) ) THEN
476: BLKLASTIND = N
477: ELSE
478: BLKLASTIND = 0
479: ENDIF
480: ENDIF
481: *
482: * Call the kernel
483: *
484: #if defined(_OPENMP)
485: IF( TTYPE.NE.1 ) THEN
486: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
487: !$OMP$ DEPEND(in:WORK(MYID-1))
488: !$OMP$ DEPEND(out:WORK(MYID))
489: TID = OMP_GET_THREAD_NUM()
490: CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE,
491: $ STIND, EDIND, SWEEPID, N, KD, IB,
492: $ WORK ( INDA ), LDA,
493: $ HOUS( INDV ), HOUS( INDTAU ), LDV,
494: $ WORK( INDW + TID*KD ) )
495: !$OMP END TASK
496: ELSE
497: !$OMP TASK DEPEND(in:WORK(MYID+SHIFT-1))
498: !$OMP$ DEPEND(out:WORK(MYID))
499: TID = OMP_GET_THREAD_NUM()
500: CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE,
501: $ STIND, EDIND, SWEEPID, N, KD, IB,
502: $ WORK ( INDA ), LDA,
503: $ HOUS( INDV ), HOUS( INDTAU ), LDV,
504: $ WORK( INDW + TID*KD ) )
505: !$OMP END TASK
506: ENDIF
507: #else
508: CALL DSB2ST_KERNELS( UPLO, WANTQ, TTYPE,
509: $ STIND, EDIND, SWEEPID, N, KD, IB,
510: $ WORK ( INDA ), LDA,
511: $ HOUS( INDV ), HOUS( INDTAU ), LDV,
512: $ WORK( INDW + TID*KD ) )
513: #endif
514: IF ( BLKLASTIND.GE.(N-1) ) THEN
515: STT = STT + 1
516: EXIT
517: ENDIF
518: 140 CONTINUE
519: 130 CONTINUE
520: 120 CONTINUE
521: 110 CONTINUE
522: 100 CONTINUE
523: *
524: #if defined(_OPENMP)
525: !$OMP END MASTER
526: !$OMP END PARALLEL
527: #endif
528: *
529: * Copy the diagonal from A to D. Note that D is REAL thus only
530: * the Real part is needed, the imaginary part should be zero.
531: *
532: DO 150 I = 1, N
533: D( I ) = ( WORK( DPOS+(I-1)*LDA ) )
534: 150 CONTINUE
535: *
536: * Copy the off diagonal from A to E. Note that E is REAL thus only
537: * the Real part is needed, the imaginary part should be zero.
538: *
539: IF( UPPER ) THEN
540: DO 160 I = 1, N-1
541: E( I ) = ( WORK( OFDPOS+I*LDA ) )
542: 160 CONTINUE
543: ELSE
544: DO 170 I = 1, N-1
545: E( I ) = ( WORK( OFDPOS+(I-1)*LDA ) )
546: 170 CONTINUE
547: ENDIF
548: *
549: HOUS( 1 ) = LHMIN
550: WORK( 1 ) = LWMIN
551: RETURN
552: *
553: * End of DSYTRD_SB2ST
554: *
555: END
556:
CVSweb interface <joel.bertrand@systella.fr>