1: *> \brief \b DSB2ST_KERNELS
2: *
3: * @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download DSB2ST_KERNELS + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24: * ST, ED, SWEEP, N, NB, IB,
25: * A, LDA, V, TAU, LDVT, WORK)
26: *
27: * IMPLICIT NONE
28: *
29: * .. Scalar Arguments ..
30: * CHARACTER UPLO
31: * LOGICAL WANTZ
32: * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33: * ..
34: * .. Array Arguments ..
35: * DOUBLE PRECISION A( LDA, * ), V( * ),
36: * TAU( * ), WORK( * )
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST
44: *> subroutine.
45: *> \endverbatim
46: *
47: * Arguments:
48: * ==========
49: *
50: *> \param[in] UPLO
51: *> \verbatim
52: *> UPLO is CHARACTER*1
53: *> \endverbatim
54: *>
55: *> \param[in] WANTZ
56: *> \verbatim
57: *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58: *> Eigenvalue/Eigenvectors.
59: *> \endverbatim
60: *>
61: *> \param[in] TTYPE
62: *> \verbatim
63: *> TTYPE is INTEGER
64: *> \endverbatim
65: *>
66: *> \param[in] ST
67: *> \verbatim
68: *> ST is INTEGER
69: *> internal parameter for indices.
70: *> \endverbatim
71: *>
72: *> \param[in] ED
73: *> \verbatim
74: *> ED is INTEGER
75: *> internal parameter for indices.
76: *> \endverbatim
77: *>
78: *> \param[in] SWEEP
79: *> \verbatim
80: *> SWEEP is INTEGER
81: *> internal parameter for indices.
82: *> \endverbatim
83: *>
84: *> \param[in] N
85: *> \verbatim
86: *> N is INTEGER. The order of the matrix A.
87: *> \endverbatim
88: *>
89: *> \param[in] NB
90: *> \verbatim
91: *> NB is INTEGER. The size of the band.
92: *> \endverbatim
93: *>
94: *> \param[in] IB
95: *> \verbatim
96: *> IB is INTEGER.
97: *> \endverbatim
98: *>
99: *> \param[in, out] A
100: *> \verbatim
101: *> A is DOUBLE PRECISION array. A pointer to the matrix A.
102: *> \endverbatim
103: *>
104: *> \param[in] LDA
105: *> \verbatim
106: *> LDA is INTEGER. The leading dimension of the matrix A.
107: *> \endverbatim
108: *>
109: *> \param[out] V
110: *> \verbatim
111: *> V is DOUBLE PRECISION array, dimension 2*n if eigenvalues only are
112: *> requested or to be queried for vectors.
113: *> \endverbatim
114: *>
115: *> \param[out] TAU
116: *> \verbatim
117: *> TAU is DOUBLE PRECISION array, dimension (2*n).
118: *> The scalar factors of the Householder reflectors are stored
119: *> in this array.
120: *> \endverbatim
121: *>
122: *> \param[in] LDVT
123: *> \verbatim
124: *> LDVT is INTEGER.
125: *> \endverbatim
126: *>
127: *> \param[in] WORK
128: *> \verbatim
129: *> WORK is DOUBLE PRECISION array. Workspace of size nb.
130: *> \endverbatim
131: *>
132: *> \par Further Details:
133: * =====================
134: *>
135: *> \verbatim
136: *>
137: *> Implemented by Azzam Haidar.
138: *>
139: *> All details are available on technical report, SC11, SC13 papers.
140: *>
141: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
142: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
143: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
144: *> of 2011 International Conference for High Performance Computing,
145: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
146: *> Article 8 , 11 pages.
147: *> http://doi.acm.org/10.1145/2063384.2063394
148: *>
149: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
150: *> An improved parallel singular value algorithm and its implementation
151: *> for multicore hardware, In Proceedings of 2013 International Conference
152: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
153: *> Denver, Colorado, USA, 2013.
154: *> Article 90, 12 pages.
155: *> http://doi.acm.org/10.1145/2503210.2503292
156: *>
157: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
158: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
159: *> calculations based on fine-grained memory aware tasks.
160: *> International Journal of High Performance Computing Applications.
161: *> Volume 28 Issue 2, Pages 196-209, May 2014.
162: *> http://hpc.sagepub.com/content/28/2/196
163: *>
164: *> \endverbatim
165: *>
166: * =====================================================================
167: SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
168: $ ST, ED, SWEEP, N, NB, IB,
169: $ A, LDA, V, TAU, LDVT, WORK)
170: *
171: IMPLICIT NONE
172: *
173: * -- LAPACK computational routine (version 3.7.1) --
174: * -- LAPACK is a software package provided by Univ. of Tennessee, --
175: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176: * June 2017
177: *
178: * .. Scalar Arguments ..
179: CHARACTER UPLO
180: LOGICAL WANTZ
181: INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
182: * ..
183: * .. Array Arguments ..
184: DOUBLE PRECISION A( LDA, * ), V( * ),
185: $ TAU( * ), WORK( * )
186: * ..
187: *
188: * =====================================================================
189: *
190: * .. Parameters ..
191: DOUBLE PRECISION ZERO, ONE
192: PARAMETER ( ZERO = 0.0D+0,
193: $ ONE = 1.0D+0 )
194: * ..
195: * .. Local Scalars ..
196: LOGICAL UPPER
197: INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
198: $ DPOS, OFDPOS, AJETER
199: DOUBLE PRECISION CTMP
200: * ..
201: * .. External Subroutines ..
202: EXTERNAL DLARFG, DLARFX, DLARFY
203: * ..
204: * .. Intrinsic Functions ..
205: INTRINSIC MOD
206: * .. External Functions ..
207: LOGICAL LSAME
208: EXTERNAL LSAME
209: * ..
210: * ..
211: * .. Executable Statements ..
212: *
213: AJETER = IB + LDVT
214: UPPER = LSAME( UPLO, 'U' )
215:
216: IF( UPPER ) THEN
217: DPOS = 2 * NB + 1
218: OFDPOS = 2 * NB
219: ELSE
220: DPOS = 1
221: OFDPOS = 2
222: ENDIF
223:
224: *
225: * Upper case
226: *
227: IF( UPPER ) THEN
228: *
229: IF( WANTZ ) THEN
230: VPOS = MOD( SWEEP-1, 2 ) * N + ST
231: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
232: ELSE
233: VPOS = MOD( SWEEP-1, 2 ) * N + ST
234: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
235: ENDIF
236: *
237: IF( TTYPE.EQ.1 ) THEN
238: LM = ED - ST + 1
239: *
240: V( VPOS ) = ONE
241: DO 10 I = 1, LM-1
242: V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) )
243: A( OFDPOS-I, ST+I ) = ZERO
244: 10 CONTINUE
245: CTMP = ( A( OFDPOS, ST ) )
246: CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1,
247: $ TAU( TAUPOS ) )
248: A( OFDPOS, ST ) = CTMP
249: *
250: LM = ED - ST + 1
251: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
252: $ ( TAU( TAUPOS ) ),
253: $ A( DPOS, ST ), LDA-1, WORK)
254: ENDIF
255: *
256: IF( TTYPE.EQ.3 ) THEN
257: *
258: LM = ED - ST + 1
259: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
260: $ ( TAU( TAUPOS ) ),
261: $ A( DPOS, ST ), LDA-1, WORK)
262: ENDIF
263: *
264: IF( TTYPE.EQ.2 ) THEN
265: J1 = ED+1
266: J2 = MIN( ED+NB, N )
267: LN = ED-ST+1
268: LM = J2-J1+1
269: IF( LM.GT.0) THEN
270: CALL DLARFX( 'Left', LN, LM, V( VPOS ),
271: $ ( TAU( TAUPOS ) ),
272: $ A( DPOS-NB, J1 ), LDA-1, WORK)
273: *
274: IF( WANTZ ) THEN
275: VPOS = MOD( SWEEP-1, 2 ) * N + J1
276: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
277: ELSE
278: VPOS = MOD( SWEEP-1, 2 ) * N + J1
279: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
280: ENDIF
281: *
282: V( VPOS ) = ONE
283: DO 30 I = 1, LM-1
284: V( VPOS+I ) =
285: $ ( A( DPOS-NB-I, J1+I ) )
286: A( DPOS-NB-I, J1+I ) = ZERO
287: 30 CONTINUE
288: CTMP = ( A( DPOS-NB, J1 ) )
289: CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
290: A( DPOS-NB, J1 ) = CTMP
291: *
292: CALL DLARFX( 'Right', LN-1, LM, V( VPOS ),
293: $ TAU( TAUPOS ),
294: $ A( DPOS-NB+1, J1 ), LDA-1, WORK)
295: ENDIF
296: ENDIF
297: *
298: * Lower case
299: *
300: ELSE
301: *
302: IF( WANTZ ) THEN
303: VPOS = MOD( SWEEP-1, 2 ) * N + ST
304: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
305: ELSE
306: VPOS = MOD( SWEEP-1, 2 ) * N + ST
307: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
308: ENDIF
309: *
310: IF( TTYPE.EQ.1 ) THEN
311: LM = ED - ST + 1
312: *
313: V( VPOS ) = ONE
314: DO 20 I = 1, LM-1
315: V( VPOS+I ) = A( OFDPOS+I, ST-1 )
316: A( OFDPOS+I, ST-1 ) = ZERO
317: 20 CONTINUE
318: CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1,
319: $ TAU( TAUPOS ) )
320: *
321: LM = ED - ST + 1
322: *
323: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
324: $ ( TAU( TAUPOS ) ),
325: $ A( DPOS, ST ), LDA-1, WORK)
326:
327: ENDIF
328: *
329: IF( TTYPE.EQ.3 ) THEN
330: LM = ED - ST + 1
331: *
332: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
333: $ ( TAU( TAUPOS ) ),
334: $ A( DPOS, ST ), LDA-1, WORK)
335:
336: ENDIF
337: *
338: IF( TTYPE.EQ.2 ) THEN
339: J1 = ED+1
340: J2 = MIN( ED+NB, N )
341: LN = ED-ST+1
342: LM = J2-J1+1
343: *
344: IF( LM.GT.0) THEN
345: CALL DLARFX( 'Right', LM, LN, V( VPOS ),
346: $ TAU( TAUPOS ), A( DPOS+NB, ST ),
347: $ LDA-1, WORK)
348: *
349: IF( WANTZ ) THEN
350: VPOS = MOD( SWEEP-1, 2 ) * N + J1
351: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
352: ELSE
353: VPOS = MOD( SWEEP-1, 2 ) * N + J1
354: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
355: ENDIF
356: *
357: V( VPOS ) = ONE
358: DO 40 I = 1, LM-1
359: V( VPOS+I ) = A( DPOS+NB+I, ST )
360: A( DPOS+NB+I, ST ) = ZERO
361: 40 CONTINUE
362: CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1,
363: $ TAU( TAUPOS ) )
364: *
365: CALL DLARFX( 'Left', LM, LN-1, V( VPOS ),
366: $ ( TAU( TAUPOS ) ),
367: $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
368:
369: ENDIF
370: ENDIF
371: ENDIF
372: *
373: RETURN
374: *
375: * END OF DSB2ST_KERNELS
376: *
377: END
CVSweb interface <joel.bertrand@systella.fr>