1: *> \brief \b DSB2ST_KERNELS
2: *
3: * @generated from zhb2st_kernels.f, fortran z -> d, Wed Dec 7 08:22:39 2016
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download DSB2ST_KERNELS + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/dsb2st_kernels.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/dsb2st_kernels.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/dsb2st_kernels.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24: * ST, ED, SWEEP, N, NB, IB,
25: * A, LDA, V, TAU, LDVT, WORK)
26: *
27: * IMPLICIT NONE
28: *
29: * .. Scalar Arguments ..
30: * CHARACTER UPLO
31: * LOGICAL WANTZ
32: * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33: * ..
34: * .. Array Arguments ..
35: * DOUBLE PRECISION A( LDA, * ), V( * ),
36: * TAU( * ), WORK( * )
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> DSB2ST_KERNELS is an internal routine used by the DSYTRD_SB2ST
44: *> subroutine.
45: *> \endverbatim
46: *
47: * Arguments:
48: * ==========
49: *
50: *> @param[in] n
51: *> The order of the matrix A.
52: *>
53: *> @param[in] nb
54: *> The size of the band.
55: *>
56: *> @param[in, out] A
57: *> A pointer to the matrix A.
58: *>
59: *> @param[in] lda
60: *> The leading dimension of the matrix A.
61: *>
62: *> @param[out] V
63: *> DOUBLE PRECISION array, dimension 2*n if eigenvalues only are
64: *> requested or to be queried for vectors.
65: *>
66: *> @param[out] TAU
67: *> DOUBLE PRECISION array, dimension (2*n).
68: *> The scalar factors of the Householder reflectors are stored
69: *> in this array.
70: *>
71: *> @param[in] st
72: *> internal parameter for indices.
73: *>
74: *> @param[in] ed
75: *> internal parameter for indices.
76: *>
77: *> @param[in] sweep
78: *> internal parameter for indices.
79: *>
80: *> @param[in] Vblksiz
81: *> internal parameter for indices.
82: *>
83: *> @param[in] wantz
84: *> logical which indicate if Eigenvalue are requested or both
85: *> Eigenvalue/Eigenvectors.
86: *>
87: *> @param[in] work
88: *> Workspace of size nb.
89: *>
90: *> \par Further Details:
91: * =====================
92: *>
93: *> \verbatim
94: *>
95: *> Implemented by Azzam Haidar.
96: *>
97: *> All details are available on technical report, SC11, SC13 papers.
98: *>
99: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
100: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
101: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
102: *> of 2011 International Conference for High Performance Computing,
103: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
104: *> Article 8 , 11 pages.
105: *> http://doi.acm.org/10.1145/2063384.2063394
106: *>
107: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
108: *> An improved parallel singular value algorithm and its implementation
109: *> for multicore hardware, In Proceedings of 2013 International Conference
110: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
111: *> Denver, Colorado, USA, 2013.
112: *> Article 90, 12 pages.
113: *> http://doi.acm.org/10.1145/2503210.2503292
114: *>
115: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
116: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
117: *> calculations based on fine-grained memory aware tasks.
118: *> International Journal of High Performance Computing Applications.
119: *> Volume 28 Issue 2, Pages 196-209, May 2014.
120: *> http://hpc.sagepub.com/content/28/2/196
121: *>
122: *> \endverbatim
123: *>
124: * =====================================================================
125: SUBROUTINE DSB2ST_KERNELS( UPLO, WANTZ, TTYPE,
126: $ ST, ED, SWEEP, N, NB, IB,
127: $ A, LDA, V, TAU, LDVT, WORK)
128: *
129: IMPLICIT NONE
130: *
131: * -- LAPACK computational routine (version 3.7.0) --
132: * -- LAPACK is a software package provided by Univ. of Tennessee, --
133: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
134: * December 2016
135: *
136: * .. Scalar Arguments ..
137: CHARACTER UPLO
138: LOGICAL WANTZ
139: INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
140: * ..
141: * .. Array Arguments ..
142: DOUBLE PRECISION A( LDA, * ), V( * ),
143: $ TAU( * ), WORK( * )
144: * ..
145: *
146: * =====================================================================
147: *
148: * .. Parameters ..
149: DOUBLE PRECISION ZERO, ONE
150: PARAMETER ( ZERO = 0.0D+0,
151: $ ONE = 1.0D+0 )
152: * ..
153: * .. Local Scalars ..
154: LOGICAL UPPER
155: INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
156: $ DPOS, OFDPOS, AJETER
157: DOUBLE PRECISION CTMP
158: * ..
159: * .. External Subroutines ..
160: EXTERNAL DLARFG, DLARFX, DLARFY
161: * ..
162: * .. Intrinsic Functions ..
163: INTRINSIC MOD
164: * .. External Functions ..
165: LOGICAL LSAME
166: EXTERNAL LSAME
167: * ..
168: * ..
169: * .. Executable Statements ..
170: *
171: AJETER = IB + LDVT
172: UPPER = LSAME( UPLO, 'U' )
173:
174: IF( UPPER ) THEN
175: DPOS = 2 * NB + 1
176: OFDPOS = 2 * NB
177: ELSE
178: DPOS = 1
179: OFDPOS = 2
180: ENDIF
181:
182: *
183: * Upper case
184: *
185: IF( UPPER ) THEN
186: *
187: IF( WANTZ ) THEN
188: VPOS = MOD( SWEEP-1, 2 ) * N + ST
189: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
190: ELSE
191: VPOS = MOD( SWEEP-1, 2 ) * N + ST
192: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
193: ENDIF
194: *
195: IF( TTYPE.EQ.1 ) THEN
196: LM = ED - ST + 1
197: *
198: V( VPOS ) = ONE
199: DO 10 I = 1, LM-1
200: V( VPOS+I ) = ( A( OFDPOS-I, ST+I ) )
201: A( OFDPOS-I, ST+I ) = ZERO
202: 10 CONTINUE
203: CTMP = ( A( OFDPOS, ST ) )
204: CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1,
205: $ TAU( TAUPOS ) )
206: A( OFDPOS, ST ) = CTMP
207: *
208: LM = ED - ST + 1
209: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
210: $ ( TAU( TAUPOS ) ),
211: $ A( DPOS, ST ), LDA-1, WORK)
212: ENDIF
213: *
214: IF( TTYPE.EQ.3 ) THEN
215: *
216: LM = ED - ST + 1
217: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
218: $ ( TAU( TAUPOS ) ),
219: $ A( DPOS, ST ), LDA-1, WORK)
220: ENDIF
221: *
222: IF( TTYPE.EQ.2 ) THEN
223: J1 = ED+1
224: J2 = MIN( ED+NB, N )
225: LN = ED-ST+1
226: LM = J2-J1+1
227: IF( LM.GT.0) THEN
228: CALL DLARFX( 'Left', LN, LM, V( VPOS ),
229: $ ( TAU( TAUPOS ) ),
230: $ A( DPOS-NB, J1 ), LDA-1, WORK)
231: *
232: IF( WANTZ ) THEN
233: VPOS = MOD( SWEEP-1, 2 ) * N + J1
234: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
235: ELSE
236: VPOS = MOD( SWEEP-1, 2 ) * N + J1
237: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
238: ENDIF
239: *
240: V( VPOS ) = ONE
241: DO 30 I = 1, LM-1
242: V( VPOS+I ) =
243: $ ( A( DPOS-NB-I, J1+I ) )
244: A( DPOS-NB-I, J1+I ) = ZERO
245: 30 CONTINUE
246: CTMP = ( A( DPOS-NB, J1 ) )
247: CALL DLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
248: A( DPOS-NB, J1 ) = CTMP
249: *
250: CALL DLARFX( 'Right', LN-1, LM, V( VPOS ),
251: $ TAU( TAUPOS ),
252: $ A( DPOS-NB+1, J1 ), LDA-1, WORK)
253: ENDIF
254: ENDIF
255: *
256: * Lower case
257: *
258: ELSE
259: *
260: IF( WANTZ ) THEN
261: VPOS = MOD( SWEEP-1, 2 ) * N + ST
262: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
263: ELSE
264: VPOS = MOD( SWEEP-1, 2 ) * N + ST
265: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
266: ENDIF
267: *
268: IF( TTYPE.EQ.1 ) THEN
269: LM = ED - ST + 1
270: *
271: V( VPOS ) = ONE
272: DO 20 I = 1, LM-1
273: V( VPOS+I ) = A( OFDPOS+I, ST-1 )
274: A( OFDPOS+I, ST-1 ) = ZERO
275: 20 CONTINUE
276: CALL DLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1,
277: $ TAU( TAUPOS ) )
278: *
279: LM = ED - ST + 1
280: *
281: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
282: $ ( TAU( TAUPOS ) ),
283: $ A( DPOS, ST ), LDA-1, WORK)
284:
285: ENDIF
286: *
287: IF( TTYPE.EQ.3 ) THEN
288: LM = ED - ST + 1
289: *
290: CALL DLARFY( UPLO, LM, V( VPOS ), 1,
291: $ ( TAU( TAUPOS ) ),
292: $ A( DPOS, ST ), LDA-1, WORK)
293:
294: ENDIF
295: *
296: IF( TTYPE.EQ.2 ) THEN
297: J1 = ED+1
298: J2 = MIN( ED+NB, N )
299: LN = ED-ST+1
300: LM = J2-J1+1
301: *
302: IF( LM.GT.0) THEN
303: CALL DLARFX( 'Right', LM, LN, V( VPOS ),
304: $ TAU( TAUPOS ), A( DPOS+NB, ST ),
305: $ LDA-1, WORK)
306: *
307: IF( WANTZ ) THEN
308: VPOS = MOD( SWEEP-1, 2 ) * N + J1
309: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
310: ELSE
311: VPOS = MOD( SWEEP-1, 2 ) * N + J1
312: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
313: ENDIF
314: *
315: V( VPOS ) = ONE
316: DO 40 I = 1, LM-1
317: V( VPOS+I ) = A( DPOS+NB+I, ST )
318: A( DPOS+NB+I, ST ) = ZERO
319: 40 CONTINUE
320: CALL DLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1,
321: $ TAU( TAUPOS ) )
322: *
323: CALL DLARFX( 'Left', LM, LN-1, V( VPOS ),
324: $ ( TAU( TAUPOS ) ),
325: $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
326:
327: ENDIF
328: ENDIF
329: ENDIF
330: *
331: RETURN
332: *
333: * END OF DSB2ST_KERNELS
334: *
335: END
CVSweb interface <joel.bertrand@systella.fr>