1: *> \brief \b ZHB2ST_KERNELS
2: *
3: * @precisions fortran z -> s d c
4: *
5: * =========== DOCUMENTATION ===========
6: *
7: * Online html documentation available at
8: * http://www.netlib.org/lapack/explore-html/
9: *
10: *> \htmlonly
11: *> Download ZHB2ST_KERNELS + dependencies
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/zhb2st_kernels.f">
13: *> [TGZ]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/zhb2st_kernels.f">
15: *> [ZIP]</a>
16: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/zhb2st_kernels.f">
17: *> [TXT]</a>
18: *> \endhtmlonly
19: *
20: * Definition:
21: * ===========
22: *
23: * SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
24: * ST, ED, SWEEP, N, NB, IB,
25: * A, LDA, V, TAU, LDVT, WORK)
26: *
27: * IMPLICIT NONE
28: *
29: * .. Scalar Arguments ..
30: * CHARACTER UPLO
31: * LOGICAL WANTZ
32: * INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
33: * ..
34: * .. Array Arguments ..
35: * COMPLEX*16 A( LDA, * ), V( * ),
36: * TAU( * ), WORK( * )
37: *
38: *> \par Purpose:
39: * =============
40: *>
41: *> \verbatim
42: *>
43: *> ZHB2ST_KERNELS is an internal routine used by the ZHETRD_HB2ST
44: *> subroutine.
45: *> \endverbatim
46: *
47: * Arguments:
48: * ==========
49: *
50: *> \param[in] UPLO
51: *> \verbatim
52: *> UPLO is CHARACTER*1
53: *> \endverbatim
54: *>
55: *> \param[in] WANTZ
56: *> \verbatim
57: *> WANTZ is LOGICAL which indicate if Eigenvalue are requested or both
58: *> Eigenvalue/Eigenvectors.
59: *> \endverbatim
60: *>
61: *> \param[in] TTYPE
62: *> \verbatim
63: *> TTYPE is INTEGER
64: *> \endverbatim
65: *>
66: *> \param[in] ST
67: *> \verbatim
68: *> ST is INTEGER
69: *> internal parameter for indices.
70: *> \endverbatim
71: *>
72: *> \param[in] ED
73: *> \verbatim
74: *> ED is INTEGER
75: *> internal parameter for indices.
76: *> \endverbatim
77: *>
78: *> \param[in] SWEEP
79: *> \verbatim
80: *> SWEEP is INTEGER
81: *> internal parameter for indices.
82: *> \endverbatim
83: *>
84: *> \param[in] N
85: *> \verbatim
86: *> N is INTEGER. The order of the matrix A.
87: *> \endverbatim
88: *>
89: *> \param[in] NB
90: *> \verbatim
91: *> NB is INTEGER. The size of the band.
92: *> \endverbatim
93: *>
94: *> \param[in] IB
95: *> \verbatim
96: *> IB is INTEGER.
97: *> \endverbatim
98: *>
99: *> \param[in, out] A
100: *> \verbatim
101: *> A is COMPLEX*16 array. A pointer to the matrix A.
102: *> \endverbatim
103: *>
104: *> \param[in] LDA
105: *> \verbatim
106: *> LDA is INTEGER. The leading dimension of the matrix A.
107: *> \endverbatim
108: *>
109: *> \param[out] V
110: *> \verbatim
111: *> V is COMPLEX*16 array, dimension 2*n if eigenvalues only are
112: *> requested or to be queried for vectors.
113: *> \endverbatim
114: *>
115: *> \param[out] TAU
116: *> \verbatim
117: *> TAU is COMPLEX*16 array, dimension (2*n).
118: *> The scalar factors of the Householder reflectors are stored
119: *> in this array.
120: *> \endverbatim
121: *>
122: *> \param[in] LDVT
123: *> \verbatim
124: *> LDVT is INTEGER.
125: *> \endverbatim
126: *>
127: *> \param[out] WORK
128: *> \verbatim
129: *> WORK is COMPLEX*16 array. Workspace of size nb.
130: *> \endverbatim
131: *>
132: *> \par Further Details:
133: * =====================
134: *>
135: *> \verbatim
136: *>
137: *> Implemented by Azzam Haidar.
138: *>
139: *> All details are available on technical report, SC11, SC13 papers.
140: *>
141: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
142: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
143: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
144: *> of 2011 International Conference for High Performance Computing,
145: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
146: *> Article 8 , 11 pages.
147: *> http://doi.acm.org/10.1145/2063384.2063394
148: *>
149: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
150: *> An improved parallel singular value algorithm and its implementation
151: *> for multicore hardware, In Proceedings of 2013 International Conference
152: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
153: *> Denver, Colorado, USA, 2013.
154: *> Article 90, 12 pages.
155: *> http://doi.acm.org/10.1145/2503210.2503292
156: *>
157: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
158: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
159: *> calculations based on fine-grained memory aware tasks.
160: *> International Journal of High Performance Computing Applications.
161: *> Volume 28 Issue 2, Pages 196-209, May 2014.
162: *> http://hpc.sagepub.com/content/28/2/196
163: *>
164: *> \endverbatim
165: *>
166: * =====================================================================
167: SUBROUTINE ZHB2ST_KERNELS( UPLO, WANTZ, TTYPE,
168: $ ST, ED, SWEEP, N, NB, IB,
169: $ A, LDA, V, TAU, LDVT, WORK)
170: *
171: IMPLICIT NONE
172: *
173: * -- LAPACK computational routine --
174: * -- LAPACK is a software package provided by Univ. of Tennessee, --
175: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
176: *
177: * .. Scalar Arguments ..
178: CHARACTER UPLO
179: LOGICAL WANTZ
180: INTEGER TTYPE, ST, ED, SWEEP, N, NB, IB, LDA, LDVT
181: * ..
182: * .. Array Arguments ..
183: COMPLEX*16 A( LDA, * ), V( * ),
184: $ TAU( * ), WORK( * )
185: * ..
186: *
187: * =====================================================================
188: *
189: * .. Parameters ..
190: COMPLEX*16 ZERO, ONE
191: PARAMETER ( ZERO = ( 0.0D+0, 0.0D+0 ),
192: $ ONE = ( 1.0D+0, 0.0D+0 ) )
193: * ..
194: * .. Local Scalars ..
195: LOGICAL UPPER
196: INTEGER I, J1, J2, LM, LN, VPOS, TAUPOS,
197: $ DPOS, OFDPOS, AJETER
198: COMPLEX*16 CTMP
199: * ..
200: * .. External Subroutines ..
201: EXTERNAL ZLARFG, ZLARFX, ZLARFY
202: * ..
203: * .. Intrinsic Functions ..
204: INTRINSIC DCONJG, MOD
205: * .. External Functions ..
206: LOGICAL LSAME
207: EXTERNAL LSAME
208: * ..
209: * ..
210: * .. Executable Statements ..
211: *
212: AJETER = IB + LDVT
213: UPPER = LSAME( UPLO, 'U' )
214:
215: IF( UPPER ) THEN
216: DPOS = 2 * NB + 1
217: OFDPOS = 2 * NB
218: ELSE
219: DPOS = 1
220: OFDPOS = 2
221: ENDIF
222:
223: *
224: * Upper case
225: *
226: IF( UPPER ) THEN
227: *
228: IF( WANTZ ) THEN
229: VPOS = MOD( SWEEP-1, 2 ) * N + ST
230: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
231: ELSE
232: VPOS = MOD( SWEEP-1, 2 ) * N + ST
233: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
234: ENDIF
235: *
236: IF( TTYPE.EQ.1 ) THEN
237: LM = ED - ST + 1
238: *
239: V( VPOS ) = ONE
240: DO 10 I = 1, LM-1
241: V( VPOS+I ) = DCONJG( A( OFDPOS-I, ST+I ) )
242: A( OFDPOS-I, ST+I ) = ZERO
243: 10 CONTINUE
244: CTMP = DCONJG( A( OFDPOS, ST ) )
245: CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1,
246: $ TAU( TAUPOS ) )
247: A( OFDPOS, ST ) = CTMP
248: *
249: LM = ED - ST + 1
250: CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
251: $ DCONJG( TAU( TAUPOS ) ),
252: $ A( DPOS, ST ), LDA-1, WORK)
253: ENDIF
254: *
255: IF( TTYPE.EQ.3 ) THEN
256: *
257: LM = ED - ST + 1
258: CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
259: $ DCONJG( TAU( TAUPOS ) ),
260: $ A( DPOS, ST ), LDA-1, WORK)
261: ENDIF
262: *
263: IF( TTYPE.EQ.2 ) THEN
264: J1 = ED+1
265: J2 = MIN( ED+NB, N )
266: LN = ED-ST+1
267: LM = J2-J1+1
268: IF( LM.GT.0) THEN
269: CALL ZLARFX( 'Left', LN, LM, V( VPOS ),
270: $ DCONJG( TAU( TAUPOS ) ),
271: $ A( DPOS-NB, J1 ), LDA-1, WORK)
272: *
273: IF( WANTZ ) THEN
274: VPOS = MOD( SWEEP-1, 2 ) * N + J1
275: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
276: ELSE
277: VPOS = MOD( SWEEP-1, 2 ) * N + J1
278: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
279: ENDIF
280: *
281: V( VPOS ) = ONE
282: DO 30 I = 1, LM-1
283: V( VPOS+I ) =
284: $ DCONJG( A( DPOS-NB-I, J1+I ) )
285: A( DPOS-NB-I, J1+I ) = ZERO
286: 30 CONTINUE
287: CTMP = DCONJG( A( DPOS-NB, J1 ) )
288: CALL ZLARFG( LM, CTMP, V( VPOS+1 ), 1, TAU( TAUPOS ) )
289: A( DPOS-NB, J1 ) = CTMP
290: *
291: CALL ZLARFX( 'Right', LN-1, LM, V( VPOS ),
292: $ TAU( TAUPOS ),
293: $ A( DPOS-NB+1, J1 ), LDA-1, WORK)
294: ENDIF
295: ENDIF
296: *
297: * Lower case
298: *
299: ELSE
300: *
301: IF( WANTZ ) THEN
302: VPOS = MOD( SWEEP-1, 2 ) * N + ST
303: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
304: ELSE
305: VPOS = MOD( SWEEP-1, 2 ) * N + ST
306: TAUPOS = MOD( SWEEP-1, 2 ) * N + ST
307: ENDIF
308: *
309: IF( TTYPE.EQ.1 ) THEN
310: LM = ED - ST + 1
311: *
312: V( VPOS ) = ONE
313: DO 20 I = 1, LM-1
314: V( VPOS+I ) = A( OFDPOS+I, ST-1 )
315: A( OFDPOS+I, ST-1 ) = ZERO
316: 20 CONTINUE
317: CALL ZLARFG( LM, A( OFDPOS, ST-1 ), V( VPOS+1 ), 1,
318: $ TAU( TAUPOS ) )
319: *
320: LM = ED - ST + 1
321: *
322: CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
323: $ DCONJG( TAU( TAUPOS ) ),
324: $ A( DPOS, ST ), LDA-1, WORK)
325:
326: ENDIF
327: *
328: IF( TTYPE.EQ.3 ) THEN
329: LM = ED - ST + 1
330: *
331: CALL ZLARFY( UPLO, LM, V( VPOS ), 1,
332: $ DCONJG( TAU( TAUPOS ) ),
333: $ A( DPOS, ST ), LDA-1, WORK)
334:
335: ENDIF
336: *
337: IF( TTYPE.EQ.2 ) THEN
338: J1 = ED+1
339: J2 = MIN( ED+NB, N )
340: LN = ED-ST+1
341: LM = J2-J1+1
342: *
343: IF( LM.GT.0) THEN
344: CALL ZLARFX( 'Right', LM, LN, V( VPOS ),
345: $ TAU( TAUPOS ), A( DPOS+NB, ST ),
346: $ LDA-1, WORK)
347: *
348: IF( WANTZ ) THEN
349: VPOS = MOD( SWEEP-1, 2 ) * N + J1
350: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
351: ELSE
352: VPOS = MOD( SWEEP-1, 2 ) * N + J1
353: TAUPOS = MOD( SWEEP-1, 2 ) * N + J1
354: ENDIF
355: *
356: V( VPOS ) = ONE
357: DO 40 I = 1, LM-1
358: V( VPOS+I ) = A( DPOS+NB+I, ST )
359: A( DPOS+NB+I, ST ) = ZERO
360: 40 CONTINUE
361: CALL ZLARFG( LM, A( DPOS+NB, ST ), V( VPOS+1 ), 1,
362: $ TAU( TAUPOS ) )
363: *
364: CALL ZLARFX( 'Left', LM, LN-1, V( VPOS ),
365: $ DCONJG( TAU( TAUPOS ) ),
366: $ A( DPOS+NB-1, ST+1 ), LDA-1, WORK)
367:
368: ENDIF
369: ENDIF
370: ENDIF
371: *
372: RETURN
373: *
374: * End of ZHB2ST_KERNELS
375: *
376: END
CVSweb interface <joel.bertrand@systella.fr>