1: *> \brief \b IPARAM2STAGE
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download IPARAM2STAGE + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
22: * NI, NBI, IBI, NXI )
23: * #if defined(_OPENMP)
24: * use omp_lib
25: * #endif
26: * IMPLICIT NONE
27: *
28: * .. Scalar Arguments ..
29: * CHARACTER*( * ) NAME, OPTS
30: * INTEGER ISPEC, NI, NBI, IBI, NXI
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> This program sets problem and machine dependent parameters
38: *> useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
39: *> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
40: *> and related subroutines for eigenvalue problems.
41: *> It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
42: *> It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
43: *> with a direct conversion ISPEC + 16.
44: *> \endverbatim
45: *
46: * Arguments:
47: * ==========
48: *
49: *> \param[in] ISPEC
50: *> \verbatim
51: *> ISPEC is integer scalar
52: *> ISPEC specifies which tunable parameter IPARAM2STAGE should
53: *> return.
54: *>
55: *> ISPEC=17: the optimal blocksize nb for the reduction to
56: *> BAND
57: *>
58: *> ISPEC=18: the optimal blocksize ib for the eigenvectors
59: *> singular vectors update routine
60: *>
61: *> ISPEC=19: The length of the array that store the Housholder
62: *> representation for the second stage
63: *> Band to Tridiagonal or Bidiagonal
64: *>
65: *> ISPEC=20: The workspace needed for the routine in input.
66: *>
67: *> ISPEC=21: For future release.
68: *> \endverbatim
69: *>
70: *> \param[in] NAME
71: *> \verbatim
72: *> NAME is character string
73: *> Name of the calling subroutine
74: *> \endverbatim
75: *>
76: *> \param[in] OPTS
77: *> \verbatim
78: *> OPTS is CHARACTER*(*)
79: *> The character options to the subroutine NAME, concatenated
80: *> into a single character string. For example, UPLO = 'U',
81: *> TRANS = 'T', and DIAG = 'N' for a triangular routine would
82: *> be specified as OPTS = 'UTN'.
83: *> \endverbatim
84: *>
85: *> \param[in] NI
86: *> \verbatim
87: *> NI is INTEGER which is the size of the matrix
88: *> \endverbatim
89: *>
90: *> \param[in] NBI
91: *> \verbatim
92: *> NBI is INTEGER which is the used in the reduciton,
93: *> (e.g., the size of the band), needed to compute workspace
94: *> and LHOUS2.
95: *> \endverbatim
96: *>
97: *> \param[in] IBI
98: *> \verbatim
99: *> IBI is INTEGER which represent the IB of the reduciton,
100: *> needed to compute workspace and LHOUS2.
101: *> \endverbatim
102: *>
103: *> \param[in] NXI
104: *> \verbatim
105: *> NXI is INTEGER needed in the future release.
106: *> \endverbatim
107: *
108: * Authors:
109: * ========
110: *
111: *> \author Univ. of Tennessee
112: *> \author Univ. of California Berkeley
113: *> \author Univ. of Colorado Denver
114: *> \author NAG Ltd.
115: *
116: *> \ingroup auxOTHERauxiliary
117: *
118: *> \par Further Details:
119: * =====================
120: *>
121: *> \verbatim
122: *>
123: *> Implemented by Azzam Haidar.
124: *>
125: *> All detail are available on technical report, SC11, SC13 papers.
126: *>
127: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
128: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
129: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
130: *> of 2011 International Conference for High Performance Computing,
131: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
132: *> Article 8 , 11 pages.
133: *> http://doi.acm.org/10.1145/2063384.2063394
134: *>
135: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
136: *> An improved parallel singular value algorithm and its implementation
137: *> for multicore hardware, In Proceedings of 2013 International Conference
138: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
139: *> Denver, Colorado, USA, 2013.
140: *> Article 90, 12 pages.
141: *> http://doi.acm.org/10.1145/2503210.2503292
142: *>
143: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
144: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
145: *> calculations based on fine-grained memory aware tasks.
146: *> International Journal of High Performance Computing Applications.
147: *> Volume 28 Issue 2, Pages 196-209, May 2014.
148: *> http://hpc.sagepub.com/content/28/2/196
149: *>
150: *> \endverbatim
151: *>
152: * =====================================================================
153: INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
154: $ NI, NBI, IBI, NXI )
155: #if defined(_OPENMP)
156: use omp_lib
157: #endif
158: IMPLICIT NONE
159: *
160: * -- LAPACK auxiliary routine --
161: * -- LAPACK is a software package provided by Univ. of Tennessee, --
162: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163: *
164: * .. Scalar Arguments ..
165: CHARACTER*( * ) NAME, OPTS
166: INTEGER ISPEC, NI, NBI, IBI, NXI
167: *
168: * ================================================================
169: * ..
170: * .. Local Scalars ..
171: INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
172: $ FACTOPTNB, QROPTNB, LQOPTNB
173: LOGICAL RPREC, CPREC
174: CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
175: * ..
176: * .. Intrinsic Functions ..
177: INTRINSIC CHAR, ICHAR, MAX
178: * ..
179: * .. External Functions ..
180: INTEGER ILAENV
181: EXTERNAL ILAENV
182: * ..
183: * .. Executable Statements ..
184: *
185: * Invalid value for ISPEC
186: *
187: IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
188: IPARAM2STAGE = -1
189: RETURN
190: ENDIF
191: *
192: * Get the number of threads
193: *
194: NTHREADS = 1
195: #if defined(_OPENMP)
196: !$OMP PARALLEL
197: NTHREADS = OMP_GET_NUM_THREADS()
198: !$OMP END PARALLEL
199: #endif
200: * WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
201: *
202: IF( ISPEC .NE. 19 ) THEN
203: *
204: * Convert NAME to upper case if the first character is lower case.
205: *
206: IPARAM2STAGE = -1
207: SUBNAM = NAME
208: IC = ICHAR( SUBNAM( 1: 1 ) )
209: IZ = ICHAR( 'Z' )
210: IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
211: *
212: * ASCII character set
213: *
214: IF( IC.GE.97 .AND. IC.LE.122 ) THEN
215: SUBNAM( 1: 1 ) = CHAR( IC-32 )
216: DO 100 I = 2, 12
217: IC = ICHAR( SUBNAM( I: I ) )
218: IF( IC.GE.97 .AND. IC.LE.122 )
219: $ SUBNAM( I: I ) = CHAR( IC-32 )
220: 100 CONTINUE
221: END IF
222: *
223: ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
224: *
225: * EBCDIC character set
226: *
227: IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
228: $ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
229: $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
230: SUBNAM( 1: 1 ) = CHAR( IC+64 )
231: DO 110 I = 2, 12
232: IC = ICHAR( SUBNAM( I: I ) )
233: IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
234: $ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
235: $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
236: $ I ) = CHAR( IC+64 )
237: 110 CONTINUE
238: END IF
239: *
240: ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
241: *
242: * Prime machines: ASCII+128
243: *
244: IF( IC.GE.225 .AND. IC.LE.250 ) THEN
245: SUBNAM( 1: 1 ) = CHAR( IC-32 )
246: DO 120 I = 2, 12
247: IC = ICHAR( SUBNAM( I: I ) )
248: IF( IC.GE.225 .AND. IC.LE.250 )
249: $ SUBNAM( I: I ) = CHAR( IC-32 )
250: 120 CONTINUE
251: END IF
252: END IF
253: *
254: PREC = SUBNAM( 1: 1 )
255: ALGO = SUBNAM( 4: 6 )
256: STAG = SUBNAM( 8:12 )
257: RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
258: CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
259: *
260: * Invalid value for PRECISION
261: *
262: IF( .NOT.( RPREC .OR. CPREC ) ) THEN
263: IPARAM2STAGE = -1
264: RETURN
265: ENDIF
266: ENDIF
267: * WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
268: * $ ' ALGO ',ALGO,' STAGE ',STAG
269: *
270: *
271: IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN
272: *
273: * ISPEC = 17, 18: block size KD, IB
274: * Could be also dependent from N but for now it
275: * depend only on sequential or parallel
276: *
277: IF( NTHREADS.GT.4 ) THEN
278: IF( CPREC ) THEN
279: KD = 128
280: IB = 32
281: ELSE
282: KD = 160
283: IB = 40
284: ENDIF
285: ELSE IF( NTHREADS.GT.1 ) THEN
286: IF( CPREC ) THEN
287: KD = 64
288: IB = 32
289: ELSE
290: KD = 64
291: IB = 32
292: ENDIF
293: ELSE
294: IF( CPREC ) THEN
295: KD = 16
296: IB = 16
297: ELSE
298: KD = 32
299: IB = 16
300: ENDIF
301: ENDIF
302: IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
303: IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
304: *
305: ELSE IF ( ISPEC .EQ. 19 ) THEN
306: *
307: * ISPEC = 19:
308: * LHOUS length of the Houselholder representation
309: * matrix (V,T) of the second stage. should be >= 1.
310: *
311: * Will add the VECT OPTION HERE next release
312: VECT = OPTS(1:1)
313: IF( VECT.EQ.'N' ) THEN
314: LHOUS = MAX( 1, 4*NI )
315: ELSE
316: * This is not correct, it need to call the ALGO and the stage2
317: LHOUS = MAX( 1, 4*NI ) + IBI
318: ENDIF
319: IF( LHOUS.GE.0 ) THEN
320: IPARAM2STAGE = LHOUS
321: ELSE
322: IPARAM2STAGE = -1
323: ENDIF
324: *
325: ELSE IF ( ISPEC .EQ. 20 ) THEN
326: *
327: * ISPEC = 20: (21 for future use)
328: * LWORK length of the workspace for
329: * either or both stages for TRD and BRD. should be >= 1.
330: * TRD:
331: * TRD_stage 1: = LT + LW + LS1 + LS2
332: * = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
333: * where LDT=LDS2=KD
334: * = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
335: * TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
336: * TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
337: * = N*KD + N*max(KD+1,FACTOPTNB)
338: * + max(2*KD*KD, KD*NTHREADS)
339: * + (KD+1)*N
340: LWORK = -1
341: SUBNAM(1:1) = PREC
342: SUBNAM(2:6) = 'GEQRF'
343: QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
344: SUBNAM(2:6) = 'GELQF'
345: LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
346: * Could be QR or LQ for TRD and the max for BRD
347: FACTOPTNB = MAX(QROPTNB, LQOPTNB)
348: IF( ALGO.EQ.'TRD' ) THEN
349: IF( STAG.EQ.'2STAG' ) THEN
350: LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
351: $ + MAX(2*NBI*NBI, NBI*NTHREADS)
352: $ + (NBI+1)*NI
353: ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
354: LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
355: ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
356: LWORK = (2*NBI+1)*NI + NBI*NTHREADS
357: ENDIF
358: ELSE IF( ALGO.EQ.'BRD' ) THEN
359: IF( STAG.EQ.'2STAG' ) THEN
360: LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
361: $ + MAX(2*NBI*NBI, NBI*NTHREADS)
362: $ + (NBI+1)*NI
363: ELSE IF( STAG.EQ.'GE2GB' ) THEN
364: LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
365: ELSE IF( STAG.EQ.'GB2BD' ) THEN
366: LWORK = (3*NBI+1)*NI + NBI*NTHREADS
367: ENDIF
368: ENDIF
369: LWORK = MAX ( 1, LWORK )
370:
371: IF( LWORK.GT.0 ) THEN
372: IPARAM2STAGE = LWORK
373: ELSE
374: IPARAM2STAGE = -1
375: ENDIF
376: *
377: ELSE IF ( ISPEC .EQ. 21 ) THEN
378: *
379: * ISPEC = 21 for future use
380: IPARAM2STAGE = NXI
381: ENDIF
382: *
383: * ==== End of IPARAM2STAGE ====
384: *
385: END
CVSweb interface <joel.bertrand@systella.fr>