1: *> \brief \b IPARAM2STAGE
2: *
3: * =========== DOCUMENTATION ===========
4: *
5: * Online html documentation available at
6: * http://www.netlib.org/lapack/explore-html/
7: *
8: *> \htmlonly
9: *> Download IPARAM2STAGE + dependencies
10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F">
11: *> [TGZ]</a>
12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F">
13: *> [ZIP]</a>
14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F">
15: *> [TXT]</a>
16: *> \endhtmlonly
17: *
18: * Definition:
19: * ===========
20: *
21: * INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
22: * NI, NBI, IBI, NXI )
23: * #if defined(_OPENMP)
24: * use omp_lib
25: * #endif
26: * IMPLICIT NONE
27: *
28: * .. Scalar Arguments ..
29: * CHARACTER*( * ) NAME, OPTS
30: * INTEGER ISPEC, NI, NBI, IBI, NXI
31: *
32: *> \par Purpose:
33: * =============
34: *>
35: *> \verbatim
36: *>
37: *> This program sets problem and machine dependent parameters
38: *> useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST,
39: *> xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD
40: *> and related subroutines for eigenvalue problems.
41: *> It is called whenever ILAENV is called with 17 <= ISPEC <= 21
42: *> \endverbatim
43: *
44: * Arguments:
45: * ==========
46: *
47: *> \param[in] ISPEC
48: *> \verbatim
49: *> ISPEC is integer scalar
50: *> ISPEC specifies which tunable parameter IPARAM2STAGE should
51: *> return.
52: *>
53: *> ISPEC=17: the optimal blocksize nb for the reduction to
54: * BAND
55: *>
56: *> ISPEC=18: the optimal blocksize ib for the eigenvectors
57: *> singular vectors update routine
58: *>
59: *> ISPEC=19: The length of the array that store the Housholder
60: *> representation for the second stage
61: *> Band to Tridiagonal or Bidiagonal
62: *>
63: *> ISPEC=20: The workspace needed for the routine in input.
64: *>
65: *> ISPEC=21: For future release.
66: *> \endverbatim
67: *>
68: *> \param[in] NAME
69: *> \verbatim
70: *> NAME is character string
71: *> Name of the calling subroutine
72: *> \endverbatim
73: *>
74: *> \param[in] OPTS
75: *> \verbatim
76: *> OPTS is CHARACTER*(*)
77: *> The character options to the subroutine NAME, concatenated
78: *> into a single character string. For example, UPLO = 'U',
79: *> TRANS = 'T', and DIAG = 'N' for a triangular routine would
80: *> be specified as OPTS = 'UTN'.
81: *> \endverbatim
82: *>
83: *> \param[in] NI
84: *> \verbatim
85: *> NI is INTEGER which is the size of the matrix
86: *> \endverbatim
87: *>
88: *> \param[in] NBI
89: *> \verbatim
90: *> NBI is INTEGER which is the used in the reduciton,
91: * (e.g., the size of the band), needed to compute workspace
92: * and LHOUS2.
93: *> \endverbatim
94: *>
95: *> \param[in] IBI
96: *> \verbatim
97: *> IBI is INTEGER which represent the IB of the reduciton,
98: * needed to compute workspace and LHOUS2.
99: *> \endverbatim
100: *>
101: *> \param[in] NXI
102: *> \verbatim
103: *> NXI is INTEGER needed in the future release.
104: *> \endverbatim
105: *
106: * Authors:
107: * ========
108: *
109: *> \author Univ. of Tennessee
110: *> \author Univ. of California Berkeley
111: *> \author Univ. of Colorado Denver
112: *> \author NAG Ltd.
113: *
114: *> \date June 2016
115: *
116: *> \ingroup auxOTHERauxiliary
117: *
118: *> \par Further Details:
119: * =====================
120: *>
121: *> \verbatim
122: *>
123: *> Implemented by Azzam Haidar.
124: *>
125: *> All detail are available on technical report, SC11, SC13 papers.
126: *>
127: *> Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
128: *> Parallel reduction to condensed forms for symmetric eigenvalue problems
129: *> using aggregated fine-grained and memory-aware kernels. In Proceedings
130: *> of 2011 International Conference for High Performance Computing,
131: *> Networking, Storage and Analysis (SC '11), New York, NY, USA,
132: *> Article 8 , 11 pages.
133: *> http://doi.acm.org/10.1145/2063384.2063394
134: *>
135: *> A. Haidar, J. Kurzak, P. Luszczek, 2013.
136: *> An improved parallel singular value algorithm and its implementation
137: *> for multicore hardware, In Proceedings of 2013 International Conference
138: *> for High Performance Computing, Networking, Storage and Analysis (SC '13).
139: *> Denver, Colorado, USA, 2013.
140: *> Article 90, 12 pages.
141: *> http://doi.acm.org/10.1145/2503210.2503292
142: *>
143: *> A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
144: *> A novel hybrid CPU-GPU generalized eigensolver for electronic structure
145: *> calculations based on fine-grained memory aware tasks.
146: *> International Journal of High Performance Computing Applications.
147: *> Volume 28 Issue 2, Pages 196-209, May 2014.
148: *> http://hpc.sagepub.com/content/28/2/196
149: *>
150: *> \endverbatim
151: *>
152: * =====================================================================
153: INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS,
154: $ NI, NBI, IBI, NXI )
155: #if defined(_OPENMP)
156: use omp_lib
157: #endif
158: IMPLICIT NONE
159: *
160: * -- LAPACK auxiliary routine (version 3.7.0) --
161: * -- LAPACK is a software package provided by Univ. of Tennessee, --
162: * -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
163: * June 2016
164: *
165: * .. Scalar Arguments ..
166: CHARACTER*( * ) NAME, OPTS
167: INTEGER ISPEC, NI, NBI, IBI, NXI
168: *
169: * ================================================================
170: * ..
171: * .. Local Scalars ..
172: INTEGER I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
173: $ FACTOPTNB, QROPTNB, LQOPTNB
174: LOGICAL RPREC, CPREC
175: CHARACTER PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*3
176: * ..
177: * .. Intrinsic Functions ..
178: INTRINSIC CHAR, ICHAR, MAX
179: * ..
180: * .. External Functions ..
181: INTEGER ILAENV
182: EXTERNAL ILAENV
183: * ..
184: * .. Executable Statements ..
185: *
186: * Invalid value for ISPEC
187: *
188: IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
189: IPARAM2STAGE = -1
190: RETURN
191: ENDIF
192: *
193: * Get the number of threads
194: *
195: NTHREADS = 1
196: #if defined(_OPENMP)
197: !$OMP PARALLEL
198: NTHREADS = OMP_GET_NUM_THREADS()
199: !$OMP END PARALLEL
200: #endif
201: * WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
202: *
203: IF( ISPEC .NE. 19 ) THEN
204: *
205: * Convert NAME to upper case if the first character is lower case.
206: *
207: IPARAM2STAGE = -1
208: SUBNAM = NAME
209: IC = ICHAR( SUBNAM( 1: 1 ) )
210: IZ = ICHAR( 'Z' )
211: IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
212: *
213: * ASCII character set
214: *
215: IF( IC.GE.97 .AND. IC.LE.122 ) THEN
216: SUBNAM( 1: 1 ) = CHAR( IC-32 )
217: DO 100 I = 2, 12
218: IC = ICHAR( SUBNAM( I: I ) )
219: IF( IC.GE.97 .AND. IC.LE.122 )
220: $ SUBNAM( I: I ) = CHAR( IC-32 )
221: 100 CONTINUE
222: END IF
223: *
224: ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
225: *
226: * EBCDIC character set
227: *
228: IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
229: $ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
230: $ ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
231: SUBNAM( 1: 1 ) = CHAR( IC+64 )
232: DO 110 I = 2, 12
233: IC = ICHAR( SUBNAM( I: I ) )
234: IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
235: $ ( IC.GE.145 .AND. IC.LE.153 ) .OR.
236: $ ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
237: $ I ) = CHAR( IC+64 )
238: 110 CONTINUE
239: END IF
240: *
241: ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
242: *
243: * Prime machines: ASCII+128
244: *
245: IF( IC.GE.225 .AND. IC.LE.250 ) THEN
246: SUBNAM( 1: 1 ) = CHAR( IC-32 )
247: DO 120 I = 2, 12
248: IC = ICHAR( SUBNAM( I: I ) )
249: IF( IC.GE.225 .AND. IC.LE.250 )
250: $ SUBNAM( I: I ) = CHAR( IC-32 )
251: 120 CONTINUE
252: END IF
253: END IF
254: *
255: PREC = SUBNAM( 1: 1 )
256: ALGO = SUBNAM( 4: 6 )
257: STAG = SUBNAM( 8:12 )
258: RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
259: CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
260: *
261: * Invalid value for PRECISION
262: *
263: IF( .NOT.( RPREC .OR. CPREC ) ) THEN
264: IPARAM2STAGE = -1
265: RETURN
266: ENDIF
267: ENDIF
268: * WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
269: * $ ' ALGO ',ALGO,' STAGE ',STAG
270: *
271: *
272: IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN
273: *
274: * ISPEC = 17, 18: block size KD, IB
275: * Could be also dependent from N but for now it
276: * depend only on sequential or parallel
277: *
278: IF( NTHREADS.GT.4 ) THEN
279: IF( CPREC ) THEN
280: KD = 128
281: IB = 32
282: ELSE
283: KD = 160
284: IB = 40
285: ENDIF
286: ELSE IF( NTHREADS.GT.1 ) THEN
287: IF( CPREC ) THEN
288: KD = 64
289: IB = 32
290: ELSE
291: KD = 64
292: IB = 32
293: ENDIF
294: ELSE
295: IF( CPREC ) THEN
296: KD = 16
297: IB = 16
298: ELSE
299: KD = 32
300: IB = 16
301: ENDIF
302: ENDIF
303: IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
304: IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
305: *
306: ELSE IF ( ISPEC .EQ. 19 ) THEN
307: *
308: * ISPEC = 19:
309: * LHOUS length of the Houselholder representation
310: * matrix (V,T) of the second stage. should be >= 1.
311: *
312: * Will add the VECT OPTION HERE next release
313: VECT = OPTS(1:1)
314: IF( VECT.EQ.'N' ) THEN
315: LHOUS = MAX( 1, 4*NI )
316: ELSE
317: * This is not correct, it need to call the ALGO and the stage2
318: LHOUS = MAX( 1, 4*NI ) + IBI
319: ENDIF
320: IF( LHOUS.GE.0 ) THEN
321: IPARAM2STAGE = LHOUS
322: ELSE
323: IPARAM2STAGE = -1
324: ENDIF
325: *
326: ELSE IF ( ISPEC .EQ. 20 ) THEN
327: *
328: * ISPEC = 20: (21 for future use)
329: * LWORK length of the workspace for
330: * either or both stages for TRD and BRD. should be >= 1.
331: * TRD:
332: * TRD_stage 1: = LT + LW + LS1 + LS2
333: * = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD
334: * where LDT=LDS2=KD
335: * = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
336: * TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
337: * TRD_both : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
338: * = N*KD + N*max(KD+1,FACTOPTNB)
339: * + max(2*KD*KD, KD*NTHREADS)
340: * + (KD+1)*N
341: LWORK = -1
342: SUBNAM(1:1) = PREC
343: SUBNAM(2:6) = 'GEQRF'
344: QROPTNB = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
345: SUBNAM(2:6) = 'GELQF'
346: LQOPTNB = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
347: * Could be QR or LQ for TRD and the max for BRD
348: FACTOPTNB = MAX(QROPTNB, LQOPTNB)
349: IF( ALGO.EQ.'TRD' ) THEN
350: IF( STAG.EQ.'2STAG' ) THEN
351: LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
352: $ + MAX(2*NBI*NBI, NBI*NTHREADS)
353: $ + (NBI+1)*NI
354: ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
355: LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
356: ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
357: LWORK = (2*NBI+1)*NI + NBI*NTHREADS
358: ENDIF
359: ELSE IF( ALGO.EQ.'BRD' ) THEN
360: IF( STAG.EQ.'2STAG' ) THEN
361: LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB)
362: $ + MAX(2*NBI*NBI, NBI*NTHREADS)
363: $ + (NBI+1)*NI
364: ELSE IF( STAG.EQ.'GE2GB' ) THEN
365: LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
366: ELSE IF( STAG.EQ.'GB2BD' ) THEN
367: LWORK = (3*NBI+1)*NI + NBI*NTHREADS
368: ENDIF
369: ENDIF
370: LWORK = MAX ( 1, LWORK )
371:
372: IF( LWORK.GT.0 ) THEN
373: IPARAM2STAGE = LWORK
374: ELSE
375: IPARAM2STAGE = -1
376: ENDIF
377: *
378: ELSE IF ( ISPEC .EQ. 21 ) THEN
379: *
380: * ISPEC = 21 for future use
381: IPARAM2STAGE = NXI
382: ENDIF
383: *
384: * ==== End of IPARAM2STAGE ====
385: *
386: END
CVSweb interface <joel.bertrand@systella.fr>