Annotation of rpl/lapack/lapack/iparam2stage.F, revision 1.2

1.1       bertrand    1: *> \brief \b IPARAM2STAGE
                      2: *
                      3: *  =========== DOCUMENTATION ===========
                      4: *
                      5: * Online html documentation available at 
                      6: *            http://www.netlib.org/lapack/explore-html/ 
                      7: *
                      8: *> \htmlonly
                      9: *> Download IPARAM2STAGE + dependencies 
                     10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F"> 
                     11: *> [TGZ]</a> 
                     12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F"> 
                     13: *> [ZIP]</a>
                     14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F"> 
                     15: *> [TXT]</a>
                     16: *> \endhtmlonly 
                     17: *
                     18: *  Definition:
                     19: *  ===========
                     20: *
                     21: *       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
                     22: *                                    NI, NBI, IBI, NXI )
                     23: *       #if defined(_OPENMP)
                     24: *           use omp_lib
                     25: *       #endif
                     26: *       IMPLICIT NONE
                     27: *
                     28: *       .. Scalar Arguments ..
                     29: *       CHARACTER*( * )    NAME, OPTS
                     30: *       INTEGER            ISPEC, NI, NBI, IBI, NXI
                     31: *
                     32: *> \par Purpose:
                     33: *  =============
                     34: *>
                     35: *> \verbatim
                     36: *>
                     37: *>      This program sets problem and machine dependent parameters
                     38: *>      useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST,
                     39: *>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
                     40: *>      and related subroutines for eigenvalue problems. 
                     41: *>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21
                     42: *> \endverbatim
                     43: *
                     44: *  Arguments:
                     45: *  ==========
                     46: *
                     47: *> \param[in] ISPEC
                     48: *> \verbatim
                     49: *>          ISPEC is integer scalar
                     50: *>              ISPEC specifies which tunable parameter IPARAM2STAGE should
                     51: *>              return.
                     52: *>
                     53: *>              ISPEC=17: the optimal blocksize nb for the reduction to
                     54: *                         BAND
                     55: *>
                     56: *>              ISPEC=18: the optimal blocksize ib for the eigenvectors
                     57: *>                        singular vectors update routine
                     58: *>
                     59: *>              ISPEC=19: The length of the array that store the Housholder 
                     60: *>                        representation for the second stage 
                     61: *>                        Band to Tridiagonal or Bidiagonal
                     62: *>
                     63: *>              ISPEC=20: The workspace needed for the routine in input.
                     64: *>
                     65: *>              ISPEC=21: For future release.
                     66: *> \endverbatim
                     67: *>
                     68: *> \param[in] NAME
                     69: *> \verbatim
                     70: *>          NAME is character string
                     71: *>               Name of the calling subroutine
                     72: *> \endverbatim
                     73: *>
                     74: *> \param[in] OPTS
                     75: *> \verbatim
                     76: *>          OPTS is CHARACTER*(*)
                     77: *>          The character options to the subroutine NAME, concatenated
                     78: *>          into a single character string.  For example, UPLO = 'U',
                     79: *>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
                     80: *>          be specified as OPTS = 'UTN'.
                     81: *> \endverbatim
                     82: *>
                     83: *> \param[in] NI
                     84: *> \verbatim
                     85: *>          NI is INTEGER which is the size of the matrix
                     86: *> \endverbatim
                     87: *>
                     88: *> \param[in] NBI
                     89: *> \verbatim
                     90: *>          NBI is INTEGER which is the used in the reduciton, 
                     91: *           (e.g., the size of the band), needed to compute workspace
                     92: *           and LHOUS2.
                     93: *> \endverbatim
                     94: *>
                     95: *> \param[in] IBI
                     96: *> \verbatim
                     97: *>          IBI is INTEGER which represent the IB of the reduciton,
                     98: *           needed to compute workspace and LHOUS2.
                     99: *> \endverbatim
                    100: *>
                    101: *> \param[in] NXI
                    102: *> \verbatim
                    103: *>          NXI is INTEGER needed in the future release.
                    104: *> \endverbatim
                    105: *
                    106: *  Authors:
                    107: *  ========
                    108: *
                    109: *> \author Univ. of Tennessee 
                    110: *> \author Univ. of California Berkeley 
                    111: *> \author Univ. of Colorado Denver 
                    112: *> \author NAG Ltd. 
                    113: *
                    114: *> \date June 2016
                    115: *
                    116: *> \ingroup auxOTHERauxiliary
                    117: *
                    118: *> \par Further Details:
                    119: *  =====================
                    120: *>
                    121: *> \verbatim
                    122: *>
                    123: *>  Implemented by Azzam Haidar.
                    124: *>
                    125: *>  All detail are available on technical report, SC11, SC13 papers.
                    126: *>
                    127: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
                    128: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
                    129: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
                    130: *>  of 2011 International Conference for High Performance Computing,
                    131: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
                    132: *>  Article 8 , 11 pages.
                    133: *>  http://doi.acm.org/10.1145/2063384.2063394
                    134: *>
                    135: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
                    136: *>  An improved parallel singular value algorithm and its implementation 
                    137: *>  for multicore hardware, In Proceedings of 2013 International Conference
                    138: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
                    139: *>  Denver, Colorado, USA, 2013.
                    140: *>  Article 90, 12 pages.
                    141: *>  http://doi.acm.org/10.1145/2503210.2503292
                    142: *>
                    143: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
                    144: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
                    145: *>  calculations based on fine-grained memory aware tasks.
                    146: *>  International Journal of High Performance Computing Applications.
                    147: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
                    148: *>  http://hpc.sagepub.com/content/28/2/196 
                    149: *>
                    150: *> \endverbatim
                    151: *>
                    152: *  =====================================================================
                    153:       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
                    154:      $                              NI, NBI, IBI, NXI )
                    155: #if defined(_OPENMP)
                    156:       use omp_lib
                    157: #endif
                    158:       IMPLICIT NONE
                    159: *
                    160: *  -- LAPACK auxiliary routine (version 3.7.0) --
                    161: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
                    162: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
                    163: *     June 2016
                    164: *
                    165: *     .. Scalar Arguments ..
                    166:       CHARACTER*( * )    NAME, OPTS
                    167:       INTEGER            ISPEC, NI, NBI, IBI, NXI
                    168: *
                    169: *  ================================================================
                    170: *     ..
                    171: *     .. Local Scalars ..
                    172:       INTEGER            I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
                    173:      $                   FACTOPTNB, QROPTNB, LQOPTNB
                    174:       LOGICAL            RPREC, CPREC
                    175:       CHARACTER          PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*3
                    176: *     ..
                    177: *     .. Intrinsic Functions ..
                    178:       INTRINSIC          CHAR, ICHAR, MAX
                    179: *     ..
                    180: *     .. External Functions ..
                    181:       INTEGER            ILAENV
                    182:       EXTERNAL           ILAENV
                    183: *     ..
                    184: *     .. Executable Statements ..
                    185: *
                    186: *     Invalid value for ISPEC
                    187: *
                    188:       IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
                    189:           IPARAM2STAGE = -1
                    190:           RETURN
                    191:       ENDIF
                    192: *
                    193: *     Get the number of threads
                    194: *      
                    195:       NTHREADS = 1
                    196: #if defined(_OPENMP)
                    197: !$OMP PARALLEL 
                    198:       NTHREADS = OMP_GET_NUM_THREADS()
                    199: !$OMP END PARALLEL
                    200: #endif
                    201: *      WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
                    202: *
                    203:       IF( ISPEC .NE. 19 ) THEN
                    204: *
                    205: *        Convert NAME to upper case if the first character is lower case.
                    206: *
                    207:          IPARAM2STAGE = -1
                    208:          SUBNAM = NAME
                    209:          IC = ICHAR( SUBNAM( 1: 1 ) )
                    210:          IZ = ICHAR( 'Z' )
                    211:          IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
                    212: *
                    213: *           ASCII character set
                    214: *
                    215:             IF( IC.GE.97 .AND. IC.LE.122 ) THEN
                    216:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
                    217:                DO 100 I = 2, 12
                    218:                   IC = ICHAR( SUBNAM( I: I ) )
                    219:                   IF( IC.GE.97 .AND. IC.LE.122 )
                    220:      $               SUBNAM( I: I ) = CHAR( IC-32 )
                    221:   100          CONTINUE
                    222:             END IF
                    223: *
                    224:          ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
                    225: *
                    226: *           EBCDIC character set
                    227: *
                    228:             IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
                    229:      $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
                    230:      $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
                    231:                SUBNAM( 1: 1 ) = CHAR( IC+64 )
                    232:                DO 110 I = 2, 12
                    233:                   IC = ICHAR( SUBNAM( I: I ) )
                    234:                   IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
                    235:      $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
                    236:      $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
                    237:      $                I ) = CHAR( IC+64 )
                    238:   110          CONTINUE
                    239:             END IF
                    240: *
                    241:          ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
                    242: *
                    243: *           Prime machines:  ASCII+128
                    244: *
                    245:             IF( IC.GE.225 .AND. IC.LE.250 ) THEN
                    246:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
                    247:                DO 120 I = 2, 12
                    248:                  IC = ICHAR( SUBNAM( I: I ) )
                    249:                  IF( IC.GE.225 .AND. IC.LE.250 )
                    250:      $             SUBNAM( I: I ) = CHAR( IC-32 )
                    251:   120          CONTINUE
                    252:             END IF
                    253:          END IF
                    254: *
                    255:          PREC  = SUBNAM( 1: 1 )
                    256:          ALGO  = SUBNAM( 4: 6 )
                    257:          STAG  = SUBNAM( 8:12 )
                    258:          RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
                    259:          CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
                    260: *
                    261: *        Invalid value for PRECISION
                    262: *      
                    263:          IF( .NOT.( RPREC .OR. CPREC ) ) THEN
                    264:              IPARAM2STAGE = -1
                    265:              RETURN
                    266:          ENDIF
                    267:       ENDIF
                    268: *      WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
                    269: *     $           '   ALGO ',ALGO,'    STAGE ',STAG
                    270: *      
                    271: *
                    272:       IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN 
                    273: *
                    274: *     ISPEC = 17, 18:  block size KD, IB
                    275: *     Could be also dependent from N but for now it
                    276: *     depend only on sequential or parallel
                    277: *
                    278:          IF( NTHREADS.GT.4 ) THEN
                    279:             IF( CPREC ) THEN
                    280:                KD = 128
                    281:                IB = 32
                    282:             ELSE
                    283:                KD = 160
                    284:                IB = 40
                    285:             ENDIF
                    286:          ELSE IF( NTHREADS.GT.1 ) THEN
                    287:             IF( CPREC ) THEN
                    288:                KD = 64
                    289:                IB = 32
                    290:             ELSE
                    291:                KD = 64
                    292:                IB = 32
                    293:             ENDIF
                    294:          ELSE
                    295:             IF( CPREC ) THEN
                    296:                KD = 16
                    297:                IB = 16
                    298:             ELSE
                    299:                KD = 32
                    300:                IB = 16
                    301:             ENDIF
                    302:          ENDIF
                    303:          IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
                    304:          IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
                    305: *
                    306:       ELSE IF ( ISPEC .EQ. 19 ) THEN
                    307: *
                    308: *     ISPEC = 19:  
                    309: *     LHOUS length of the Houselholder representation
                    310: *     matrix (V,T) of the second stage. should be >= 1.
                    311: *
                    312: *     Will add the VECT OPTION HERE next release
                    313:          VECT  = OPTS(1:1)
                    314:          IF( VECT.EQ.'N' ) THEN
                    315:             LHOUS = MAX( 1, 4*NI )
                    316:          ELSE
                    317: *           This is not correct, it need to call the ALGO and the stage2
                    318:             LHOUS = MAX( 1, 4*NI ) + IBI
                    319:          ENDIF
                    320:          IF( LHOUS.GE.0 ) THEN
                    321:             IPARAM2STAGE = LHOUS
                    322:          ELSE
                    323:             IPARAM2STAGE = -1
                    324:          ENDIF
                    325: *
                    326:       ELSE IF ( ISPEC .EQ. 20 ) THEN
                    327: *
                    328: *     ISPEC = 20: (21 for future use)  
                    329: *     LWORK length of the workspace for 
                    330: *     either or both stages for TRD and BRD. should be >= 1.
                    331: *     TRD:
                    332: *     TRD_stage 1: = LT + LW + LS1 + LS2
                    333: *                  = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD 
                    334: *                    where LDT=LDS2=KD
                    335: *                  = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
                    336: *     TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
                    337: *     TRD_both   : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
                    338: *                  = N*KD + N*max(KD+1,FACTOPTNB) 
                    339: *                    + max(2*KD*KD, KD*NTHREADS) 
                    340: *                    + (KD+1)*N
                    341:          LWORK        = -1
                    342:          SUBNAM(1:1)  = PREC
                    343:          SUBNAM(2:6)  = 'GEQRF'
                    344:          QROPTNB      = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
                    345:          SUBNAM(2:6)  = 'GELQF'
                    346:          LQOPTNB      = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
                    347: *        Could be QR or LQ for TRD and the max for BRD
                    348:          FACTOPTNB    = MAX(QROPTNB, LQOPTNB)
                    349:          IF( ALGO.EQ.'TRD' ) THEN
                    350:             IF( STAG.EQ.'2STAG' ) THEN
                    351:                LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
                    352:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
                    353:      $              + (NBI+1)*NI
                    354:             ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
                    355:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
                    356:             ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
                    357:                LWORK = (2*NBI+1)*NI + NBI*NTHREADS
                    358:             ENDIF
                    359:          ELSE IF( ALGO.EQ.'BRD' ) THEN
                    360:             IF( STAG.EQ.'2STAG' ) THEN
                    361:                LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
                    362:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
                    363:      $              + (NBI+1)*NI
                    364:             ELSE IF( STAG.EQ.'GE2GB' ) THEN
                    365:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
                    366:             ELSE IF( STAG.EQ.'GB2BD' ) THEN
                    367:                LWORK = (3*NBI+1)*NI + NBI*NTHREADS
                    368:             ENDIF
                    369:          ENDIF
                    370:          LWORK = MAX ( 1, LWORK )
                    371: 
                    372:          IF( LWORK.GT.0 ) THEN
                    373:             IPARAM2STAGE = LWORK
                    374:          ELSE
                    375:             IPARAM2STAGE = -1
                    376:          ENDIF
                    377: *
                    378:       ELSE IF ( ISPEC .EQ. 21 ) THEN
                    379: *
                    380: *     ISPEC = 21 for future use 
                    381:          IPARAM2STAGE = NXI
                    382:       ENDIF
                    383: *
                    384: *     ==== End of IPARAM2STAGE ====
                    385: *
                    386:       END

CVSweb interface <joel.bertrand@systella.fr>