Annotation of rpl/lapack/lapack/iparam2stage.F, revision 1.4

1.1       bertrand    1: *> \brief \b IPARAM2STAGE
                      2: *
                      3: *  =========== DOCUMENTATION ===========
                      4: *
                      5: * Online html documentation available at 
                      6: *            http://www.netlib.org/lapack/explore-html/ 
                      7: *
                      8: *> \htmlonly
                      9: *> Download IPARAM2STAGE + dependencies 
                     10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F"> 
                     11: *> [TGZ]</a> 
                     12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F"> 
                     13: *> [ZIP]</a>
                     14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F"> 
                     15: *> [TXT]</a>
                     16: *> \endhtmlonly 
                     17: *
                     18: *  Definition:
                     19: *  ===========
                     20: *
                     21: *       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
                     22: *                                    NI, NBI, IBI, NXI )
                     23: *       #if defined(_OPENMP)
                     24: *           use omp_lib
                     25: *       #endif
                     26: *       IMPLICIT NONE
                     27: *
                     28: *       .. Scalar Arguments ..
                     29: *       CHARACTER*( * )    NAME, OPTS
                     30: *       INTEGER            ISPEC, NI, NBI, IBI, NXI
                     31: *
                     32: *> \par Purpose:
                     33: *  =============
                     34: *>
                     35: *> \verbatim
                     36: *>
                     37: *>      This program sets problem and machine dependent parameters
1.4     ! bertrand   38: *>      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
1.1       bertrand   39: *>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
                     40: *>      and related subroutines for eigenvalue problems. 
1.4     ! bertrand   41: *>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
        !            42: *>      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
        !            43: *>      with a direct conversion ISPEC + 16.
1.1       bertrand   44: *> \endverbatim
                     45: *
                     46: *  Arguments:
                     47: *  ==========
                     48: *
                     49: *> \param[in] ISPEC
                     50: *> \verbatim
                     51: *>          ISPEC is integer scalar
                     52: *>              ISPEC specifies which tunable parameter IPARAM2STAGE should
                     53: *>              return.
                     54: *>
                     55: *>              ISPEC=17: the optimal blocksize nb for the reduction to
1.4     ! bertrand   56: *>                        BAND
1.1       bertrand   57: *>
                     58: *>              ISPEC=18: the optimal blocksize ib for the eigenvectors
                     59: *>                        singular vectors update routine
                     60: *>
                     61: *>              ISPEC=19: The length of the array that store the Housholder 
                     62: *>                        representation for the second stage 
                     63: *>                        Band to Tridiagonal or Bidiagonal
                     64: *>
                     65: *>              ISPEC=20: The workspace needed for the routine in input.
                     66: *>
                     67: *>              ISPEC=21: For future release.
                     68: *> \endverbatim
                     69: *>
                     70: *> \param[in] NAME
                     71: *> \verbatim
                     72: *>          NAME is character string
                     73: *>               Name of the calling subroutine
                     74: *> \endverbatim
                     75: *>
                     76: *> \param[in] OPTS
                     77: *> \verbatim
                     78: *>          OPTS is CHARACTER*(*)
                     79: *>          The character options to the subroutine NAME, concatenated
                     80: *>          into a single character string.  For example, UPLO = 'U',
                     81: *>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
                     82: *>          be specified as OPTS = 'UTN'.
                     83: *> \endverbatim
                     84: *>
                     85: *> \param[in] NI
                     86: *> \verbatim
                     87: *>          NI is INTEGER which is the size of the matrix
                     88: *> \endverbatim
                     89: *>
                     90: *> \param[in] NBI
                     91: *> \verbatim
                     92: *>          NBI is INTEGER which is the used in the reduciton, 
1.4     ! bertrand   93: *>          (e.g., the size of the band), needed to compute workspace
        !            94: *>          and LHOUS2.
1.1       bertrand   95: *> \endverbatim
                     96: *>
                     97: *> \param[in] IBI
                     98: *> \verbatim
                     99: *>          IBI is INTEGER which represent the IB of the reduciton,
1.4     ! bertrand  100: *>          needed to compute workspace and LHOUS2.
1.1       bertrand  101: *> \endverbatim
                    102: *>
                    103: *> \param[in] NXI
                    104: *> \verbatim
                    105: *>          NXI is INTEGER needed in the future release.
                    106: *> \endverbatim
                    107: *
                    108: *  Authors:
                    109: *  ========
                    110: *
                    111: *> \author Univ. of Tennessee 
                    112: *> \author Univ. of California Berkeley 
                    113: *> \author Univ. of Colorado Denver 
                    114: *> \author NAG Ltd. 
                    115: *
                    116: *> \ingroup auxOTHERauxiliary
                    117: *
                    118: *> \par Further Details:
                    119: *  =====================
                    120: *>
                    121: *> \verbatim
                    122: *>
                    123: *>  Implemented by Azzam Haidar.
                    124: *>
                    125: *>  All detail are available on technical report, SC11, SC13 papers.
                    126: *>
                    127: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
                    128: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
                    129: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
                    130: *>  of 2011 International Conference for High Performance Computing,
                    131: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
                    132: *>  Article 8 , 11 pages.
                    133: *>  http://doi.acm.org/10.1145/2063384.2063394
                    134: *>
                    135: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
                    136: *>  An improved parallel singular value algorithm and its implementation 
                    137: *>  for multicore hardware, In Proceedings of 2013 International Conference
                    138: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
                    139: *>  Denver, Colorado, USA, 2013.
                    140: *>  Article 90, 12 pages.
                    141: *>  http://doi.acm.org/10.1145/2503210.2503292
                    142: *>
                    143: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
                    144: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
                    145: *>  calculations based on fine-grained memory aware tasks.
                    146: *>  International Journal of High Performance Computing Applications.
                    147: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
                    148: *>  http://hpc.sagepub.com/content/28/2/196 
                    149: *>
                    150: *> \endverbatim
                    151: *>
                    152: *  =====================================================================
                    153:       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
                    154:      $                              NI, NBI, IBI, NXI )
                    155: #if defined(_OPENMP)
                    156:       use omp_lib
                    157: #endif
                    158:       IMPLICIT NONE
                    159: *
1.4     ! bertrand  160: *  -- LAPACK auxiliary routine --
1.1       bertrand  161: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
                    162: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
                    163: *
                    164: *     .. Scalar Arguments ..
                    165:       CHARACTER*( * )    NAME, OPTS
                    166:       INTEGER            ISPEC, NI, NBI, IBI, NXI
                    167: *
                    168: *  ================================================================
                    169: *     ..
                    170: *     .. Local Scalars ..
                    171:       INTEGER            I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
                    172:      $                   FACTOPTNB, QROPTNB, LQOPTNB
                    173:       LOGICAL            RPREC, CPREC
1.4     ! bertrand  174:       CHARACTER          PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
1.1       bertrand  175: *     ..
                    176: *     .. Intrinsic Functions ..
                    177:       INTRINSIC          CHAR, ICHAR, MAX
                    178: *     ..
                    179: *     .. External Functions ..
                    180:       INTEGER            ILAENV
                    181:       EXTERNAL           ILAENV
                    182: *     ..
                    183: *     .. Executable Statements ..
                    184: *
                    185: *     Invalid value for ISPEC
                    186: *
                    187:       IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
                    188:           IPARAM2STAGE = -1
                    189:           RETURN
                    190:       ENDIF
                    191: *
                    192: *     Get the number of threads
                    193: *      
                    194:       NTHREADS = 1
                    195: #if defined(_OPENMP)
                    196: !$OMP PARALLEL 
                    197:       NTHREADS = OMP_GET_NUM_THREADS()
                    198: !$OMP END PARALLEL
                    199: #endif
                    200: *      WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
                    201: *
                    202:       IF( ISPEC .NE. 19 ) THEN
                    203: *
                    204: *        Convert NAME to upper case if the first character is lower case.
                    205: *
                    206:          IPARAM2STAGE = -1
                    207:          SUBNAM = NAME
                    208:          IC = ICHAR( SUBNAM( 1: 1 ) )
                    209:          IZ = ICHAR( 'Z' )
                    210:          IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
                    211: *
                    212: *           ASCII character set
                    213: *
                    214:             IF( IC.GE.97 .AND. IC.LE.122 ) THEN
                    215:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
                    216:                DO 100 I = 2, 12
                    217:                   IC = ICHAR( SUBNAM( I: I ) )
                    218:                   IF( IC.GE.97 .AND. IC.LE.122 )
                    219:      $               SUBNAM( I: I ) = CHAR( IC-32 )
                    220:   100          CONTINUE
                    221:             END IF
                    222: *
                    223:          ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
                    224: *
                    225: *           EBCDIC character set
                    226: *
                    227:             IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
                    228:      $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
                    229:      $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
                    230:                SUBNAM( 1: 1 ) = CHAR( IC+64 )
                    231:                DO 110 I = 2, 12
                    232:                   IC = ICHAR( SUBNAM( I: I ) )
                    233:                   IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
                    234:      $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
                    235:      $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
                    236:      $                I ) = CHAR( IC+64 )
                    237:   110          CONTINUE
                    238:             END IF
                    239: *
                    240:          ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
                    241: *
                    242: *           Prime machines:  ASCII+128
                    243: *
                    244:             IF( IC.GE.225 .AND. IC.LE.250 ) THEN
                    245:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
                    246:                DO 120 I = 2, 12
                    247:                  IC = ICHAR( SUBNAM( I: I ) )
                    248:                  IF( IC.GE.225 .AND. IC.LE.250 )
                    249:      $             SUBNAM( I: I ) = CHAR( IC-32 )
                    250:   120          CONTINUE
                    251:             END IF
                    252:          END IF
                    253: *
                    254:          PREC  = SUBNAM( 1: 1 )
                    255:          ALGO  = SUBNAM( 4: 6 )
                    256:          STAG  = SUBNAM( 8:12 )
                    257:          RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
                    258:          CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
                    259: *
                    260: *        Invalid value for PRECISION
                    261: *      
                    262:          IF( .NOT.( RPREC .OR. CPREC ) ) THEN
                    263:              IPARAM2STAGE = -1
                    264:              RETURN
                    265:          ENDIF
                    266:       ENDIF
                    267: *      WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
                    268: *     $           '   ALGO ',ALGO,'    STAGE ',STAG
                    269: *      
                    270: *
                    271:       IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN 
                    272: *
                    273: *     ISPEC = 17, 18:  block size KD, IB
                    274: *     Could be also dependent from N but for now it
                    275: *     depend only on sequential or parallel
                    276: *
                    277:          IF( NTHREADS.GT.4 ) THEN
                    278:             IF( CPREC ) THEN
                    279:                KD = 128
                    280:                IB = 32
                    281:             ELSE
                    282:                KD = 160
                    283:                IB = 40
                    284:             ENDIF
                    285:          ELSE IF( NTHREADS.GT.1 ) THEN
                    286:             IF( CPREC ) THEN
                    287:                KD = 64
                    288:                IB = 32
                    289:             ELSE
                    290:                KD = 64
                    291:                IB = 32
                    292:             ENDIF
                    293:          ELSE
                    294:             IF( CPREC ) THEN
                    295:                KD = 16
                    296:                IB = 16
                    297:             ELSE
                    298:                KD = 32
                    299:                IB = 16
                    300:             ENDIF
                    301:          ENDIF
                    302:          IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
                    303:          IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
                    304: *
                    305:       ELSE IF ( ISPEC .EQ. 19 ) THEN
                    306: *
                    307: *     ISPEC = 19:  
                    308: *     LHOUS length of the Houselholder representation
                    309: *     matrix (V,T) of the second stage. should be >= 1.
                    310: *
                    311: *     Will add the VECT OPTION HERE next release
                    312:          VECT  = OPTS(1:1)
                    313:          IF( VECT.EQ.'N' ) THEN
                    314:             LHOUS = MAX( 1, 4*NI )
                    315:          ELSE
                    316: *           This is not correct, it need to call the ALGO and the stage2
                    317:             LHOUS = MAX( 1, 4*NI ) + IBI
                    318:          ENDIF
                    319:          IF( LHOUS.GE.0 ) THEN
                    320:             IPARAM2STAGE = LHOUS
                    321:          ELSE
                    322:             IPARAM2STAGE = -1
                    323:          ENDIF
                    324: *
                    325:       ELSE IF ( ISPEC .EQ. 20 ) THEN
                    326: *
                    327: *     ISPEC = 20: (21 for future use)  
                    328: *     LWORK length of the workspace for 
                    329: *     either or both stages for TRD and BRD. should be >= 1.
                    330: *     TRD:
                    331: *     TRD_stage 1: = LT + LW + LS1 + LS2
                    332: *                  = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD 
                    333: *                    where LDT=LDS2=KD
                    334: *                  = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
                    335: *     TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
                    336: *     TRD_both   : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
                    337: *                  = N*KD + N*max(KD+1,FACTOPTNB) 
                    338: *                    + max(2*KD*KD, KD*NTHREADS) 
                    339: *                    + (KD+1)*N
                    340:          LWORK        = -1
                    341:          SUBNAM(1:1)  = PREC
                    342:          SUBNAM(2:6)  = 'GEQRF'
                    343:          QROPTNB      = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
                    344:          SUBNAM(2:6)  = 'GELQF'
                    345:          LQOPTNB      = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
                    346: *        Could be QR or LQ for TRD and the max for BRD
                    347:          FACTOPTNB    = MAX(QROPTNB, LQOPTNB)
                    348:          IF( ALGO.EQ.'TRD' ) THEN
                    349:             IF( STAG.EQ.'2STAG' ) THEN
                    350:                LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
                    351:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
                    352:      $              + (NBI+1)*NI
                    353:             ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
                    354:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
                    355:             ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
                    356:                LWORK = (2*NBI+1)*NI + NBI*NTHREADS
                    357:             ENDIF
                    358:          ELSE IF( ALGO.EQ.'BRD' ) THEN
                    359:             IF( STAG.EQ.'2STAG' ) THEN
                    360:                LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
                    361:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
                    362:      $              + (NBI+1)*NI
                    363:             ELSE IF( STAG.EQ.'GE2GB' ) THEN
                    364:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
                    365:             ELSE IF( STAG.EQ.'GB2BD' ) THEN
                    366:                LWORK = (3*NBI+1)*NI + NBI*NTHREADS
                    367:             ENDIF
                    368:          ENDIF
                    369:          LWORK = MAX ( 1, LWORK )
                    370: 
                    371:          IF( LWORK.GT.0 ) THEN
                    372:             IPARAM2STAGE = LWORK
                    373:          ELSE
                    374:             IPARAM2STAGE = -1
                    375:          ENDIF
                    376: *
                    377:       ELSE IF ( ISPEC .EQ. 21 ) THEN
                    378: *
                    379: *     ISPEC = 21 for future use 
                    380:          IPARAM2STAGE = NXI
                    381:       ENDIF
                    382: *
                    383: *     ==== End of IPARAM2STAGE ====
                    384: *
                    385:       END

CVSweb interface <joel.bertrand@systella.fr>