File:  [local] / rpl / lapack / lapack / iparam2stage.F
Revision 1.4: download - view: text, annotated - select for diffs - revision graph
Mon Aug 7 08:39:14 2023 UTC (9 months, 1 week ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, HEAD
Première mise à jour de lapack et blas.

    1: *> \brief \b IPARAM2STAGE
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at 
    6: *            http://www.netlib.org/lapack/explore-html/ 
    7: *
    8: *> \htmlonly
    9: *> Download IPARAM2STAGE + dependencies 
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F"> 
   11: *> [TGZ]</a> 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F"> 
   13: *> [ZIP]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F"> 
   15: *> [TXT]</a>
   16: *> \endhtmlonly 
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
   22: *                                    NI, NBI, IBI, NXI )
   23: *       #if defined(_OPENMP)
   24: *           use omp_lib
   25: *       #endif
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER*( * )    NAME, OPTS
   30: *       INTEGER            ISPEC, NI, NBI, IBI, NXI
   31: *
   32: *> \par Purpose:
   33: *  =============
   34: *>
   35: *> \verbatim
   36: *>
   37: *>      This program sets problem and machine dependent parameters
   38: *>      useful for xHETRD_2STAGE, xHETRD_HE2HB, xHETRD_HB2ST,
   39: *>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
   40: *>      and related subroutines for eigenvalue problems. 
   41: *>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21.
   42: *>      It is called whenever ILAENV2STAGE is called with 1 <= ISPEC <= 5
   43: *>      with a direct conversion ISPEC + 16.
   44: *> \endverbatim
   45: *
   46: *  Arguments:
   47: *  ==========
   48: *
   49: *> \param[in] ISPEC
   50: *> \verbatim
   51: *>          ISPEC is integer scalar
   52: *>              ISPEC specifies which tunable parameter IPARAM2STAGE should
   53: *>              return.
   54: *>
   55: *>              ISPEC=17: the optimal blocksize nb for the reduction to
   56: *>                        BAND
   57: *>
   58: *>              ISPEC=18: the optimal blocksize ib for the eigenvectors
   59: *>                        singular vectors update routine
   60: *>
   61: *>              ISPEC=19: The length of the array that store the Housholder 
   62: *>                        representation for the second stage 
   63: *>                        Band to Tridiagonal or Bidiagonal
   64: *>
   65: *>              ISPEC=20: The workspace needed for the routine in input.
   66: *>
   67: *>              ISPEC=21: For future release.
   68: *> \endverbatim
   69: *>
   70: *> \param[in] NAME
   71: *> \verbatim
   72: *>          NAME is character string
   73: *>               Name of the calling subroutine
   74: *> \endverbatim
   75: *>
   76: *> \param[in] OPTS
   77: *> \verbatim
   78: *>          OPTS is CHARACTER*(*)
   79: *>          The character options to the subroutine NAME, concatenated
   80: *>          into a single character string.  For example, UPLO = 'U',
   81: *>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
   82: *>          be specified as OPTS = 'UTN'.
   83: *> \endverbatim
   84: *>
   85: *> \param[in] NI
   86: *> \verbatim
   87: *>          NI is INTEGER which is the size of the matrix
   88: *> \endverbatim
   89: *>
   90: *> \param[in] NBI
   91: *> \verbatim
   92: *>          NBI is INTEGER which is the used in the reduciton, 
   93: *>          (e.g., the size of the band), needed to compute workspace
   94: *>          and LHOUS2.
   95: *> \endverbatim
   96: *>
   97: *> \param[in] IBI
   98: *> \verbatim
   99: *>          IBI is INTEGER which represent the IB of the reduciton,
  100: *>          needed to compute workspace and LHOUS2.
  101: *> \endverbatim
  102: *>
  103: *> \param[in] NXI
  104: *> \verbatim
  105: *>          NXI is INTEGER needed in the future release.
  106: *> \endverbatim
  107: *
  108: *  Authors:
  109: *  ========
  110: *
  111: *> \author Univ. of Tennessee 
  112: *> \author Univ. of California Berkeley 
  113: *> \author Univ. of Colorado Denver 
  114: *> \author NAG Ltd. 
  115: *
  116: *> \ingroup auxOTHERauxiliary
  117: *
  118: *> \par Further Details:
  119: *  =====================
  120: *>
  121: *> \verbatim
  122: *>
  123: *>  Implemented by Azzam Haidar.
  124: *>
  125: *>  All detail are available on technical report, SC11, SC13 papers.
  126: *>
  127: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  128: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  129: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  130: *>  of 2011 International Conference for High Performance Computing,
  131: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  132: *>  Article 8 , 11 pages.
  133: *>  http://doi.acm.org/10.1145/2063384.2063394
  134: *>
  135: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  136: *>  An improved parallel singular value algorithm and its implementation 
  137: *>  for multicore hardware, In Proceedings of 2013 International Conference
  138: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  139: *>  Denver, Colorado, USA, 2013.
  140: *>  Article 90, 12 pages.
  141: *>  http://doi.acm.org/10.1145/2503210.2503292
  142: *>
  143: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  144: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  145: *>  calculations based on fine-grained memory aware tasks.
  146: *>  International Journal of High Performance Computing Applications.
  147: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  148: *>  http://hpc.sagepub.com/content/28/2/196 
  149: *>
  150: *> \endverbatim
  151: *>
  152: *  =====================================================================
  153:       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
  154:      $                              NI, NBI, IBI, NXI )
  155: #if defined(_OPENMP)
  156:       use omp_lib
  157: #endif
  158:       IMPLICIT NONE
  159: *
  160: *  -- LAPACK auxiliary routine --
  161: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  162: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  163: *
  164: *     .. Scalar Arguments ..
  165:       CHARACTER*( * )    NAME, OPTS
  166:       INTEGER            ISPEC, NI, NBI, IBI, NXI
  167: *
  168: *  ================================================================
  169: *     ..
  170: *     .. Local Scalars ..
  171:       INTEGER            I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
  172:      $                   FACTOPTNB, QROPTNB, LQOPTNB
  173:       LOGICAL            RPREC, CPREC
  174:       CHARACTER          PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*1
  175: *     ..
  176: *     .. Intrinsic Functions ..
  177:       INTRINSIC          CHAR, ICHAR, MAX
  178: *     ..
  179: *     .. External Functions ..
  180:       INTEGER            ILAENV
  181:       EXTERNAL           ILAENV
  182: *     ..
  183: *     .. Executable Statements ..
  184: *
  185: *     Invalid value for ISPEC
  186: *
  187:       IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
  188:           IPARAM2STAGE = -1
  189:           RETURN
  190:       ENDIF
  191: *
  192: *     Get the number of threads
  193: *      
  194:       NTHREADS = 1
  195: #if defined(_OPENMP)
  196: !$OMP PARALLEL 
  197:       NTHREADS = OMP_GET_NUM_THREADS()
  198: !$OMP END PARALLEL
  199: #endif
  200: *      WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
  201: *
  202:       IF( ISPEC .NE. 19 ) THEN
  203: *
  204: *        Convert NAME to upper case if the first character is lower case.
  205: *
  206:          IPARAM2STAGE = -1
  207:          SUBNAM = NAME
  208:          IC = ICHAR( SUBNAM( 1: 1 ) )
  209:          IZ = ICHAR( 'Z' )
  210:          IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
  211: *
  212: *           ASCII character set
  213: *
  214:             IF( IC.GE.97 .AND. IC.LE.122 ) THEN
  215:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
  216:                DO 100 I = 2, 12
  217:                   IC = ICHAR( SUBNAM( I: I ) )
  218:                   IF( IC.GE.97 .AND. IC.LE.122 )
  219:      $               SUBNAM( I: I ) = CHAR( IC-32 )
  220:   100          CONTINUE
  221:             END IF
  222: *
  223:          ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
  224: *
  225: *           EBCDIC character set
  226: *
  227:             IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  228:      $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  229:      $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
  230:                SUBNAM( 1: 1 ) = CHAR( IC+64 )
  231:                DO 110 I = 2, 12
  232:                   IC = ICHAR( SUBNAM( I: I ) )
  233:                   IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  234:      $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  235:      $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
  236:      $                I ) = CHAR( IC+64 )
  237:   110          CONTINUE
  238:             END IF
  239: *
  240:          ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
  241: *
  242: *           Prime machines:  ASCII+128
  243: *
  244:             IF( IC.GE.225 .AND. IC.LE.250 ) THEN
  245:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
  246:                DO 120 I = 2, 12
  247:                  IC = ICHAR( SUBNAM( I: I ) )
  248:                  IF( IC.GE.225 .AND. IC.LE.250 )
  249:      $             SUBNAM( I: I ) = CHAR( IC-32 )
  250:   120          CONTINUE
  251:             END IF
  252:          END IF
  253: *
  254:          PREC  = SUBNAM( 1: 1 )
  255:          ALGO  = SUBNAM( 4: 6 )
  256:          STAG  = SUBNAM( 8:12 )
  257:          RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
  258:          CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
  259: *
  260: *        Invalid value for PRECISION
  261: *      
  262:          IF( .NOT.( RPREC .OR. CPREC ) ) THEN
  263:              IPARAM2STAGE = -1
  264:              RETURN
  265:          ENDIF
  266:       ENDIF
  267: *      WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
  268: *     $           '   ALGO ',ALGO,'    STAGE ',STAG
  269: *      
  270: *
  271:       IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN 
  272: *
  273: *     ISPEC = 17, 18:  block size KD, IB
  274: *     Could be also dependent from N but for now it
  275: *     depend only on sequential or parallel
  276: *
  277:          IF( NTHREADS.GT.4 ) THEN
  278:             IF( CPREC ) THEN
  279:                KD = 128
  280:                IB = 32
  281:             ELSE
  282:                KD = 160
  283:                IB = 40
  284:             ENDIF
  285:          ELSE IF( NTHREADS.GT.1 ) THEN
  286:             IF( CPREC ) THEN
  287:                KD = 64
  288:                IB = 32
  289:             ELSE
  290:                KD = 64
  291:                IB = 32
  292:             ENDIF
  293:          ELSE
  294:             IF( CPREC ) THEN
  295:                KD = 16
  296:                IB = 16
  297:             ELSE
  298:                KD = 32
  299:                IB = 16
  300:             ENDIF
  301:          ENDIF
  302:          IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
  303:          IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
  304: *
  305:       ELSE IF ( ISPEC .EQ. 19 ) THEN
  306: *
  307: *     ISPEC = 19:  
  308: *     LHOUS length of the Houselholder representation
  309: *     matrix (V,T) of the second stage. should be >= 1.
  310: *
  311: *     Will add the VECT OPTION HERE next release
  312:          VECT  = OPTS(1:1)
  313:          IF( VECT.EQ.'N' ) THEN
  314:             LHOUS = MAX( 1, 4*NI )
  315:          ELSE
  316: *           This is not correct, it need to call the ALGO and the stage2
  317:             LHOUS = MAX( 1, 4*NI ) + IBI
  318:          ENDIF
  319:          IF( LHOUS.GE.0 ) THEN
  320:             IPARAM2STAGE = LHOUS
  321:          ELSE
  322:             IPARAM2STAGE = -1
  323:          ENDIF
  324: *
  325:       ELSE IF ( ISPEC .EQ. 20 ) THEN
  326: *
  327: *     ISPEC = 20: (21 for future use)  
  328: *     LWORK length of the workspace for 
  329: *     either or both stages for TRD and BRD. should be >= 1.
  330: *     TRD:
  331: *     TRD_stage 1: = LT + LW + LS1 + LS2
  332: *                  = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD 
  333: *                    where LDT=LDS2=KD
  334: *                  = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
  335: *     TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
  336: *     TRD_both   : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
  337: *                  = N*KD + N*max(KD+1,FACTOPTNB) 
  338: *                    + max(2*KD*KD, KD*NTHREADS) 
  339: *                    + (KD+1)*N
  340:          LWORK        = -1
  341:          SUBNAM(1:1)  = PREC
  342:          SUBNAM(2:6)  = 'GEQRF'
  343:          QROPTNB      = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
  344:          SUBNAM(2:6)  = 'GELQF'
  345:          LQOPTNB      = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
  346: *        Could be QR or LQ for TRD and the max for BRD
  347:          FACTOPTNB    = MAX(QROPTNB, LQOPTNB)
  348:          IF( ALGO.EQ.'TRD' ) THEN
  349:             IF( STAG.EQ.'2STAG' ) THEN
  350:                LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
  351:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
  352:      $              + (NBI+1)*NI
  353:             ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
  354:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
  355:             ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
  356:                LWORK = (2*NBI+1)*NI + NBI*NTHREADS
  357:             ENDIF
  358:          ELSE IF( ALGO.EQ.'BRD' ) THEN
  359:             IF( STAG.EQ.'2STAG' ) THEN
  360:                LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
  361:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
  362:      $              + (NBI+1)*NI
  363:             ELSE IF( STAG.EQ.'GE2GB' ) THEN
  364:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
  365:             ELSE IF( STAG.EQ.'GB2BD' ) THEN
  366:                LWORK = (3*NBI+1)*NI + NBI*NTHREADS
  367:             ENDIF
  368:          ENDIF
  369:          LWORK = MAX ( 1, LWORK )
  370: 
  371:          IF( LWORK.GT.0 ) THEN
  372:             IPARAM2STAGE = LWORK
  373:          ELSE
  374:             IPARAM2STAGE = -1
  375:          ENDIF
  376: *
  377:       ELSE IF ( ISPEC .EQ. 21 ) THEN
  378: *
  379: *     ISPEC = 21 for future use 
  380:          IPARAM2STAGE = NXI
  381:       ENDIF
  382: *
  383: *     ==== End of IPARAM2STAGE ====
  384: *
  385:       END

CVSweb interface <joel.bertrand@systella.fr>