File:  [local] / rpl / lapack / lapack / iparam2stage.F
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Sat Jun 17 11:06:40 2017 UTC (6 years, 10 months ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_27, rpl-4_1_26, HEAD
Cohérence.

    1: *> \brief \b IPARAM2STAGE
    2: *
    3: *  =========== DOCUMENTATION ===========
    4: *
    5: * Online html documentation available at 
    6: *            http://www.netlib.org/lapack/explore-html/ 
    7: *
    8: *> \htmlonly
    9: *> Download IPARAM2STAGE + dependencies 
   10: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.tgz?format=tgz&filename=/lapack/lapack_routine/iparam2stage.F"> 
   11: *> [TGZ]</a> 
   12: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.zip?format=zip&filename=/lapack/lapack_routine/iparam2stage.F"> 
   13: *> [ZIP]</a>
   14: *> <a href="http://www.netlib.org/cgi-bin/netlibfiles.txt?format=txt&filename=/lapack/lapack_routine/iparam2stage.F"> 
   15: *> [TXT]</a>
   16: *> \endhtmlonly 
   17: *
   18: *  Definition:
   19: *  ===========
   20: *
   21: *       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
   22: *                                    NI, NBI, IBI, NXI )
   23: *       #if defined(_OPENMP)
   24: *           use omp_lib
   25: *       #endif
   26: *       IMPLICIT NONE
   27: *
   28: *       .. Scalar Arguments ..
   29: *       CHARACTER*( * )    NAME, OPTS
   30: *       INTEGER            ISPEC, NI, NBI, IBI, NXI
   31: *
   32: *> \par Purpose:
   33: *  =============
   34: *>
   35: *> \verbatim
   36: *>
   37: *>      This program sets problem and machine dependent parameters
   38: *>      useful for xHETRD_2STAGE, xHETRD_H@2HB, xHETRD_HB2ST,
   39: *>      xGEBRD_2STAGE, xGEBRD_GE2GB, xGEBRD_GB2BD 
   40: *>      and related subroutines for eigenvalue problems. 
   41: *>      It is called whenever ILAENV is called with 17 <= ISPEC <= 21
   42: *> \endverbatim
   43: *
   44: *  Arguments:
   45: *  ==========
   46: *
   47: *> \param[in] ISPEC
   48: *> \verbatim
   49: *>          ISPEC is integer scalar
   50: *>              ISPEC specifies which tunable parameter IPARAM2STAGE should
   51: *>              return.
   52: *>
   53: *>              ISPEC=17: the optimal blocksize nb for the reduction to
   54: *                         BAND
   55: *>
   56: *>              ISPEC=18: the optimal blocksize ib for the eigenvectors
   57: *>                        singular vectors update routine
   58: *>
   59: *>              ISPEC=19: The length of the array that store the Housholder 
   60: *>                        representation for the second stage 
   61: *>                        Band to Tridiagonal or Bidiagonal
   62: *>
   63: *>              ISPEC=20: The workspace needed for the routine in input.
   64: *>
   65: *>              ISPEC=21: For future release.
   66: *> \endverbatim
   67: *>
   68: *> \param[in] NAME
   69: *> \verbatim
   70: *>          NAME is character string
   71: *>               Name of the calling subroutine
   72: *> \endverbatim
   73: *>
   74: *> \param[in] OPTS
   75: *> \verbatim
   76: *>          OPTS is CHARACTER*(*)
   77: *>          The character options to the subroutine NAME, concatenated
   78: *>          into a single character string.  For example, UPLO = 'U',
   79: *>          TRANS = 'T', and DIAG = 'N' for a triangular routine would
   80: *>          be specified as OPTS = 'UTN'.
   81: *> \endverbatim
   82: *>
   83: *> \param[in] NI
   84: *> \verbatim
   85: *>          NI is INTEGER which is the size of the matrix
   86: *> \endverbatim
   87: *>
   88: *> \param[in] NBI
   89: *> \verbatim
   90: *>          NBI is INTEGER which is the used in the reduciton, 
   91: *           (e.g., the size of the band), needed to compute workspace
   92: *           and LHOUS2.
   93: *> \endverbatim
   94: *>
   95: *> \param[in] IBI
   96: *> \verbatim
   97: *>          IBI is INTEGER which represent the IB of the reduciton,
   98: *           needed to compute workspace and LHOUS2.
   99: *> \endverbatim
  100: *>
  101: *> \param[in] NXI
  102: *> \verbatim
  103: *>          NXI is INTEGER needed in the future release.
  104: *> \endverbatim
  105: *
  106: *  Authors:
  107: *  ========
  108: *
  109: *> \author Univ. of Tennessee 
  110: *> \author Univ. of California Berkeley 
  111: *> \author Univ. of Colorado Denver 
  112: *> \author NAG Ltd. 
  113: *
  114: *> \date June 2016
  115: *
  116: *> \ingroup auxOTHERauxiliary
  117: *
  118: *> \par Further Details:
  119: *  =====================
  120: *>
  121: *> \verbatim
  122: *>
  123: *>  Implemented by Azzam Haidar.
  124: *>
  125: *>  All detail are available on technical report, SC11, SC13 papers.
  126: *>
  127: *>  Azzam Haidar, Hatem Ltaief, and Jack Dongarra.
  128: *>  Parallel reduction to condensed forms for symmetric eigenvalue problems
  129: *>  using aggregated fine-grained and memory-aware kernels. In Proceedings
  130: *>  of 2011 International Conference for High Performance Computing,
  131: *>  Networking, Storage and Analysis (SC '11), New York, NY, USA,
  132: *>  Article 8 , 11 pages.
  133: *>  http://doi.acm.org/10.1145/2063384.2063394
  134: *>
  135: *>  A. Haidar, J. Kurzak, P. Luszczek, 2013.
  136: *>  An improved parallel singular value algorithm and its implementation 
  137: *>  for multicore hardware, In Proceedings of 2013 International Conference
  138: *>  for High Performance Computing, Networking, Storage and Analysis (SC '13).
  139: *>  Denver, Colorado, USA, 2013.
  140: *>  Article 90, 12 pages.
  141: *>  http://doi.acm.org/10.1145/2503210.2503292
  142: *>
  143: *>  A. Haidar, R. Solca, S. Tomov, T. Schulthess and J. Dongarra.
  144: *>  A novel hybrid CPU-GPU generalized eigensolver for electronic structure 
  145: *>  calculations based on fine-grained memory aware tasks.
  146: *>  International Journal of High Performance Computing Applications.
  147: *>  Volume 28 Issue 2, Pages 196-209, May 2014.
  148: *>  http://hpc.sagepub.com/content/28/2/196 
  149: *>
  150: *> \endverbatim
  151: *>
  152: *  =====================================================================
  153:       INTEGER FUNCTION IPARAM2STAGE( ISPEC, NAME, OPTS, 
  154:      $                              NI, NBI, IBI, NXI )
  155: #if defined(_OPENMP)
  156:       use omp_lib
  157: #endif
  158:       IMPLICIT NONE
  159: *
  160: *  -- LAPACK auxiliary routine (version 3.7.0) --
  161: *  -- LAPACK is a software package provided by Univ. of Tennessee,    --
  162: *  -- Univ. of California Berkeley, Univ. of Colorado Denver and NAG Ltd..--
  163: *     June 2016
  164: *
  165: *     .. Scalar Arguments ..
  166:       CHARACTER*( * )    NAME, OPTS
  167:       INTEGER            ISPEC, NI, NBI, IBI, NXI
  168: *
  169: *  ================================================================
  170: *     ..
  171: *     .. Local Scalars ..
  172:       INTEGER            I, IC, IZ, KD, IB, LHOUS, LWORK, NTHREADS,
  173:      $                   FACTOPTNB, QROPTNB, LQOPTNB
  174:       LOGICAL            RPREC, CPREC
  175:       CHARACTER          PREC*1, ALGO*3, STAG*5, SUBNAM*12, VECT*3
  176: *     ..
  177: *     .. Intrinsic Functions ..
  178:       INTRINSIC          CHAR, ICHAR, MAX
  179: *     ..
  180: *     .. External Functions ..
  181:       INTEGER            ILAENV
  182:       EXTERNAL           ILAENV
  183: *     ..
  184: *     .. Executable Statements ..
  185: *
  186: *     Invalid value for ISPEC
  187: *
  188:       IF( (ISPEC.LT.17).OR.(ISPEC.GT.21) ) THEN
  189:           IPARAM2STAGE = -1
  190:           RETURN
  191:       ENDIF
  192: *
  193: *     Get the number of threads
  194: *      
  195:       NTHREADS = 1
  196: #if defined(_OPENMP)
  197: !$OMP PARALLEL 
  198:       NTHREADS = OMP_GET_NUM_THREADS()
  199: !$OMP END PARALLEL
  200: #endif
  201: *      WRITE(*,*) 'IPARAM VOICI NTHREADS ISPEC ',NTHREADS, ISPEC
  202: *
  203:       IF( ISPEC .NE. 19 ) THEN
  204: *
  205: *        Convert NAME to upper case if the first character is lower case.
  206: *
  207:          IPARAM2STAGE = -1
  208:          SUBNAM = NAME
  209:          IC = ICHAR( SUBNAM( 1: 1 ) )
  210:          IZ = ICHAR( 'Z' )
  211:          IF( IZ.EQ.90 .OR. IZ.EQ.122 ) THEN
  212: *
  213: *           ASCII character set
  214: *
  215:             IF( IC.GE.97 .AND. IC.LE.122 ) THEN
  216:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
  217:                DO 100 I = 2, 12
  218:                   IC = ICHAR( SUBNAM( I: I ) )
  219:                   IF( IC.GE.97 .AND. IC.LE.122 )
  220:      $               SUBNAM( I: I ) = CHAR( IC-32 )
  221:   100          CONTINUE
  222:             END IF
  223: *
  224:          ELSE IF( IZ.EQ.233 .OR. IZ.EQ.169 ) THEN
  225: *
  226: *           EBCDIC character set
  227: *
  228:             IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  229:      $          ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  230:      $          ( IC.GE.162 .AND. IC.LE.169 ) ) THEN
  231:                SUBNAM( 1: 1 ) = CHAR( IC+64 )
  232:                DO 110 I = 2, 12
  233:                   IC = ICHAR( SUBNAM( I: I ) )
  234:                   IF( ( IC.GE.129 .AND. IC.LE.137 ) .OR.
  235:      $                ( IC.GE.145 .AND. IC.LE.153 ) .OR.
  236:      $                ( IC.GE.162 .AND. IC.LE.169 ) )SUBNAM( I:
  237:      $                I ) = CHAR( IC+64 )
  238:   110          CONTINUE
  239:             END IF
  240: *
  241:          ELSE IF( IZ.EQ.218 .OR. IZ.EQ.250 ) THEN
  242: *
  243: *           Prime machines:  ASCII+128
  244: *
  245:             IF( IC.GE.225 .AND. IC.LE.250 ) THEN
  246:                SUBNAM( 1: 1 ) = CHAR( IC-32 )
  247:                DO 120 I = 2, 12
  248:                  IC = ICHAR( SUBNAM( I: I ) )
  249:                  IF( IC.GE.225 .AND. IC.LE.250 )
  250:      $             SUBNAM( I: I ) = CHAR( IC-32 )
  251:   120          CONTINUE
  252:             END IF
  253:          END IF
  254: *
  255:          PREC  = SUBNAM( 1: 1 )
  256:          ALGO  = SUBNAM( 4: 6 )
  257:          STAG  = SUBNAM( 8:12 )
  258:          RPREC = PREC.EQ.'S' .OR. PREC.EQ.'D'
  259:          CPREC = PREC.EQ.'C' .OR. PREC.EQ.'Z'
  260: *
  261: *        Invalid value for PRECISION
  262: *      
  263:          IF( .NOT.( RPREC .OR. CPREC ) ) THEN
  264:              IPARAM2STAGE = -1
  265:              RETURN
  266:          ENDIF
  267:       ENDIF
  268: *      WRITE(*,*),'RPREC,CPREC ',RPREC,CPREC,
  269: *     $           '   ALGO ',ALGO,'    STAGE ',STAG
  270: *      
  271: *
  272:       IF (( ISPEC .EQ. 17 ) .OR. ( ISPEC .EQ. 18 )) THEN 
  273: *
  274: *     ISPEC = 17, 18:  block size KD, IB
  275: *     Could be also dependent from N but for now it
  276: *     depend only on sequential or parallel
  277: *
  278:          IF( NTHREADS.GT.4 ) THEN
  279:             IF( CPREC ) THEN
  280:                KD = 128
  281:                IB = 32
  282:             ELSE
  283:                KD = 160
  284:                IB = 40
  285:             ENDIF
  286:          ELSE IF( NTHREADS.GT.1 ) THEN
  287:             IF( CPREC ) THEN
  288:                KD = 64
  289:                IB = 32
  290:             ELSE
  291:                KD = 64
  292:                IB = 32
  293:             ENDIF
  294:          ELSE
  295:             IF( CPREC ) THEN
  296:                KD = 16
  297:                IB = 16
  298:             ELSE
  299:                KD = 32
  300:                IB = 16
  301:             ENDIF
  302:          ENDIF
  303:          IF( ISPEC.EQ.17 ) IPARAM2STAGE = KD
  304:          IF( ISPEC.EQ.18 ) IPARAM2STAGE = IB
  305: *
  306:       ELSE IF ( ISPEC .EQ. 19 ) THEN
  307: *
  308: *     ISPEC = 19:  
  309: *     LHOUS length of the Houselholder representation
  310: *     matrix (V,T) of the second stage. should be >= 1.
  311: *
  312: *     Will add the VECT OPTION HERE next release
  313:          VECT  = OPTS(1:1)
  314:          IF( VECT.EQ.'N' ) THEN
  315:             LHOUS = MAX( 1, 4*NI )
  316:          ELSE
  317: *           This is not correct, it need to call the ALGO and the stage2
  318:             LHOUS = MAX( 1, 4*NI ) + IBI
  319:          ENDIF
  320:          IF( LHOUS.GE.0 ) THEN
  321:             IPARAM2STAGE = LHOUS
  322:          ELSE
  323:             IPARAM2STAGE = -1
  324:          ENDIF
  325: *
  326:       ELSE IF ( ISPEC .EQ. 20 ) THEN
  327: *
  328: *     ISPEC = 20: (21 for future use)  
  329: *     LWORK length of the workspace for 
  330: *     either or both stages for TRD and BRD. should be >= 1.
  331: *     TRD:
  332: *     TRD_stage 1: = LT + LW + LS1 + LS2
  333: *                  = LDT*KD + N*KD + N*MAX(KD,FACTOPTNB) + LDS2*KD 
  334: *                    where LDT=LDS2=KD
  335: *                  = N*KD + N*max(KD,FACTOPTNB) + 2*KD*KD
  336: *     TRD_stage 2: = (2NB+1)*N + KD*NTHREADS
  337: *     TRD_both   : = max(stage1,stage2) + AB ( AB=(KD+1)*N )
  338: *                  = N*KD + N*max(KD+1,FACTOPTNB) 
  339: *                    + max(2*KD*KD, KD*NTHREADS) 
  340: *                    + (KD+1)*N
  341:          LWORK        = -1
  342:          SUBNAM(1:1)  = PREC
  343:          SUBNAM(2:6)  = 'GEQRF'
  344:          QROPTNB      = ILAENV( 1, SUBNAM, ' ', NI, NBI, -1, -1 )
  345:          SUBNAM(2:6)  = 'GELQF'
  346:          LQOPTNB      = ILAENV( 1, SUBNAM, ' ', NBI, NI, -1, -1 )
  347: *        Could be QR or LQ for TRD and the max for BRD
  348:          FACTOPTNB    = MAX(QROPTNB, LQOPTNB)
  349:          IF( ALGO.EQ.'TRD' ) THEN
  350:             IF( STAG.EQ.'2STAG' ) THEN
  351:                LWORK = NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
  352:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
  353:      $              + (NBI+1)*NI
  354:             ELSE IF( (STAG.EQ.'HE2HB').OR.(STAG.EQ.'SY2SB') ) THEN
  355:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
  356:             ELSE IF( (STAG.EQ.'HB2ST').OR.(STAG.EQ.'SB2ST') ) THEN
  357:                LWORK = (2*NBI+1)*NI + NBI*NTHREADS
  358:             ENDIF
  359:          ELSE IF( ALGO.EQ.'BRD' ) THEN
  360:             IF( STAG.EQ.'2STAG' ) THEN
  361:                LWORK = 2*NI*NBI + NI*MAX(NBI+1,FACTOPTNB) 
  362:      $              + MAX(2*NBI*NBI, NBI*NTHREADS) 
  363:      $              + (NBI+1)*NI
  364:             ELSE IF( STAG.EQ.'GE2GB' ) THEN
  365:                LWORK = NI*NBI + NI*MAX(NBI,FACTOPTNB) + 2*NBI*NBI
  366:             ELSE IF( STAG.EQ.'GB2BD' ) THEN
  367:                LWORK = (3*NBI+1)*NI + NBI*NTHREADS
  368:             ENDIF
  369:          ENDIF
  370:          LWORK = MAX ( 1, LWORK )
  371: 
  372:          IF( LWORK.GT.0 ) THEN
  373:             IPARAM2STAGE = LWORK
  374:          ELSE
  375:             IPARAM2STAGE = -1
  376:          ENDIF
  377: *
  378:       ELSE IF ( ISPEC .EQ. 21 ) THEN
  379: *
  380: *     ISPEC = 21 for future use 
  381:          IPARAM2STAGE = NXI
  382:       ENDIF
  383: *
  384: *     ==== End of IPARAM2STAGE ====
  385: *
  386:       END

CVSweb interface <joel.bertrand@systella.fr>