File:  [local] / rpl / rplawk / lex.c
Revision 1.2: download - view: text, annotated - select for diffs - revision graph
Wed Jun 12 09:47:52 2013 UTC (10 years, 10 months ago) by bertrand
Branches: MAIN
CVS tags: rpl-4_1_35, rpl-4_1_34, rpl-4_1_33, rpl-4_1_32, rpl-4_1_31, rpl-4_1_30, rpl-4_1_29, rpl-4_1_28, rpl-4_1_27, rpl-4_1_26, rpl-4_1_25, rpl-4_1_24, rpl-4_1_23, rpl-4_1_22, rpl-4_1_21, rpl-4_1_20, rpl-4_1_19, rpl-4_1_18, rpl-4_1_17, rpl-4_1_16, rpl-4_1_15, rpl-4_1_14, HEAD
Quelques patches pour rplawk et ncurses.

    1: /****************************************************************
    2: Copyright (C) Lucent Technologies 1997
    3: All Rights Reserved
    4: 
    5: Permission to use, copy, modify, and distribute this software and
    6: its documentation for any purpose and without fee is hereby
    7: granted, provided that the above copyright notice appear in all
    8: copies and that both that the copyright notice and this
    9: permission notice and warranty disclaimer appear in supporting
   10: documentation, and that the name Lucent Technologies or any of
   11: its entities not be used in advertising or publicity pertaining
   12: to distribution of the software without specific, written prior
   13: permission.
   14: 
   15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
   16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
   17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
   18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
   19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
   20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
   21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
   22: THIS SOFTWARE.
   23: ****************************************************************/
   24: 
   25: #include <stdio.h>
   26: #include <stdlib.h>
   27: #include <string.h>
   28: #include <ctype.h>
   29: #include "awk.h"
   30: #include "ytab.h"
   31: 
   32: extern YYSTYPE  yylval;
   33: extern int  infunc;
   34: 
   35: int lineno  = 1;
   36: int bracecnt = 0;
   37: int brackcnt  = 0;
   38: int parencnt = 0;
   39: 
   40: typedef struct Keyword {
   41:     const char *word;
   42:     int sub;
   43:     int type;
   44: } Keyword;
   45: 
   46: Keyword keywords[] ={   /* keep sorted: binary searched */
   47:     { "BEGIN",  XBEGIN,     XBEGIN },
   48:     { "END",    XEND,       XEND },
   49:     { "NF",     VARNF,      VARNF },
   50:     { "atan2",  FATAN,      BLTIN },
   51:     { "break",  BREAK,      BREAK },
   52:     { "close",  CLOSE,      CLOSE },
   53:     { "continue",   CONTINUE,   CONTINUE },
   54:     { "cos",    FCOS,       BLTIN },
   55:     { "delete", DELETE,     DELETE },
   56:     { "do",     DO,     DO },
   57:     { "else",   ELSE,       ELSE },
   58:     { "exit",   EXIT,       EXIT },
   59:     { "exp",    FEXP,       BLTIN },
   60:     { "fflush", FFLUSH,     BLTIN },
   61:     { "for",    FOR,        FOR },
   62:     { "func",   FUNC,       FUNC },
   63:     { "function",   FUNC,       FUNC },
   64:     { "getline",    GETLINE,    GETLINE },
   65:     { "gsub",   GSUB,       GSUB },
   66:     { "if",     IF,     IF },
   67:     { "in",     IN,     IN },
   68:     { "index",  INDEX,      INDEX },
   69:     { "int",    FINT,       BLTIN },
   70:     { "length", FLENGTH,    BLTIN },
   71:     { "log",    FLOG,       BLTIN },
   72:     { "match",  MATCHFCN,   MATCHFCN },
   73:     { "next",   NEXT,       NEXT },
   74:     { "nextfile",   NEXTFILE,   NEXTFILE },
   75:     { "print",  PRINT,      PRINT },
   76:     { "printf", PRINTF,     PRINTF },
   77:     { "rand",   FRAND,      BLTIN },
   78:     { "return", RETURN,     RETURN },
   79:     { "sin",    FSIN,       BLTIN },
   80:     { "split",  SPLIT,      SPLIT },
   81:     { "sprintf",    SPRINTF,    SPRINTF },
   82:     { "sqrt",   FSQRT,      BLTIN },
   83:     { "srand",  FSRAND,     BLTIN },
   84:     { "sub",    SUB,        SUB },
   85:     { "substr", SUBSTR,     SUBSTR },
   86:     { "system", FSYSTEM,    BLTIN },
   87:     { "tolower",    FTOLOWER,   BLTIN },
   88:     { "toupper",    FTOUPPER,   BLTIN },
   89:     { "while",  WHILE,      WHILE },
   90: };
   91: 
   92: #define RET(x)  { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
   93: 
   94: int peek(void)
   95: {
   96:     int c = input();
   97:     unput(c);
   98:     return c;
   99: }
  100: 
  101: int gettok(char **pbuf, int *psz)   /* get next input token */
  102: {
  103:     int c, retc;
  104:     char *buf = *pbuf;
  105:     int sz = *psz;
  106:     char *bp = buf;
  107: 
  108:     c = input();
  109:     if (c == 0)
  110:         return 0;
  111:     buf[0] = c;
  112:     buf[1] = 0;
  113:     if (!isalnum(c) && c != '.' && c != '_')
  114:         return c;
  115: 
  116:     *bp++ = c;
  117:     if (isalpha(c) || c == '_') {   /* it's a varname */
  118:         for ( ; (c = input()) != 0; ) {
  119:             if (bp-buf >= sz)
  120:                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
  121:                     FATAL( "out of space for name %.10s...", buf );
  122:             if (isalnum(c) || c == '_')
  123:                 *bp++ = c;
  124:             else {
  125:                 *bp = 0;
  126:                 unput(c);
  127:                 break;
  128:             }
  129:         }
  130:         *bp = 0;
  131:         retc = 'a'; /* alphanumeric */
  132:     } else {    /* maybe it's a number, but could be . */
  133:         char *rem;
  134:         /* read input until can't be a number */
  135:         for ( ; (c = input()) != 0; ) {
  136:             if (bp-buf >= sz)
  137:                 if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
  138:                     FATAL( "out of space for number %.10s...", buf );
  139:             if (isdigit(c) || c == 'e' || c == 'E' 
  140:               || c == '.' || c == '+' || c == '-')
  141:                 *bp++ = c;
  142:             else {
  143:                 unput(c);
  144:                 break;
  145:             }
  146:         }
  147:         *bp = 0;
  148:         strtod(buf, &rem);  /* parse the number */
  149:         if (rem == buf) {   /* it wasn't a valid number at all */
  150:             buf[1] = 0; /* return one character as token */
  151:             retc = buf[0];  /* character is its own type */
  152:             unputstr(rem+1); /* put rest back for later */
  153:         } else {    /* some prefix was a number */
  154:             unputstr(rem);  /* put rest back for later */
  155:             rem[0] = 0; /* truncate buf after number part */
  156:             retc = '0'; /* type is number */
  157:         }
  158:     }
  159:     *pbuf = buf;
  160:     *psz = sz;
  161:     return retc;
  162: }
  163: 
  164: int word(char *);
  165: int string(void);
  166: int regexpr(void);
  167: int sc  = 0;    /* 1 => return a } right now */
  168: int reg = 0;    /* 1 => return a REGEXPR now */
  169: 
  170: int yylex(void)
  171: {
  172:     int c;
  173:     static char *buf = 0;
  174:     static int bufsize = 5; /* BUG: setting this small causes core dump! */
  175: 
  176:     if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
  177:         FATAL( "out of space in yylex" );
  178:     if (sc) {
  179:         sc = 0;
  180:         RET('}');
  181:     }
  182:     if (reg) {
  183:         reg = 0;
  184:         return regexpr();
  185:     }
  186:     for (;;) {
  187:         c = gettok(&buf, &bufsize);
  188:         if (c == 0)
  189:             return 0;
  190:         if (isalpha(c) || c == '_')
  191:             return word(buf);
  192:         if (isdigit(c)) {
  193:             yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
  194:             /* should this also have STR set? */
  195:             RET(NUMBER);
  196:         }
  197:     
  198:         yylval.i = c;
  199:         switch (c) {
  200:         case '\n':  /* {EOL} */
  201:             RET(NL);
  202:         case '\r':  /* assume \n is coming */
  203:         case ' ':   /* {WS}+ */
  204:         case '\t':
  205:             break;
  206:         case '#':   /* #.* strip comments */
  207:             while ((c = input()) != '\n' && c != 0)
  208:                 ;
  209:             unput(c);
  210:             break;
  211:         case ';':
  212:             RET(';');
  213:         case '\\':
  214:             if (peek() == '\n') {
  215:                 input();
  216:             } else if (peek() == '\r') {
  217:                 input(); input();   /* \n */
  218:                 lineno++;
  219:             } else {
  220:                 RET(c);
  221:             }
  222:             break;
  223:         case '&':
  224:             if (peek() == '&') {
  225:                 input(); RET(AND);
  226:             } else 
  227:                 RET('&');
  228:         case '|':
  229:             if (peek() == '|') {
  230:                 input(); RET(BOR);
  231:             } else
  232:                 RET('|');
  233:         case '!':
  234:             if (peek() == '=') {
  235:                 input(); yylval.i = NE; RET(NE);
  236:             } else if (peek() == '~') {
  237:                 input(); yylval.i = NOTMATCH; RET(MATCHOP);
  238:             } else
  239:                 RET(NOT);
  240:         case '~':
  241:             yylval.i = MATCH;
  242:             RET(MATCHOP);
  243:         case '<':
  244:             if (peek() == '=') {
  245:                 input(); yylval.i = LE; RET(LE);
  246:             } else {
  247:                 yylval.i = LT; RET(LT);
  248:             }
  249:         case '=':
  250:             if (peek() == '=') {
  251:                 input(); yylval.i = EQ; RET(EQ);
  252:             } else {
  253:                 yylval.i = ASSIGN; RET(ASGNOP);
  254:             }
  255:         case '>':
  256:             if (peek() == '=') {
  257:                 input(); yylval.i = GE; RET(GE);
  258:             } else if (peek() == '>') {
  259:                 input(); yylval.i = APPEND; RET(APPEND);
  260:             } else {
  261:                 yylval.i = GT; RET(GT);
  262:             }
  263:         case '+':
  264:             if (peek() == '+') {
  265:                 input(); yylval.i = INCR; RET(INCR);
  266:             } else if (peek() == '=') {
  267:                 input(); yylval.i = ADDEQ; RET(ASGNOP);
  268:             } else
  269:                 RET('+');
  270:         case '-':
  271:             if (peek() == '-') {
  272:                 input(); yylval.i = DECR; RET(DECR);
  273:             } else if (peek() == '=') {
  274:                 input(); yylval.i = SUBEQ; RET(ASGNOP);
  275:             } else
  276:                 RET('-');
  277:         case '*':
  278:             if (peek() == '=') {    /* *= */
  279:                 input(); yylval.i = MULTEQ; RET(ASGNOP);
  280:             } else if (peek() == '*') { /* ** or **= */
  281:                 input();    /* eat 2nd * */
  282:                 if (peek() == '=') {
  283:                     input(); yylval.i = POWEQ; RET(ASGNOP);
  284:                 } else {
  285:                     RET(POWER);
  286:                 }
  287:             } else
  288:                 RET('*');
  289:         case '/':
  290:             RET('/');
  291:         case '%':
  292:             if (peek() == '=') {
  293:                 input(); yylval.i = MODEQ; RET(ASGNOP);
  294:             } else
  295:                 RET('%');
  296:         case '^':
  297:             if (peek() == '=') {
  298:                 input(); yylval.i = POWEQ; RET(ASGNOP);
  299:             } else
  300:                 RET(POWER);
  301: 
  302:         case '$':
  303:             /* BUG: awkward, if not wrong */
  304:             c = gettok(&buf, &bufsize);
  305:             if (isalpha(c)) {
  306:                 if (strcmp(buf, "NF") == 0) {   /* very special */
  307:                     unputstr("(NF)");
  308:                     RET(INDIRECT);
  309:                 }
  310:                 c = peek();
  311:                 if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
  312:                     unputstr(buf);
  313:                     RET(INDIRECT);
  314:                 }
  315:                 yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
  316:                 RET(IVAR);
  317:             } else if (c == 0) {    /*  */
  318:                 SYNTAX( "unexpected end of input after $" );
  319:                 RET(';');
  320:             } else {
  321:                 unputstr(buf);
  322:                 RET(INDIRECT);
  323:             }
  324:     
  325:         case '}':
  326:             if (--bracecnt < 0)
  327:                 SYNTAX( "extra }" );
  328:             sc = 1;
  329:             RET(';');
  330:         case ']':
  331:             if (--brackcnt < 0)
  332:                 SYNTAX( "extra ]" );
  333:             RET(']');
  334:         case ')':
  335:             if (--parencnt < 0)
  336:                 SYNTAX( "extra )" );
  337:             RET(')');
  338:         case '{':
  339:             bracecnt++;
  340:             RET('{');
  341:         case '[':
  342:             brackcnt++;
  343:             RET('[');
  344:         case '(':
  345:             parencnt++;
  346:             RET('(');
  347:     
  348:         case '"':
  349:             return string();    /* BUG: should be like tran.c ? */
  350:     
  351:         default:
  352:             RET(c);
  353:         }
  354:     }
  355: }
  356: 
  357: int string(void)
  358: {
  359:     int c, n;
  360:     char *s, *bp;
  361:     static char *buf = 0;
  362:     static int bufsz = 500;
  363: 
  364:     if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
  365:         FATAL("out of space for strings");
  366:     for (bp = buf; (c = input()) != '"'; ) {
  367:         if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
  368:             FATAL("out of space for string %.10s...", buf);
  369:         switch (c) {
  370:         case '\n':
  371:         case '\r':
  372:         case 0:
  373:             SYNTAX( "non-terminated string %.10s...", buf );
  374:             lineno++;
  375:             if (c == 0) /* hopeless */
  376:                 FATAL( "giving up" );
  377:             break;
  378:         case '\\':
  379:             c = input();
  380:             switch (c) {
  381:             case '"': *bp++ = '"'; break;
  382:             case 'n': *bp++ = '\n'; break;  
  383:             case 't': *bp++ = '\t'; break;
  384:             case 'f': *bp++ = '\f'; break;
  385:             case 'r': *bp++ = '\r'; break;
  386:             case 'b': *bp++ = '\b'; break;
  387:             case 'v': *bp++ = '\v'; break;
  388:             case 'a': *bp++ = '\007'; break;
  389:             case '\\': *bp++ = '\\'; break;
  390: 
  391:             case '0': case '1': case '2': /* octal: \d \dd \ddd */
  392:             case '3': case '4': case '5': case '6': case '7':
  393:                 n = c - '0';
  394:                 if ((c = peek()) >= '0' && c < '8') {
  395:                     n = 8 * n + input() - '0';
  396:                     if ((c = peek()) >= '0' && c < '8')
  397:                         n = 8 * n + input() - '0';
  398:                 }
  399:                 *bp++ = n;
  400:                 break;
  401: 
  402:             case 'x':   /* hex  \x0-9a-fA-F + */
  403:                 {   char xbuf[100], *px;
  404:                 for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
  405:                     if (isdigit(c)
  406:                      || (c >= 'a' && c <= 'f')
  407:                      || (c >= 'A' && c <= 'F'))
  408:                         *px++ = c;
  409:                     else
  410:                         break;
  411:                 }
  412:                 *px = 0;
  413:                 unput(c);
  414:                 sscanf(xbuf, "%x", (unsigned int *) &n);
  415:                 *bp++ = n;
  416:                 break;
  417:                 }
  418: 
  419:             default: 
  420:                 *bp++ = c;
  421:                 break;
  422:             }
  423:             break;
  424:         default:
  425:             *bp++ = c;
  426:             break;
  427:         }
  428:     }
  429:     *bp = 0; 
  430:     s = tostring(buf);
  431:     *bp++ = ' '; *bp++ = 0;
  432:     yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
  433:     RET(STRING);
  434: }
  435: 
  436: 
  437: int binsearch(char *w, Keyword *kp, int n)
  438: {
  439:     int cond, low, mid, high;
  440: 
  441:     low = 0;
  442:     high = n - 1;
  443:     while (low <= high) {
  444:         mid = (low + high) / 2;
  445:         if ((cond = strcmp(w, kp[mid].word)) < 0)
  446:             high = mid - 1;
  447:         else if (cond > 0)
  448:             low = mid + 1;
  449:         else
  450:             return mid;
  451:     }
  452:     return -1;
  453: }
  454: 
  455: int word(char *w) 
  456: {
  457:     Keyword *kp;
  458:     int c, n;
  459: 
  460:     n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
  461: /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
  462:     kp = keywords + n;
  463:     if (n != -1) {  /* found in table */
  464:         yylval.i = kp->sub;
  465:         switch (kp->type) { /* special handling */
  466:         case BLTIN:
  467:             if (kp->sub == FSYSTEM && safe)
  468:                 SYNTAX( "system is unsafe" );
  469:             RET(kp->type);
  470:         case FUNC:
  471:             if (infunc)
  472:                 SYNTAX( "illegal nested function" );
  473:             RET(kp->type);
  474:         case RETURN:
  475:             if (!infunc)
  476:                 SYNTAX( "return not in function" );
  477:             RET(kp->type);
  478:         case VARNF:
  479:             yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
  480:             RET(VARNF);
  481:         default:
  482:             RET(kp->type);
  483:         }
  484:     }
  485:     c = peek(); /* look for '(' */
  486:     if (c != '(' && infunc && (n=isarg(w)) >= 0) {
  487:         yylval.i = n;
  488:         RET(ARG);
  489:     } else {
  490:         yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
  491:         if (c == '(') {
  492:             RET(CALL);
  493:         } else {
  494:             RET(VAR);
  495:         }
  496:     }
  497: }
  498: 
  499: void startreg(void) /* next call to yylex will return a regular expression */
  500: {
  501:     reg = 1;
  502: }
  503: 
  504: int regexpr(void)
  505: {
  506:     int c;
  507:     static char *buf = 0;
  508:     static int bufsz = 500;
  509:     char *bp;
  510: 
  511:     if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
  512:         FATAL("out of space for rex expr");
  513:     bp = buf;
  514:     for ( ; (c = input()) != '/' && c != 0; ) {
  515:         if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
  516:             FATAL("out of space for reg expr %.10s...", buf);
  517:         if (c == '\n') {
  518:             SYNTAX( "newline in regular expression %.10s...", buf ); 
  519:             unput('\n');
  520:             break;
  521:         } else if (c == '\\') {
  522:             *bp++ = '\\'; 
  523:             *bp++ = input();
  524:         } else {
  525:             *bp++ = c;
  526:         }
  527:     }
  528:     *bp = 0;
  529:     if (c == 0)
  530:         SYNTAX("non-terminated regular expression %.10s...", buf);
  531:     yylval.s = tostring(buf);
  532:     unput('/');
  533:     RET(REGEXPR);
  534: }
  535: 
  536: /* low-level lexical stuff, sort of inherited from lex */
  537: 
  538: char    ebuf[300];
  539: char    *ep = ebuf;
  540: char    yysbuf[100];    /* pushback buffer */
  541: char    *yysptr = yysbuf;
  542: FILE    *yyin = 0;
  543: 
  544: int input(void) /* get next lexical input character */
  545: {
  546:     int c;
  547:     extern char *lexprog;
  548: 
  549:     if (yysptr > yysbuf)
  550:         c = (uschar)*--yysptr;
  551:     else if (lexprog != NULL) { /* awk '...' */
  552:         if ((c = (uschar)*lexprog) != 0)
  553:             lexprog++;
  554:     } else              /* awk -f ... */
  555:         c = pgetc();
  556:     if (c == '\n')
  557:         lineno++;
  558:     else if (c == EOF)
  559:         c = 0;
  560:     if (ep >= ebuf + sizeof ebuf)
  561:         ep = ebuf;
  562:     return *ep++ = c;
  563: }
  564: 
  565: void unput(int c)   /* put lexical character back on input */
  566: {
  567:     if (c == '\n')
  568:         lineno--;
  569:     if (yysptr >= yysbuf + sizeof(yysbuf))
  570:         FATAL("pushed back too much: %.20s...", yysbuf);
  571:     *yysptr++ = c;
  572:     if (--ep < ebuf)
  573:         ep = ebuf + sizeof(ebuf) - 1;
  574: }
  575: 
  576: void unputstr(const char *s)    /* put a string back on input */
  577: {
  578:     int i;
  579: 
  580:     for (i = strlen(s)-1; i >= 0; i--)
  581:         unput(s[i]);
  582: }

CVSweb interface <joel.bertrand@systella.fr>