File:
[local] /
rpl /
rplawk /
lex.c
Revision
1.2:
download - view:
text,
annotated -
select for diffs -
revision graph
Wed Jun 12 09:47:52 2013 UTC (11 years, 10 months ago) by
bertrand
Branches:
MAIN
CVS tags:
rpl-4_1_35,
rpl-4_1_34,
rpl-4_1_33,
rpl-4_1_32,
rpl-4_1_31,
rpl-4_1_30,
rpl-4_1_29,
rpl-4_1_28,
rpl-4_1_27,
rpl-4_1_26,
rpl-4_1_25,
rpl-4_1_24,
rpl-4_1_23,
rpl-4_1_22,
rpl-4_1_21,
rpl-4_1_20,
rpl-4_1_19,
rpl-4_1_18,
rpl-4_1_17,
rpl-4_1_16,
rpl-4_1_15,
rpl-4_1_14,
HEAD
Quelques patches pour rplawk et ncurses.
1: /****************************************************************
2: Copyright (C) Lucent Technologies 1997
3: All Rights Reserved
4:
5: Permission to use, copy, modify, and distribute this software and
6: its documentation for any purpose and without fee is hereby
7: granted, provided that the above copyright notice appear in all
8: copies and that both that the copyright notice and this
9: permission notice and warranty disclaimer appear in supporting
10: documentation, and that the name Lucent Technologies or any of
11: its entities not be used in advertising or publicity pertaining
12: to distribution of the software without specific, written prior
13: permission.
14:
15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
22: THIS SOFTWARE.
23: ****************************************************************/
24:
25: #include <stdio.h>
26: #include <stdlib.h>
27: #include <string.h>
28: #include <ctype.h>
29: #include "awk.h"
30: #include "ytab.h"
31:
32: extern YYSTYPE yylval;
33: extern int infunc;
34:
35: int lineno = 1;
36: int bracecnt = 0;
37: int brackcnt = 0;
38: int parencnt = 0;
39:
40: typedef struct Keyword {
41: const char *word;
42: int sub;
43: int type;
44: } Keyword;
45:
46: Keyword keywords[] ={ /* keep sorted: binary searched */
47: { "BEGIN", XBEGIN, XBEGIN },
48: { "END", XEND, XEND },
49: { "NF", VARNF, VARNF },
50: { "atan2", FATAN, BLTIN },
51: { "break", BREAK, BREAK },
52: { "close", CLOSE, CLOSE },
53: { "continue", CONTINUE, CONTINUE },
54: { "cos", FCOS, BLTIN },
55: { "delete", DELETE, DELETE },
56: { "do", DO, DO },
57: { "else", ELSE, ELSE },
58: { "exit", EXIT, EXIT },
59: { "exp", FEXP, BLTIN },
60: { "fflush", FFLUSH, BLTIN },
61: { "for", FOR, FOR },
62: { "func", FUNC, FUNC },
63: { "function", FUNC, FUNC },
64: { "getline", GETLINE, GETLINE },
65: { "gsub", GSUB, GSUB },
66: { "if", IF, IF },
67: { "in", IN, IN },
68: { "index", INDEX, INDEX },
69: { "int", FINT, BLTIN },
70: { "length", FLENGTH, BLTIN },
71: { "log", FLOG, BLTIN },
72: { "match", MATCHFCN, MATCHFCN },
73: { "next", NEXT, NEXT },
74: { "nextfile", NEXTFILE, NEXTFILE },
75: { "print", PRINT, PRINT },
76: { "printf", PRINTF, PRINTF },
77: { "rand", FRAND, BLTIN },
78: { "return", RETURN, RETURN },
79: { "sin", FSIN, BLTIN },
80: { "split", SPLIT, SPLIT },
81: { "sprintf", SPRINTF, SPRINTF },
82: { "sqrt", FSQRT, BLTIN },
83: { "srand", FSRAND, BLTIN },
84: { "sub", SUB, SUB },
85: { "substr", SUBSTR, SUBSTR },
86: { "system", FSYSTEM, BLTIN },
87: { "tolower", FTOLOWER, BLTIN },
88: { "toupper", FTOUPPER, BLTIN },
89: { "while", WHILE, WHILE },
90: };
91:
92: #define RET(x) { if(dbg)printf("lex %s\n", tokname(x)); return(x); }
93:
94: int peek(void)
95: {
96: int c = input();
97: unput(c);
98: return c;
99: }
100:
101: int gettok(char **pbuf, int *psz) /* get next input token */
102: {
103: int c, retc;
104: char *buf = *pbuf;
105: int sz = *psz;
106: char *bp = buf;
107:
108: c = input();
109: if (c == 0)
110: return 0;
111: buf[0] = c;
112: buf[1] = 0;
113: if (!isalnum(c) && c != '.' && c != '_')
114: return c;
115:
116: *bp++ = c;
117: if (isalpha(c) || c == '_') { /* it's a varname */
118: for ( ; (c = input()) != 0; ) {
119: if (bp-buf >= sz)
120: if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
121: FATAL( "out of space for name %.10s...", buf );
122: if (isalnum(c) || c == '_')
123: *bp++ = c;
124: else {
125: *bp = 0;
126: unput(c);
127: break;
128: }
129: }
130: *bp = 0;
131: retc = 'a'; /* alphanumeric */
132: } else { /* maybe it's a number, but could be . */
133: char *rem;
134: /* read input until can't be a number */
135: for ( ; (c = input()) != 0; ) {
136: if (bp-buf >= sz)
137: if (!adjbuf(&buf, &sz, bp-buf+2, 100, &bp, "gettok"))
138: FATAL( "out of space for number %.10s...", buf );
139: if (isdigit(c) || c == 'e' || c == 'E'
140: || c == '.' || c == '+' || c == '-')
141: *bp++ = c;
142: else {
143: unput(c);
144: break;
145: }
146: }
147: *bp = 0;
148: strtod(buf, &rem); /* parse the number */
149: if (rem == buf) { /* it wasn't a valid number at all */
150: buf[1] = 0; /* return one character as token */
151: retc = buf[0]; /* character is its own type */
152: unputstr(rem+1); /* put rest back for later */
153: } else { /* some prefix was a number */
154: unputstr(rem); /* put rest back for later */
155: rem[0] = 0; /* truncate buf after number part */
156: retc = '0'; /* type is number */
157: }
158: }
159: *pbuf = buf;
160: *psz = sz;
161: return retc;
162: }
163:
164: int word(char *);
165: int string(void);
166: int regexpr(void);
167: int sc = 0; /* 1 => return a } right now */
168: int reg = 0; /* 1 => return a REGEXPR now */
169:
170: int yylex(void)
171: {
172: int c;
173: static char *buf = 0;
174: static int bufsize = 5; /* BUG: setting this small causes core dump! */
175:
176: if (buf == 0 && (buf = (char *) malloc(bufsize)) == NULL)
177: FATAL( "out of space in yylex" );
178: if (sc) {
179: sc = 0;
180: RET('}');
181: }
182: if (reg) {
183: reg = 0;
184: return regexpr();
185: }
186: for (;;) {
187: c = gettok(&buf, &bufsize);
188: if (c == 0)
189: return 0;
190: if (isalpha(c) || c == '_')
191: return word(buf);
192: if (isdigit(c)) {
193: yylval.cp = setsymtab(buf, tostring(buf), atof(buf), CON|NUM, symtab);
194: /* should this also have STR set? */
195: RET(NUMBER);
196: }
197:
198: yylval.i = c;
199: switch (c) {
200: case '\n': /* {EOL} */
201: RET(NL);
202: case '\r': /* assume \n is coming */
203: case ' ': /* {WS}+ */
204: case '\t':
205: break;
206: case '#': /* #.* strip comments */
207: while ((c = input()) != '\n' && c != 0)
208: ;
209: unput(c);
210: break;
211: case ';':
212: RET(';');
213: case '\\':
214: if (peek() == '\n') {
215: input();
216: } else if (peek() == '\r') {
217: input(); input(); /* \n */
218: lineno++;
219: } else {
220: RET(c);
221: }
222: break;
223: case '&':
224: if (peek() == '&') {
225: input(); RET(AND);
226: } else
227: RET('&');
228: case '|':
229: if (peek() == '|') {
230: input(); RET(BOR);
231: } else
232: RET('|');
233: case '!':
234: if (peek() == '=') {
235: input(); yylval.i = NE; RET(NE);
236: } else if (peek() == '~') {
237: input(); yylval.i = NOTMATCH; RET(MATCHOP);
238: } else
239: RET(NOT);
240: case '~':
241: yylval.i = MATCH;
242: RET(MATCHOP);
243: case '<':
244: if (peek() == '=') {
245: input(); yylval.i = LE; RET(LE);
246: } else {
247: yylval.i = LT; RET(LT);
248: }
249: case '=':
250: if (peek() == '=') {
251: input(); yylval.i = EQ; RET(EQ);
252: } else {
253: yylval.i = ASSIGN; RET(ASGNOP);
254: }
255: case '>':
256: if (peek() == '=') {
257: input(); yylval.i = GE; RET(GE);
258: } else if (peek() == '>') {
259: input(); yylval.i = APPEND; RET(APPEND);
260: } else {
261: yylval.i = GT; RET(GT);
262: }
263: case '+':
264: if (peek() == '+') {
265: input(); yylval.i = INCR; RET(INCR);
266: } else if (peek() == '=') {
267: input(); yylval.i = ADDEQ; RET(ASGNOP);
268: } else
269: RET('+');
270: case '-':
271: if (peek() == '-') {
272: input(); yylval.i = DECR; RET(DECR);
273: } else if (peek() == '=') {
274: input(); yylval.i = SUBEQ; RET(ASGNOP);
275: } else
276: RET('-');
277: case '*':
278: if (peek() == '=') { /* *= */
279: input(); yylval.i = MULTEQ; RET(ASGNOP);
280: } else if (peek() == '*') { /* ** or **= */
281: input(); /* eat 2nd * */
282: if (peek() == '=') {
283: input(); yylval.i = POWEQ; RET(ASGNOP);
284: } else {
285: RET(POWER);
286: }
287: } else
288: RET('*');
289: case '/':
290: RET('/');
291: case '%':
292: if (peek() == '=') {
293: input(); yylval.i = MODEQ; RET(ASGNOP);
294: } else
295: RET('%');
296: case '^':
297: if (peek() == '=') {
298: input(); yylval.i = POWEQ; RET(ASGNOP);
299: } else
300: RET(POWER);
301:
302: case '$':
303: /* BUG: awkward, if not wrong */
304: c = gettok(&buf, &bufsize);
305: if (isalpha(c)) {
306: if (strcmp(buf, "NF") == 0) { /* very special */
307: unputstr("(NF)");
308: RET(INDIRECT);
309: }
310: c = peek();
311: if (c == '(' || c == '[' || (infunc && isarg(buf) >= 0)) {
312: unputstr(buf);
313: RET(INDIRECT);
314: }
315: yylval.cp = setsymtab(buf, "", 0.0, STR|NUM, symtab);
316: RET(IVAR);
317: } else if (c == 0) { /* */
318: SYNTAX( "unexpected end of input after $" );
319: RET(';');
320: } else {
321: unputstr(buf);
322: RET(INDIRECT);
323: }
324:
325: case '}':
326: if (--bracecnt < 0)
327: SYNTAX( "extra }" );
328: sc = 1;
329: RET(';');
330: case ']':
331: if (--brackcnt < 0)
332: SYNTAX( "extra ]" );
333: RET(']');
334: case ')':
335: if (--parencnt < 0)
336: SYNTAX( "extra )" );
337: RET(')');
338: case '{':
339: bracecnt++;
340: RET('{');
341: case '[':
342: brackcnt++;
343: RET('[');
344: case '(':
345: parencnt++;
346: RET('(');
347:
348: case '"':
349: return string(); /* BUG: should be like tran.c ? */
350:
351: default:
352: RET(c);
353: }
354: }
355: }
356:
357: int string(void)
358: {
359: int c, n;
360: char *s, *bp;
361: static char *buf = 0;
362: static int bufsz = 500;
363:
364: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
365: FATAL("out of space for strings");
366: for (bp = buf; (c = input()) != '"'; ) {
367: if (!adjbuf(&buf, &bufsz, bp-buf+2, 500, &bp, "string"))
368: FATAL("out of space for string %.10s...", buf);
369: switch (c) {
370: case '\n':
371: case '\r':
372: case 0:
373: SYNTAX( "non-terminated string %.10s...", buf );
374: lineno++;
375: if (c == 0) /* hopeless */
376: FATAL( "giving up" );
377: break;
378: case '\\':
379: c = input();
380: switch (c) {
381: case '"': *bp++ = '"'; break;
382: case 'n': *bp++ = '\n'; break;
383: case 't': *bp++ = '\t'; break;
384: case 'f': *bp++ = '\f'; break;
385: case 'r': *bp++ = '\r'; break;
386: case 'b': *bp++ = '\b'; break;
387: case 'v': *bp++ = '\v'; break;
388: case 'a': *bp++ = '\007'; break;
389: case '\\': *bp++ = '\\'; break;
390:
391: case '0': case '1': case '2': /* octal: \d \dd \ddd */
392: case '3': case '4': case '5': case '6': case '7':
393: n = c - '0';
394: if ((c = peek()) >= '0' && c < '8') {
395: n = 8 * n + input() - '0';
396: if ((c = peek()) >= '0' && c < '8')
397: n = 8 * n + input() - '0';
398: }
399: *bp++ = n;
400: break;
401:
402: case 'x': /* hex \x0-9a-fA-F + */
403: { char xbuf[100], *px;
404: for (px = xbuf; (c = input()) != 0 && px-xbuf < 100-2; ) {
405: if (isdigit(c)
406: || (c >= 'a' && c <= 'f')
407: || (c >= 'A' && c <= 'F'))
408: *px++ = c;
409: else
410: break;
411: }
412: *px = 0;
413: unput(c);
414: sscanf(xbuf, "%x", (unsigned int *) &n);
415: *bp++ = n;
416: break;
417: }
418:
419: default:
420: *bp++ = c;
421: break;
422: }
423: break;
424: default:
425: *bp++ = c;
426: break;
427: }
428: }
429: *bp = 0;
430: s = tostring(buf);
431: *bp++ = ' '; *bp++ = 0;
432: yylval.cp = setsymtab(buf, s, 0.0, CON|STR|DONTFREE, symtab);
433: RET(STRING);
434: }
435:
436:
437: int binsearch(char *w, Keyword *kp, int n)
438: {
439: int cond, low, mid, high;
440:
441: low = 0;
442: high = n - 1;
443: while (low <= high) {
444: mid = (low + high) / 2;
445: if ((cond = strcmp(w, kp[mid].word)) < 0)
446: high = mid - 1;
447: else if (cond > 0)
448: low = mid + 1;
449: else
450: return mid;
451: }
452: return -1;
453: }
454:
455: int word(char *w)
456: {
457: Keyword *kp;
458: int c, n;
459:
460: n = binsearch(w, keywords, sizeof(keywords)/sizeof(keywords[0]));
461: /* BUG: this ought to be inside the if; in theory could fault (daniel barrett) */
462: kp = keywords + n;
463: if (n != -1) { /* found in table */
464: yylval.i = kp->sub;
465: switch (kp->type) { /* special handling */
466: case BLTIN:
467: if (kp->sub == FSYSTEM && safe)
468: SYNTAX( "system is unsafe" );
469: RET(kp->type);
470: case FUNC:
471: if (infunc)
472: SYNTAX( "illegal nested function" );
473: RET(kp->type);
474: case RETURN:
475: if (!infunc)
476: SYNTAX( "return not in function" );
477: RET(kp->type);
478: case VARNF:
479: yylval.cp = setsymtab("NF", "", 0.0, NUM, symtab);
480: RET(VARNF);
481: default:
482: RET(kp->type);
483: }
484: }
485: c = peek(); /* look for '(' */
486: if (c != '(' && infunc && (n=isarg(w)) >= 0) {
487: yylval.i = n;
488: RET(ARG);
489: } else {
490: yylval.cp = setsymtab(w, "", 0.0, STR|NUM|DONTFREE, symtab);
491: if (c == '(') {
492: RET(CALL);
493: } else {
494: RET(VAR);
495: }
496: }
497: }
498:
499: void startreg(void) /* next call to yylex will return a regular expression */
500: {
501: reg = 1;
502: }
503:
504: int regexpr(void)
505: {
506: int c;
507: static char *buf = 0;
508: static int bufsz = 500;
509: char *bp;
510:
511: if (buf == 0 && (buf = (char *) malloc(bufsz)) == NULL)
512: FATAL("out of space for rex expr");
513: bp = buf;
514: for ( ; (c = input()) != '/' && c != 0; ) {
515: if (!adjbuf(&buf, &bufsz, bp-buf+3, 500, &bp, "regexpr"))
516: FATAL("out of space for reg expr %.10s...", buf);
517: if (c == '\n') {
518: SYNTAX( "newline in regular expression %.10s...", buf );
519: unput('\n');
520: break;
521: } else if (c == '\\') {
522: *bp++ = '\\';
523: *bp++ = input();
524: } else {
525: *bp++ = c;
526: }
527: }
528: *bp = 0;
529: if (c == 0)
530: SYNTAX("non-terminated regular expression %.10s...", buf);
531: yylval.s = tostring(buf);
532: unput('/');
533: RET(REGEXPR);
534: }
535:
536: /* low-level lexical stuff, sort of inherited from lex */
537:
538: char ebuf[300];
539: char *ep = ebuf;
540: char yysbuf[100]; /* pushback buffer */
541: char *yysptr = yysbuf;
542: FILE *yyin = 0;
543:
544: int input(void) /* get next lexical input character */
545: {
546: int c;
547: extern char *lexprog;
548:
549: if (yysptr > yysbuf)
550: c = (uschar)*--yysptr;
551: else if (lexprog != NULL) { /* awk '...' */
552: if ((c = (uschar)*lexprog) != 0)
553: lexprog++;
554: } else /* awk -f ... */
555: c = pgetc();
556: if (c == '\n')
557: lineno++;
558: else if (c == EOF)
559: c = 0;
560: if (ep >= ebuf + sizeof ebuf)
561: ep = ebuf;
562: return *ep++ = c;
563: }
564:
565: void unput(int c) /* put lexical character back on input */
566: {
567: if (c == '\n')
568: lineno--;
569: if (yysptr >= yysbuf + sizeof(yysbuf))
570: FATAL("pushed back too much: %.20s...", yysbuf);
571: *yysptr++ = c;
572: if (--ep < ebuf)
573: ep = ebuf + sizeof(ebuf) - 1;
574: }
575:
576: void unputstr(const char *s) /* put a string back on input */
577: {
578: int i;
579:
580: for (i = strlen(s)-1; i >= 0; i--)
581: unput(s[i]);
582: }
CVSweb interface <joel.bertrand@systella.fr>