Annotation of rpl/rplawk/tran.c, revision 1.2

1.1       bertrand    1: /****************************************************************
                      2: Copyright (C) Lucent Technologies 1997
                      3: All Rights Reserved
                      4: 
                      5: Permission to use, copy, modify, and distribute this software and
                      6: its documentation for any purpose and without fee is hereby
                      7: granted, provided that the above copyright notice appear in all
                      8: copies and that both that the copyright notice and this
                      9: permission notice and warranty disclaimer appear in supporting
                     10: documentation, and that the name Lucent Technologies or any of
                     11: its entities not be used in advertising or publicity pertaining
                     12: to distribution of the software without specific, written prior
                     13: permission.
                     14: 
                     15: LUCENT DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
                     16: INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS.
                     17: IN NO EVENT SHALL LUCENT OR ANY OF ITS ENTITIES BE LIABLE FOR ANY
                     18: SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
                     19: WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER
                     20: IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
                     21: ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF
                     22: THIS SOFTWARE.
                     23: ****************************************************************/
                     24: 
                     25: #define    DEBUG
                     26: #include <stdio.h>
                     27: #include <math.h>
                     28: #include <ctype.h>
                     29: #include <string.h>
                     30: #include <stdlib.h>
                     31: #include "awk.h"
                     32: #include "ytab.h"
                     33: 
                     34: #define    FULLTAB 2   /* rehash when table gets this x full */
                     35: #define    GROWTAB 4   /* grow table by this factor */
                     36: 
                     37: Array  *symtab;    /* main symbol table */
                     38: 
                     39: char   **FS;       /* initial field sep */
                     40: char   **RS;       /* initial record sep */
                     41: char   **OFS;      /* output field sep */
                     42: char   **ORS;      /* output record sep */
                     43: char   **OFMT;     /* output format for numbers */
                     44: char   **CONVFMT;  /* format for conversions in getsval */
                     45: Awkfloat *NF;      /* number of fields in current record */
                     46: Awkfloat *NR;      /* number of current record */
                     47: Awkfloat *FNR;     /* number of current record in current file */
                     48: char   **FILENAME; /* current filename argument */
                     49: Awkfloat *ARGC;        /* number of arguments from command line */
                     50: char   **SUBSEP;   /* subscript separator for a[i,j,k]; default \034 */
                     51: Awkfloat *RSTART;  /* start of re matched with ~; origin 1 (!) */
                     52: Awkfloat *RLENGTH; /* length of same */
                     53: 
                     54: Cell   *fsloc;     /* FS */
                     55: Cell   *nrloc;     /* NR */
                     56: Cell   *nfloc;     /* NF */
                     57: Cell   *fnrloc;    /* FNR */
                     58: Array  *ARGVtab;   /* symbol table containing ARGV[...] */
                     59: Array  *ENVtab;    /* symbol table containing ENVIRON[...] */
                     60: Cell   *rstartloc; /* RSTART */
                     61: Cell   *rlengthloc;    /* RLENGTH */
                     62: Cell   *symtabloc; /* SYMTAB */
                     63: 
                     64: Cell   *nullloc;   /* a guaranteed empty cell */
                     65: Node   *nullnode;  /* zero&null, converted into a node for comparisons */
                     66: Cell   *literal0;
                     67: 
                     68: extern Cell **fldtab;
                     69: 
                     70: void syminit(void) /* initialize symbol table with builtin vars */
                     71: {
                     72:    literal0 = setsymtab("0", "0", 0.0, NUM|STR|CON|DONTFREE, symtab);
                     73:    /* this is used for if(x)... tests: */
                     74:    nullloc = setsymtab("$zero&null", "", 0.0, NUM|STR|CON|DONTFREE, symtab);
                     75:    nullnode = celltonode(nullloc, CCON);
                     76: 
                     77:    fsloc = setsymtab("FS", " ", 0.0, STR|DONTFREE, symtab);
                     78:    FS = &fsloc->sval;
                     79:    RS = &setsymtab("RS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
                     80:    OFS = &setsymtab("OFS", " ", 0.0, STR|DONTFREE, symtab)->sval;
                     81:    ORS = &setsymtab("ORS", "\n", 0.0, STR|DONTFREE, symtab)->sval;
                     82:    OFMT = &setsymtab("OFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
                     83:    CONVFMT = &setsymtab("CONVFMT", "%.6g", 0.0, STR|DONTFREE, symtab)->sval;
                     84:    FILENAME = &setsymtab("FILENAME", "", 0.0, STR|DONTFREE, symtab)->sval;
                     85:    nfloc = setsymtab("NF", "", 0.0, NUM, symtab);
                     86:    NF = &nfloc->fval;
                     87:    nrloc = setsymtab("NR", "", 0.0, NUM, symtab);
                     88:    NR = &nrloc->fval;
                     89:    fnrloc = setsymtab("FNR", "", 0.0, NUM, symtab);
                     90:    FNR = &fnrloc->fval;
                     91:    SUBSEP = &setsymtab("SUBSEP", "\034", 0.0, STR|DONTFREE, symtab)->sval;
                     92:    rstartloc = setsymtab("RSTART", "", 0.0, NUM, symtab);
                     93:    RSTART = &rstartloc->fval;
                     94:    rlengthloc = setsymtab("RLENGTH", "", 0.0, NUM, symtab);
                     95:    RLENGTH = &rlengthloc->fval;
                     96:    symtabloc = setsymtab("SYMTAB", "", 0.0, ARR, symtab);
                     97:    symtabloc->sval = (char *) symtab;
                     98: }
                     99: 
                    100: void arginit(int ac, char **av)    /* set up ARGV and ARGC */
                    101: {
                    102:    Cell *cp;
                    103:    int i;
                    104:    char temp[50];
                    105: 
                    106:    ARGC = &setsymtab("ARGC", "", (Awkfloat) ac, NUM, symtab)->fval;
                    107:    cp = setsymtab("ARGV", "", 0.0, ARR, symtab);
                    108:    ARGVtab = makesymtab(NSYMTAB);  /* could be (int) ARGC as well */
                    109:    cp->sval = (char *) ARGVtab;
                    110:    for (i = 0; i < ac; i++) {
                    111:        sprintf(temp, "%d", i);
                    112:        if (is_number(*av))
                    113:            setsymtab(temp, *av, atof(*av), STR|NUM, ARGVtab);
                    114:        else
                    115:            setsymtab(temp, *av, 0.0, STR, ARGVtab);
                    116:        av++;
                    117:    }
                    118: }
                    119: 
                    120: void envinit(char **envp)  /* set up ENVIRON variable */
                    121: {
                    122:    Cell *cp;
                    123:    char *p;
                    124: 
                    125:    cp = setsymtab("ENVIRON", "", 0.0, ARR, symtab);
                    126:    ENVtab = makesymtab(NSYMTAB);
                    127:    cp->sval = (char *) ENVtab;
                    128:    for ( ; *envp; envp++) {
                    129:        if ((p = strchr(*envp, '=')) == NULL)
                    130:            continue;
                    131:        if( p == *envp ) /* no left hand side name in env string */
                    132:            continue;
                    133:        *p++ = 0;   /* split into two strings at = */
                    134:        if (is_number(p))
                    135:            setsymtab(*envp, p, atof(p), STR|NUM, ENVtab);
                    136:        else
                    137:            setsymtab(*envp, p, 0.0, STR, ENVtab);
                    138:        p[-1] = '=';    /* restore in case env is passed down to a shell */
                    139:    }
                    140: }
                    141: 
                    142: Array *makesymtab(int n)   /* make a new symbol table */
                    143: {
                    144:    Array *ap;
                    145:    Cell **tp;
                    146: 
                    147:    ap = (Array *) malloc(sizeof(Array));
                    148:    tp = (Cell **) calloc(n, sizeof(Cell *));
                    149:    if (ap == NULL || tp == NULL)
                    150:        FATAL("out of space in makesymtab");
                    151:    ap->nelem = 0;
                    152:    ap->size = n;
                    153:    ap->tab = tp;
                    154:    return(ap);
                    155: }
                    156: 
                    157: void freesymtab(Cell *ap)  /* free a symbol table */
                    158: {
                    159:    Cell *cp, *temp;
                    160:    Array *tp;
                    161:    int i;
                    162: 
                    163:    if (!isarr(ap))
                    164:        return;
                    165:    tp = (Array *) ap->sval;
                    166:    if (tp == NULL)
                    167:        return;
                    168:    for (i = 0; i < tp->size; i++) {
                    169:        for (cp = tp->tab[i]; cp != NULL; cp = temp) {
                    170:            xfree(cp->nval);
                    171:            if (freeable(cp))
                    172:                xfree(cp->sval);
                    173:            temp = cp->cnext;   /* avoids freeing then using */
                    174:            free(cp); 
                    175:            tp->nelem--;
                    176:        }
                    177:        tp->tab[i] = 0;
                    178:    }
                    179:    if (tp->nelem != 0)
                    180:        WARNING("can't happen: inconsistent element count freeing %s", ap->nval);
                    181:    free(tp->tab);
                    182:    free(tp);
                    183: }
                    184: 
                    185: void freeelem(Cell *ap, const char *s) /* free elem s from ap (i.e., ap["s"] */
                    186: {
                    187:    Array *tp;
                    188:    Cell *p, *prev = NULL;
                    189:    int h;
                    190:    
                    191:    tp = (Array *) ap->sval;
                    192:    h = hash(s, tp->size);
                    193:    for (p = tp->tab[h]; p != NULL; prev = p, p = p->cnext)
                    194:        if (strcmp(s, p->nval) == 0) {
                    195:            if (prev == NULL)   /* 1st one */
                    196:                tp->tab[h] = p->cnext;
                    197:            else            /* middle somewhere */
                    198:                prev->cnext = p->cnext;
                    199:            if (freeable(p))
                    200:                xfree(p->sval);
                    201:            free(p->nval);
                    202:            free(p);
                    203:            tp->nelem--;
                    204:            return;
                    205:        }
                    206: }
                    207: 
                    208: Cell *setsymtab(const char *n, const char *s, Awkfloat f, unsigned t, Array *tp)
                    209: {
                    210:    int h;
                    211:    Cell *p;
                    212: 
                    213:    if (n != NULL && (p = lookup(n, tp)) != NULL) {
                    214:           dprintf( ("setsymtab found %p: n=%s s=\"%s\" f=%g t=%o\n",
1.2     ! bertrand  215:            (void*)p, NN(p->nval), NN(p->sval), p->fval, p->tval) );
1.1       bertrand  216:        return(p);
                    217:    }
                    218:    p = (Cell *) malloc(sizeof(Cell));
                    219:    if (p == NULL)
                    220:        FATAL("out of space for symbol table at %s", n);
                    221:    p->nval = tostring(n);
                    222:    p->sval = s ? tostring(s) : tostring("");
                    223:    p->fval = f;
                    224:    p->tval = t;
                    225:    p->csub = CUNK;
                    226:    p->ctype = OCELL;
                    227:    tp->nelem++;
                    228:    if (tp->nelem > FULLTAB * tp->size)
                    229:        rehash(tp);
                    230:    h = hash(n, tp->size);
                    231:    p->cnext = tp->tab[h];
                    232:    tp->tab[h] = p;
                    233:       dprintf( ("setsymtab set %p: n=%s s=\"%s\" f=%g t=%o\n",
1.2     ! bertrand  234:        (void*)p, p->nval, p->sval, p->fval, p->tval) );
1.1       bertrand  235:    return(p);
                    236: }
                    237: 
                    238: int hash(const char *s, int n) /* form hash value for string s */
                    239: {
                    240:    unsigned hashval;
                    241: 
                    242:    for (hashval = 0; *s != '\0'; s++)
                    243:        hashval = (*s + 31 * hashval);
                    244:    return hashval % n;
                    245: }
                    246: 
                    247: void rehash(Array *tp) /* rehash items in small table into big one */
                    248: {
                    249:    int i, nh, nsz;
                    250:    Cell *cp, *op, **np;
                    251: 
                    252:    nsz = GROWTAB * tp->size;
                    253:    np = (Cell **) calloc(nsz, sizeof(Cell *));
                    254:    if (np == NULL)     /* can't do it, but can keep running. */
                    255:        return;     /* someone else will run out later. */
                    256:    for (i = 0; i < tp->size; i++) {
                    257:        for (cp = tp->tab[i]; cp; cp = op) {
                    258:            op = cp->cnext;
                    259:            nh = hash(cp->nval, nsz);
                    260:            cp->cnext = np[nh];
                    261:            np[nh] = cp;
                    262:        }
                    263:    }
                    264:    free(tp->tab);
                    265:    tp->tab = np;
                    266:    tp->size = nsz;
                    267: }
                    268: 
                    269: Cell *lookup(const char *s, Array *tp) /* look for s in tp */
                    270: {
                    271:    Cell *p;
                    272:    int h;
                    273: 
                    274:    h = hash(s, tp->size);
                    275:    for (p = tp->tab[h]; p != NULL; p = p->cnext)
                    276:        if (strcmp(s, p->nval) == 0)
                    277:            return(p);  /* found it */
                    278:    return(NULL);           /* not found */
                    279: }
                    280: 
                    281: Awkfloat setfval(Cell *vp, Awkfloat f) /* set float val of a Cell */
                    282: {
                    283:    int fldno;
                    284: 
                    285:    if ((vp->tval & (NUM | STR)) == 0) 
                    286:        funnyvar(vp, "assign to");
                    287:    if (isfld(vp)) {
                    288:        donerec = 0;    /* mark $0 invalid */
                    289:        fldno = atoi(vp->nval);
                    290:        if (fldno > *NF)
                    291:            newfld(fldno);
                    292:           dprintf( ("setting field %d to %g\n", fldno, f) );
                    293:    } else if (isrec(vp)) {
                    294:        donefld = 0;    /* mark $1... invalid */
                    295:        donerec = 1;
                    296:    }
                    297:    if (freeable(vp))
                    298:        xfree(vp->sval); /* free any previous string */
                    299:    vp->tval &= ~STR;   /* mark string invalid */
                    300:    vp->tval |= NUM;    /* mark number ok */
1.2     ! bertrand  301:    if (f == -0)  /* who would have thought this possible? */
        !           302:        f = 0;
        !           303:       dprintf( ("setfval %p: %s = %g, t=%o\n", (void*)vp, NN(vp->nval), f, vp->tval) );
1.1       bertrand  304:    return vp->fval = f;
                    305: }
                    306: 
                    307: void funnyvar(Cell *vp, const char *rw)
                    308: {
                    309:    if (isarr(vp))
                    310:        FATAL("can't %s %s; it's an array name.", rw, vp->nval);
                    311:    if (vp->tval & FCN)
                    312:        FATAL("can't %s %s; it's a function.", rw, vp->nval);
                    313:    WARNING("funny variable %p: n=%s s=\"%s\" f=%g t=%o",
                    314:        vp, vp->nval, vp->sval, vp->fval, vp->tval);
                    315: }
                    316: 
                    317: char *setsval(Cell *vp, const char *s) /* set string val of a Cell */
                    318: {
                    319:    char *t;
                    320:    int fldno;
                    321: 
                    322:       dprintf( ("starting setsval %p: %s = \"%s\", t=%o, r,f=%d,%d\n", 
1.2     ! bertrand  323:        (void*)vp, NN(vp->nval), s, vp->tval, donerec, donefld) );
1.1       bertrand  324:    if ((vp->tval & (NUM | STR)) == 0)
                    325:        funnyvar(vp, "assign to");
                    326:    if (isfld(vp)) {
                    327:        donerec = 0;    /* mark $0 invalid */
                    328:        fldno = atoi(vp->nval);
                    329:        if (fldno > *NF)
                    330:            newfld(fldno);
                    331:           dprintf( ("setting field %d to %s (%p)\n", fldno, s, s) );
                    332:    } else if (isrec(vp)) {
                    333:        donefld = 0;    /* mark $1... invalid */
                    334:        donerec = 1;
                    335:    }
                    336:    t = tostring(s);    /* in case it's self-assign */
                    337:    if (freeable(vp))
                    338:        xfree(vp->sval);
                    339:    vp->tval &= ~NUM;
                    340:    vp->tval |= STR;
                    341:    vp->tval &= ~DONTFREE;
                    342:       dprintf( ("setsval %p: %s = \"%s (%p) \", t=%o r,f=%d,%d\n", 
1.2     ! bertrand  343:        (void*)vp, NN(vp->nval), t,t, vp->tval, donerec, donefld) );
1.1       bertrand  344:    return(vp->sval = t);
                    345: }
                    346: 
                    347: Awkfloat getfval(Cell *vp) /* get float val of a Cell */
                    348: {
                    349:    if ((vp->tval & (NUM | STR)) == 0)
                    350:        funnyvar(vp, "read value of");
                    351:    if (isfld(vp) && donefld == 0)
                    352:        fldbld();
                    353:    else if (isrec(vp) && donerec == 0)
                    354:        recbld();
                    355:    if (!isnum(vp)) {   /* not a number */
                    356:        vp->fval = atof(vp->sval);  /* best guess */
                    357:        if (is_number(vp->sval) && !(vp->tval&CON))
                    358:            vp->tval |= NUM;    /* make NUM only sparingly */
                    359:    }
1.2     ! bertrand  360:       dprintf( ("getfval %p: %s = %g, t=%o\n",
        !           361:        (void*)vp, NN(vp->nval), vp->fval, vp->tval) );
1.1       bertrand  362:    return(vp->fval);
                    363: }
                    364: 
                    365: static char *get_str_val(Cell *vp, char **fmt)        /* get string val of a Cell */
                    366: {
                    367:    char s[100];    /* BUG: unchecked */
                    368:    double dtemp;
                    369: 
                    370:    if ((vp->tval & (NUM | STR)) == 0)
                    371:        funnyvar(vp, "read value of");
                    372:    if (isfld(vp) && donefld == 0)
                    373:        fldbld();
                    374:    else if (isrec(vp) && donerec == 0)
                    375:        recbld();
                    376:    if (isstr(vp) == 0) {
                    377:        if (freeable(vp))
                    378:            xfree(vp->sval);
                    379:        if (modf(vp->fval, &dtemp) == 0)    /* it's integral */
                    380:            sprintf(s, "%.30g", vp->fval);
                    381:        else
                    382:            sprintf(s, *fmt, vp->fval);
                    383:        vp->sval = tostring(s);
                    384:        vp->tval &= ~DONTFREE;
                    385:        vp->tval |= STR;
                    386:    }
1.2     ! bertrand  387:       dprintf( ("getsval %p: %s = \"%s (%p)\", t=%o\n",
        !           388:        (void*)vp, NN(vp->nval), vp->sval, vp->sval, vp->tval) );
1.1       bertrand  389:    return(vp->sval);
                    390: }
                    391: 
                    392: char *getsval(Cell *vp)       /* get string val of a Cell */
                    393: {
                    394:       return get_str_val(vp, CONVFMT);
                    395: }
                    396: 
                    397: char *getpssval(Cell *vp)     /* get string val of a Cell for print */
                    398: {
                    399:       return get_str_val(vp, OFMT);
                    400: }
                    401: 
                    402: 
                    403: char *tostring(const char *s)  /* make a copy of string s */
                    404: {
                    405:    char *p;
                    406: 
                    407:    p = (char *) malloc(strlen(s)+1);
                    408:    if (p == NULL)
                    409:        FATAL("out of space in tostring on %s", s);
                    410:    strcpy(p, s);
                    411:    return(p);
                    412: }
                    413: 
                    414: char *qstring(const char *is, int delim)   /* collect string up to next delim */
                    415: {
                    416:    const char *os = is;
                    417:    int c, n;
                    418:    uschar *s = (uschar *) is;
                    419:    uschar *buf, *bp;
                    420: 
                    421:    if ((buf = (uschar *) malloc(strlen(is)+3)) == NULL)
                    422:        FATAL( "out of space in qstring(%s)", s);
                    423:    for (bp = buf; (c = *s) != delim; s++) {
                    424:        if (c == '\n')
                    425:            SYNTAX( "newline in string %.20s...", os );
                    426:        else if (c != '\\')
                    427:            *bp++ = c;
                    428:        else {  /* \something */
                    429:            c = *++s;
                    430:            if (c == 0) {   /* \ at end */
                    431:                *bp++ = '\\';
                    432:                break;  /* for loop */
                    433:            }   
                    434:            switch (c) {
                    435:            case '\\':  *bp++ = '\\'; break;
                    436:            case 'n':   *bp++ = '\n'; break;
                    437:            case 't':   *bp++ = '\t'; break;
                    438:            case 'b':   *bp++ = '\b'; break;
                    439:            case 'f':   *bp++ = '\f'; break;
                    440:            case 'r':   *bp++ = '\r'; break;
                    441:            default:
                    442:                if (!isdigit(c)) {
                    443:                    *bp++ = c;
                    444:                    break;
                    445:                }
                    446:                n = c - '0';
                    447:                if (isdigit(s[1])) {
                    448:                    n = 8 * n + *++s - '0';
                    449:                    if (isdigit(s[1]))
                    450:                        n = 8 * n + *++s - '0';
                    451:                }
                    452:                *bp++ = n;
                    453:                break;
                    454:            }
                    455:        }
                    456:    }
                    457:    *bp++ = 0;
                    458:    return (char *) buf;
                    459: }

CVSweb interface <joel.bertrand@systella.fr>