|
|
1. Calculator - Next VersionThe next version of the calculator to be described below is substantially much more complex with major changes in the inclusion of if-then-else and while constructs. In addition a syntax tree is constructed during parsing. We can traverse or walk down the tree to get the output. Designing of the tree walk routine can be done in two ways:
To make things more concrete, here is a sample program.
x = 0;
while(x < 3) {
print x;
x = x + 1;
}
The output of the interpretive version will be: 1 2 3 while that of the compiler version will be: push 0 push x LC0: push x push 3 complt jz LC1 push x print push x push 1 add pop x jmp LC0 LC1: ret The include file contains declarations for the syntax tree and symbol table. The symbol table, sym allows for single character variable names. A node in the syntax tree may hold a constant, an identifier or an internal node with an operator. All three variants are encapsulated in a union nodetype and the structure we have can be determined by nodetype.type. The lex input file contains patterns for VARIABLE and INTEGER tokens. In addition, tokens are identified for two-character operators such as EQ and NE. Single character operators are simply returned as themselves. The yacc input file defines YYSTYPE, the type of yylval, as
%union {
int ivalue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
This causes the following to be generated in y.tab.h:
typedef union {
int iValue; /* integer value;
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
}YYSTYPE;
extern YYSTYPE yylval;
Constants, variables and nodes can be represented by yylval in the parser's value stack. Notice the type definitions %token <iValue> INTEGER %token <nPtr> expr This binds expr to nPtr, and INTEGER to iValue in the YYSTYPE union. This is essential so that yacc can generate the correct code. For example, the rule
expr: INTEGER { $$ = con($1); }
should generate the following code. yylval.nPtr = con(yyvsp[0].iValue); Note that yyvsp is the value stack pointer and yyvsp[0] addresses the top of the value stack, or the value associated with INTEGER. The unary minus operator is given higher priority than binary operators as follows: %left GE LE EQ NE '<' '>' %left '+' '-' %left '*' '/' %nonassoc UMINUS The %nonassoc indicates no associativity is implied. It is frequently used in conjunction with %prec to specify precedence as a rule. The bottom-up technique is used to construct the syntax tree. Leaf nodes are allocated as integers and variables are reduced. When operators are encountered, a node is allocated and pointers to previously allocated nodes are entered as operands. As statements are reduced, ex is called to do a depth-first walk of the syntax tree. Since the tree was constructed bottom-up, a depth first walk visits nodes in the order that they were originally allocated. This results in operators being applied in the order that they were encountered during parsing. 2. Include File
typedef enum { typeCon, typeId, typeOpr } nodeEnum;
/* constants */
typedef struct {
nodeEnum type; /* type of node */
int value; /* value of constant */
} conNodeType;
/* identifiers */
typedef struct {
nodeEnum type; /* type of node */
int i; /* subscript to ident array */
} idNodeType;
/* operators */
typedef struct {
nodeEnum type; /* type of node */
int oper; /* operator */
int nops; /* number of operands */
union nodeTypeTag *op[1]; /* operands (expandable) */
} oprNodeType;
typedef union nodeTypeTag {
nodeEnum type; /* type of node */
conNodeType con; /* constants */
idNodeType id; /* identifiers */
oprNodeType opr; /* operators */
} nodeType;
extern int sym[26];
3. Lex Input
%{
#include <stdlib.h>
#include "calc3.h"
#include "y.tab.h"
void yyerror(char *);
%}
%%
[a-z] {
yylval.sIndex = *yytext - 'a';
return VARIABLE;
}
[0-9]+ {
yylval.iValue = atoi(yytext);
return INTEGER;
}
[-()<>=+*/;{}.] {
return *yytext;
}
">=" return GE;
"<=" return LE;
"==" return EQ;
"!=" return NE;
"while" return WHILE;
"if" return IF;
"else" return ELSE;
"print" return PRINT;
[ \t\n]+ ; /* ignore whitespace */
. yyerror("Unknown character");
%%
int yywrap(void) {
return 1;
}
4. Yacc Input
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include "calc3.h"
/* prototypes */
nodeType *opr(int oper, int nops, ...);
nodeType *id(int i);
nodeType *con(int value);
void freeNode(nodeType *p);
int ex(nodeType *p);
int yylex(void);
void yyerror(char *s);
int sym[26]; /* symbol table */
%}
%union {
int iValue; /* integer value */
char sIndex; /* symbol table index */
nodeType *nPtr; /* node pointer */
};
%token <iValue> INTEGER
%token <sIndex> VARIABLE
%token WHILE IF PRINT
%nonassoc IFX
%nonassoc ELSE
%left GE LE EQ NE '>' '<'
%left '+' '-'
%left '*' '/'
%nonassoc UMINUS
%type <nPtr> stmt expr stmt_list
%%
program:
function { exit(0); }
;
function:
function stmt { ex($2); freeNode($2); }
| /* NULL */
;
stmt:
';' { $$ = opr(';', 2, NULL, NULL); }
| expr ';' { $$ = $1; }
| PRINT expr ';' { $$ = opr(PRINT, 1, $2); }
| VARIABLE '=' expr ';' { $$ = opr('=', 2, id($1), $3); }
| WHILE '(' expr ')' stmt
{ $$ = opr(WHILE, 2, $3, $5); }
| IF '(' expr ')' stmt %prec IFX
{ $$ = opr(IF, 2, $3, $5); }
| IF '(' expr ')' stmt ELSE stmt
{ $$ = opr(IF, 3, $3, $5, $7); }
| '{' stmt_list '}' { $$ = $2; }
;
stmt_list:
stmt { $$ = $1; }
| stmt_list stmt { $$ = opr(';', 2, $1, $2); }
;
expr:
INTEGER { $$ = con($1); }
| VARIABLE { $$ = id($1); }
| '-' expr %prec UMINUS { $$ = opr(UMINUS, 1, $2); }
| expr '+' expr { $$ = opr('+', 2, $1, $3); }
| expr '-' expr { $$ = opr('-', 2, $1, $3); }
| expr '*' expr { $$ = opr('*', 2, $1, $3); }
| expr '/' expr { $$ = opr('/', 2, $1, $3); }
| expr '<' expr { $$ = opr('<', 2, $1, $3); }
| expr '>' expr { $$ = opr('>', 2, $1, $3); }
| expr GE expr { $$ = opr(GE, 2, $1, $3); }
| expr LE expr { $$ = opr(LE, 2, $1, $3); }
| expr NE expr { $$ = opr(NE, 2, $1, $3); }
| expr EQ expr { $$ = opr(EQ, 2, $1, $3); }
| '(' expr ')' { $$ = $2; }
;
%%
nodeType *con(int value) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(conNodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeCon;
p->con.value = value;
return p;
}
nodeType *id(int i) {
nodeType *p;
/* allocate node */
if ((p = malloc(sizeof(idNodeType))) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeId;
p->id.i = i;
return p;
}
nodeType *opr(int oper, int nops, ...) {
va_list ap;
nodeType *p;
size_t size;
int i;
/* allocate node */
size = sizeof(oprNodeType) + (nops - 1) * sizeof(nodeType*);
if ((p = malloc(size)) == NULL)
yyerror("out of memory");
/* copy information */
p->type = typeOpr;
p->opr.oper = oper;
p->opr.nops = nops;
va_start(ap, nops);
for (i = 0; i < nops; i++)
p->opr.op[i] = va_arg(ap, nodeType*);
va_end(ap);
return p;
}
void freeNode(nodeType *p) {
int i;
if (!p) return;
if (p->type == typeOpr) {
for (i = 0; i < p->opr.nops; i++)
freeNode(p->opr.op[i]);
}
free (p);
}
void yyerror(char *s) {
fprintf(stdout, "%s\n", s);
}
int main(void) {
yyparse();
return 0;
}
5. Interpreter
#include <stdio.h>
#include "calc3.h"
#include "y.tab.h"
int ex(nodeType *p) {
if (!p) return 0;
switch(p->type) {
case typeCon: return p->con.value;
case typeId: return sym[p->id.i];
case typeOpr:
switch(p->opr.oper) {
case WHILE: while(ex(p->opr.op[0]))
ex(p->opr.op[1]);
return 0;
case IF: if (ex(p->opr.op[0]))
ex(p->opr.op[1]);
else if (p->opr.nops > 2)
ex(p->opr.op[2]);
return 0;
case PRINT: printf("%d\n", ex(p->opr.op[0]));
return 0;
case ';': ex(p->opr.op[0]);
return ex(p->opr.op[1]);
case '=': return sym[p->opr.op[0]->id.i] =
ex(p->opr.op[1]);
case UMINUS: return -ex(p->opr.op[0]);
case '+': return ex(p->opr.op[0]) + ex(p->opr.op[1]);
case '-': return ex(p->opr.op[0]) - ex(p->opr.op[1]);
case '*': return ex(p->opr.op[0]) * ex(p->opr.op[1]);
case '/': return ex(p->opr.op[0]) / ex(p->opr.op[1]);
case '<': return ex(p->opr.op[0]) < ex(p->opr.op[1]);
case '>': return ex(p->opr.op[0]) > ex(p->opr.op[1]);
case GE: return ex(p->opr.op[0]) >= ex(p->opr.op[1]);
case LE: return ex(p->opr.op[0]) <= ex(p->opr.op[1]);
case NE: return ex(p->opr.op[0]) != ex(p->opr.op[1]);
case EQ: return ex(p->opr.op[0]) == ex(p->opr.op[1]);
}
}
}
6. Compiler
#include <stdio.h>
#include "calc3.h"
#include "y.tab.h"
static int lbl;
int ex(nodeType *p) {
int lbl1, lbl2;
if (!p) return 0;
switch(p->type) {
case typeCon:
printf("\tpush\t%d\n", p->con.value);
break;
case typeId:
printf("\tpush\t%c\n", p->id.i + 'a');
break;
case typeOpr:
switch(p->opr.oper) {
case WHILE:
printf("L%03d:\n", lbl1 = lbl++);
ex(p->opr.op[0]);
printf("\tjz\tL%03d\n", lbl2 = lbl++);
ex(p->opr.op[1]);
printf("\tjmp\tL%03d\n", lbl1);
printf("L%03d:\n", lbl2);
break;
case IF:
ex(p->opr.op[0]);
if (p->opr.nops > 2) {
/* if else */
printf("\tjz\tL%03d\n", lbl1 = lbl++);
ex(p->opr.op[1]);
printf("\tjmp\tL%03d\n", lbl2 = lbl++);
printf("L%03d:\n", lbl1);
ex(p->opr.op[2]);
printf("L%03d:\n", lbl2);
} else {
/* if */
printf("\tjz\tL%03d\n", lbl1 = lbl++);
ex(p->opr.op[1]);
printf("L%03d:\n", lbl1);
}
break;
case PRINT:
ex(p->opr.op[0]);
printf("\tprint\n");
break;
case '=':
ex(p->opr.op[1]);
printf("\tpop\t%c\n", p->opr.op[0]->id.i + 'a');
break;
case UMINUS:
ex(p->opr.op[0]);
printf("\tneg\n");
break;
default:
ex(p->opr.op[0]);
ex(p->opr.op[1]);
switch(p->opr.oper) {
case '+': printf("\tadd\n"); break;
case '-': printf("\tsub\n"); break;
case '*': printf("\tmul\n"); break;
case '/': printf("\tdiv\n"); break;
case '<': printf("\tcompLT\n"); break;
case '>': printf("\tcompGT\n"); break;
case GE: printf("\tcompGE\n"); break;
case LE: printf("\tcompLE\n"); break;
case NE: printf("\tcompNE\n"); break;
case EQ: printf("\tcompEQ\n"); break;
}
}
}
}
|
I have just given my final year B.Tech examinations in Computer Science and
Engineering and a native of Kerala, India.