表达式分析中的一元,二元表达式包含了大量的语义分析,这儿要温故一下LCC的类型系统
LCC通过能反映类型的前缀规范的链接结构来表示类型。具体就是int* 表示为(pointer (int)),具体的C代码如下
typedef strct type* TYPE
struct type { int op; //表示操作码,是枚举常量,由词法分析模块获得 Type type; //类型的前缀 int align; //对齐字节数 int size; //大小 union { Symbol sym; //相应符号表中的标识名称 ,既types struct { unsigned oldstyle:1; Type *proto; } f; //用于处理函数类型 } u; //用联合是为了节约内存,处理多种类型 Xtype x; //后端的接口 };
词法分析模块获得的类型TOKEN的词属性如下,其中第二行即为枚举值
enum { #define xx(a,b,c,d,e,f,g) a=b, #define yy(a,b,c,d,e,f,g) #include "token.h" LAST };
xx(FLOAT, 1, 0, 0, 0, CHAR, "float") xx(DOUBLE, 2, 0, 0, 0, CHAR, "double") xx(CHAR, 3, 0, 0, 0, CHAR, "char") xx(SHORT, 4, 0, 0, 0, CHAR, "short") xx(INT, 5, 0, 0, 0, CHAR, "int") xx(UNSIGNED, 6, 0, 0, 0, CHAR, "unsigned") xx(POINTER, 7, 0, 0, 0, 0, "pointer") xx(VOID, 8, 0, 0, 0, CHAR, "void") xx(STRUCT, 9, 0, 0, 0, CHAR, "struct") xx(UNION, 10, 0, 0, 0, CHAR, "union") xx(FUNCTION, 11, 0, 0, 0, 0, "function") xx(ARRAY, 12, 0, 0, 0, 0, "array") xx(ENUM, 13, 0, 0, 0, CHAR, "enum") xx(LONG, 14, 0, 0, 0, CHAR, "long") xx(CONST, 15, 0, 0, 0, CHAR, "const") xx(VOLATILE, 16, 0, 0, 0, CHAR, "volatile")
整个类型系统,作为一个小模块,提供了一系列的操作函数,如构造,删除,比较兼容性,判断是否是一种类型等等,有较强的内聚性。相应接口如下
static Type type(op, ty, size, align, sym) 构造函数,不同类型的初始化都调用它
void rmtypes(lev) 从类型表中删除类型,类型是存在一个小小的哈希表中
#define isarray(t) (unqual(t)->op == ARRAY) #define isstruct(t) (unqual(t)->op == STRUCT / || unqual(t)->op == UNION) //宏定义片段,测试类型
Type ptr(ty) Type ty; //指针的构造函数
Type array(ty, n, a) //数组构造函数,相应还有函数,枚举,结构的构造函数就不一一列出来了。
eqtype(ty1,ty2,ret) //类型兼容性比较函数。
void typeInit() //类型初始化函数,初始化一些内置类型,如INT,LONG等等,这里跟接口IR有打交道,IR是前后两端的接口
下面举个结构类型为例子,窥一孔观全貌LCC的类型系统
首先是初始构造函数
static Type type(op, ty, size, align, sym) int op, size, align; Type ty; void *sym; { unsigned h = (op^((unsigned long)ty>>3)) &(NELEMS(typetable)-1); struct entry *tn;
if (op != FUNCTION && (op != ARRAY || size > 0)) for (tn = typetable[h]; tn; tn = tn->link) if (tn->type.op == op && tn->type.type == ty && tn->type.size == size && tn->type.align == align && tn->type.u.sym == sym) return &tn->type; //查表,已经存在的类型就不新建了 NEW0(tn, PERM); tn->type.op = op; tn->type.type = ty; tn->type.size = size; tn->type.align = align; tn->type.u.sym = sym; tn->link = typetable[h]; typetable[h] = tn; return &tn->type; }
类型的结构包括域。 struct { unsigned cfields:1; unsigned vfields:1; Table ftab; /* omit */ Field flist; //链表,连接所有的域 } s; //这是标记在符号表中表示域的结构
struct field { char *name; //名 Type type; //类型 int offset; //偏移 short bitsize; //处理位域的 short lsb; Field link; //下个域 }; //域的详细结构
同一个标记不能多次定义,相应代码片段如下
check:
if ((p = lookup(tag, types)) != NULL && (p->scope == level || p->scope == PARAM && level == PARAM+1)) { if (p->type->op == op && !p->defined) //如果已经声明,但没定义,返回类型 return p->type; error("redefinition of `%s' previously defined at %w/n", //重复定义,提示错误 p->name, &p->src); }
结构的全部构造函数如下:
Type newstruct(op, tag) int op; char *tag; { Symbol p;
assert(tag); if (*tag == 0) tag = stringd(genlabel(1)); else CHECK; p = install(tag, &types, level, PERM); p->type = type(op, NULL, 0, 0, p); if (p->scope > maxlevel) maxlevel = p->scope; p->src = src; //词法分析模块的接口 return p->type; }
同样一个结构的域也不能同名
Field p, *q = &ty->u.sym->u.s.flist;
for (p = *q; p; q = &p->link, p = *q) if (p->name == name) error("duplicate field name `%s' in `%t'/n", name, ty); //提示重名
域的构造函数如下
Field newfield(name, ty, fty) char *name; Type ty, fty; { Field p, *q = &ty->u.sym->u.s.flist;
if (name == NULL) name = stringd(genlabel(1)); 判断是否重名 NEW0(p, PERM); *q = p; p->name = name; p->type = fty;
return p; }
LCC内置了一些类型
Type chartype; /* char */ Type doubletype; /* double */ Type floattype; /* float */ Type inttype; /* signed int */ Type longdouble; /* long double */ Type longtype; /* long */ Type shorttype; /* signed short int */ Type signedchar; /* signed char */ Type unsignedchar; /* unsigned char */ Type unsignedlong; /* unsigned long int */ Type unsignedshort; /* unsigned short int */ Type unsignedtype; /* unsigned int */ Type voidptype; /* void* */ Type voidtype; /* basic types: void */
void typeInit() { #define xx(v,name,op,metrics) { / Symbol p = install(string(name), &types, GLOBAL, PERM);/ //用宏预处理类型 v = type(op, 0, IR->metrics.size, IR->metrics.align, p);/ assert(v->align == 0 || v->size%v->align == 0); / p->type = v; p->addressed = IR->metrics.outofline; } xx(chartype, "char", CHAR, charmetric); xx(doubletype, "double", DOUBLE, doublemetric); xx(floattype, "float", FLOAT, floatmetric); xx(inttype, "int", INT, intmetric); xx(longdouble, "long double", DOUBLE, doublemetric); xx(longtype, "long int", INT, intmetric); xx(shorttype, "short", SHORT, shortmetric); xx(signedchar, "signed char", CHAR, charmetric); xx(unsignedchar, "unsigned char", CHAR, charmetric); xx(unsignedlong, "unsigned long", UNSIGNED,intmetric); xx(unsignedshort,"unsigned short",SHORT, shortmetric); xx(unsignedtype, "unsigned int", UNSIGNED,intmetric); #undef xx
LCC的类型兼容判断函数如下:
if (ty1 == ty2) return 1; if (ty1->op != ty2->op) return 0; switch (ty1->op) { case CHAR: case SHORT: case UNSIGNED: case INT: case ENUM: case UNION: case STRUCT: case DOUBLE: return 0; //可见不同名,相同构造的结构不兼容,LONG 和INT不兼容 ???char 和 unsigned char不兼容??
case POINTER: return eqtype(ty1->type, ty2->type, 1); //看所指类型是否兼容
case ARRAY: if (eqtype(ty1->type, ty2->type, 1)) { //首先数组类型得兼容 if (ty1->size == ty2->size) return 1; if (ty1->size == 0 || ty2->size == 0) //大小得一致或者其中一个不完全 return ret; } return 0;
case FUNCTION: if (eqtype(ty1->type, ty2->type, 1)) { //先看返回类型是否兼容 Type *p1 = ty1->u.f.proto, *p2 = ty2->u.f.proto; if (p1 == p2) return 1; if (p1 && p2) { for ( ; *p1 && *p2; p1++, p2++) if (eqtype(unqual(*p1), unqual(*p2), 1) == 0) //函数原型大小是否一致,兼容 return 0; if (*p1 == NULL && *p2 == NULL) return 1; } else { if (variadic(p1 ? ty1 : ty2)) //若只有其中一个有原型且可变参数,不兼容 return 0; if (p1 == NULL) p1 = p2; for ( ; *p1; p1++) { Type ty = unqual(*p1); if (promote(ty) != (isenum(ty) ? ty->type : ty) //函数参数提升后看是否类型不变,变了也不兼容 || ty == floattype) return 0; } return 1; } } return 0;
函数的参数提升,规定浮点提升为双精度,小整数和枚举提升为整数或无符号数
Type promote(ty) Type ty; { ty = unqual(ty); if (isunsigned(ty) || ty == longtype) //unsigned int ,unsigned long,long不提升 return ty; else if (isint(ty) || isenum(ty)) //int,char,unsigned char,shor,unsigned short,enum提升为整数 return inttype; return ty; //其他不提升 }
LCC的类型就说到这儿吧,大家感兴趣可以去下载http://www.cs.princeton.edu/software/lcc/ lcc的源码。 |