lcc 源码读书笔记2之类型系统

论坛 期权论坛 脚本     
已经匿名di用户   2022-5-29 19:38   2242   0

表达式分析中的一元,二元表达式包含了大量的语义分析,这儿要温故一下LCC的类型系统

LCC通过能反映类型的前缀规范的链接结构来表示类型。具体就是int* 表示为(pointer (int)),具体的C代码如下

typedef strct type* TYPE

struct type {
int op; //表示操作码,是枚举常量,由词法分析模块获得
Type type; //类型的前缀
int align; //对齐字节数
int size; //大小
union {
Symbol sym; //相应符号表中的标识名称 ,既types
struct {
unsigned oldstyle:1;
Type *proto;
} f; //用于处理函数类型
} u; //用联合是为了节约内存,处理多种类型
Xtype x; //后端的接口
};

词法分析模块获得的类型TOKEN的词属性如下,其中第二行即为枚举值

enum {
#define xx(a,b,c,d,e,f,g) a=b,
#define yy(a,b,c,d,e,f,g)
#include "token.h"
LAST
};

xx(FLOAT, 1, 0, 0, 0, CHAR, "float")
xx(DOUBLE, 2, 0, 0, 0, CHAR, "double")
xx(CHAR, 3, 0, 0, 0, CHAR, "char")
xx(SHORT, 4, 0, 0, 0, CHAR, "short")
xx(INT, 5, 0, 0, 0, CHAR, "int")
xx(UNSIGNED, 6, 0, 0, 0, CHAR, "unsigned")
xx(POINTER, 7, 0, 0, 0, 0, "pointer")
xx(VOID, 8, 0, 0, 0, CHAR, "void")
xx(STRUCT, 9, 0, 0, 0, CHAR, "struct")
xx(UNION, 10, 0, 0, 0, CHAR, "union")
xx(FUNCTION, 11, 0, 0, 0, 0, "function")
xx(ARRAY, 12, 0, 0, 0, 0, "array")
xx(ENUM, 13, 0, 0, 0, CHAR, "enum")
xx(LONG, 14, 0, 0, 0, CHAR, "long")
xx(CONST, 15, 0, 0, 0, CHAR, "const")
xx(VOLATILE, 16, 0, 0, 0, CHAR, "volatile")

整个类型系统,作为一个小模块,提供了一系列的操作函数,如构造,删除,比较兼容性,判断是否是一种类型等等,有较强的内聚性。相应接口如下

static Type type(op, ty, size, align, sym) 构造函数,不同类型的初始化都调用它

void rmtypes(lev) 从类型表中删除类型,类型是存在一个小小的哈希表中

#define isarray(t) (unqual(t)->op == ARRAY)
#define isstruct(t) (unqual(t)->op == STRUCT /
|| unqual(t)->op == UNION) //宏定义片段,测试类型

Type ptr(ty) Type ty; //指针的构造函数

Type array(ty, n, a) //数组构造函数,相应还有函数,枚举,结构的构造函数就不一一列出来了。

eqtype(ty1,ty2,ret) //类型兼容性比较函数。

void typeInit() //类型初始化函数,初始化一些内置类型,如INT,LONG等等,这里跟接口IR有打交道,IR是前后两端的接口

下面举个结构类型为例子,窥一孔观全貌LCC的类型系统

首先是初始构造函数

static Type type(op, ty, size, align, sym)
int op, size, align; Type ty; void *sym; {
unsigned h = (op^((unsigned long)ty>>3))
&(NELEMS(typetable)-1);
struct entry *tn;

if (op != FUNCTION && (op != ARRAY || size > 0))
for (tn = typetable[h]; tn; tn = tn->link)
if (tn->type.op == op && tn->type.type == ty
&& tn->type.size == size && tn->type.align == align
&& tn->type.u.sym == sym)
return &tn->type; //查表,已经存在的类型就不新建了
NEW0(tn, PERM);
tn->type.op = op;
tn->type.type = ty;
tn->type.size = size;
tn->type.align = align;
tn->type.u.sym = sym;
tn->link = typetable[h];
typetable[h] = tn;
return &tn->type;
}

类型的结构包括域。 struct {
unsigned cfields:1;
unsigned vfields:1;
Table ftab; /* omit */
Field flist; //链表,连接所有的域
} s; //这是标记在符号表中表示域的结构

struct field {
char *name; //名
Type type; //类型
int offset; //偏移
short bitsize; //处理位域的
short lsb;
Field link; //下个域
}; //域的详细结构

同一个标记不能多次定义,相应代码片段如下

check:

if ((p = lookup(tag, types)) != NULL && (p->scope == level
|| p->scope == PARAM && level == PARAM+1)) {
if (p->type->op == op && !p->defined) //如果已经声明,但没定义,返回类型
return p->type;
error("redefinition of `%s' previously defined at %w/n", //重复定义,提示错误
p->name, &p->src);
}

结构的全部构造函数如下:

Type newstruct(op, tag) int op; char *tag; {
Symbol p;

assert(tag);
if (*tag == 0)
tag = stringd(genlabel(1));
else
CHECK;
p = install(tag, &types, level, PERM);
p->type = type(op, NULL, 0, 0, p);
if (p->scope > maxlevel)
maxlevel = p->scope;
p->src = src; //词法分析模块的接口
return p->type;
}

同样一个结构的域也不能同名

Field p, *q = &ty->u.sym->u.s.flist;

for (p = *q; p; q = &p->link, p = *q)
if (p->name == name)
error("duplicate field name `%s' in `%t'/n",
name, ty); //提示重名

域的构造函数如下

Field newfield(name, ty, fty) char *name; Type ty, fty; {
Field p, *q = &ty->u.sym->u.s.flist;

if (name == NULL)
name = stringd(genlabel(1));
判断是否重名
NEW0(p, PERM);
*q = p;
p->name = name;
p->type = fty;

return p;
}

LCC内置了一些类型

Type chartype; /* char */
Type doubletype; /* double */
Type floattype; /* float */
Type inttype; /* signed int */
Type longdouble; /* long double */
Type longtype; /* long */
Type shorttype; /* signed short int */
Type signedchar; /* signed char */
Type unsignedchar; /* unsigned char */
Type unsignedlong; /* unsigned long int */
Type unsignedshort; /* unsigned short int */
Type unsignedtype; /* unsigned int */
Type voidptype; /* void* */
Type voidtype; /* basic types: void */

void typeInit() {
#define xx(v,name,op,metrics) { /
Symbol p = install(string(name), &types, GLOBAL, PERM);/ //用宏预处理类型
v = type(op, 0, IR->metrics.size, IR->metrics.align, p);/
assert(v->align == 0 || v->size%v->align == 0); /
p->type = v; p->addressed = IR->metrics.outofline; }
xx(chartype, "char", CHAR, charmetric);
xx(doubletype, "double", DOUBLE, doublemetric);
xx(floattype, "float", FLOAT, floatmetric);
xx(inttype, "int", INT, intmetric);
xx(longdouble, "long double", DOUBLE, doublemetric);
xx(longtype, "long int", INT, intmetric);
xx(shorttype, "short", SHORT, shortmetric);
xx(signedchar, "signed char", CHAR, charmetric);
xx(unsignedchar, "unsigned char", CHAR, charmetric);
xx(unsignedlong, "unsigned long", UNSIGNED,intmetric);
xx(unsignedshort,"unsigned short",SHORT, shortmetric);
xx(unsignedtype, "unsigned int", UNSIGNED,intmetric);
#undef xx

LCC的类型兼容判断函数如下:

if (ty1 == ty2)
return 1;
if (ty1->op != ty2->op)
return 0;
switch (ty1->op) {
case CHAR: case SHORT: case UNSIGNED: case INT:
case ENUM: case UNION: case STRUCT: case DOUBLE:
return 0; //可见不同名,相同构造的结构不兼容,LONG 和INT不兼容 ???char 和 unsigned char不兼容??

case POINTER: return eqtype(ty1->type, ty2->type, 1); //看所指类型是否兼容

case ARRAY: if (eqtype(ty1->type, ty2->type, 1)) { //首先数组类型得兼容
if (ty1->size == ty2->size)
return 1;
if (ty1->size == 0 || ty2->size == 0) //大小得一致或者其中一个不完全
return ret;
}
return 0;

case FUNCTION: if (eqtype(ty1->type, ty2->type, 1)) { //先看返回类型是否兼容
Type *p1 = ty1->u.f.proto, *p2 = ty2->u.f.proto;
if (p1 == p2)
return 1;
if (p1 && p2) {
for ( ; *p1 && *p2; p1++, p2++)
if (eqtype(unqual(*p1), unqual(*p2), 1) == 0) //函数原型大小是否一致,兼容
return 0;
if (*p1 == NULL && *p2 == NULL)
return 1;
} else {
if (variadic(p1 ? ty1 : ty2)) //若只有其中一个有原型且可变参数,不兼容
return 0;
if (p1 == NULL)
p1 = p2;
for ( ; *p1; p1++) {
Type ty = unqual(*p1);
if (promote(ty) != (isenum(ty) ? ty->type : ty) //函数参数提升后看是否类型不变,变了也不兼容
|| ty == floattype)
return 0;
}
return 1;
}
}
return 0;

函数的参数提升,规定浮点提升为双精度,小整数和枚举提升为整数或无符号数

Type promote(ty) Type ty; {
ty = unqual(ty);
if (isunsigned(ty) || ty == longtype) //unsigned int ,unsigned long,long不提升
return ty;
else if (isint(ty) || isenum(ty)) //int,char,unsigned char,shor,unsigned short,enum提升为整数
return inttype;
return ty; //其他不提升
}

LCC的类型就说到这儿吧,大家感兴趣可以去下载http://www.cs.princeton.edu/software/lcc/ lcc的源码。

分享到 :
0 人收藏
您需要登录后才可以回帖 登录 | 立即注册

本版积分规则

积分:81
帖子:4969
精华:0
期权论坛 期权论坛
发布
内容

下载期权论坛手机APP