Skip to content

Commit f5eeb1f

Browse files
committed
Support variable typing & integer resizing
In this patch, all variables (including IR generated variables) are now guaranteed to have default type "int" to later used for integer resizing, this includes truncation and sign extension. Currently, only following cases will generate resizing opcodes (OP_trunc & OP_sign_ext): - variable assignment to registers - function parameter preparation, variadic function parameters are default to have size of int type Additionally, several data structures and memory managements are adjusted to have extensibility, this includes local variable and syntactic block allocation. Close #166.
1 parent 5ecfd87 commit f5eeb1f

File tree

15 files changed

+419
-129
lines changed

15 files changed

+419
-129
lines changed

src/arm-codegen.c

Lines changed: 24 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,14 @@ void update_elf_offset(ph2_ir_t *ph2_ir)
119119
case OP_return:
120120
elf_offset += 24;
121121
return;
122+
case OP_trunc:
123+
elf_offset += 4;
124+
return;
125+
case OP_sign_ext:
126+
elf_offset += 4;
127+
return;
122128
default:
123-
printf("Unknown opcode\n");
124-
abort();
129+
fatal("Unknown opcode");
125130
}
126131
}
127132

@@ -421,9 +426,24 @@ void emit_ph2_ir(ph2_ir_t *ph2_ir)
421426
emit(__mov_i(__NE, rd, 0));
422427
emit(__mov_i(__EQ, rd, 1));
423428
return;
429+
case OP_trunc:
430+
if (rm == 1)
431+
rm = 0xFF;
432+
else if (rm == 2)
433+
rm = 0xFFFF;
434+
else if (rm == 4)
435+
rm = 0xFFFFFFFF;
436+
else
437+
fatal("Unsupported truncation operation with invalid target size");
438+
439+
emit(__and_i(__AL, rd, rn, rm));
440+
return;
441+
case OP_sign_ext:
442+
/* TODO: Allow to sign extends to other types */
443+
emit(__sxtb(__AL, rd, rn, 0));
444+
return;
424445
default:
425-
printf("Unknown opcode\n");
426-
abort();
446+
fatal("Unknown opcode");
427447
}
428448
}
429449

src/arm.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,11 @@ int __sub_r(arm_cond_t cond, arm_reg rd, arm_reg rs, arm_reg ro)
251251
return __mov(cond, 0, arm_sub, 0, rs, rd, ro);
252252
}
253253

254+
int __and_i(arm_cond_t cond, arm_reg rd, arm_reg rs, int imm)
255+
{
256+
return __mov(cond, 1, arm_and, 0, rs, rd, imm);
257+
}
258+
254259
int __zero(int rd)
255260
{
256261
return __mov_i(__AL, rd, 0);
@@ -349,3 +354,12 @@ int __teq(arm_reg rd)
349354
{
350355
return __mov(__AL, 1, arm_teq, 1, rd, 0, 0);
351356
}
357+
358+
int __sxtb(arm_cond_t cond, arm_reg rd, arm_reg rm, int rotation)
359+
{
360+
if (rotation != 0 && rotation != 8 && rotation != 16 && rotation != 24)
361+
fatal("SXTB rotation must be 0, 8, 16, or 24");
362+
363+
return arm_encode(cond, 106, 0xF, rd,
364+
rm | ((rotation >> 3) << 10) | (0x7 << 4));
365+
}

src/defs.h

Lines changed: 15 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,10 @@ typedef enum {
248248
OP_bit_not,
249249
OP_negate,
250250

251+
/* data type conversion */
252+
OP_trunc,
253+
OP_sign_ext,
254+
251255
/* entry point of the state machine */
252256
OP_start
253257
} opcode_t;
@@ -276,8 +280,17 @@ typedef struct use_chain_node {
276280
struct use_chain_node *prev;
277281
} use_chain_t;
278282

283+
typedef struct var var_t;
284+
typedef struct type type_t;
285+
286+
typedef struct var_list {
287+
int capacity;
288+
int size;
289+
var_t **elements;
290+
} var_list_t;
291+
279292
struct var {
280-
char type_name[MAX_TYPE_LEN];
293+
type_t *type;
281294
char var_name[MAX_VAR_LEN];
282295
int is_ptr;
283296
bool is_func;
@@ -302,8 +315,6 @@ struct var {
302315
bool is_const; /* whether a constant representaion or not */
303316
};
304317

305-
typedef struct var var_t;
306-
307318
typedef struct {
308319
char name[MAX_VAR_LEN];
309320
bool is_variadic;
@@ -319,22 +330,14 @@ typedef struct func func_t;
319330

320331
/* block definition */
321332
struct block {
322-
var_t locals[MAX_LOCALS];
323-
int next_local;
333+
var_list_t locals;
324334
struct block *parent;
325335
func_t *func;
326336
macro_t *macro;
327-
int locals_size;
328337
struct block *next;
329338
};
330339

331340
typedef struct block block_t;
332-
333-
typedef struct {
334-
block_t *head;
335-
block_t *tail;
336-
} block_list_t;
337-
338341
typedef struct basic_block basic_block_t;
339342

340343
/* Definition of a growable buffer for a mutable null-terminated string
@@ -374,8 +377,6 @@ struct type {
374377
int num_fields;
375378
};
376379

377-
typedef struct type type_t;
378-
379380
/* lvalue details */
380381
typedef struct {
381382
int size;

src/elf.c

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@
77

88
/* ELF file manipulation */
99

10+
#include "../config"
11+
#include "defs.h"
12+
#include "globals.c"
13+
1014
int elf_symbol_index;
1115

1216
void elf_write_str(strbuf_t *elf_array, char *vals)

src/globals.c

Lines changed: 54 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -28,8 +28,6 @@ int macro_return_idx;
2828

2929
/* Global objects */
3030

31-
block_list_t BLOCKS;
32-
3331
macro_t *MACROS;
3432
int macros_idx = 0;
3533

@@ -41,11 +39,23 @@ hashmap_t *FUNC_MAP;
4139
hashmap_t *ALIASES_MAP;
4240
hashmap_t *CONSTANTS_MAP;
4341

42+
/* Types */
43+
4444
type_t *TYPES;
4545
int types_idx = 0;
4646

47+
type_t *TY_void;
48+
type_t *TY_char;
49+
type_t *TY_bool;
50+
type_t *TY_int;
51+
52+
/* Arenas */
53+
4754
arena_t *INSN_ARENA;
4855

56+
/* BLOCK_ARENA is responsible for block_t / var_t allocation */
57+
arena_t *BLOCK_ARENA;
58+
4959
/* BB_ARENA is responsible for basic_block_t / ph2_ir_t allocation */
5060
arena_t *BB_ARENA;
5161

@@ -56,6 +66,7 @@ int ph2_ir_idx = 0;
5666

5767
func_list_t FUNC_LIST;
5868
func_t *GLOBAL_FUNC;
69+
block_t *GLOBAL_BLOCK;
5970
basic_block_t *MAIN_BB;
6071
int elf_offset = 0;
6172

@@ -499,20 +510,14 @@ void set_var_liveout(var_t *var, int end)
499510

500511
block_t *add_block(block_t *parent, func_t *func, macro_t *macro)
501512
{
502-
block_t *blk = malloc(sizeof(block_t));
503-
504-
if (!BLOCKS.head) {
505-
BLOCKS.head = blk;
506-
BLOCKS.tail = BLOCKS.head;
507-
} else {
508-
BLOCKS.tail->next = blk;
509-
BLOCKS.tail = blk;
510-
}
513+
block_t *blk = arena_alloc(BLOCK_ARENA, sizeof(block_t));
511514

512515
blk->parent = parent;
513516
blk->func = func;
514517
blk->macro = macro;
515-
blk->next_local = 0;
518+
blk->locals.capacity = 16;
519+
blk->locals.elements =
520+
arena_alloc(BLOCK_ARENA, blk->locals.capacity * sizeof(var_t *));
516521
return blk;
517522
}
518523

@@ -645,9 +650,10 @@ var_t *find_local_var(char *token, block_t *block)
645650
func_t *func = block->func;
646651

647652
for (; block; block = block->parent) {
648-
for (int i = 0; i < block->next_local; i++) {
649-
if (!strcmp(block->locals[i].var_name, token))
650-
return &block->locals[i];
653+
var_list_t *var_list = &block->locals;
654+
for (int i = 0; i < var_list->size; i++) {
655+
if (!strcmp(var_list->elements[i]->var_name, token))
656+
return var_list->elements[i];
651657
}
652658
}
653659

@@ -662,11 +668,11 @@ var_t *find_local_var(char *token, block_t *block)
662668

663669
var_t *find_global_var(char *token)
664670
{
665-
block_t *block = BLOCKS.head;
671+
var_list_t *var_list = &GLOBAL_BLOCK->locals;
666672

667-
for (int i = 0; i < block->next_local; i++) {
668-
if (!strcmp(block->locals[i].var_name, token))
669-
return &block->locals[i];
673+
for (int i = 0; i < var_list->size; i++) {
674+
if (!strcmp(var_list->elements[i]->var_name, token))
675+
return var_list->elements[i];
670676
}
671677
return NULL;
672678
}
@@ -685,9 +691,7 @@ int size_var(var_t *var)
685691
if (var->is_ptr > 0 || var->is_func) {
686692
size = 4;
687693
} else {
688-
type_t *type = find_type(var->type_name, 0);
689-
if (!type)
690-
error("Incomplete type");
694+
type_t *type = var->type;
691695
if (type->size == 0)
692696
size = type->base_struct->size;
693697
else
@@ -970,16 +974,14 @@ void global_init()
970974
{
971975
elf_code_start = ELF_START + elf_header_len;
972976

973-
BLOCKS.head = NULL;
974-
BLOCKS.tail = NULL;
975-
976977
MACROS = malloc(MAX_ALIASES * sizeof(macro_t));
977-
FUNC_MAP = hashmap_create(DEFAULT_FUNCS_SIZE);
978978
TYPES = malloc(MAX_TYPES * sizeof(type_t));
979+
BLOCK_ARENA = arena_init(DEFAULT_ARENA_SIZE);
979980
INSN_ARENA = arena_init(DEFAULT_ARENA_SIZE);
980981
BB_ARENA = arena_init(DEFAULT_ARENA_SIZE);
981982
PH2_IR_FLATTEN = malloc(MAX_IR_INSTR * sizeof(ph2_ir_t *));
982983
SOURCE = strbuf_create(MAX_SOURCE);
984+
FUNC_MAP = hashmap_create(DEFAULT_FUNCS_SIZE);
983985
INCLUSION_MAP = hashmap_create(DEFAULT_INCLUSIONS_SIZE);
984986
ALIASES_MAP = hashmap_create(MAX_ALIASES);
985987
CONSTANTS_MAP = hashmap_create(MAX_CONSTANTS);
@@ -994,18 +996,14 @@ void global_init()
994996

995997
void global_release()
996998
{
997-
while (BLOCKS.head) {
998-
block_t *next = BLOCKS.head->next;
999-
free(BLOCKS.head);
1000-
BLOCKS.head = next;
1001-
}
1002999
free(MACROS);
1003-
hashmap_free(FUNC_MAP);
10041000
free(TYPES);
1001+
arena_free(BLOCK_ARENA);
10051002
arena_free(INSN_ARENA);
10061003
arena_free(BB_ARENA);
10071004
free(PH2_IR_FLATTEN);
10081005
strbuf_free(SOURCE);
1006+
hashmap_free(FUNC_MAP);
10091007
hashmap_free(INCLUSION_MAP);
10101008
hashmap_free(ALIASES_MAP);
10111009
hashmap_free(CONSTANTS_MAP);
@@ -1018,6 +1016,14 @@ void global_release()
10181016
strbuf_free(elf_section);
10191017
}
10201018

1019+
/* Reports an error without specifying a position */
1020+
void fatal(char *msg)
1021+
{
1022+
printf("[Error]: %s\n", msg);
1023+
abort();
1024+
}
1025+
1026+
/* Reports an error and specifying a position */
10211027
void error(char *msg)
10221028
{
10231029
/* Construct error source diagnostics, enabling precise identification of
@@ -1048,8 +1054,8 @@ void error(char *msg)
10481054
/* TODO: figure out the corresponding C source file path and report line
10491055
* number.
10501056
*/
1051-
printf("Error %s at source location %d\n%s\n", msg, SOURCE->size,
1052-
diagnostic);
1057+
printf("[Error]: %s\nOccurs at source location %d.\n%s\n", msg,
1058+
SOURCE->size, diagnostic);
10531059
abort();
10541060
}
10551061

@@ -1081,7 +1087,7 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
10811087
continue;
10821088
case OP_allocat:
10831089
print_indent(1);
1084-
printf("allocat %s", rd->type_name);
1090+
printf("allocat %s", rd->type->type_name);
10851091

10861092
for (int i = 0; i < rd->is_ptr; i++)
10871093
printf("*");
@@ -1251,6 +1257,16 @@ void dump_bb_insn(func_t *func, basic_block_t *bb, bool *at_func_start)
12511257
printf("%%%s = lshift %%%s, %%%s", rd->var_name, rs1->var_name,
12521258
rs2->var_name);
12531259
break;
1260+
case OP_trunc:
1261+
print_indent(1);
1262+
printf("%%%s = trunc %%%s, %d", rd->var_name, rs1->var_name,
1263+
insn->sz);
1264+
break;
1265+
case OP_sign_ext:
1266+
print_indent(1);
1267+
printf("%%%s = sign_ext %%%s, %d", rd->var_name, rs1->var_name,
1268+
insn->sz);
1269+
break;
12541270
default:
12551271
printf("<Unsupported opcode: %d>", insn->opcode);
12561272
break;
@@ -1277,7 +1293,7 @@ void dump_insn()
12771293
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
12781294
bool at_func_start = true;
12791295

1280-
printf("def %s", func->return_def.type_name);
1296+
printf("def %s", func->return_def.type->type_name);
12811297

12821298
for (int i = 0; i < func->return_def.is_ptr; i++)
12831299
printf("*");
@@ -1286,7 +1302,7 @@ void dump_insn()
12861302
for (int i = 0; i < func->num_params; i++) {
12871303
if (i != 0)
12881304
printf(", ");
1289-
printf("%s", func->param_defs[i].type_name);
1305+
printf("%s", func->param_defs[i].type->type_name);
12901306

12911307
for (int k = 0; k < func->param_defs[i].is_ptr; k++)
12921308
printf("*");
@@ -1302,7 +1318,7 @@ void dump_insn()
13021318
if (!bb)
13031319
continue;
13041320

1305-
if (strcmp(func->return_def.type_name, "void"))
1321+
if (func->return_def.type != TY_void)
13061322
continue;
13071323

13081324
if (bb->insn_list.tail)

0 commit comments

Comments
 (0)