Skip to content

Commit 04c6b99

Browse files
committed
py/emitnative: Improve Viper register-indexed code for Thumb.
This commit lets the Viper code generator use optimised code sequence for register-indexed load and store operations when generating Thumb code. Register-indexed load and store operations for Thumb now can take at most two machine opcodes for halfword and word values, and just a single machine opcode for byte values. The original implementation could generate up to four opcodes in the worst case (dealing with word values). Signed-off-by: Alessandro Gatti <[email protected]>
1 parent 1d37caa commit 04c6b99

File tree

3 files changed

+87
-0
lines changed

3 files changed

+87
-0
lines changed

py/asmthumb.c

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -491,6 +491,26 @@ void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint r
491491
}
492492
}
493493

494+
void asm_thumb_ldrh_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index) {
495+
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 1);
496+
asm_thumb_ldrh_rlo_rlo_rlo(as, reg_dest, reg_base, reg_index);
497+
}
498+
499+
void asm_thumb_ldr_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index) {
500+
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 2);
501+
asm_thumb_ldr_rlo_rlo_rlo(as, reg_dest, reg_base, reg_index);
502+
}
503+
504+
void asm_thumb_strh_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index) {
505+
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 1);
506+
asm_thumb_strh_rlo_rlo_rlo(as, reg_val, reg_base, reg_index);
507+
}
508+
509+
void asm_thumb_str_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index) {
510+
asm_thumb_lsl_rlo_rlo_i5(as, reg_index, reg_index, 2);
511+
asm_thumb_str_rlo_rlo_rlo(as, reg_val, reg_base, reg_index);
512+
}
513+
494514
// this could be wrong, because it should have a range of +/- 16MiB...
495515
#define OP_BW_HI(byte_offset) (0xf000 | (((byte_offset) >> 12) & 0x07ff))
496516
#define OP_BW_LO(byte_offset) (0xb800 | (((byte_offset) >> 1) & 0x07ff))

py/asmthumb.h

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,50 @@ static inline void asm_thumb_bx_reg(asm_thumb_t *as, uint r_src) {
251251
asm_thumb_format_5(as, ASM_THUMB_FORMAT_5_BX, 0, r_src);
252252
}
253253

254+
// FORMAT 7: load/store with register offset
255+
// FORMAT 8: load/store sign-extended byte/halfword
256+
257+
#define ASM_THUMB_FORMAT_7_LDR (0x5800)
258+
#define ASM_THUMB_FORMAT_7_STR (0x5000)
259+
#define ASM_THUMB_FORMAT_7_WORD_TRANSFER (0x0000)
260+
#define ASM_THUMB_FORMAT_7_BYTE_TRANSFER (0x0400)
261+
#define ASM_THUMB_FORMAT_8_LDRH (0x5A00)
262+
#define ASM_THUMB_FORMAT_8_STRH (0x5200)
263+
264+
#define ASM_THUMB_FORMAT_7_8_ENCODE(op, rlo_dest, rlo_base, rlo_index) \
265+
((op) | ((rlo_index) << 6) | ((rlo_base) << 3) | ((rlo_dest)))
266+
267+
static inline void asm_thumb_format_7_8(asm_thumb_t *as, uint op, uint rlo_dest, uint rlo_base, uint rlo_index) {
268+
assert(rlo_dest < ASM_THUMB_REG_R8);
269+
assert(rlo_base < ASM_THUMB_REG_R8);
270+
assert(rlo_index < ASM_THUMB_REG_R8);
271+
asm_thumb_op16(as, ASM_THUMB_FORMAT_7_8_ENCODE(op, rlo_dest, rlo_base, rlo_index));
272+
}
273+
274+
static inline void asm_thumb_ldrb_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) {
275+
asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_LDR | ASM_THUMB_FORMAT_7_BYTE_TRANSFER, rlo_dest, rlo_base, rlo_index);
276+
}
277+
278+
static inline void asm_thumb_ldrh_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) {
279+
asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_8_LDRH, rlo_dest, rlo_base, rlo_index);
280+
}
281+
282+
static inline void asm_thumb_ldr_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) {
283+
asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_LDR | ASM_THUMB_FORMAT_7_WORD_TRANSFER, rlo_dest, rlo_base, rlo_index);
284+
}
285+
286+
static inline void asm_thumb_strb_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint rlo_index) {
287+
asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_STR | ASM_THUMB_FORMAT_7_BYTE_TRANSFER, rlo_src, rlo_base, rlo_index);
288+
}
289+
290+
static inline void asm_thumb_strh_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_dest, uint rlo_base, uint rlo_index) {
291+
asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_8_STRH, rlo_dest, rlo_base, rlo_index);
292+
}
293+
294+
static inline void asm_thumb_str_rlo_rlo_rlo(asm_thumb_t *as, uint rlo_src, uint rlo_base, uint rlo_index) {
295+
asm_thumb_format_7_8(as, ASM_THUMB_FORMAT_7_STR | ASM_THUMB_FORMAT_7_WORD_TRANSFER, rlo_src, rlo_base, rlo_index);
296+
}
297+
254298
// FORMAT 9: load/store with immediate offset
255299
// For word transfers the offset must be aligned, and >>2
256300

@@ -341,6 +385,11 @@ void asm_thumb_mov_reg_pcrel(asm_thumb_t *as, uint rlo_dest, uint label);
341385
void asm_thumb_ldr_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint word_offset); // convenience
342386
void asm_thumb_ldrh_reg_reg_i12_optimised(asm_thumb_t *as, uint reg_dest, uint reg_base, uint uint16_offset); // convenience
343387

388+
void asm_thumb_ldrh_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index);
389+
void asm_thumb_ldr_reg_reg_reg(asm_thumb_t *as, uint reg_dest, uint reg_base, uint reg_index);
390+
void asm_thumb_strh_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index);
391+
void asm_thumb_str_reg_reg_reg(asm_thumb_t *as, uint reg_val, uint reg_base, uint reg_index);
392+
344393
void asm_thumb_b_label(asm_thumb_t *as, uint label); // convenience: picks narrow or wide branch
345394
void asm_thumb_bcc_label(asm_thumb_t *as, int cc, uint label); // convenience: picks narrow or wide branch
346395
void asm_thumb_bl_ind(asm_thumb_t *as, uint fun_id, uint reg_temp); // convenience

py/emitnative.c

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1641,6 +1641,9 @@ static void emit_native_load_subscr(emit_t *emit) {
16411641
#if N_ARM
16421642
asm_arm_ldrb_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
16431643
break;
1644+
#elif N_THUMB
1645+
asm_thumb_ldrb_rlo_rlo_rlo(emit->as, REG_RET, REG_ARG_1, reg_index);
1646+
break;
16441647
#endif
16451648
// TODO optimise to use thumb ldrb r1, [r2, r3]
16461649
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
@@ -1652,6 +1655,9 @@ static void emit_native_load_subscr(emit_t *emit) {
16521655
#if N_ARM
16531656
asm_arm_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
16541657
break;
1658+
#elif N_THUMB
1659+
asm_thumb_ldrh_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
1660+
break;
16551661
#elif N_XTENSA || N_XTENSAWIN
16561662
asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1);
16571663
asm_xtensa_op_l16ui(emit->as, REG_RET, REG_ARG_1, 0);
@@ -1667,6 +1673,9 @@ static void emit_native_load_subscr(emit_t *emit) {
16671673
#if N_ARM
16681674
asm_arm_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
16691675
break;
1676+
#elif N_THUMB
1677+
asm_thumb_ldr_reg_reg_reg(emit->as, REG_RET, REG_ARG_1, reg_index);
1678+
break;
16701679
#elif N_RV32
16711680
asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2);
16721681
asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2);
@@ -1944,6 +1953,9 @@ static void emit_native_store_subscr(emit_t *emit) {
19441953
#if N_ARM
19451954
asm_arm_strb_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
19461955
break;
1956+
#elif N_THUMB
1957+
asm_thumb_strb_rlo_rlo_rlo(emit->as, reg_value, REG_ARG_1, reg_index);
1958+
break;
19471959
#endif
19481960
ASM_ADD_REG_REG(emit->as, REG_ARG_1, reg_index); // add index to base
19491961
ASM_STORE8_REG_REG(emit->as, reg_value, REG_ARG_1); // store value to (base+index)
@@ -1954,6 +1966,9 @@ static void emit_native_store_subscr(emit_t *emit) {
19541966
#if N_ARM
19551967
asm_arm_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
19561968
break;
1969+
#elif N_THUMB
1970+
asm_thumb_strh_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
1971+
break;
19571972
#elif N_XTENSA || N_XTENSAWIN
19581973
asm_xtensa_op_addx2(emit->as, REG_ARG_1, reg_index, REG_ARG_1);
19591974
asm_xtensa_op_s16i(emit->as, reg_value, REG_ARG_1, 0);
@@ -1969,6 +1984,9 @@ static void emit_native_store_subscr(emit_t *emit) {
19691984
#if N_ARM
19701985
asm_arm_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
19711986
break;
1987+
#elif N_THUMB
1988+
asm_thumb_str_reg_reg_reg(emit->as, reg_value, REG_ARG_1, reg_index);
1989+
break;
19721990
#elif N_RV32
19731991
asm_rv32_opcode_slli(emit->as, REG_TEMP2, reg_index, 2);
19741992
asm_rv32_opcode_cadd(emit->as, REG_ARG_1, REG_TEMP2);

0 commit comments

Comments
 (0)