[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructi
From: |
Richard Henderson |
Subject: |
Re: [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib |
Date: |
Mon, 11 May 2015 09:55:27 -0700 |
User-agent: |
Mozilla/5.0 (X11; Linux x86_64; rv:31.0) Gecko/20100101 Thunderbird/31.6.0 |
On 05/10/2015 03:45 PM, Chen Gang wrote:
> +static void gen_cmpltsi(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, int8_t imm8)
> +{
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltsi r%d, r%d, %d\n",
> + rdst, rsrc, imm8);
> + tcg_gen_setcondi_i64(TCG_COND_LTU, dest_gr(dc, rdst), load_gr(dc, rsrc),
> + (int64_t)imm8);
> +}
> +
> +static void gen_cmpltui(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpltui r%d, r%d, %d\n",
> + rdst, rsrc, imm8);
> + tcg_gen_setcondi_i64(TCG_COND_LTU,
> + dest_gr(dc, rdst), load_gr(dc, rsrc), imm8);
> +}
> +
> +static void gen_cmpeqi(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmpeqi r%d, r%d, %d\n", rdst, rsrc,
> imm8);
> + tcg_gen_setcondi_i64(TCG_COND_EQ,
> + dest_gr(dc, rdst), load_gr(dc, rsrc), imm8);
> +}
Merge these.
> +
> +static void gen_cmp(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, TCGCond cond)
> +{
> + const char *prefix;
> +
> + switch (cond) {
> + case TCG_COND_EQ:
> + prefix = "eq";
> + break;
> + case TCG_COND_LE:
> + prefix = "les";
> + break;
> + case TCG_COND_LEU:
> + prefix = "leu";
> + break;
> + case TCG_COND_LT:
> + prefix = "lts";
> + break;
> + case TCG_COND_LTU:
> + prefix = "ltu";
> + break;
> + case TCG_COND_NE:
> + prefix = "ne";
> + break;
> + default:
> + dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN;
> + return;
> + }
> +
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "cmp%s r%d, r%d, r%d\n",
> + prefix, rdst, rsrc, rsrcb);
Better to just pass down the opcode name with the TCGCond rather than trying to
recreate it. Then there's no need for a switch, nor a need for a confusing
TILEGX_EXCP_OPCODE_UNKNOWN path.
> +static void gen_exch(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb, int excp)
> +{
> + const char *prefix, *width;
> +
> + switch (excp) {
> + case TILEGX_EXCP_OPCODE_EXCH4:
> + prefix = "";
> + width = "4";
> + break;
> + case TILEGX_EXCP_OPCODE_EXCH:
> + prefix = "";
> + width = "";
> + break;
> + case TILEGX_EXCP_OPCODE_CMPEXCH4:
> + prefix = "cmp";
> + width = "4";
> + break;
> + case TILEGX_EXCP_OPCODE_CMPEXCH:
> + prefix = "cmp";
> + width = "";
> + break;
> + default:
> + dc->exception = TILEGX_EXCP_OPCODE_UNKNOWN;
> + return;
> + }
Likewise.
> +static void gen_v1cmpeqi(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t imm8)
> +{
> + int count;
> + TCGv vdst = dest_gr(dc, rdst);
> + TCGv tmp = tcg_temp_new_i64();
> +
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1cmpeqi r%d, r%d, %d\n",
> + rdst, rsrc, imm8);
> +
> + tcg_gen_movi_i64(vdst, 0);
> +
> + for (count = 0; count < 8; count++) {
> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (8 - count - 1) * 8);
> + tcg_gen_andi_i64(tmp, tmp, 0xff);
> + tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8);
> + tcg_gen_or_i64(vdst, vdst, tmp);
> + tcg_gen_shli_i64(vdst, vdst, 8);
For all of these vector instructions, I would encourage you to use helpers to
extract and insert values. Extraction has little choice but to use a shift and
a mask, as you use here. But insertion can use tcg_gen_deposit_i64. I think
that is a lot easier to reason with than your construction here which
sequentially shifts vdst.
E.g.
static inline void
extract_v1(TCGv out, TCGv in, unsigned byte)
{
tcg_gen_shri_i64(out, in, byte * 8);
tcg_gen_ext8u_i64(out, out);
}
static inline void
insert_v1(TCGv out, TCGv in, unsigned byte)
{
tcg_gen_deposit_i64(out, out, in, byte * 8, 8);
}
This loop then becomes
TCGv vsrc = load_gr(dc, src);
for (count = 0; count < 8; ++count) {
extract_v1(tmp, vsrc, count);
tcg_gen_setcondi_i64(TCG_COND_EQ, tmp, tmp, imm8);
insert_v1(vdst, tmp, count);
}
> +static void gen_v1int_l(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
> +{
> + int count;
> + TCGv vdst = dest_gr(dc, rdst);
> + TCGv tmp = tcg_temp_new_i64();
> +
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v1int_l r%d, r%d, r%d\n",
> + rdst, rsrc, rsrcb);
> +
> + tcg_gen_movi_i64(vdst, 0);
> + for (count = 0; count < 4; count++) {
> +
> + tcg_gen_shli_i64(vdst, vdst, 8);
> +
> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), (4 - count - 1) * 8);
> + tcg_gen_andi_i64(tmp, tmp, 0xff);
> + tcg_gen_or_i64(vdst, vdst, tmp);
> + tcg_gen_shli_i64(vdst, vdst, 8);
> +
> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrcb), (4 - count - 1) * 8);
> + tcg_gen_andi_i64(tmp, tmp, 0xff);
> + tcg_gen_or_i64(vdst, vdst, tmp);
> + }
> + tcg_temp_free_i64(tmp);
TCGv vsrc = load_gr(dc, rsrc);
TCGv vsrcb = load_gr(dc, rsrcb);
for (count = 0; count < 4; ++count) {
extract_v1(tmp, vsrc, count);
insert_v1(vdst, tmp, count * 2);
extract_v1(tmp, vsrcb, count);
insert_v1(vdst, tmp, count * 2 + 1);
}
> +}
> +
> +/*
> + * Functional Description
> + *
> + * uint64_t output = 0;
> + * uint32_t counter;
> + * for (counter = 0; counter < (WORD_SIZE / 32); counter++)
> + * {
> + * bool asel = ((counter & 1) == 1);
> + * int in_sel = 0 + counter / 2;
> + * int32_t srca = get4Byte (rf[SrcA], in_sel);
> + * int32_t srcb = get4Byte (rf[SrcB], in_sel);
> + * output = set4Byte (output, counter, (asel ? srca : srcb));
> + * }
> + * rf[Dest] = output;
> +*/
> +
> +static void gen_v4int_l(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
> +{
> + TCGv vdst = dest_gr(dc, rdst);
> + TCGv tmp = tcg_temp_new_i64();
> +
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "v4int_l r%d, r%d, r%d\n",
> + rdst, rsrc, rsrcb);
> +
> + tcg_gen_andi_i64(vdst, load_gr(dc, rsrc), 0xffffffff);
> + tcg_gen_shli_i64(vdst, vdst, 8);
> + tcg_gen_andi_i64(tmp, load_gr(dc, rsrcb), 0xffffffff);
> + tcg_gen_or_i64(vdst, vdst, tmp);
And herein is a bug, that I'd hope using the helper functions would avoid: you
shift by 8 instead of 32. This function simplifies to
tcg_gen_deposit_i64(vdst, load_gr(dc, rsrc), load_gr(dc, rsrcb),
32, 32);
> +/*
> + * uint64_t mask = 0;
> + * int64_t background = ((rf[SrcA] >> BFEnd) & 1) ? -1ULL : 0ULL;
> + * mask = ((-1ULL) ^ ((-1ULL << ((BFEnd - BFStart) & 63)) << 1));
> + * uint64_t rot_src = (((uint64_t) rf[SrcA]) >> BFStart)
> + * | (rf[SrcA] << (64 - BFStart));
> + * rf[Dest] = (rot_src & mask) | (background & ~mask);
> + */
> +static void gen_bfexts(struct DisasContext *dc, uint8_t rdst, uint8_t rsrc,
> + uint8_t start, uint8_t end)
> +{
> + uint64_t mask = (-1ULL) ^ ((-1ULL << ((end - start) & 63)) << 1);
> + TCGv vldst = tcg_temp_local_new_i64();
> + TCGv tmp = tcg_temp_local_new_i64();
> + TCGv pmsk = tcg_const_i64(-1ULL);
Why the local temps?
> +
> + qemu_log_mask(CPU_LOG_TB_IN_ASM, "bfexts r%d, r%d, %d, %d\n",
> + rdst, rsrc, start, end);
> +
> + tcg_gen_rotri_i64(vldst, load_gr(dc, rsrc), start);
> + tcg_gen_andi_i64(vldst, vldst, mask);
> +
> + tcg_gen_shri_i64(tmp, load_gr(dc, rsrc), end);
> + tcg_gen_andi_i64(tmp, tmp, 1);
> + tcg_gen_movcond_i64(TCG_COND_EQ, tmp, tmp, load_zero(dc),
> + load_zero(dc), pmsk);
This movcond is equivalent to negation.
> +/*
> + Functional Description
> + uint64_t a = rf[SrcA];
> + uint64_t b = rf[SrcB];
> + uint64_t d = rf[Dest];
> + uint64_t output = 0;
> + unsigned int counter;
> + for (counter = 0; counter < (WORD_SIZE / BYTE_SIZE); counter++)
> + {
> + int sel = getByte (b, counter) & 0xf;
> + uint8_t byte = (sel < 8) ? getByte (d, sel) : getByte (a, (sel -
> 8));
> + output = setByte (output, counter, byte);
> + }
> + rf[Dest] = output;
> + */
> +static void gen_shufflebytes(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, uint8_t rsrcb)
I strongly suggest this be moved to op_helper.c. It's too big.
> +/* FIXME: At present, skip unalignment checking */
> +static void gen_ld(struct DisasContext *dc,
> + uint8_t rdst, uint8_t rsrc, TCGMemOp ops)
Alignment checks would never be done here anyway.
Again, pass down the opcode string rather than rebuild.
r~
- [Qemu-devel] [PATCH 00/10 v10] tilegx: Firstly add tilegx target for linux-user, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 01/10 v10] linux-user: tilegx: Firstly add architecture related features, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 02/10 v10] linux-user: Support tilegx architecture in linux-user, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 03/10 v10] linux-user/syscall.c: Conditionalize syscalls which are not defined in tilegx, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 04/10 v10] target-tilegx: Add opcode basic implementation from Tilera Corporation, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 06/10 v10] target-tilegx: Add special register information from Tilera Corporation, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 07/10 v10] target-tilegx: Add cpu basic features for linux-user, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 08/10 v10] target-tilegx: Add helper features for linux-user, Chen Gang, 2015/05/10
- [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib, Chen Gang, 2015/05/10
- Re: [Qemu-devel] [PATCH 09/10 v10] target-tilegx: Generate tcg instructions to execute to _init_malloc in glib, Chen Gang, 2015/05/29
[Qemu-devel] [PATCH 10/10 v10] target-tilegx: Add TILE-Gx building files, Chen Gang, 2015/05/10
Message not available