[Top][All Lists]
[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [Qemu-devel] [PATCH 2/2] Support for Cavium-Octeon specific instruct
From: |
Aurelien Jarno |
Subject: |
Re: [Qemu-devel] [PATCH 2/2] Support for Cavium-Octeon specific instruction |
Date: |
Tue, 12 Apr 2011 23:32:21 +0200 |
User-agent: |
Mutt/1.5.20 (2009-06-14) |
On Sat, Apr 09, 2011 at 04:05:45PM +0500, Khansa Butt wrote:
> From 7fd3ef1360b78ad85848e54e36d97ab50d77e6a6 Mon Sep 17 00:00:00 2001
> From: Ehsan-ul-Haq & Khansa Butt <address@hidden>
> Date: Sat, 9 Apr 2011 11:09:18 +0500
> Subject: [PATCH 2/2] Support for Cavium-Octeon specific instruction
>
>
> Signed-off-by: Ehsan-ul-Haq, Abdul Qadeer, Abdul Waheed, Khansa Butt <
> address@hidden>
> ---
> host-utils.c | 14 ++
> host-utils.h | 1 +
> linux-user/elfload.c | 5 +
> linux-user/main.c | 5 +
> linux-user/qemu.h | 1 +
> linux-user/syscall.c | 5 +
> target-mips/cpu.h | 15 ++
> target-mips/helper.h | 5 +
> target-mips/op_helper.c | 70 ++++++
> target-mips/translate.c | 556
> ++++++++++++++++++++++++++++++++++++++++++++++-
> 10 files changed, 666 insertions(+), 11 deletions(-)
>
> diff --git a/host-utils.c b/host-utils.c
> index dc96123..3073ef8 100644
> --- a/host-utils.c
> +++ b/host-utils.c
> @@ -102,4 +102,18 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t
> a, int64_t b)
> a, b, *phigh, *plow);
> #endif
> }
> +
> #endif /* !defined(__x86_64__) */
> +void addc(uint64_t res [], uint64_t a, int i)
> +{
> + uint64_t c = res[i];
> + for (;i < 4;i++) {
> + res[i] = c + a;
> + if (res[i] < a) {
> + c = 1;
> + a = res[i+1];
> + }
> + else
> + break;
> + }
> +}
Is it really something we want in host-utils? It is something very
specific and in any case it violates coding style.
> diff --git a/host-utils.h b/host-utils.h
> index 0ddc176..172c4fd 100644
> --- a/host-utils.h
> +++ b/host-utils.h
> @@ -46,6 +46,7 @@ static inline void muls64(uint64_t *plow, uint64_t *phigh,
> void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
> void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
> #endif
> +void addc(uint64_t res [], uint64_t a, int i);
>
> /* Binary search for leading zeros. */
>
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 2832a33..9399e44 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -1662,6 +1662,11 @@ int load_elf_binary(struct linux_binprm * bprm,
> struct target_pt_regs * regs,
> when we load the interpreter. */
> elf_ex = *(struct elfhdr *)bprm->buf;
>
> +#if defined(TARGET_MIPS64)
> + if ((elf_ex.e_flags & EF_MIPS_MARCH) == E_MIPS_MACH_OCTEON) {
> + info->elf_arch = 1;
> + }
> +#endif
> bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p);
> bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p);
> bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p);
> diff --git a/linux-user/main.c b/linux-user/main.c
> index a7f4955..acf9cac 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -3348,6 +3348,11 @@ int main(int argc, char **argv, char **envp)
> if (regs->cp0_epc & 1) {
> env->hflags |= MIPS_HFLAG_M16;
> }
> +#if defined(TARGET_MIPS64)
> + if (info->elf_arch) {
> + env->TARGET_OCTEON = 1;
> + }
> +#endif
> }
> #elif defined(TARGET_SH4)
> {
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index 250814d..adef428 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -51,6 +51,7 @@ struct image_info {
> abi_ulong arg_start;
> abi_ulong arg_end;
> int personality;
> + int elf_arch;
> #ifdef CONFIG_USE_FDPIC
> abi_ulong loadmap_addr;
> uint16_t nsegs;
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index bb0999d..d5da0ee 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -7320,6 +7320,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long
> arg1,
> case TARGET_NR_set_thread_area:
> #if defined(TARGET_MIPS)
> ((CPUMIPSState *) cpu_env)->tls_value = arg1;
> + if (((CPUMIPSState *) cpu_env)->TARGET_OCTEON) {
> + /* tls entry is moved to k0 so that this can be used later
> + * currently this thing is tested only for Octeon */
> + ((CPUMIPSState *) cpu_env)->active_tc.gpr[26] = arg1;
> + }
> ret = 0;
> break;
> #elif defined(TARGET_CRIS)
All the above code is actually specific to linux-user. It would be
better to put them in a separate patch.
> diff --git a/target-mips/cpu.h b/target-mips/cpu.h
> index 0b98d10..8ef8afb 100644
> --- a/target-mips/cpu.h
> +++ b/target-mips/cpu.h
> @@ -5,6 +5,11 @@
>
> #define ELF_MACHINE EM_MIPS
>
> +/* These flags are used to check whether given
> + * user mode binary is octeon specific or not */
> +#define EF_MIPS_MARCH 0xFF0000
> +#define E_MIPS_MACH_OCTEON 0x8B0000
> +
> #define CPUState struct CPUMIPSState
>
> #include "config.h"
> @@ -171,6 +176,15 @@ struct TCState {
> target_ulong CP0_TCSchedule;
> target_ulong CP0_TCScheFBack;
> int32_t CP0_Debug_tcstatus;
> + /* Multiplier registers for Octeon */
> + target_ulong MPL0;
> + target_ulong MPL1;
> + target_ulong MPL2;
> + target_ulong P0;
> + target_ulong P1;
> + target_ulong P2;
> + /* Octeon specific Coprocessor 0 register */
> + target_ulong cvmctl;
> };
>
> typedef struct CPUMIPSState CPUMIPSState;
> @@ -178,6 +192,7 @@ struct CPUMIPSState {
> TCState active_tc;
> CPUMIPSFPUContext active_fpu;
>
> + int TARGET_OCTEON;
We don't want a specific boolean value for octeons. What we want is a
new insn_flags for all octeon specific code. Have a look at mips-defs.h
to see how it's done.
> uint32_t current_tc;
> uint32_t current_fpu;
>
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 297ab64..409c08d 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -8,7 +8,12 @@ DEF_HELPER_3(ldl, tl, tl, tl, int)
> DEF_HELPER_3(ldr, tl, tl, tl, int)
> DEF_HELPER_3(sdl, void, tl, tl, int)
> DEF_HELPER_3(sdr, void, tl, tl, int)
> +DEF_HELPER_3(v3mulu, void, tl, tl, int)
> +DEF_HELPER_3(vmulu, void, tl, tl, int)
> +DEF_HELPER_2(dpop, void, tl, i32)
> #endif
> +DEF_HELPER_2(pop, void, tl, i32)
> +
> DEF_HELPER_3(lwl, tl, tl, tl, int)
> DEF_HELPER_3(lwr, tl, tl, tl, int)
> DEF_HELPER_3(swl, void, tl, tl, int)
> diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
> index bd16ce3..de793ba 100644
> --- a/target-mips/op_helper.c
> +++ b/target-mips/op_helper.c
> @@ -266,7 +266,77 @@ void helper_dmultu (target_ulong arg1, target_ulong
> arg2)
> {
> mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
> }
> +void helper_v3mulu (target_ulong arg1, target_ulong arg2, int rd)
> +{
> + uint64_t hi, lo, res[4];
> + int i;
> + for (i = 0;i < 4; i++) {
> + res[i] = 0;
> + }
> + mulu64(&res[0], &res[1], env->active_tc.MPL0, arg1);
> + mulu64(&lo, &hi, env->active_tc.MPL1, arg1);
> + res[1] = res[1] + lo;
> + if (res[1] < lo)
> + res[2]++;
> + res[2] = res[2] + hi;
> + if (res[2] < hi)
> + res[3]++;
> + mulu64(&lo, &hi, env->active_tc.MPL2, arg1);
> + res[2] = res[2] + lo;
> + if (res[2] < lo)
> + res[3]++;
> + res[3] = res[3] + hi;
> + addc(res, arg2, 0);
> + addc(res, env->active_tc.P0, 0);
> + addc(res, env->active_tc.P1, 1);
> + addc(res, env->active_tc.P2, 2);
> + env->active_tc.gpr[rd] = res[0];
> + env->active_tc.P0 = res[1];
> + env->active_tc.P1 = res[2];
> + env->active_tc.P2 = res[3];
> +}
Please watch the CODING_STYLE (braces, indentation)
> +void helper_vmulu (target_ulong arg1, target_ulong arg2, int rd)
> +{
> + uint64_t hi, lo;
> + mulu64(&lo, &hi, env->active_tc.MPL0, arg1);
> + lo = lo + arg2;
> + if (lo < arg2)
> + hi++;
> + lo = lo + env->active_tc.P0;
> + if (lo < env->active_tc.P0)
> + hi++;
> + env->active_tc.gpr[rd] = lo;
> + env->active_tc.P0 = hi;
> +}
Please watch the CODING_STYLE (braces, indentation)
> +void helper_dpop (target_ulong arg, uint32_t rd)
> +{
> + uint32_t rem, ones = 0;
> + while (arg >= 1) {
> + arg = arg/2;
> + rem = arg % 2;
> + if(rem == 1)
> + ones++;
> + if(arg == 1)
> + ones++;
> + }
Please use ctpop64 from host-utils.h instead
> + env->active_tc.gpr[rd] = ones;
It's better to return a value instead of directly writing the registers,
as further optimizations can be done on the helper.
> +}
> #endif
> +void helper_pop (target_ulong arg, uint32_t rd)
> +{
> + uint32_t rem, i, ones = 0;
> + i = 0;
> + while(i <= 31) {
> + arg = arg/2;
> + rem = arg % 2;
> + if(rem == 1)
> + ones++;
> + if(arg == 1)
> + ones++;
> + i++;
> + }
> + env->active_tc.gpr[rd] = ones;
Same with ctpop32.
> +}
>
> #ifndef CONFIG_USER_ONLY
>
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 63c2563..1394e18 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -36,6 +36,14 @@
> #define GEN_HELPER 1
> #include "helper.h"
>
> +#if defined(TARGET_MIPS64)
> +/* Macros for setting values of cvmctl registers */
> +#define FUSE_START_BIT(cvmctl) (cvmctl | 0x80000000)
> +#define KASUMI(cvmctl) (cvmctl | 0x20000000)
> +#define IPPCI(cvmctl) (cvmctl | 0x380)
> +#define IPTI(cvmctl) (cvmctl | 0x70)
> +#endif
> +
> //#define MIPS_DEBUG_DISAS
> //#define MIPS_DEBUG_SIGN_EXTENSIONS
>
> @@ -70,6 +78,11 @@ enum {
> OPC_JAL = (0x03 << 26),
> OPC_JALS = OPC_JAL | 0x5,
> OPC_BEQ = (0x04 << 26), /* Unconditional if rs = rt = 0 (B) */
> + /* Cavium Specific */
> + OPC_BBIT1 = (0x3a << 26), /* jump on bit set, cavium specific */
> + OPC_BBIT132 = (0x3e << 26), /* jump on bit set(for upper 32 bits) */
> + OPC_BBIT0 = (0x32 << 26), /* jump on bit clear, cavium specific */
> + OPC_BBIT032 = (0x36 << 26), /* jump on bit clear(for upper 32 bits)
> */
> OPC_BEQL = (0x14 << 26),
> OPC_BNE = (0x05 << 26),
> OPC_BNEL = (0x15 << 26),
> @@ -265,6 +278,31 @@ enum {
> OPC_MADD = 0x00 | OPC_SPECIAL2,
> OPC_MADDU = 0x01 | OPC_SPECIAL2,
> OPC_MUL = 0x02 | OPC_SPECIAL2,
> + /* Cavium Specific Instructions */
> + OPC_BADDU = 0x28 | OPC_SPECIAL2,
> + OPC_DMUL = 0x03 | OPC_SPECIAL2,
> + OPC_EXTS = 0x3a | OPC_SPECIAL2,
> + OPC_EXTS32 = 0x3b | OPC_SPECIAL2,
> + OPC_CINS = 0x32 | OPC_SPECIAL2,
> + OPC_CINS32 = 0x33 | OPC_SPECIAL2,
> + OPC_SEQI = 0x2e | OPC_SPECIAL2,
> + OPC_SNEI = 0x2f | OPC_SPECIAL2,
> + OPC_MTM0 = 0x08 | OPC_SPECIAL2,
> + OPC_MTM1 = 0x0c | OPC_SPECIAL2,
> + OPC_MTM2 = 0x0d | OPC_SPECIAL2,
> + OPC_MTP0 = 0x09 | OPC_SPECIAL2,
> + OPC_MTP1 = 0x0a | OPC_SPECIAL2,
> + OPC_MTP2 = 0x0b | OPC_SPECIAL2,
> + OPC_V3MULU = 0x11 | OPC_SPECIAL2,
> + OPC_VMM0 = 0x10 | OPC_SPECIAL2,
> + OPC_VMULU = 0x0f | OPC_SPECIAL2,
> + OPC_POP = 0X2C | OPC_SPECIAL2,
> + OPC_DPOP = 0X2D | OPC_SPECIAL2,
> + OPC_SEQ = 0x2a | OPC_SPECIAL2,
> + OPC_SNE = 0x2b | OPC_SPECIAL2,
> + OPC_SAA = 0x18 | OPC_SPECIAL2,
> + OPC_SAAD = 0x19 | OPC_SPECIAL2,
> +/**************************************/
> OPC_MSUB = 0x04 | OPC_SPECIAL2,
> OPC_MSUBU = 0x05 | OPC_SPECIAL2,
> /* Loongson 2F */
> @@ -483,7 +521,7 @@ enum {
> static TCGv_ptr cpu_env;
> static TCGv cpu_gpr[32], cpu_PC;
> static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC],
> cpu_ACX[MIPS_DSP_ACC];
> -static TCGv cpu_dspctrl, btarget, bcond;
> +static TCGv cpu_dspctrl, btarget, bcond, mpl0, mpl1, mpl2, p0, p1, p2;
> static TCGv_i32 hflags;
> static TCGv_i32 fpu_fcr0, fpu_fcr31;
>
> @@ -847,6 +885,13 @@ static inline void check_mips_64(DisasContext *ctx)
> if (unlikely(!(ctx->hflags & MIPS_HFLAG_64)))
> generate_exception(ctx, EXCP_RI);
> }
> +/* This code generates a "reserved instruction" exception if Octeon
> + instructions are not enabled. */
> +static inline void check_octeon(DisasContext *ctx, CPUState *env)
> +{
> + if (!env->TARGET_OCTEON)
> + generate_exception(ctx, EXCP_RI);
> +}
You might want to replace calls to check_octeon(ctx, env) by check_insn(
env, ctx, INSN_OCTEON);
> /* Define small wrappers for gen_load_fpr* so that we have a uniform
> calling interface for 32 and 64-bit FPRs. No sense in changing
> @@ -1419,7 +1464,33 @@ static void gen_arith_imm (CPUState *env,
> DisasContext *ctx, uint32_t opc,
> (void)opn; /* avoid a compiler warning */
> MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt],
> regnames[rs], uimm);
> }
> -
> +#if defined(TARGET_MIPS64)
> +/* set on equal immidiate/seton not equal immidiate */
> +static void gen_set_imm(CPUState *env, uint32_t opc, int rt, int rs,
> int16_t imm)
> +{
> + target_ulong uimm;
> + TCGv t0, t1;
> + const char *opn = "imm set";
> + uimm = (uint16_t)imm;
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + switch (opc) {
> + case OPC_SEQI:
> + tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
> + gen_load_gpr(t0, rt);
Doing so just load cpu_gpr[rt] into t0. Is it really what you want to
do here?
> + tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rt], t0, 1);
If you want to access cpu_gpr[rt] and cpu_gpr[rs] directly, you should
handle the fact that rt or rs can be 0. I don't know exactly what the
instruction is supposed to do, but this looks plainly wrong.
> + opn = "seqi";
> + break;
> + case OPC_SNEI:
> + tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
> + gen_load_gpr(t0, rt);
> + gen_load_gpr(t1, 0);
> + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rt], t1, t0);
> + opn = "snei";
> + break;
Ditto.
> + }
> +}
> +#endif
> /* Logic with immediate operand */
> static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs,
> int16_t imm)
> {
> @@ -1583,7 +1654,86 @@ static void gen_shift_imm(CPUState *env, DisasContext
> *ctx, uint32_t opc,
> MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt],
> regnames[rs], uimm);
> tcg_temp_free(t0);
> }
> +#if defined(TARGET_MIPS64)
> +static void gen_LMI (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
> + int rs, int rt, int rd)
> +{
> + const char *opn = "LMI";
> + TCGv t0, t1;
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + int nomul = env->active_tc.cvmctl & 0x8000000;
> + if (!nomul) {
> + switch (opc) {
> + case OPC_MTM0:
> + tcg_gen_mov_tl(mpl0, cpu_gpr[rs]);
You should probably handle the case where rs = 0. Or use gen_load_gpr()
> + tcg_gen_movi_tl(p0, 0);
> + tcg_gen_movi_tl(p1, 0);
> + tcg_gen_movi_tl(p2, 0);
> + opn = "mtm0";
> + break;
> + case OPC_MTM1:
> + tcg_gen_mov_tl(mpl1, cpu_gpr[rs]);
Ditto here and for the cases below.
> + tcg_gen_movi_tl(p0, 0);
> + tcg_gen_movi_tl(p1, 0);
> + tcg_gen_movi_tl(p2, 0);
> + opn = "mtm1";
> + break;
> + case OPC_MTM2:
> + tcg_gen_mov_tl(mpl2, cpu_gpr[rs]);
> + tcg_gen_movi_tl(p0, 0);
> + tcg_gen_movi_tl(p1, 0);
> + tcg_gen_movi_tl(p2, 0);
> + opn = "mtm2";
> + break;
> + case OPC_MTP0:
> + tcg_gen_mov_tl(p0, cpu_gpr[rs]);
> + opn = "mtp0";
> + break;
> + case OPC_MTP1:
> + tcg_gen_mov_tl(p1, cpu_gpr[rs]);
> + opn = "mtp1";
> + break;
> + case OPC_MTP2:
> + tcg_gen_mov_tl(p2, cpu_gpr[rs]);
> + opn = "mtp2";
> + break;
> + case OPC_VMM0:
> + gen_load_gpr(t1, rs);
> + gen_helper_dmultu(t1, mpl0);
> + gen_load_gpr(t0, rt);
> + tcg_gen_add_tl(t0, t0, cpu_LO[0]);
> + tcg_gen_add_tl(t0, t0, p0);
> + gen_store_gpr(t0, rd);
> + tcg_gen_mov_tl(mpl0, cpu_gpr[rd]);
> + tcg_gen_movi_tl(p0, 0);
> + tcg_gen_movi_tl(p1, 0);
> + tcg_gen_movi_tl(p2, 0);
> + opn = "vmm0";
> + break;
> + case OPC_VMULU:
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, rt);
> + gen_helper_2i(vmulu, t0, t1, rd);
> + opn = "vmulu";
> + break;
> + case OPC_V3MULU:
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, rt);
> + gen_helper_2i(v3mulu, t0, t1, rd);
> + opn = "v3mulu";
> + break;
> + }
> +
> + } else {
> + generate_exception(ctx, EXCP_RI);
> + }
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> +}
> +
>
> +#endif
> /* Arithmetic */
> static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
> int rd, int rs, int rt)
> @@ -1637,6 +1787,25 @@ static void gen_arith (CPUState *env, DisasContext
> *ctx, uint32_t opc,
> }
> opn = "addu";
> break;
> + case OPC_BADDU:
> + {
> + TCGv t0 = tcg_temp_new();
> + TCGv t1 = tcg_temp_new();
> + TCGv t2 = tcg_temp_new();
> + gen_load_gpr(t1, rs);
> + gen_load_gpr(t2, rt);
> + tcg_gen_andi_tl(t1, t1, 0xff);
> + tcg_gen_andi_tl(t2, t2, 0xff);
> + tcg_gen_add_tl(t0, t1, t2);
> + tcg_gen_andi_tl(t0, t0, 0xff);
This looks like overly complicated, you can just do the addition and
then do ext8u. You can also use only two registers.
> + gen_store_gpr(t0, rd);
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> + tcg_temp_free(t2);
> + }
> +
> + opn = "baddu";
> + break;
> case OPC_SUB:
> {
> TCGv t0 = tcg_temp_local_new();
> @@ -2013,7 +2182,70 @@ static void gen_HILO (DisasContext *ctx, uint32_t
> opc, int reg)
> (void)opn; /* avoid a compiler warning */
> MIPS_DEBUG("%s %s", opn, regnames[reg]);
> }
> +#if defined(TARGET_MIPS64)
> +static void gen_seqsne (DisasContext *ctx, uint32_t opc,
> + int rd, int rs, int rt)
> +{
> + const char *opn = "seq/sne";
> + TCGv t0, t1;
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + switch (opc) {
> + case OPC_SEQ:
> + tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
> + gen_load_gpr(t0, rd);
Doing so just load cpu_gpr[rd] into t0. Is it really what you want to
do here?
> + tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rd], t0, 1);
> + opn = "seq";
> + break;
> + case OPC_SNE:
> + tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
> + gen_load_gpr(t0, rd);
Ditto.
> + gen_load_gpr(t1, 0);
> + tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rd], t1, t0);
> + opn = "sne";
> + break;
> + default:
> + MIPS_INVAL(opn);
> + generate_exception(ctx, EXCP_RI);
> + goto out;
> + }
> +out:
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> +
> +}
> +
> +static void gen_saa (CPUState *env, DisasContext *ctx, uint32_t opc,
> + int rt, int base)
> +{
> + const char *opn = "saa";
> + TCGv t0, t1, temp;
> + t0 = tcg_temp_new();
> + t1 = tcg_temp_new();
> + temp = tcg_temp_new();
> + gen_load_gpr(t1, rt);
> + gen_base_offset_addr(ctx, t0, base, 0);
> + switch (opc) {
> + case OPC_SAA:
> + save_cpu_state(ctx, 1);
> + op_ld_lw(temp, t0, ctx);
> + tcg_gen_add_tl(temp, temp, t1);
> + op_st_sw(temp, t0, ctx);
> + opn = "saa";
> + break;
> + case OPC_SAAD:
> + save_cpu_state(ctx, 0);
> + op_ld_ld(temp, t0, ctx);
> + tcg_gen_add_tl(temp, temp, t1);
> + op_st_sd(temp, t0, ctx);
> + opn = "saad";
> + break;
> + }
You should add a comment explaining that the operation should be atomic.
That will be something to do when emulating SMP systems.
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> +}
> +#endif
> static void gen_muldiv (DisasContext *ctx, uint32_t opc,
> int rs, int rt)
> {
> @@ -2149,6 +2381,10 @@ static void gen_muldiv (DisasContext *ctx, uint32_t
> opc,
> gen_helper_dmult(t0, t1);
> opn = "dmult";
> break;
> + case OPC_DMUL:
> + gen_helper_dmult(t0, t1);
> + opn = "dmul";
> + break;
While this is correct, you probably don't want to use an helper, but a
simple tcg multiplication.
> case OPC_DMULTU:
> gen_helper_dmultu(t0, t1);
> opn = "dmultu";
> @@ -2368,7 +2604,26 @@ static void gen_cl (DisasContext *ctx, uint32_t opc,
> MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]);
> tcg_temp_free(t0);
> }
> -
> +#if defined(TARGET_MIPS64)
> +static void gen_pop_count (DisasContext *ctx, uint32_t opc, int rd, int rs)
> +{
> + const char *opn = "pop";
> + TCGv t0;
> + t0 = tcg_temp_new();
> + gen_load_gpr(t0, rs);
> + switch (opc) {
> + case OPC_DPOP:
> + gen_helper_1i(dpop, t0, rd);
> + opn = "dpop";
> + break;
> + case OPC_POP:
> + gen_helper_1i(pop, t0, rd);
> + opn = "pop";
> + break;
> + }
> + tcg_temp_free(t0);
> +}
> +#endif
> /* Godson integer instructions */
> static void gen_loongson_integer (DisasContext *ctx, uint32_t opc,
> int rd, int rs, int rt)
> @@ -2705,6 +2960,7 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
> target_ulong btgt = -1;
> int blink = 0;
> int bcond_compute = 0;
> + target_ulong maskb; /* Used in BBIT0 and BBIT1 */
> TCGv t0 = tcg_temp_new();
> TCGv t1 = tcg_temp_new();
>
> @@ -2730,6 +2986,39 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
> }
> btgt = ctx->pc + insn_bytes + offset;
> break;
> + case OPC_BBIT1:
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, 0);
There is no need to load t1 here, as it is not used.
> + maskb = 1ULL << rt;
> + tcg_gen_andi_tl(t0, t0, maskb);
> + bcond_compute = 1;
> + btgt = ctx->pc + insn_bytes + offset;
> + break;
> + case OPC_BBIT132:
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, 0);
Ditto.
> + maskb = 1ULL << (rt + 32);
> + tcg_gen_andi_tl(t0, t0, maskb);
> + bcond_compute = 1;
> + btgt = ctx->pc + insn_bytes + offset;
> + break;
> + case OPC_BBIT0:
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, 0);
Ditto.
> + maskb = 1ULL << rt;
> + tcg_gen_andi_tl(t0, t0, maskb);
> + bcond_compute = 1;
> + btgt = ctx->pc + insn_bytes + offset;
> + break;
> + case OPC_BBIT032:
> + gen_load_gpr(t0, rs);
> + gen_load_gpr(t1, 0);
Ditto.
> + maskb = 1ULL << (rt + 32);
> + tcg_gen_andi_tl(t0, t0, maskb);
> + bcond_compute = 1;
> + btgt = ctx->pc + insn_bytes + offset;
> + break;
> +
These 4 instructions have a lot in common, it's probably possible to
)implement them with the same code, instead of having four times very
similar code.
> case OPC_BGEZ:
> case OPC_BGEZAL:
> case OPC_BGEZALS:
> @@ -2888,6 +3177,18 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
> MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx,
> regnames[rs], regnames[rt], btgt);
> goto not_likely;
> + case OPC_BBIT1:
> + tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
> + goto not_likely;
> + case OPC_BBIT132:
> + tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
> + goto not_likely;
> + case OPC_BBIT0:
> + tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
> + goto not_likely;
> + case OPC_BBIT032:
> + tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
> + goto not_likely;
> case OPC_BNEL:
> tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
> MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx,
> @@ -2983,7 +3284,42 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
> tcg_temp_free(t0);
> tcg_temp_free(t1);
> }
> +/* For cavium specific extract instructions */
> +#if defined(TARGET_MIPS64)
> +static void gen_exts (CPUState *env,DisasContext *ctx, uint32_t opc, int
> rt,
> + int rs, int lsb, int msb)
> +{
> + TCGv t0 = tcg_temp_new();
> + TCGv t1 = tcg_temp_new();
> + target_ulong mask;
> + gen_load_gpr(t1, rs);
> + switch (opc) {
> + case OPC_EXTS:
> + tcg_gen_shri_tl(t0, t1, lsb);
> + tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
> + /* To sign extened the remaining bits according to
> + the msb of the bit field */
> + mask = 1ULL << msb;
> + tcg_gen_andi_tl(t1, t0, mask);
> + tcg_gen_addi_tl(t1, t1, -1);
> + tcg_gen_orc_tl(t0, t0, t1);
To sign extend a value, you can use tcg_gen_ext32s_tl()
> + gen_store_gpr(t0, rt);
> + break;
> + case OPC_EXTS32:
> + tcg_gen_shri_tl(t0, t1, lsb + 32);
> + tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
> + mask = 1ULL << msb;
> + tcg_gen_andi_tl(t1, t0, mask);
> + tcg_gen_addi_tl(t1, t1, -1);
> + tcg_gen_orc_tl(t0, t0, t1);
Ditto
> + gen_store_gpr(t0, rt);
> + break;
>
> + }
> + tcg_temp_free(t0);
> + tcg_temp_free(t1);
> +}
> +#endif
> /* special3 bitfield operations */
> static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
> int rs, int lsb, int msb)
> @@ -3063,6 +3399,22 @@ static void gen_bitops (DisasContext *ctx, uint32_t
> opc, int rt,
> tcg_gen_andi_tl(t1, t1, mask);
> tcg_gen_or_tl(t0, t0, t1);
> break;
> + case OPC_CINS:
> + mask = (1ULL << (msb+1))-1;
> + gen_load_gpr(t0, rt);
> + tcg_gen_andi_tl(t0, t0, 0);
> + tcg_gen_andi_tl(t1, t1, mask);
> + tcg_gen_shli_tl(t1, t1, lsb);
> + tcg_gen_or_tl(t0, t0, t1);
> + break;
> + case OPC_CINS32:
> + mask = (1ULL << (msb+1))-1;
> + gen_load_gpr(t0, rt);
> + tcg_gen_andi_tl(t0, t0, 0);
> + tcg_gen_andi_tl(t1, t1, mask);
> + tcg_gen_shli_tl(t1, t1, (lsb+32));
> + tcg_gen_or_tl(t0, t0, t1);
> + break;
> #endif
> default:
> fail:
> @@ -11609,8 +11961,8 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
> int32_t offset;
> int rs, rt, rd, sa;
> uint32_t op, op1, op2;
> - int16_t imm;
> -
> + int16_t imm, imm10;
> + int TARGET_OCTEON = env->TARGET_OCTEON;
> /* make sure instructions are on a word boundary */
> if (ctx->pc & 0x3) {
> env->CP0_BadVAddr = ctx->pc;
> @@ -11638,6 +11990,9 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
> rd = (ctx->opcode >> 11) & 0x1f;
> sa = (ctx->opcode >> 6) & 0x1f;
> imm = (int16_t)ctx->opcode;
> + /* 10 bit Immediate value For SEQI,SNEI */
> + imm10 = (ctx->opcode >> 6) & 0x3ff;
> +
> switch (op) {
> case OPC_SPECIAL:
> op1 = MASK_SPECIAL(ctx->opcode);
> @@ -11863,6 +12218,84 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
> case OPC_MUL:
> gen_arith(env, ctx, op1, rd, rs, rt);
> break;
> +#if defined(TARGET_MIPS64)
> +
> + case OPC_DMUL:
> + check_insn(env, ctx, ISA_MIPS3);
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
You don't need to check for both MIPS3 and Octeon.
> + gen_muldiv(ctx, op1, rs, rt);
> + tcg_gen_mov_tl(cpu_gpr[rd], cpu_LO[0]);
> + break;
> + case OPC_CINS:
> + check_insn(env, ctx, ISA_MIPS64R2);
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
Same here with ISA_MIPS64R2.
> + gen_bitops(ctx, op1, rt, rs, sa, rd);
> + break;
> + case OPC_CINS32:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_bitops(ctx, op1, rt, rs, sa, rd);
> + break;
> + case OPC_MTM0:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
> + case OPC_MTM1:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
> + case OPC_MTM2:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
> + case OPC_MTP0:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
> + case OPC_MTP1:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
> + case OPC_MTP2:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
> + case OPC_VMULU:
> + check_mips_64(ctx);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + break;
All the LMI calls can be grouped together.
> + case OPC_BADDU:
> + check_octeon(ctx, env);
> + gen_arith(env, ctx, op1, rd, rs, rt);
> + break;
> + case OPC_EXTS:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_exts(env, ctx, op1, rt, rs, sa, rd);
> + break;
> + case OPC_EXTS32:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_exts(env, ctx, op1, rt, rs, sa, rd);
> + break;
> + case OPC_SAA:
> + check_octeon(ctx, env);
> + gen_saa(env, ctx, op1, rt, rs);
> + break;
> + case OPC_SAAD:
> + check_octeon(ctx, env);
> + check_mips_64(ctx);
> + gen_saa(env, ctx, op1, rt, rs);
> + break;
> +#endif
> case OPC_CLO:
> case OPC_CLZ:
> check_insn(env, ctx, ISA_MIPS32);
> @@ -11882,13 +12315,25 @@ static void decode_opc (CPUState *env,
> DisasContext *ctx, int *is_branch)
> break;
> case OPC_DIV_G_2F:
> case OPC_DIVU_G_2F:
> - case OPC_MULT_G_2F:
> case OPC_MULTU_G_2F:
> case OPC_MOD_G_2F:
> case OPC_MODU_G_2F:
> check_insn(env, ctx, INSN_LOONGSON2F);
> gen_loongson_integer(ctx, op1, rd, rs, rt);
> break;
> + case OPC_MULT_G_2F:
> + if (!TARGET_OCTEON) {
> + check_insn(env, ctx, INSN_LOONGSON2F);
> + gen_loongson_integer(ctx, op1, rd, rs, rt);
> + } else {
> +#if defined(TARGET_MIPS64)
> + /* Cavium Specific vmm0 */
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> +#endif
> + }
> + break;
> #if defined(TARGET_MIPS64)
> case OPC_DCLO:
> case OPC_DCLZ:
> @@ -11896,7 +12341,6 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
> check_mips_64(ctx);
> gen_cl(ctx, op1, rd, rs);
> break;
> - case OPC_DMULT_G_2F:
> case OPC_DMULTU_G_2F:
> case OPC_DDIV_G_2F:
> case OPC_DDIVU_G_2F:
> @@ -11905,6 +12349,46 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
> check_insn(env, ctx, INSN_LOONGSON2F);
> gen_loongson_integer(ctx, op1, rd, rs, rt);
> break;
> + case OPC_DMULT_G_2F:
> + if (!TARGET_OCTEON) {
> + check_insn(env, ctx, INSN_LOONGSON2F);
> + gen_loongson_integer(ctx, op1, rd, rs, rt);
> + } else {
> + /* Cavium Specific instruction v3mulu */
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_LMI(env, ctx, op1, rs, rt, rd);
> + }
> + break;
> + case OPC_SEQ:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_seqsne(ctx, op1, rd, rs, rt);
> + break;
> + case OPC_SNE:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_seqsne(ctx, op1, rd, rs, rt);
> + break;
> + case OPC_SEQI:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_set_imm(env, op1, rt, rs, imm10);
> + break;
> + case OPC_SNEI:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_set_imm(env, op1, rt, rs, imm10);
> + break;
> + case OPC_POP:
> + check_octeon(ctx, env);
> + gen_pop_count(ctx, op1, rd, rs);
> + break;
> + case OPC_DPOP:
> + check_mips_64(ctx);
> + check_octeon(ctx, env);
> + gen_pop_count(ctx, op1, rd, rs);
> + break;
> #endif
> default: /* Invalid */
> MIPS_INVAL("special2");
> @@ -12196,10 +12680,32 @@ static void decode_opc (CPUState *env,
> DisasContext *ctx, int *is_branch)
> break;
>
> /* COP2. */
> - case OPC_LWC2:
> - case OPC_LDC2:
> - case OPC_SWC2:
> - case OPC_SDC2:
> + /* Conflicting opcodes with Cavium specific branch instructions
> + if TARGET_OCTEON is set these opcodes will belong to Cavium */
> + case OPC_LWC2: /* BBIT0 */
> + if (TARGET_OCTEON) {
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + *is_branch = 1;
> + break;
> + }
> + case OPC_LDC2: /* BBIT032 */
> + if (TARGET_OCTEON) {
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + *is_branch = 1;
> + break;
> + }
> + case OPC_SWC2: /* BBIT1 */
> + if (TARGET_OCTEON) {
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + *is_branch = 1;
> + break;
> + }
> + case OPC_SDC2: /* BBIT132 */
> + if (TARGET_OCTEON) {
> + gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> + *is_branch = 1;
> + break;
> + }
All of these can be grouped together.
> case OPC_CP2:
> /* COP2: Not implemented. */
> generate_exception_err(ctx, EXCP_CpU, 2);
> @@ -12588,6 +13094,18 @@ static void mips_tcg_init(void)
> cpu_dspctrl = tcg_global_mem_new(TCG_AREG0,
> offsetof(CPUState,
> active_tc.DSPControl),
> "DSPControl");
> + mpl0 = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUState, active_tc.MPL0), "MPL0");
> + mpl1 = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUState, active_tc.MPL1), "MPL1");
> + mpl2 = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUState, active_tc.MPL2), "MPL2");
> + p0 = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUState, active_tc.P0), "P0");
> + p1 = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUState, active_tc.P1), "P1");
> + p2 = tcg_global_mem_new(TCG_AREG0,
> + offsetof(CPUState, active_tc.P2), "P2");
> bcond = tcg_global_mem_new(TCG_AREG0,
> offsetof(CPUState, bcond), "bcond");
> btarget = tcg_global_mem_new(TCG_AREG0,
> @@ -12611,6 +13129,18 @@ static void mips_tcg_init(void)
>
> #include "translate_init.c"
>
> +#if defined(TARGET_MIPS64)
> +
> +static void set_cvmctl_register(CPUMIPSState *env)
> +{
> + env->active_tc.cvmctl = env->active_tc.cvmctl
> + ^ env->active_tc.cvmctl;
> + env->active_tc.cvmctl = FUSE_START_BIT(env->active_tc.cvmctl);
> + env->active_tc.cvmctl = KASUMI(env->active_tc.cvmctl);
> + env->active_tc.cvmctl = IPPCI(env->active_tc.cvmctl);
> + env->active_tc.cvmctl = IPTI(env->active_tc.cvmctl);
> +}
> +#endif
> CPUMIPSState *cpu_mips_init (const char *cpu_model)
> {
> CPUMIPSState *env;
> @@ -12623,6 +13153,10 @@ CPUMIPSState *cpu_mips_init (const char *cpu_model)
> env->cpu_model = def;
> env->cpu_model_str = cpu_model;
>
> +#if defined(TARGET_MIPS64)
> + /* Function for setting cvmctl register */
> + set_cvmctl_register(env);
> +#endif
> cpu_exec_init(env);
> #ifndef CONFIG_USER_ONLY
> mmu_init(env, def);
> --
> 1.7.3.4
--
Aurelien Jarno GPG: 1024D/F1BCDB73
address@hidden http://www.aurel32.net