qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PATCH 2/2] Support for Cavium-Octeon specific instruct


From: Aurelien Jarno
Subject: Re: [Qemu-devel] [PATCH 2/2] Support for Cavium-Octeon specific instruction
Date: Tue, 12 Apr 2011 23:32:21 +0200
User-agent: Mutt/1.5.20 (2009-06-14)

On Sat, Apr 09, 2011 at 04:05:45PM +0500, Khansa Butt wrote:
> From 7fd3ef1360b78ad85848e54e36d97ab50d77e6a6 Mon Sep 17 00:00:00 2001
> From: Ehsan-ul-Haq & Khansa Butt <address@hidden>
> Date: Sat, 9 Apr 2011 11:09:18 +0500
> Subject: [PATCH 2/2] Support for Cavium-Octeon specific instruction
> 
> 
> Signed-off-by: Ehsan-ul-Haq, Abdul Qadeer, Abdul Waheed, Khansa Butt <
> address@hidden>
> ---
>  host-utils.c            |   14 ++
>  host-utils.h            |    1 +
>  linux-user/elfload.c    |    5 +
>  linux-user/main.c       |    5 +
>  linux-user/qemu.h       |    1 +
>  linux-user/syscall.c    |    5 +
>  target-mips/cpu.h       |   15 ++
>  target-mips/helper.h    |    5 +
>  target-mips/op_helper.c |   70 ++++++
>  target-mips/translate.c |  556
> ++++++++++++++++++++++++++++++++++++++++++++++-
>  10 files changed, 666 insertions(+), 11 deletions(-)
> 
> diff --git a/host-utils.c b/host-utils.c
> index dc96123..3073ef8 100644
> --- a/host-utils.c
> +++ b/host-utils.c
> @@ -102,4 +102,18 @@ void muls64 (uint64_t *plow, uint64_t *phigh, int64_t
> a, int64_t b)
>             a, b, *phigh, *plow);
>  #endif
>  }
> +
>  #endif /* !defined(__x86_64__) */
> +void addc(uint64_t res [], uint64_t a, int i)
> +{
> +    uint64_t c = res[i];
> +    for (;i < 4;i++) {
> +        res[i] = c + a;
> +        if (res[i] < a) {
> +            c = 1;
> +            a = res[i+1];
> +        }
> +        else
> +            break;
> +    }
> +}

Is it really something we want in host-utils? It is something very
specific and in any case it violates coding style.

> diff --git a/host-utils.h b/host-utils.h
> index 0ddc176..172c4fd 100644
> --- a/host-utils.h
> +++ b/host-utils.h
> @@ -46,6 +46,7 @@ static inline void muls64(uint64_t *plow, uint64_t *phigh,
>  void muls64(uint64_t *phigh, uint64_t *plow, int64_t a, int64_t b);
>  void mulu64(uint64_t *phigh, uint64_t *plow, uint64_t a, uint64_t b);
>  #endif
> +void addc(uint64_t res [], uint64_t a, int i);
> 
>  /* Binary search for leading zeros.  */
> 
> diff --git a/linux-user/elfload.c b/linux-user/elfload.c
> index 2832a33..9399e44 100644
> --- a/linux-user/elfload.c
> +++ b/linux-user/elfload.c
> @@ -1662,6 +1662,11 @@ int load_elf_binary(struct linux_binprm * bprm,
> struct target_pt_regs * regs,
>         when we load the interpreter.  */
>      elf_ex = *(struct elfhdr *)bprm->buf;
> 
> +#if defined(TARGET_MIPS64)
> +    if ((elf_ex.e_flags & EF_MIPS_MARCH) == E_MIPS_MACH_OCTEON) {
> +        info->elf_arch = 1;
> +    }
> +#endif
>      bprm->p = copy_elf_strings(1, &bprm->filename, bprm->page, bprm->p);
>      bprm->p = copy_elf_strings(bprm->envc,bprm->envp,bprm->page,bprm->p);
>      bprm->p = copy_elf_strings(bprm->argc,bprm->argv,bprm->page,bprm->p);
> diff --git a/linux-user/main.c b/linux-user/main.c
> index a7f4955..acf9cac 100644
> --- a/linux-user/main.c
> +++ b/linux-user/main.c
> @@ -3348,6 +3348,11 @@ int main(int argc, char **argv, char **envp)
>          if (regs->cp0_epc & 1) {
>              env->hflags |= MIPS_HFLAG_M16;
>          }
> +#if defined(TARGET_MIPS64)
> +        if (info->elf_arch) {
> +            env->TARGET_OCTEON = 1;
> +        }
> +#endif
>      }
>  #elif defined(TARGET_SH4)
>      {
> diff --git a/linux-user/qemu.h b/linux-user/qemu.h
> index 250814d..adef428 100644
> --- a/linux-user/qemu.h
> +++ b/linux-user/qemu.h
> @@ -51,6 +51,7 @@ struct image_info {
>          abi_ulong       arg_start;
>          abi_ulong       arg_end;
>   int personality;
> + int elf_arch;
>  #ifdef CONFIG_USE_FDPIC
>          abi_ulong       loadmap_addr;
>          uint16_t        nsegs;
> diff --git a/linux-user/syscall.c b/linux-user/syscall.c
> index bb0999d..d5da0ee 100644
> --- a/linux-user/syscall.c
> +++ b/linux-user/syscall.c
> @@ -7320,6 +7320,11 @@ abi_long do_syscall(void *cpu_env, int num, abi_long
> arg1,
>      case TARGET_NR_set_thread_area:
>  #if defined(TARGET_MIPS)
>        ((CPUMIPSState *) cpu_env)->tls_value = arg1;
> +      if (((CPUMIPSState *) cpu_env)->TARGET_OCTEON) {
> +          /* tls entry is moved to k0 so that this can be used later
> +           * currently this thing is tested only for Octeon */
> +          ((CPUMIPSState *) cpu_env)->active_tc.gpr[26] = arg1;
> +      }
>        ret = 0;
>        break;
>  #elif defined(TARGET_CRIS)

All the above code is actually specific to linux-user. It would be
better to put them in a separate patch.

> diff --git a/target-mips/cpu.h b/target-mips/cpu.h
> index 0b98d10..8ef8afb 100644
> --- a/target-mips/cpu.h
> +++ b/target-mips/cpu.h
> @@ -5,6 +5,11 @@
> 
>  #define ELF_MACHINE EM_MIPS
> 
> +/* These flags are used to check whether given
> + * user mode binary is octeon specific or not */
> +#define EF_MIPS_MARCH 0xFF0000
> +#define E_MIPS_MACH_OCTEON 0x8B0000
> +
>  #define CPUState struct CPUMIPSState
> 
>  #include "config.h"
> @@ -171,6 +176,15 @@ struct TCState {
>      target_ulong CP0_TCSchedule;
>      target_ulong CP0_TCScheFBack;
>      int32_t CP0_Debug_tcstatus;
> +    /* Multiplier registers for Octeon */
> +    target_ulong MPL0;
> +    target_ulong MPL1;
> +    target_ulong MPL2;
> +    target_ulong P0;
> +    target_ulong P1;
> +    target_ulong P2;
> +    /* Octeon specific Coprocessor 0 register */
> +    target_ulong cvmctl;
>  };
> 
>  typedef struct CPUMIPSState CPUMIPSState;
> @@ -178,6 +192,7 @@ struct CPUMIPSState {
>      TCState active_tc;
>      CPUMIPSFPUContext active_fpu;
> 
> +    int TARGET_OCTEON;

We don't want a specific boolean value for octeons. What we want is a
new insn_flags for all octeon specific code. Have a look at mips-defs.h
to see how it's done.

>      uint32_t current_tc;
>      uint32_t current_fpu;
> 
> diff --git a/target-mips/helper.h b/target-mips/helper.h
> index 297ab64..409c08d 100644
> --- a/target-mips/helper.h
> +++ b/target-mips/helper.h
> @@ -8,7 +8,12 @@ DEF_HELPER_3(ldl, tl, tl, tl, int)
>  DEF_HELPER_3(ldr, tl, tl, tl, int)
>  DEF_HELPER_3(sdl, void, tl, tl, int)
>  DEF_HELPER_3(sdr, void, tl, tl, int)
> +DEF_HELPER_3(v3mulu, void, tl, tl, int)
> +DEF_HELPER_3(vmulu, void, tl, tl, int)
> +DEF_HELPER_2(dpop, void, tl, i32)
>  #endif
> +DEF_HELPER_2(pop, void, tl, i32)
> +
>  DEF_HELPER_3(lwl, tl, tl, tl, int)
>  DEF_HELPER_3(lwr, tl, tl, tl, int)
>  DEF_HELPER_3(swl, void, tl, tl, int)
> diff --git a/target-mips/op_helper.c b/target-mips/op_helper.c
> index bd16ce3..de793ba 100644
> --- a/target-mips/op_helper.c
> +++ b/target-mips/op_helper.c
> @@ -266,7 +266,77 @@ void helper_dmultu (target_ulong arg1, target_ulong
> arg2)
>  {
>      mulu64(&(env->active_tc.LO[0]), &(env->active_tc.HI[0]), arg1, arg2);
>  }
> +void helper_v3mulu (target_ulong arg1, target_ulong arg2, int rd)
> +{
> +    uint64_t hi, lo, res[4];
> +    int i;
> +    for (i = 0;i < 4; i++) {
> +        res[i] = 0;
> +    }
> +    mulu64(&res[0], &res[1], env->active_tc.MPL0, arg1);
> +    mulu64(&lo, &hi, env->active_tc.MPL1, arg1);
> +    res[1] = res[1] + lo;
> +    if (res[1] < lo)
> +        res[2]++;
> +    res[2] = res[2] + hi;
> +    if (res[2] < hi)
> +        res[3]++;
> +    mulu64(&lo, &hi, env->active_tc.MPL2, arg1);
> +    res[2] = res[2] + lo;
> +    if (res[2] < lo)
> +        res[3]++;
> +    res[3] = res[3] + hi;
> +    addc(res, arg2, 0);
> +    addc(res, env->active_tc.P0, 0);
> +    addc(res, env->active_tc.P1, 1);
> +    addc(res, env->active_tc.P2, 2);
> +    env->active_tc.gpr[rd] = res[0];
> +    env->active_tc.P0 = res[1];
> +    env->active_tc.P1 = res[2];
> +    env->active_tc.P2 = res[3];
> +}

Please watch the CODING_STYLE (braces, indentation)

> +void helper_vmulu (target_ulong arg1, target_ulong arg2, int rd)
> +{
> + uint64_t hi, lo;
> + mulu64(&lo, &hi, env->active_tc.MPL0, arg1);
> + lo = lo + arg2;
> + if (lo < arg2)
> + hi++;
> + lo = lo + env->active_tc.P0;
> + if (lo < env->active_tc.P0)
> + hi++;
> + env->active_tc.gpr[rd] = lo;
> + env->active_tc.P0 = hi;
> +}

Please watch the CODING_STYLE (braces, indentation)

> +void helper_dpop (target_ulong arg, uint32_t rd)
> +{
> +    uint32_t rem, ones = 0;
> +    while (arg >= 1) {
> +        arg = arg/2;
> +        rem = arg % 2;
> +        if(rem == 1)
> +            ones++;
> +        if(arg == 1)
> +            ones++;
> +    }

Please use ctpop64 from host-utils.h instead 

> +    env->active_tc.gpr[rd] = ones;

It's better to return a value instead of directly writing the registers,
as further optimizations can be done on the helper.

> +}
>  #endif
> +void helper_pop (target_ulong arg, uint32_t rd)
> +{
> +    uint32_t rem, i, ones = 0;
> +    i = 0;
> +    while(i <= 31) {
> +        arg = arg/2;
> +        rem = arg % 2;
> +        if(rem == 1)
> +            ones++;
> +        if(arg == 1)
> +            ones++;
> +        i++;
> +    }
> +    env->active_tc.gpr[rd] = ones;

Same with ctpop32.

> +}
> 
>  #ifndef CONFIG_USER_ONLY
> 
> diff --git a/target-mips/translate.c b/target-mips/translate.c
> index 63c2563..1394e18 100644
> --- a/target-mips/translate.c
> +++ b/target-mips/translate.c
> @@ -36,6 +36,14 @@
>  #define GEN_HELPER 1
>  #include "helper.h"
> 
> +#if defined(TARGET_MIPS64)
> +/* Macros for setting values of cvmctl registers */
> +#define FUSE_START_BIT(cvmctl)  (cvmctl | 0x80000000)
> +#define KASUMI(cvmctl)  (cvmctl | 0x20000000)
> +#define IPPCI(cvmctl)  (cvmctl | 0x380)
> +#define IPTI(cvmctl)  (cvmctl | 0x70)
> +#endif
> +
>  //#define MIPS_DEBUG_DISAS
>  //#define MIPS_DEBUG_SIGN_EXTENSIONS
> 
> @@ -70,6 +78,11 @@ enum {
>      OPC_JAL      = (0x03 << 26),
>      OPC_JALS     = OPC_JAL | 0x5,
>      OPC_BEQ      = (0x04 << 26),  /* Unconditional if rs = rt = 0 (B) */
> +    /* Cavium Specific */
> +    OPC_BBIT1    = (0x3a << 26),  /* jump on bit set, cavium specific */
> +    OPC_BBIT132  = (0x3e << 26),  /* jump on bit set(for upper 32 bits) */
> +    OPC_BBIT0    = (0x32 << 26),  /* jump on bit clear, cavium specific */
> +    OPC_BBIT032  = (0x36 << 26),  /* jump on bit clear(for upper 32 bits)
> */
>      OPC_BEQL     = (0x14 << 26),
>      OPC_BNE      = (0x05 << 26),
>      OPC_BNEL     = (0x15 << 26),
> @@ -265,6 +278,31 @@ enum {
>      OPC_MADD     = 0x00 | OPC_SPECIAL2,
>      OPC_MADDU    = 0x01 | OPC_SPECIAL2,
>      OPC_MUL      = 0x02 | OPC_SPECIAL2,
> +    /* Cavium Specific Instructions */
> +    OPC_BADDU    = 0x28 | OPC_SPECIAL2,
> +    OPC_DMUL     = 0x03 | OPC_SPECIAL2,
> +    OPC_EXTS     = 0x3a | OPC_SPECIAL2,
> +    OPC_EXTS32   = 0x3b | OPC_SPECIAL2,
> +    OPC_CINS     = 0x32 | OPC_SPECIAL2,
> +    OPC_CINS32   = 0x33 | OPC_SPECIAL2,
> +    OPC_SEQI     = 0x2e | OPC_SPECIAL2,
> +    OPC_SNEI     = 0x2f | OPC_SPECIAL2,
> +    OPC_MTM0     = 0x08 | OPC_SPECIAL2,
> +    OPC_MTM1     = 0x0c | OPC_SPECIAL2,
> +    OPC_MTM2     = 0x0d | OPC_SPECIAL2,
> +    OPC_MTP0     = 0x09 | OPC_SPECIAL2,
> +    OPC_MTP1     = 0x0a | OPC_SPECIAL2,
> +    OPC_MTP2     = 0x0b | OPC_SPECIAL2,
> +    OPC_V3MULU   = 0x11 | OPC_SPECIAL2,
> +    OPC_VMM0     = 0x10 | OPC_SPECIAL2,
> +    OPC_VMULU    = 0x0f | OPC_SPECIAL2,
> +    OPC_POP      = 0X2C | OPC_SPECIAL2,
> +    OPC_DPOP     = 0X2D | OPC_SPECIAL2,
> +    OPC_SEQ      = 0x2a | OPC_SPECIAL2,
> +    OPC_SNE      = 0x2b | OPC_SPECIAL2,
> +    OPC_SAA      = 0x18 | OPC_SPECIAL2,
> +    OPC_SAAD     = 0x19 | OPC_SPECIAL2,
> +/**************************************/
>      OPC_MSUB     = 0x04 | OPC_SPECIAL2,
>      OPC_MSUBU    = 0x05 | OPC_SPECIAL2,
>      /* Loongson 2F */
> @@ -483,7 +521,7 @@ enum {
>  static TCGv_ptr cpu_env;
>  static TCGv cpu_gpr[32], cpu_PC;
>  static TCGv cpu_HI[MIPS_DSP_ACC], cpu_LO[MIPS_DSP_ACC],
> cpu_ACX[MIPS_DSP_ACC];
> -static TCGv cpu_dspctrl, btarget, bcond;
> +static TCGv cpu_dspctrl, btarget, bcond, mpl0, mpl1, mpl2, p0, p1, p2;
>  static TCGv_i32 hflags;
>  static TCGv_i32 fpu_fcr0, fpu_fcr31;
> 
> @@ -847,6 +885,13 @@ static inline void check_mips_64(DisasContext *ctx)
>      if (unlikely(!(ctx->hflags & MIPS_HFLAG_64)))
>          generate_exception(ctx, EXCP_RI);
>  }
> +/* This code generates a "reserved instruction" exception if Octeon
> +   instructions are not enabled. */
> +static inline void check_octeon(DisasContext *ctx, CPUState *env)
> +{
> +    if (!env->TARGET_OCTEON)
> +        generate_exception(ctx, EXCP_RI);
> +}

You might want to replace calls to check_octeon(ctx, env) by check_insn(
env, ctx, INSN_OCTEON);

>  /* Define small wrappers for gen_load_fpr* so that we have a uniform
>     calling interface for 32 and 64-bit FPRs.  No sense in changing
> @@ -1419,7 +1464,33 @@ static void gen_arith_imm (CPUState *env,
> DisasContext *ctx, uint32_t opc,
>      (void)opn; /* avoid a compiler warning */
>      MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt],
> regnames[rs], uimm);
>  }
> -
> +#if defined(TARGET_MIPS64)
> +/* set on equal immidiate/seton not equal immidiate */
> +static void gen_set_imm(CPUState *env, uint32_t opc, int rt, int rs,
> int16_t imm)
> +{
> +    target_ulong uimm;
> +    TCGv t0, t1;
> +    const char *opn = "imm set";
> +    uimm = (uint16_t)imm;
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    switch (opc) {
> +    case OPC_SEQI:
> +        tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
> +        gen_load_gpr(t0, rt);

Doing so just load cpu_gpr[rt] into t0. Is it really what you want to
do here?

> +        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rt], t0, 1);

If you want to access cpu_gpr[rt] and cpu_gpr[rs] directly, you should
handle the fact that rt or rs can be 0. I don't know exactly what the
instruction is supposed to do, but this looks plainly wrong.

> +        opn = "seqi";
> +        break;
> +    case OPC_SNEI:
> +        tcg_gen_xori_tl(cpu_gpr[rt], cpu_gpr[rs], uimm);
> +        gen_load_gpr(t0, rt);
> +        gen_load_gpr(t1, 0);
> +        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rt], t1, t0);
> +        opn = "snei";
> +        break;

Ditto.

> +    }
> +}
> +#endif
>  /* Logic with immediate operand */
>  static void gen_logic_imm (CPUState *env, uint32_t opc, int rt, int rs,
> int16_t imm)
>  {
> @@ -1583,7 +1654,86 @@ static void gen_shift_imm(CPUState *env, DisasContext
> *ctx, uint32_t opc,
>      MIPS_DEBUG("%s %s, %s, " TARGET_FMT_lx, opn, regnames[rt],
> regnames[rs], uimm);
>      tcg_temp_free(t0);
>  }
> +#if defined(TARGET_MIPS64)
> +static void gen_LMI (CPUMIPSState *env, DisasContext *ctx, uint32_t opc,
> +                 int rs, int rt, int rd)
> +{
> +    const char *opn = "LMI";
> +    TCGv t0, t1;
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    int nomul = env->active_tc.cvmctl & 0x8000000;
> +    if (!nomul) {
> +        switch (opc) {
> +        case OPC_MTM0:
> +            tcg_gen_mov_tl(mpl0, cpu_gpr[rs]);

You should probably handle the case where rs = 0. Or use gen_load_gpr()

> +            tcg_gen_movi_tl(p0, 0);
> +            tcg_gen_movi_tl(p1, 0);
> +            tcg_gen_movi_tl(p2, 0);
> +            opn = "mtm0";
> +            break;
> +        case OPC_MTM1:
> +            tcg_gen_mov_tl(mpl1, cpu_gpr[rs]);

Ditto here and for the cases below.

> +            tcg_gen_movi_tl(p0, 0);
> +            tcg_gen_movi_tl(p1, 0);
> +            tcg_gen_movi_tl(p2, 0);
> +            opn = "mtm1";
> +            break;
> +        case OPC_MTM2:
> +            tcg_gen_mov_tl(mpl2, cpu_gpr[rs]);
> +            tcg_gen_movi_tl(p0, 0);
> +            tcg_gen_movi_tl(p1, 0);
> +            tcg_gen_movi_tl(p2, 0);
> +            opn = "mtm2";
> +            break;
> +        case OPC_MTP0:
> +            tcg_gen_mov_tl(p0, cpu_gpr[rs]);
> +            opn = "mtp0";
> +            break;
> +        case OPC_MTP1:
> +            tcg_gen_mov_tl(p1, cpu_gpr[rs]);
> +            opn = "mtp1";
> +            break;
> +        case OPC_MTP2:
> +            tcg_gen_mov_tl(p2, cpu_gpr[rs]);
> +            opn = "mtp2";
> +            break;
> +        case OPC_VMM0:
> +            gen_load_gpr(t1, rs);
> +            gen_helper_dmultu(t1, mpl0);
> +            gen_load_gpr(t0, rt);
> +            tcg_gen_add_tl(t0, t0, cpu_LO[0]);
> +            tcg_gen_add_tl(t0, t0, p0);
> +            gen_store_gpr(t0, rd);
> +            tcg_gen_mov_tl(mpl0, cpu_gpr[rd]);
> +            tcg_gen_movi_tl(p0, 0);
> +            tcg_gen_movi_tl(p1, 0);
> +            tcg_gen_movi_tl(p2, 0);
> +            opn = "vmm0";
> +            break;
> +        case OPC_VMULU:
> +         gen_load_gpr(t0, rs);
> +            gen_load_gpr(t1, rt);
> +            gen_helper_2i(vmulu, t0, t1, rd);
> +            opn = "vmulu";
> +            break;
> +        case OPC_V3MULU:
> +            gen_load_gpr(t0, rs);
> +            gen_load_gpr(t1, rt);
> +            gen_helper_2i(v3mulu, t0, t1, rd);
> +            opn = "v3mulu";
> +            break;
> +        }
> +
> +    } else {
> +        generate_exception(ctx, EXCP_RI);
> +    }
> +    tcg_temp_free(t0);
> +    tcg_temp_free(t1);
> +}
> +
> 
> +#endif
>  /* Arithmetic */
>  static void gen_arith (CPUState *env, DisasContext *ctx, uint32_t opc,
>                         int rd, int rs, int rt)
> @@ -1637,6 +1787,25 @@ static void gen_arith (CPUState *env, DisasContext
> *ctx, uint32_t opc,
>          }
>          opn = "addu";
>          break;
> +    case OPC_BADDU:
> +        {
> +            TCGv t0 = tcg_temp_new();
> +            TCGv t1 = tcg_temp_new();
> +            TCGv t2 = tcg_temp_new();
> +            gen_load_gpr(t1, rs);
> +            gen_load_gpr(t2, rt);
> +            tcg_gen_andi_tl(t1, t1, 0xff);
> +            tcg_gen_andi_tl(t2, t2, 0xff);
> +            tcg_gen_add_tl(t0, t1, t2);
> +            tcg_gen_andi_tl(t0, t0, 0xff);

This looks like overly complicated, you can just do the addition and
then do ext8u. You can also use only two registers.

> +            gen_store_gpr(t0, rd);
> +            tcg_temp_free(t0);
> +            tcg_temp_free(t1);
> +            tcg_temp_free(t2);
> +        }
> +
> +       opn = "baddu";
> +       break;
>      case OPC_SUB:
>          {
>              TCGv t0 = tcg_temp_local_new();
> @@ -2013,7 +2182,70 @@ static void gen_HILO (DisasContext *ctx, uint32_t
> opc, int reg)
>      (void)opn; /* avoid a compiler warning */
>      MIPS_DEBUG("%s %s", opn, regnames[reg]);
>  }
> +#if defined(TARGET_MIPS64)
> +static void gen_seqsne (DisasContext *ctx, uint32_t opc,
> +                        int rd, int rs, int rt)
> +{
> +    const char *opn = "seq/sne";
> +    TCGv t0, t1;
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    switch (opc) {
> +    case OPC_SEQ:
> +        tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
> +        gen_load_gpr(t0, rd);

Doing so just load cpu_gpr[rd] into t0. Is it really what you want to
do here?

> +        tcg_gen_setcondi_tl(TCG_COND_LTU, cpu_gpr[rd], t0, 1);
> +        opn = "seq";
> +        break;
> +    case OPC_SNE:
> +        tcg_gen_xor_tl(cpu_gpr[rd], cpu_gpr[rs], cpu_gpr[rt]);
> +        gen_load_gpr(t0, rd);

Ditto.

> +        gen_load_gpr(t1, 0);
> +        tcg_gen_setcond_tl(TCG_COND_LTU, cpu_gpr[rd], t1, t0);
> +        opn = "sne";
> +        break;
> +    default:
> +        MIPS_INVAL(opn);
> +        generate_exception(ctx, EXCP_RI);
> +        goto out;
> +    }
> +out:
> +        tcg_temp_free(t0);
> +        tcg_temp_free(t1);
> +
> +}
> +
> +static void gen_saa (CPUState *env, DisasContext *ctx, uint32_t opc,
> +                     int rt, int base)
> +{
> +    const char *opn = "saa";
> +    TCGv t0, t1, temp;
> +    t0 = tcg_temp_new();
> +    t1 = tcg_temp_new();
> +    temp = tcg_temp_new();
> +    gen_load_gpr(t1, rt);
> +    gen_base_offset_addr(ctx, t0, base, 0);
> +    switch (opc) {
> +    case OPC_SAA:
> +        save_cpu_state(ctx, 1);
> +        op_ld_lw(temp, t0, ctx);
> +        tcg_gen_add_tl(temp, temp, t1);
> +        op_st_sw(temp, t0, ctx);
> +        opn = "saa";
> +        break;
> +    case OPC_SAAD:
> +        save_cpu_state(ctx, 0);
> +        op_ld_ld(temp, t0, ctx);
> +        tcg_gen_add_tl(temp, temp, t1);
> +        op_st_sd(temp, t0, ctx);
> +        opn = "saad";
> +        break;
> +    }

You should add a comment explaining that the operation should be atomic.
That will be something to do when emulating SMP systems.

> +    tcg_temp_free(t0);

> +    tcg_temp_free(t1);
> +}
> +#endif
>  static void gen_muldiv (DisasContext *ctx, uint32_t opc,
>                          int rs, int rt)
>  {
> @@ -2149,6 +2381,10 @@ static void gen_muldiv (DisasContext *ctx, uint32_t
> opc,
>          gen_helper_dmult(t0, t1);
>          opn = "dmult";
>          break;
> +    case OPC_DMUL:
> +        gen_helper_dmult(t0, t1);
> +        opn = "dmul";
> +        break;

While this is correct, you probably don't want to use an helper, but a
simple tcg multiplication.

>      case OPC_DMULTU:
>          gen_helper_dmultu(t0, t1);
>          opn = "dmultu";
> @@ -2368,7 +2604,26 @@ static void gen_cl (DisasContext *ctx, uint32_t opc,
>      MIPS_DEBUG("%s %s, %s", opn, regnames[rd], regnames[rs]);
>      tcg_temp_free(t0);
>  }
> -
> +#if defined(TARGET_MIPS64)
> +static void gen_pop_count (DisasContext *ctx, uint32_t opc, int rd, int rs)
> +{
> +    const char *opn = "pop";
> +        TCGv t0;
> +        t0 = tcg_temp_new();
> +        gen_load_gpr(t0, rs);
> +        switch (opc) {
> +        case OPC_DPOP:
> +            gen_helper_1i(dpop, t0, rd);
> +            opn = "dpop";
> +            break;
> +        case OPC_POP:
> +            gen_helper_1i(pop, t0, rd);
> +            opn = "pop";
> +            break;
> +        }
> +        tcg_temp_free(t0);
> +}
> +#endif
>  /* Godson integer instructions */
>  static void gen_loongson_integer (DisasContext *ctx, uint32_t opc,
>                                  int rd, int rs, int rt)
> @@ -2705,6 +2960,7 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
>      target_ulong btgt = -1;
>      int blink = 0;
>      int bcond_compute = 0;
> +    target_ulong maskb; /* Used in BBIT0 and BBIT1 */
>      TCGv t0 = tcg_temp_new();
>      TCGv t1 = tcg_temp_new();
> 
> @@ -2730,6 +2986,39 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
>          }
>          btgt = ctx->pc + insn_bytes + offset;
>          break;
> +    case OPC_BBIT1:
> +        gen_load_gpr(t0, rs);
> +        gen_load_gpr(t1, 0);

There is no need to load t1 here, as it is not used.

> +        maskb = 1ULL << rt;
> +        tcg_gen_andi_tl(t0, t0, maskb);
> +        bcond_compute = 1;
> +        btgt = ctx->pc + insn_bytes + offset;
> +        break;
> +    case OPC_BBIT132:
> +        gen_load_gpr(t0, rs);
> +        gen_load_gpr(t1, 0);

Ditto.

> +        maskb = 1ULL << (rt + 32);
> +        tcg_gen_andi_tl(t0, t0, maskb);
> +        bcond_compute = 1;
> +        btgt = ctx->pc + insn_bytes + offset;
> +        break;
> +    case OPC_BBIT0:
> +        gen_load_gpr(t0, rs);
> +        gen_load_gpr(t1, 0);

Ditto.

> +        maskb = 1ULL << rt;
> +        tcg_gen_andi_tl(t0, t0, maskb);
> +        bcond_compute = 1;
> +        btgt = ctx->pc + insn_bytes + offset;
> +        break;
> +    case OPC_BBIT032:
> +        gen_load_gpr(t0, rs);
> +        gen_load_gpr(t1, 0);

Ditto.

> +        maskb = 1ULL << (rt + 32);
> +        tcg_gen_andi_tl(t0, t0, maskb);
> +        bcond_compute = 1;
> +        btgt = ctx->pc + insn_bytes + offset;
> +        break;
> +

These 4 instructions have a lot in common, it's probably possible to
)implement them with the same code, instead of having four times very
similar code.

>      case OPC_BGEZ:
>      case OPC_BGEZAL:
>      case OPC_BGEZALS:
> @@ -2888,6 +3177,18 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
>              MIPS_DEBUG("bne %s, %s, " TARGET_FMT_lx,
>                         regnames[rs], regnames[rt], btgt);
>              goto not_likely;
> +        case OPC_BBIT1:
> +            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
> +            goto not_likely;
> +        case OPC_BBIT132:
> +            tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
> +            goto not_likely;
> +            case OPC_BBIT0:
> +            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
> +            goto not_likely;
> +        case OPC_BBIT032:
> +            tcg_gen_setcond_tl(TCG_COND_EQ, bcond, t0, t1);
> +            goto not_likely;
>          case OPC_BNEL:
>              tcg_gen_setcond_tl(TCG_COND_NE, bcond, t0, t1);
>              MIPS_DEBUG("bnel %s, %s, " TARGET_FMT_lx,
> @@ -2983,7 +3284,42 @@ static void gen_compute_branch (DisasContext *ctx,
> uint32_t opc,
>      tcg_temp_free(t0);
>      tcg_temp_free(t1);
>  }
> +/* For cavium specific extract instructions */
> +#if defined(TARGET_MIPS64)
> +static void gen_exts (CPUState *env,DisasContext *ctx, uint32_t opc, int
> rt,
> +                      int rs, int lsb, int msb)
> +{
> +    TCGv t0 = tcg_temp_new();
> +    TCGv t1 = tcg_temp_new();
> +    target_ulong mask;
> +    gen_load_gpr(t1, rs);
> +    switch (opc) {
> +    case OPC_EXTS:
> +        tcg_gen_shri_tl(t0, t1, lsb);
> +        tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
> +        /* To sign extened the remaining bits according to
> +           the msb of the bit field */
> +        mask = 1ULL << msb;
> +        tcg_gen_andi_tl(t1, t0, mask);
> +        tcg_gen_addi_tl(t1, t1, -1);
> +        tcg_gen_orc_tl(t0, t0, t1);

To sign extend a value, you can use tcg_gen_ext32s_tl()

> +        gen_store_gpr(t0, rt);
> +        break;
> +    case OPC_EXTS32:
> +        tcg_gen_shri_tl(t0, t1, lsb + 32);
> +        tcg_gen_andi_tl(t0, t0, (1ULL << (msb + 1)) - 1);
> +        mask = 1ULL << msb;
> +        tcg_gen_andi_tl(t1, t0, mask);
> +        tcg_gen_addi_tl(t1, t1, -1);
> +        tcg_gen_orc_tl(t0, t0, t1);

Ditto

> +        gen_store_gpr(t0, rt);
> +        break;
> 
> +    }
> +    tcg_temp_free(t0);
> +    tcg_temp_free(t1);
> +}
> +#endif
>  /* special3 bitfield operations */
>  static void gen_bitops (DisasContext *ctx, uint32_t opc, int rt,
>                          int rs, int lsb, int msb)
> @@ -3063,6 +3399,22 @@ static void gen_bitops (DisasContext *ctx, uint32_t
> opc, int rt,
>          tcg_gen_andi_tl(t1, t1, mask);
>          tcg_gen_or_tl(t0, t0, t1);
>          break;
> +    case OPC_CINS:
> +        mask =  (1ULL << (msb+1))-1;
> +        gen_load_gpr(t0, rt);
> +        tcg_gen_andi_tl(t0, t0, 0);
> +        tcg_gen_andi_tl(t1, t1, mask);
> +        tcg_gen_shli_tl(t1, t1, lsb);
> +        tcg_gen_or_tl(t0, t0, t1);
> +        break;
> +    case OPC_CINS32:
> +        mask =  (1ULL << (msb+1))-1;
> +        gen_load_gpr(t0, rt);
> +        tcg_gen_andi_tl(t0, t0, 0);
> +        tcg_gen_andi_tl(t1, t1, mask);
> +        tcg_gen_shli_tl(t1, t1, (lsb+32));
> +        tcg_gen_or_tl(t0, t0, t1);
> +        break;
>  #endif
>      default:
>  fail:
> @@ -11609,8 +11961,8 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
>      int32_t offset;
>      int rs, rt, rd, sa;
>      uint32_t op, op1, op2;
> -    int16_t imm;
> -
> +    int16_t imm, imm10;
> +    int TARGET_OCTEON = env->TARGET_OCTEON;
>      /* make sure instructions are on a word boundary */
>      if (ctx->pc & 0x3) {
>          env->CP0_BadVAddr = ctx->pc;
> @@ -11638,6 +11990,9 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
>      rd = (ctx->opcode >> 11) & 0x1f;
>      sa = (ctx->opcode >> 6) & 0x1f;
>      imm = (int16_t)ctx->opcode;
> +    /* 10 bit Immediate value For SEQI,SNEI */
> +    imm10 = (ctx->opcode >> 6) & 0x3ff;
> +
>      switch (op) {
>      case OPC_SPECIAL:
>          op1 = MASK_SPECIAL(ctx->opcode);
> @@ -11863,6 +12218,84 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
>          case OPC_MUL:
>              gen_arith(env, ctx, op1, rd, rs, rt);
>              break;
> +#if defined(TARGET_MIPS64)
> +
> +        case OPC_DMUL:
> +            check_insn(env, ctx, ISA_MIPS3);
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);

You don't need to check for both MIPS3 and Octeon.

> +            gen_muldiv(ctx, op1, rs, rt);
> +            tcg_gen_mov_tl(cpu_gpr[rd], cpu_LO[0]);
> +            break;
> +        case OPC_CINS:
> +            check_insn(env, ctx, ISA_MIPS64R2);
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);

Same here with ISA_MIPS64R2.

> +            gen_bitops(ctx, op1, rt, rs, sa, rd);
> +            break;
> +        case OPC_CINS32:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_bitops(ctx, op1, rt, rs, sa, rd);
> +            break;
> +        case OPC_MTM0:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;
> +        case OPC_MTM1:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;
> +        case OPC_MTM2:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;
> +        case OPC_MTP0:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;
> +        case OPC_MTP1:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;
> +        case OPC_MTP2:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;
> +        case OPC_VMULU:
> +            check_mips_64(ctx);
> +            gen_LMI(env, ctx, op1, rs, rt, rd);
> +            break;

All the LMI calls can be grouped together.

> +        case OPC_BADDU:
> +            check_octeon(ctx, env);
> +            gen_arith(env, ctx, op1, rd, rs, rt);
> +            break;
> +        case OPC_EXTS:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_exts(env, ctx, op1, rt, rs, sa, rd);
> +            break;
> +        case OPC_EXTS32:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_exts(env, ctx, op1, rt, rs, sa, rd);
> +            break;
> +        case OPC_SAA:
> +            check_octeon(ctx, env);
> +            gen_saa(env, ctx, op1, rt, rs);
> +            break;
> +        case OPC_SAAD:
> +            check_octeon(ctx, env);
> +            check_mips_64(ctx);
> +            gen_saa(env, ctx, op1, rt, rs);
> +            break;
> +#endif
>          case OPC_CLO:
>          case OPC_CLZ:
>              check_insn(env, ctx, ISA_MIPS32);
> @@ -11882,13 +12315,25 @@ static void decode_opc (CPUState *env,
> DisasContext *ctx, int *is_branch)
>              break;
>          case OPC_DIV_G_2F:
>          case OPC_DIVU_G_2F:
> -        case OPC_MULT_G_2F:
>          case OPC_MULTU_G_2F:
>          case OPC_MOD_G_2F:
>          case OPC_MODU_G_2F:
>              check_insn(env, ctx, INSN_LOONGSON2F);
>              gen_loongson_integer(ctx, op1, rd, rs, rt);
>              break;
> +        case OPC_MULT_G_2F:
> +            if (!TARGET_OCTEON) {
> +                check_insn(env, ctx, INSN_LOONGSON2F);
> +                gen_loongson_integer(ctx, op1, rd, rs, rt);
> +            } else {
> +#if defined(TARGET_MIPS64)
> +                /* Cavium Specific vmm0 */
> +                check_mips_64(ctx);
> +                check_octeon(ctx, env);
> +                gen_LMI(env, ctx, op1, rs, rt, rd);
> +#endif
> +            }
> +            break;
>  #if defined(TARGET_MIPS64)
>          case OPC_DCLO:
>          case OPC_DCLZ:
> @@ -11896,7 +12341,6 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
>              check_mips_64(ctx);
>              gen_cl(ctx, op1, rd, rs);
>              break;
> -        case OPC_DMULT_G_2F:
>          case OPC_DMULTU_G_2F:
>          case OPC_DDIV_G_2F:
>          case OPC_DDIVU_G_2F:
> @@ -11905,6 +12349,46 @@ static void decode_opc (CPUState *env, DisasContext
> *ctx, int *is_branch)
>              check_insn(env, ctx, INSN_LOONGSON2F);
>              gen_loongson_integer(ctx, op1, rd, rs, rt);
>              break;
> +        case OPC_DMULT_G_2F:
> +            if (!TARGET_OCTEON) {
> +                check_insn(env, ctx, INSN_LOONGSON2F);
> +                gen_loongson_integer(ctx, op1, rd, rs, rt);
> +            } else {
> +                /* Cavium Specific instruction v3mulu */
> +                check_mips_64(ctx);
> +                check_octeon(ctx, env);
> +                gen_LMI(env, ctx, op1, rs, rt, rd);
> +            }
> +            break;
> +        case OPC_SEQ:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_seqsne(ctx, op1, rd, rs, rt);
> +            break;
> +        case OPC_SNE:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_seqsne(ctx, op1, rd, rs, rt);
> +            break;
> +        case OPC_SEQI:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_set_imm(env, op1, rt, rs, imm10);
> +            break;
> +        case OPC_SNEI:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_set_imm(env, op1, rt, rs, imm10);
> +            break;
> +        case OPC_POP:
> +            check_octeon(ctx, env);
> +            gen_pop_count(ctx, op1, rd, rs);
> +            break;
> +        case OPC_DPOP:
> +            check_mips_64(ctx);
> +            check_octeon(ctx, env);
> +            gen_pop_count(ctx, op1, rd, rs);
> +            break;
>  #endif
>          default:            /* Invalid */
>              MIPS_INVAL("special2");
> @@ -12196,10 +12680,32 @@ static void decode_opc (CPUState *env,
> DisasContext *ctx, int *is_branch)
>          break;
> 
>      /* COP2.  */
> -    case OPC_LWC2:
> -    case OPC_LDC2:
> -    case OPC_SWC2:
> -    case OPC_SDC2:
> +    /* Conflicting opcodes with Cavium specific branch instructions
> +       if TARGET_OCTEON is set these opcodes will belong to Cavium */
> +    case OPC_LWC2: /* BBIT0 */
> +        if (TARGET_OCTEON) {
> +            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> +            *is_branch = 1;
> +            break;
> +        }
> +    case OPC_LDC2: /* BBIT032 */
> +        if (TARGET_OCTEON) {
> +            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> +            *is_branch = 1;
> +            break;
> +        }
> +    case OPC_SWC2: /* BBIT1 */
> +        if (TARGET_OCTEON) {
> +            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> +            *is_branch = 1;
> +            break;
> +        }
> +    case OPC_SDC2: /* BBIT132 */
> +        if (TARGET_OCTEON) {
> +            gen_compute_branch(ctx, op, 4, rs, rt, imm << 2);
> +            *is_branch = 1;
> +            break;
> +        }

All of these can be grouped together.

>      case OPC_CP2:
>          /* COP2: Not implemented. */
>          generate_exception_err(ctx, EXCP_CpU, 2);
> @@ -12588,6 +13094,18 @@ static void mips_tcg_init(void)
>      cpu_dspctrl = tcg_global_mem_new(TCG_AREG0,
>                                       offsetof(CPUState,
> active_tc.DSPControl),
>                                       "DSPControl");
> +    mpl0 = tcg_global_mem_new(TCG_AREG0,
> +                              offsetof(CPUState, active_tc.MPL0), "MPL0");
> +    mpl1 = tcg_global_mem_new(TCG_AREG0,
> +                              offsetof(CPUState, active_tc.MPL1), "MPL1");
> +    mpl2 = tcg_global_mem_new(TCG_AREG0,
> +                              offsetof(CPUState, active_tc.MPL2), "MPL2");
> +    p0 = tcg_global_mem_new(TCG_AREG0,
> +                            offsetof(CPUState, active_tc.P0), "P0");
> +    p1 = tcg_global_mem_new(TCG_AREG0,
> +                            offsetof(CPUState, active_tc.P1), "P1");
> +    p2 = tcg_global_mem_new(TCG_AREG0,
> +                            offsetof(CPUState, active_tc.P2), "P2");
>      bcond = tcg_global_mem_new(TCG_AREG0,
>                                 offsetof(CPUState, bcond), "bcond");
>      btarget = tcg_global_mem_new(TCG_AREG0,
> @@ -12611,6 +13129,18 @@ static void mips_tcg_init(void)
> 
>  #include "translate_init.c"
> 
> +#if defined(TARGET_MIPS64)
> +
> +static void set_cvmctl_register(CPUMIPSState *env)
> +{
> +    env->active_tc.cvmctl = env->active_tc.cvmctl
> +                                 ^ env->active_tc.cvmctl;
> +    env->active_tc.cvmctl = FUSE_START_BIT(env->active_tc.cvmctl);
> +    env->active_tc.cvmctl = KASUMI(env->active_tc.cvmctl);
> +    env->active_tc.cvmctl = IPPCI(env->active_tc.cvmctl);
> +    env->active_tc.cvmctl = IPTI(env->active_tc.cvmctl);
> +}
> +#endif
>  CPUMIPSState *cpu_mips_init (const char *cpu_model)
>  {
>      CPUMIPSState *env;
> @@ -12623,6 +13153,10 @@ CPUMIPSState *cpu_mips_init (const char *cpu_model)
>      env->cpu_model = def;
>      env->cpu_model_str = cpu_model;
> 
> +#if defined(TARGET_MIPS64)
> +    /* Function for setting cvmctl register */
> +    set_cvmctl_register(env);
> +#endif
>      cpu_exec_init(env);
>  #ifndef CONFIG_USER_ONLY
>      mmu_init(env, def);
> -- 
> 1.7.3.4

-- 
Aurelien Jarno                          GPG: 1024D/F1BCDB73
address@hidden                 http://www.aurel32.net



reply via email to

[Prev in Thread] Current Thread [Next in Thread]