qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [PULL 5/8] tcg: Put opcodes in a linked list


From: Artyom Tarasenko
Subject: Re: [Qemu-devel] [PULL 5/8] tcg: Put opcodes in a linked list
Date: Mon, 17 Aug 2015 13:35:07 +0200

Hi Richard,

this patch seems to break a build when USE_LIVENESS_ANALYSIS is undefined.

Regards,
Artyom

On Fri, Feb 13, 2015 at 6:43 AM, Richard Henderson <address@hidden> wrote:
> The previous setup required ops and args to be completely sequential,
> and was error prone when it came to both iteration and optimization.
>
> Reviewed-by: Bastian Koppelmann <address@hidden>
> Signed-off-by: Richard Henderson <address@hidden>
> ---
>  include/exec/gen-icount.h |  22 ++-
>  tcg/optimize.c            | 286 ++++++++++++++---------------------
>  tcg/tcg-op.c              | 190 ++++++++++++-----------
>  tcg/tcg.c                 | 376 
> +++++++++++++++++++---------------------------
>  tcg/tcg.h                 |  58 ++++---
>  5 files changed, 431 insertions(+), 501 deletions(-)
>
> diff --git a/include/exec/gen-icount.h b/include/exec/gen-icount.h
> index a37a61d..6e5b012 100644
> --- a/include/exec/gen-icount.h
> +++ b/include/exec/gen-icount.h
> @@ -11,8 +11,8 @@ static int exitreq_label;
>
>  static inline void gen_tb_start(TranslationBlock *tb)
>  {
> -    TCGv_i32 count;
> -    TCGv_i32 flag;
> +    TCGv_i32 count, flag, imm;
> +    int i;
>
>      exitreq_label = gen_new_label();
>      flag = tcg_temp_new_i32();
> @@ -21,16 +21,25 @@ static inline void gen_tb_start(TranslationBlock *tb)
>      tcg_gen_brcondi_i32(TCG_COND_NE, flag, 0, exitreq_label);
>      tcg_temp_free_i32(flag);
>
> -    if (!(tb->cflags & CF_USE_ICOUNT))
> +    if (!(tb->cflags & CF_USE_ICOUNT)) {
>          return;
> +    }
>
>      icount_label = gen_new_label();
>      count = tcg_temp_local_new_i32();
>      tcg_gen_ld_i32(count, cpu_env,
>                     -ENV_OFFSET + offsetof(CPUState, icount_decr.u32));
> +
> +    imm = tcg_temp_new_i32();
> +    tcg_gen_movi_i32(imm, 0xdeadbeef);
> +
>      /* This is a horrid hack to allow fixing up the value later.  */
> -    icount_arg = tcg_ctx.gen_opparam_ptr + 1;
> -    tcg_gen_subi_i32(count, count, 0xdeadbeef);
> +    i = tcg_ctx.gen_last_op_idx;
> +    i = tcg_ctx.gen_op_buf[i].args;
> +    icount_arg = &tcg_ctx.gen_opparam_buf[i + 1];
> +
> +    tcg_gen_sub_i32(count, count, imm);
> +    tcg_temp_free_i32(imm);
>
>      tcg_gen_brcondi_i32(TCG_COND_LT, count, 0, icount_label);
>      tcg_gen_st16_i32(count, cpu_env,
> @@ -49,7 +58,8 @@ static void gen_tb_end(TranslationBlock *tb, int num_insns)
>          tcg_gen_exit_tb((uintptr_t)tb + TB_EXIT_ICOUNT_EXPIRED);
>      }
>
> -    *tcg_ctx.gen_opc_ptr = INDEX_op_end;
> +    /* Terminate the linked list.  */
> +    tcg_ctx.gen_op_buf[tcg_ctx.gen_last_op_idx].next = -1;
>  }
>
>  static inline void gen_io_start(void)
> diff --git a/tcg/optimize.c b/tcg/optimize.c
> index 34ae3c2..f2b8acf 100644
> --- a/tcg/optimize.c
> +++ b/tcg/optimize.c
> @@ -162,13 +162,13 @@ static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
>      return false;
>  }
>
> -static void tcg_opt_gen_mov(TCGContext *s, int op_index, TCGArg *gen_args,
> +static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
>                              TCGOpcode old_op, TCGArg dst, TCGArg src)
>  {
>      TCGOpcode new_op = op_to_mov(old_op);
>      tcg_target_ulong mask;
>
> -    s->gen_opc_buf[op_index] = new_op;
> +    op->opc = new_op;
>
>      reset_temp(dst);
>      mask = temps[src].mask;
> @@ -193,17 +193,17 @@ static void tcg_opt_gen_mov(TCGContext *s, int 
> op_index, TCGArg *gen_args,
>          temps[src].next_copy = dst;
>      }
>
> -    gen_args[0] = dst;
> -    gen_args[1] = src;
> +    args[0] = dst;
> +    args[1] = src;
>  }
>
> -static void tcg_opt_gen_movi(TCGContext *s, int op_index, TCGArg *gen_args,
> +static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
>                               TCGOpcode old_op, TCGArg dst, TCGArg val)
>  {
>      TCGOpcode new_op = op_to_movi(old_op);
>      tcg_target_ulong mask;
>
> -    s->gen_opc_buf[op_index] = new_op;
> +    op->opc = new_op;
>
>      reset_temp(dst);
>      temps[dst].state = TCG_TEMP_CONST;
> @@ -215,8 +215,8 @@ static void tcg_opt_gen_movi(TCGContext *s, int op_index, 
> TCGArg *gen_args,
>      }
>      temps[dst].mask = mask;
>
> -    gen_args[0] = dst;
> -    gen_args[1] = val;
> +    args[0] = dst;
> +    args[1] = val;
>  }
>
>  static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
> @@ -533,11 +533,9 @@ static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
>  }
>
>  /* Propagate constants and copies, fold constant expressions. */
> -static TCGArg *tcg_constant_folding(TCGContext *s, uint16_t *tcg_opc_ptr,
> -                                    TCGArg *args, TCGOpDef *tcg_op_defs)
> +static void tcg_constant_folding(TCGContext *s)
>  {
> -    int nb_ops, op_index, nb_temps, nb_globals;
> -    TCGArg *gen_args;
> +    int oi, oi_next, nb_temps, nb_globals;
>
>      /* Array VALS has an element for each temp.
>         If this temp holds a constant then its value is kept in VALS' element.
> @@ -548,24 +546,23 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>      nb_globals = s->nb_globals;
>      reset_all_temps(nb_temps);
>
> -    nb_ops = tcg_opc_ptr - s->gen_opc_buf;
> -    gen_args = args;
> -    for (op_index = 0; op_index < nb_ops; op_index++) {
> -        TCGOpcode op = s->gen_opc_buf[op_index];
> -        const TCGOpDef *def = &tcg_op_defs[op];
> +    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
>          tcg_target_ulong mask, partmask, affected;
> -        int nb_oargs, nb_iargs, nb_args, i;
> +        int nb_oargs, nb_iargs, i;
>          TCGArg tmp;
>
> -        if (op == INDEX_op_call) {
> -            *gen_args++ = tmp = *args++;
> -            nb_oargs = tmp >> 16;
> -            nb_iargs = tmp & 0xffff;
> -            nb_args = nb_oargs + nb_iargs + def->nb_cargs;
> +        TCGOp * const op = &s->gen_op_buf[oi];
> +        TCGArg * const args = &s->gen_opparam_buf[op->args];
> +        TCGOpcode opc = op->opc;
> +        const TCGOpDef *def = &tcg_op_defs[opc];
> +
> +        oi_next = op->next;
> +        if (opc == INDEX_op_call) {
> +            nb_oargs = op->callo;
> +            nb_iargs = op->calli;
>          } else {
>              nb_oargs = def->nb_oargs;
>              nb_iargs = def->nb_iargs;
> -            nb_args = def->nb_args;
>          }
>
>          /* Do copy propagation */
> @@ -576,7 +573,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          }
>
>          /* For commutative operations make constant second argument */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(add):
>          CASE_OP_32_64(mul):
>          CASE_OP_32_64(and):
> @@ -634,7 +631,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>
>          /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
>             and "sub r, 0, a => neg r, a" case.  */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(shl):
>          CASE_OP_32_64(shr):
>          CASE_OP_32_64(sar):
> @@ -642,9 +639,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          CASE_OP_32_64(rotr):
>              if (temps[args[1]].state == TCG_TEMP_CONST
>                  && temps[args[1]].val == 0) {
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
> -                args += 3;
> -                gen_args += 2;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
>                  continue;
>              }
>              break;
> @@ -657,7 +652,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                      /* Proceed with possible constant folding. */
>                      break;
>                  }
> -                if (op == INDEX_op_sub_i32) {
> +                if (opc == INDEX_op_sub_i32) {
>                      neg_op = INDEX_op_neg_i32;
>                      have_neg = TCG_TARGET_HAS_neg_i32;
>                  } else {
> @@ -669,12 +664,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  }
>                  if (temps[args[1]].state == TCG_TEMP_CONST
>                      && temps[args[1]].val == 0) {
> -                    s->gen_opc_buf[op_index] = neg_op;
> +                    op->opc = neg_op;
>                      reset_temp(args[0]);
> -                    gen_args[0] = args[0];
> -                    gen_args[1] = args[2];
> -                    args += 3;
> -                    gen_args += 2;
> +                    args[1] = args[2];
>                      continue;
>                  }
>              }
> @@ -728,12 +720,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  if (!have_not) {
>                      break;
>                  }
> -                s->gen_opc_buf[op_index] = not_op;
> +                op->opc = not_op;
>                  reset_temp(args[0]);
> -                gen_args[0] = args[0];
> -                gen_args[1] = args[i];
> -                args += 3;
> -                gen_args += 2;
> +                args[1] = args[i];
>                  continue;
>              }
>          default:
> @@ -741,7 +730,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          }
>
>          /* Simplify expression for "op r, a, const => mov r, a" cases */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(add):
>          CASE_OP_32_64(sub):
>          CASE_OP_32_64(shl):
> @@ -769,12 +758,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>              break;
>          do_mov3:
>              if (temps_are_copies(args[0], args[1])) {
> -                s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                op->opc = INDEX_op_nop;
>              } else {
> -                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
> -                gen_args += 2;
> +                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
>              }
> -            args += 3;
>              continue;
>          default:
>              break;
> @@ -784,7 +771,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>             output argument is supported. */
>          mask = -1;
>          affected = -1;
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(ext8s):
>              if ((temps[args[1]].mask & 0x80) != 0) {
>                  break;
> @@ -923,38 +910,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>
>          if (partmask == 0) {
>              assert(nb_oargs == 1);
> -            tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
> -            args += nb_args;
> -            gen_args += 2;
> +            tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
>              continue;
>          }
>          if (affected == 0) {
>              assert(nb_oargs == 1);
>              if (temps_are_copies(args[0], args[1])) {
> -                s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                op->opc = INDEX_op_nop;
>              } else if (temps[args[1]].state != TCG_TEMP_CONST) {
> -                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
> -                gen_args += 2;
> +                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
>              } else {
> -                tcg_opt_gen_movi(s, op_index, gen_args, op,
> +                tcg_opt_gen_movi(s, op, args, opc,
>                                   args[0], temps[args[1]].val);
> -                gen_args += 2;
>              }
> -            args += nb_args;
>              continue;
>          }
>
>          /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(and):
>          CASE_OP_32_64(mul):
>          CASE_OP_32_64(muluh):
>          CASE_OP_32_64(mulsh):
>              if ((temps[args[2]].state == TCG_TEMP_CONST
>                  && temps[args[2]].val == 0)) {
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
> -                args += 3;
> -                gen_args += 2;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
>                  continue;
>              }
>              break;
> @@ -963,18 +943,15 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          }
>
>          /* Simplify expression for "op r, a, a => mov r, a" cases */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(or):
>          CASE_OP_32_64(and):
>              if (temps_are_copies(args[1], args[2])) {
>                  if (temps_are_copies(args[0], args[1])) {
> -                    s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                    op->opc = INDEX_op_nop;
>                  } else {
> -                    tcg_opt_gen_mov(s, op_index, gen_args, op,
> -                                    args[0], args[1]);
> -                    gen_args += 2;
> +                    tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
>                  }
> -                args += 3;
>                  continue;
>              }
>              break;
> @@ -983,14 +960,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          }
>
>          /* Simplify expression for "op r, a, a => movi r, 0" cases */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(andc):
>          CASE_OP_32_64(sub):
>          CASE_OP_32_64(xor):
>              if (temps_are_copies(args[1], args[2])) {
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], 0);
> -                gen_args += 2;
> -                args += 3;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], 0);
>                  continue;
>              }
>              break;
> @@ -1001,17 +976,14 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          /* Propagate constants through copy operations and do constant
>             folding.  Constants will be substituted to arguments by register
>             allocator where needed and possible.  Also detect copies. */
> -        switch (op) {
> +        switch (opc) {
>          CASE_OP_32_64(mov):
>              if (temps_are_copies(args[0], args[1])) {
> -                args += 2;
> -                s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                op->opc = INDEX_op_nop;
>                  break;
>              }
>              if (temps[args[1]].state != TCG_TEMP_CONST) {
> -                tcg_opt_gen_mov(s, op_index, gen_args, op, args[0], args[1]);
> -                gen_args += 2;
> -                args += 2;
> +                tcg_opt_gen_mov(s, op, args, opc, args[0], args[1]);
>                  break;
>              }
>              /* Source argument is constant.  Rewrite the operation and
> @@ -1019,9 +991,7 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>              args[1] = temps[args[1]].val;
>              /* fallthrough */
>          CASE_OP_32_64(movi):
> -            tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], args[1]);
> -            gen_args += 2;
> -            args += 2;
> +            tcg_opt_gen_movi(s, op, args, opc, args[0], args[1]);
>              break;
>
>          CASE_OP_32_64(not):
> @@ -1033,20 +1003,16 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          case INDEX_op_ext32s_i64:
>          case INDEX_op_ext32u_i64:
>              if (temps[args[1]].state == TCG_TEMP_CONST) {
> -                tmp = do_constant_folding(op, temps[args[1]].val, 0);
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
> -                gen_args += 2;
> -                args += 2;
> +                tmp = do_constant_folding(opc, temps[args[1]].val, 0);
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
>                  break;
>              }
>              goto do_default;
>
>          case INDEX_op_trunc_shr_i32:
>              if (temps[args[1]].state == TCG_TEMP_CONST) {
> -                tmp = do_constant_folding(op, temps[args[1]].val, args[2]);
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
> -                gen_args += 2;
> -                args += 3;
> +                tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
>                  break;
>              }
>              goto do_default;
> @@ -1075,11 +1041,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>          CASE_OP_32_64(remu):
>              if (temps[args[1]].state == TCG_TEMP_CONST
>                  && temps[args[2]].state == TCG_TEMP_CONST) {
> -                tmp = do_constant_folding(op, temps[args[1]].val,
> +                tmp = do_constant_folding(opc, temps[args[1]].val,
>                                            temps[args[2]].val);
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
> -                gen_args += 2;
> -                args += 3;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
>                  break;
>              }
>              goto do_default;
> @@ -1089,54 +1053,44 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  && temps[args[2]].state == TCG_TEMP_CONST) {
>                  tmp = deposit64(temps[args[1]].val, args[3], args[4],
>                                  temps[args[2]].val);
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
> -                gen_args += 2;
> -                args += 5;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
>                  break;
>              }
>              goto do_default;
>
>          CASE_OP_32_64(setcond):
> -            tmp = do_constant_folding_cond(op, args[1], args[2], args[3]);
> +            tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
>              if (tmp != 2) {
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
> -                gen_args += 2;
> -                args += 4;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
>                  break;
>              }
>              goto do_default;
>
>          CASE_OP_32_64(brcond):
> -            tmp = do_constant_folding_cond(op, args[0], args[1], args[2]);
> +            tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
>              if (tmp != 2) {
>                  if (tmp) {
>                      reset_all_temps(nb_temps);
> -                    s->gen_opc_buf[op_index] = INDEX_op_br;
> -                    gen_args[0] = args[3];
> -                    gen_args += 1;
> +                    op->opc = INDEX_op_br;
> +                    args[0] = args[3];
>                  } else {
> -                    s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                    op->opc = INDEX_op_nop;
>                  }
> -                args += 4;
>                  break;
>              }
>              goto do_default;
>
>          CASE_OP_32_64(movcond):
> -            tmp = do_constant_folding_cond(op, args[1], args[2], args[5]);
> +            tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
>              if (tmp != 2) {
>                  if (temps_are_copies(args[0], args[4-tmp])) {
> -                    s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                    op->opc = INDEX_op_nop;
>                  } else if (temps[args[4-tmp]].state == TCG_TEMP_CONST) {
> -                    tcg_opt_gen_movi(s, op_index, gen_args, op,
> +                    tcg_opt_gen_movi(s, op, args, opc,
>                                       args[0], temps[args[4-tmp]].val);
> -                    gen_args += 2;
>                  } else {
> -                    tcg_opt_gen_mov(s, op_index, gen_args, op,
> -                                    args[0], args[4-tmp]);
> -                    gen_args += 2;
> +                    tcg_opt_gen_mov(s, op, args, opc, args[0], args[4-tmp]);
>                  }
> -                args += 6;
>                  break;
>              }
>              goto do_default;
> @@ -1154,24 +1108,31 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  uint64_t a = ((uint64_t)ah << 32) | al;
>                  uint64_t b = ((uint64_t)bh << 32) | bl;
>                  TCGArg rl, rh;
> +                TCGOp *op2;
> +                TCGArg *args2;
>
> -                if (op == INDEX_op_add2_i32) {
> +                if (opc == INDEX_op_add2_i32) {
>                      a += b;
>                  } else {
>                      a -= b;
>                  }
>
>                  /* We emit the extra nop when we emit the add2/sub2.  */
> -                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> +                op2 = &s->gen_op_buf[oi_next];
> +                assert(op2->opc == INDEX_op_nop);
> +
> +                /* But we still have to allocate args for the op.  */
> +                op2->args = s->gen_next_parm_idx;
> +                s->gen_next_parm_idx += 2;
> +                args2 = &s->gen_opparam_buf[op2->args];
>
>                  rl = args[0];
>                  rh = args[1];
> -                tcg_opt_gen_movi(s, op_index, &gen_args[0],
> -                                 op, rl, (uint32_t)a);
> -                tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
> -                                 op, rh, (uint32_t)(a >> 32));
> -                gen_args += 4;
> -                args += 6;
> +                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)a);
> +                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(a >> 
> 32));
> +
> +                /* We've done all we need to do with the movi.  Skip it.  */
> +                oi_next = op2->next;
>                  break;
>              }
>              goto do_default;
> @@ -1183,18 +1144,25 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  uint32_t b = temps[args[3]].val;
>                  uint64_t r = (uint64_t)a * b;
>                  TCGArg rl, rh;
> +                TCGOp *op2;
> +                TCGArg *args2;
>
>                  /* We emit the extra nop when we emit the mulu2.  */
> -                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> +                op2 = &s->gen_op_buf[oi_next];
> +                assert(op2->opc == INDEX_op_nop);
> +
> +                /* But we still have to allocate args for the op.  */
> +                op2->args = s->gen_next_parm_idx;
> +                s->gen_next_parm_idx += 2;
> +                args2 = &s->gen_opparam_buf[op2->args];
>
>                  rl = args[0];
>                  rh = args[1];
> -                tcg_opt_gen_movi(s, op_index, &gen_args[0],
> -                                 op, rl, (uint32_t)r);
> -                tcg_opt_gen_movi(s, ++op_index, &gen_args[2],
> -                                 op, rh, (uint32_t)(r >> 32));
> -                gen_args += 4;
> -                args += 4;
> +                tcg_opt_gen_movi(s, op, args, opc, rl, (uint32_t)r);
> +                tcg_opt_gen_movi(s, op2, args2, opc, rh, (uint32_t)(r >> 
> 32));
> +
> +                /* We've done all we need to do with the movi.  Skip it.  */
> +                oi_next = op2->next;
>                  break;
>              }
>              goto do_default;
> @@ -1205,12 +1173,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  if (tmp) {
>              do_brcond_true:
>                      reset_all_temps(nb_temps);
> -                    s->gen_opc_buf[op_index] = INDEX_op_br;
> -                    gen_args[0] = args[5];
> -                    gen_args += 1;
> +                    op->opc = INDEX_op_br;
> +                    args[0] = args[5];
>                  } else {
>              do_brcond_false:
> -                    s->gen_opc_buf[op_index] = INDEX_op_nop;
> +                    op->opc = INDEX_op_nop;
>                  }
>              } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
>                         && temps[args[2]].state == TCG_TEMP_CONST
> @@ -1221,12 +1188,11 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                     vs the high word of the input.  */
>              do_brcond_high:
>                  reset_all_temps(nb_temps);
> -                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
> -                gen_args[0] = args[1];
> -                gen_args[1] = args[3];
> -                gen_args[2] = args[4];
> -                gen_args[3] = args[5];
> -                gen_args += 4;
> +                op->opc = INDEX_op_brcond_i32;
> +                args[0] = args[1];
> +                args[1] = args[3];
> +                args[2] = args[4];
> +                args[3] = args[5];
>              } else if (args[4] == TCG_COND_EQ) {
>                  /* Simplify EQ comparisons where one of the pairs
>                     can be simplified.  */
> @@ -1246,12 +1212,10 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  }
>              do_brcond_low:
>                  reset_all_temps(nb_temps);
> -                s->gen_opc_buf[op_index] = INDEX_op_brcond_i32;
> -                gen_args[0] = args[0];
> -                gen_args[1] = args[2];
> -                gen_args[2] = args[4];
> -                gen_args[3] = args[5];
> -                gen_args += 4;
> +                op->opc = INDEX_op_brcond_i32;
> +                args[1] = args[2];
> +                args[2] = args[4];
> +                args[3] = args[5];
>              } else if (args[4] == TCG_COND_NE) {
>                  /* Simplify NE comparisons where one of the pairs
>                     can be simplified.  */
> @@ -1273,15 +1237,13 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>              } else {
>                  goto do_default;
>              }
> -            args += 6;
>              break;
>
>          case INDEX_op_setcond2_i32:
>              tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
>              if (tmp != 2) {
>              do_setcond_const:
> -                tcg_opt_gen_movi(s, op_index, gen_args, op, args[0], tmp);
> -                gen_args += 2;
> +                tcg_opt_gen_movi(s, op, args, opc, args[0], tmp);
>              } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
>                         && temps[args[3]].state == TCG_TEMP_CONST
>                         && temps[args[4]].state == TCG_TEMP_CONST
> @@ -1290,14 +1252,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                  /* Simplify LT/GE comparisons vs zero to a single compare
>                     vs the high word of the input.  */
>              do_setcond_high:
> -                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
>                  reset_temp(args[0]);
>                  temps[args[0]].mask = 1;
> -                gen_args[0] = args[0];
> -                gen_args[1] = args[2];
> -                gen_args[2] = args[4];
> -                gen_args[3] = args[5];
> -                gen_args += 4;
> +                op->opc = INDEX_op_setcond_i32;
> +                args[1] = args[2];
> +                args[2] = args[4];
> +                args[3] = args[5];
>              } else if (args[5] == TCG_COND_EQ) {
>                  /* Simplify EQ comparisons where one of the pairs
>                     can be simplified.  */
> @@ -1318,12 +1278,9 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>              do_setcond_low:
>                  reset_temp(args[0]);
>                  temps[args[0]].mask = 1;
> -                s->gen_opc_buf[op_index] = INDEX_op_setcond_i32;
> -                gen_args[0] = args[0];
> -                gen_args[1] = args[1];
> -                gen_args[2] = args[3];
> -                gen_args[3] = args[5];
> -                gen_args += 4;
> +                op->opc = INDEX_op_setcond_i32;
> +                args[2] = args[3];
> +                args[3] = args[5];
>              } else if (args[5] == TCG_COND_NE) {
>                  /* Simplify NE comparisons where one of the pairs
>                     can be simplified.  */
> @@ -1345,7 +1302,6 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>              } else {
>                  goto do_default;
>              }
> -            args += 6;
>              break;
>
>          case INDEX_op_call:
> @@ -1377,22 +1333,12 @@ static TCGArg *tcg_constant_folding(TCGContext *s, 
> uint16_t *tcg_opc_ptr,
>                      }
>                  }
>              }
> -            for (i = 0; i < nb_args; i++) {
> -                gen_args[i] = args[i];
> -            }
> -            args += nb_args;
> -            gen_args += nb_args;
>              break;
>          }
>      }
> -
> -    return gen_args;
>  }
>
> -TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr,
> -        TCGArg *args, TCGOpDef *tcg_op_defs)
> +void tcg_optimize(TCGContext *s)
>  {
> -    TCGArg *res;
> -    res = tcg_constant_folding(s, tcg_opc_ptr, args, tcg_op_defs);
> -    return res;
> +    tcg_constant_folding(s);
>  }
> diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c
> index 5305f1d..cbaa15c 100644
> --- a/tcg/tcg-op.c
> +++ b/tcg/tcg-op.c
> @@ -35,100 +35,116 @@ extern TCGv_i32 TCGV_HIGH_link_error(TCGv_i64);
>  #define TCGV_HIGH TCGV_HIGH_link_error
>  #endif
>
> +/* Note that this is optimized for sequential allocation during translate.
> +   Up to and including filling in the forward link immediately.  We'll do
> +   proper termination of the end of the list after we finish translation.  */
> +
> +static void tcg_emit_op(TCGContext *ctx, TCGOpcode opc, int args)
> +{
> +    int oi = ctx->gen_next_op_idx;
> +    int ni = oi + 1;
> +    int pi = oi - 1;
> +
> +    tcg_debug_assert(oi < OPC_BUF_SIZE);
> +    ctx->gen_last_op_idx = oi;
> +    ctx->gen_next_op_idx = ni;
> +
> +    ctx->gen_op_buf[oi] = (TCGOp){
> +        .opc = opc,
> +        .args = args,
> +        .prev = pi,
> +        .next = ni
> +    };
> +}
> +
>  void tcg_gen_op0(TCGContext *ctx, TCGOpcode opc)
>  {
> -    *ctx->gen_opc_ptr++ = opc;
> +    tcg_emit_op(ctx, opc, -1);
>  }
>
>  void tcg_gen_op1(TCGContext *ctx, TCGOpcode opc, TCGArg a1)
>  {
> -    uint16_t *op = ctx->gen_opc_ptr;
> -    TCGArg *opp = ctx->gen_opparam_ptr;
> +    int pi = ctx->gen_next_parm_idx;
>
> -    op[0] = opc;
> -    opp[0] = a1;
> +    tcg_debug_assert(pi + 1 <= OPPARAM_BUF_SIZE);
> +    ctx->gen_next_parm_idx = pi + 1;
> +    ctx->gen_opparam_buf[pi] = a1;
>
> -    ctx->gen_opc_ptr = op + 1;
> -    ctx->gen_opparam_ptr = opp + 1;
> +    tcg_emit_op(ctx, opc, pi);
>  }
>
>  void tcg_gen_op2(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2)
>  {
> -    uint16_t *op = ctx->gen_opc_ptr;
> -    TCGArg *opp = ctx->gen_opparam_ptr;
> +    int pi = ctx->gen_next_parm_idx;
>
> -    op[0] = opc;
> -    opp[0] = a1;
> -    opp[1] = a2;
> +    tcg_debug_assert(pi + 2 <= OPPARAM_BUF_SIZE);
> +    ctx->gen_next_parm_idx = pi + 2;
> +    ctx->gen_opparam_buf[pi + 0] = a1;
> +    ctx->gen_opparam_buf[pi + 1] = a2;
>
> -    ctx->gen_opc_ptr = op + 1;
> -    ctx->gen_opparam_ptr = opp + 2;
> +    tcg_emit_op(ctx, opc, pi);
>  }
>
>  void tcg_gen_op3(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
>                   TCGArg a2, TCGArg a3)
>  {
> -    uint16_t *op = ctx->gen_opc_ptr;
> -    TCGArg *opp = ctx->gen_opparam_ptr;
> +    int pi = ctx->gen_next_parm_idx;
>
> -    op[0] = opc;
> -    opp[0] = a1;
> -    opp[1] = a2;
> -    opp[2] = a3;
> +    tcg_debug_assert(pi + 3 <= OPPARAM_BUF_SIZE);
> +    ctx->gen_next_parm_idx = pi + 3;
> +    ctx->gen_opparam_buf[pi + 0] = a1;
> +    ctx->gen_opparam_buf[pi + 1] = a2;
> +    ctx->gen_opparam_buf[pi + 2] = a3;
>
> -    ctx->gen_opc_ptr = op + 1;
> -    ctx->gen_opparam_ptr = opp + 3;
> +    tcg_emit_op(ctx, opc, pi);
>  }
>
>  void tcg_gen_op4(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
>                   TCGArg a2, TCGArg a3, TCGArg a4)
>  {
> -    uint16_t *op = ctx->gen_opc_ptr;
> -    TCGArg *opp = ctx->gen_opparam_ptr;
> +    int pi = ctx->gen_next_parm_idx;
>
> -    op[0] = opc;
> -    opp[0] = a1;
> -    opp[1] = a2;
> -    opp[2] = a3;
> -    opp[3] = a4;
> +    tcg_debug_assert(pi + 4 <= OPPARAM_BUF_SIZE);
> +    ctx->gen_next_parm_idx = pi + 4;
> +    ctx->gen_opparam_buf[pi + 0] = a1;
> +    ctx->gen_opparam_buf[pi + 1] = a2;
> +    ctx->gen_opparam_buf[pi + 2] = a3;
> +    ctx->gen_opparam_buf[pi + 3] = a4;
>
> -    ctx->gen_opc_ptr = op + 1;
> -    ctx->gen_opparam_ptr = opp + 4;
> +    tcg_emit_op(ctx, opc, pi);
>  }
>
>  void tcg_gen_op5(TCGContext *ctx, TCGOpcode opc, TCGArg a1,
>                   TCGArg a2, TCGArg a3, TCGArg a4, TCGArg a5)
>  {
> -    uint16_t *op = ctx->gen_opc_ptr;
> -    TCGArg *opp = ctx->gen_opparam_ptr;
> +    int pi = ctx->gen_next_parm_idx;
>
> -    op[0] = opc;
> -    opp[0] = a1;
> -    opp[1] = a2;
> -    opp[2] = a3;
> -    opp[3] = a4;
> -    opp[4] = a5;
> +    tcg_debug_assert(pi + 5 <= OPPARAM_BUF_SIZE);
> +    ctx->gen_next_parm_idx = pi + 5;
> +    ctx->gen_opparam_buf[pi + 0] = a1;
> +    ctx->gen_opparam_buf[pi + 1] = a2;
> +    ctx->gen_opparam_buf[pi + 2] = a3;
> +    ctx->gen_opparam_buf[pi + 3] = a4;
> +    ctx->gen_opparam_buf[pi + 4] = a5;
>
> -    ctx->gen_opc_ptr = op + 1;
> -    ctx->gen_opparam_ptr = opp + 5;
> +    tcg_emit_op(ctx, opc, pi);
>  }
>
>  void tcg_gen_op6(TCGContext *ctx, TCGOpcode opc, TCGArg a1, TCGArg a2,
>                   TCGArg a3, TCGArg a4, TCGArg a5, TCGArg a6)
>  {
> -    uint16_t *op = ctx->gen_opc_ptr;
> -    TCGArg *opp = ctx->gen_opparam_ptr;
> +    int pi = ctx->gen_next_parm_idx;
>
> -    op[0] = opc;
> -    opp[0] = a1;
> -    opp[1] = a2;
> -    opp[2] = a3;
> -    opp[3] = a4;
> -    opp[4] = a5;
> -    opp[5] = a6;
> +    tcg_debug_assert(pi + 6 <= OPPARAM_BUF_SIZE);
> +    ctx->gen_next_parm_idx = pi + 6;
> +    ctx->gen_opparam_buf[pi + 0] = a1;
> +    ctx->gen_opparam_buf[pi + 1] = a2;
> +    ctx->gen_opparam_buf[pi + 2] = a3;
> +    ctx->gen_opparam_buf[pi + 3] = a4;
> +    ctx->gen_opparam_buf[pi + 4] = a5;
> +    ctx->gen_opparam_buf[pi + 5] = a6;
>
> -    ctx->gen_opc_ptr = op + 1;
> -    ctx->gen_opparam_ptr = opp + 6;
> +    tcg_emit_op(ctx, opc, pi);
>  }
>
>  /* 32 bit ops */
> @@ -1862,53 +1878,57 @@ static inline TCGMemOp 
> tcg_canonicalize_memop(TCGMemOp op, bool is64, bool st)
>      return op;
>  }
>
> -static inline void tcg_add_param_i32(TCGv_i32 val)
> -{
> -    *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(val);
> -}
> -
> -static inline void tcg_add_param_i64(TCGv_i64 val)
> +static void gen_ldst_i32(TCGOpcode opc, TCGv_i32 val, TCGv addr,
> +                         TCGMemOp memop, TCGArg idx)
>  {
> +#if TARGET_LONG_BITS == 32
> +    tcg_gen_op4ii_i32(opc, val, addr, memop, idx);
> +#else
>      if (TCG_TARGET_REG_BITS == 32) {
> -        *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_LOW(val));
> -        *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I32(TCGV_HIGH(val));
> +        tcg_gen_op5ii_i32(opc, val, TCGV_LOW(addr), TCGV_HIGH(addr),
> +                          memop, idx);
>      } else {
> -        *tcg_ctx.gen_opparam_ptr++ = GET_TCGV_I64(val);
> +        tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I32(val), GET_TCGV_I64(addr),
> +                    memop, idx);
>      }
> +#endif
>  }
>
> +static void gen_ldst_i64(TCGOpcode opc, TCGv_i64 val, TCGv addr,
> +                         TCGMemOp memop, TCGArg idx)
> +{
>  #if TARGET_LONG_BITS == 32
> -# define tcg_add_param_tl  tcg_add_param_i32
> +    if (TCG_TARGET_REG_BITS == 32) {
> +        tcg_gen_op5ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
> +                          addr, memop, idx);
> +    } else {
> +        tcg_gen_op4(&tcg_ctx, opc, GET_TCGV_I64(val), GET_TCGV_I32(addr),
> +                    memop, idx);
> +    }
>  #else
> -# define tcg_add_param_tl  tcg_add_param_i64
> +    if (TCG_TARGET_REG_BITS == 32) {
> +        tcg_gen_op6ii_i32(opc, TCGV_LOW(val), TCGV_HIGH(val),
> +                          TCGV_LOW(addr), TCGV_HIGH(addr), memop, idx);
> +    } else {
> +        tcg_gen_op4ii_i64(opc, val, addr, memop, idx);
> +    }
>  #endif
> +}
>
>  void tcg_gen_qemu_ld_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>  {
>      memop = tcg_canonicalize_memop(memop, 0, 0);
> -
> -    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i32;
> -    tcg_add_param_i32(val);
> -    tcg_add_param_tl(addr);
> -    *tcg_ctx.gen_opparam_ptr++ = memop;
> -    *tcg_ctx.gen_opparam_ptr++ = idx;
> +    gen_ldst_i32(INDEX_op_qemu_ld_i32, val, addr, memop, idx);
>  }
>
>  void tcg_gen_qemu_st_i32(TCGv_i32 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>  {
>      memop = tcg_canonicalize_memop(memop, 0, 1);
> -
> -    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i32;
> -    tcg_add_param_i32(val);
> -    tcg_add_param_tl(addr);
> -    *tcg_ctx.gen_opparam_ptr++ = memop;
> -    *tcg_ctx.gen_opparam_ptr++ = idx;
> +    gen_ldst_i32(INDEX_op_qemu_st_i32, val, addr, memop, idx);
>  }
>
>  void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>  {
> -    memop = tcg_canonicalize_memop(memop, 1, 0);
> -
>      if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
>          tcg_gen_qemu_ld_i32(TCGV_LOW(val), addr, idx, memop);
>          if (memop & MO_SIGN) {
> @@ -1919,25 +1939,17 @@ void tcg_gen_qemu_ld_i64(TCGv_i64 val, TCGv addr, 
> TCGArg idx, TCGMemOp memop)
>          return;
>      }
>
> -    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_ld_i64;
> -    tcg_add_param_i64(val);
> -    tcg_add_param_tl(addr);
> -    *tcg_ctx.gen_opparam_ptr++ = memop;
> -    *tcg_ctx.gen_opparam_ptr++ = idx;
> +    memop = tcg_canonicalize_memop(memop, 1, 0);
> +    gen_ldst_i64(INDEX_op_qemu_ld_i64, val, addr, memop, idx);
>  }
>
>  void tcg_gen_qemu_st_i64(TCGv_i64 val, TCGv addr, TCGArg idx, TCGMemOp memop)
>  {
> -    memop = tcg_canonicalize_memop(memop, 1, 1);
> -
>      if (TCG_TARGET_REG_BITS == 32 && (memop & MO_SIZE) < MO_64) {
>          tcg_gen_qemu_st_i32(TCGV_LOW(val), addr, idx, memop);
>          return;
>      }
>
> -    *tcg_ctx.gen_opc_ptr++ = INDEX_op_qemu_st_i64;
> -    tcg_add_param_i64(val);
> -    tcg_add_param_tl(addr);
> -    *tcg_ctx.gen_opparam_ptr++ = memop;
> -    *tcg_ctx.gen_opparam_ptr++ = idx;
> +    memop = tcg_canonicalize_memop(memop, 1, 1);
> +    gen_ldst_i64(INDEX_op_qemu_st_i64, val, addr, memop, idx);
>  }
> diff --git a/tcg/tcg.c b/tcg/tcg.c
> index 3470500..ee041b9 100644
> --- a/tcg/tcg.c
> +++ b/tcg/tcg.c
> @@ -407,7 +407,6 @@ void tcg_func_start(TCGContext *s)
>      /* No temps have been previously allocated for size or locality.  */
>      memset(s->free_temps, 0, sizeof(s->free_temps));
>
> -    s->labels = tcg_malloc(sizeof(TCGLabel) * TCG_MAX_LABELS);
>      s->nb_labels = 0;
>      s->current_frame_offset = s->frame_start;
>
> @@ -415,8 +414,10 @@ void tcg_func_start(TCGContext *s)
>      s->goto_tb_issue_mask = 0;
>  #endif
>
> -    s->gen_opc_ptr = s->gen_opc_buf;
> -    s->gen_opparam_ptr = s->gen_opparam_buf;
> +    s->gen_first_op_idx = 0;
> +    s->gen_last_op_idx = -1;
> +    s->gen_next_op_idx = 0;
> +    s->gen_next_parm_idx = 0;
>
>      s->be = tcg_malloc(sizeof(TCGBackendData));
>  }
> @@ -703,9 +704,8 @@ int tcg_check_temp_count(void)
>  void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
>                     int nargs, TCGArg *args)
>  {
> -    int i, real_args, nb_rets;
> +    int i, real_args, nb_rets, pi, pi_first;
>      unsigned sizemask, flags;
> -    TCGArg *nparam;
>      TCGHelperInfo *info;
>
>      info = g_hash_table_lookup(s->helpers, (gpointer)func);
> @@ -758,8 +758,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
>      }
>  #endif /* TCG_TARGET_EXTEND_ARGS */
>
> -    *s->gen_opc_ptr++ = INDEX_op_call;
> -    nparam = s->gen_opparam_ptr++;
> +    pi_first = pi = s->gen_next_parm_idx;
>      if (ret != TCG_CALL_DUMMY_ARG) {
>  #if defined(__sparc__) && !defined(__arch64__) \
>      && !defined(CONFIG_TCG_INTERPRETER)
> @@ -769,25 +768,25 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg 
> ret,
>                 two return temporaries, and reassemble below.  */
>              retl = tcg_temp_new_i64();
>              reth = tcg_temp_new_i64();
> -            *s->gen_opparam_ptr++ = GET_TCGV_I64(reth);
> -            *s->gen_opparam_ptr++ = GET_TCGV_I64(retl);
> +            s->gen_opparam_buf[pi++] = GET_TCGV_I64(reth);
> +            s->gen_opparam_buf[pi++] = GET_TCGV_I64(retl);
>              nb_rets = 2;
>          } else {
> -            *s->gen_opparam_ptr++ = ret;
> +            s->gen_opparam_buf[pi++] = ret;
>              nb_rets = 1;
>          }
>  #else
>          if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
>  #ifdef HOST_WORDS_BIGENDIAN
> -            *s->gen_opparam_ptr++ = ret + 1;
> -            *s->gen_opparam_ptr++ = ret;
> +            s->gen_opparam_buf[pi++] = ret + 1;
> +            s->gen_opparam_buf[pi++] = ret;
>  #else
> -            *s->gen_opparam_ptr++ = ret;
> -            *s->gen_opparam_ptr++ = ret + 1;
> +            s->gen_opparam_buf[pi++] = ret;
> +            s->gen_opparam_buf[pi++] = ret + 1;
>  #endif
>              nb_rets = 2;
>          } else {
> -            *s->gen_opparam_ptr++ = ret;
> +            s->gen_opparam_buf[pi++] = ret;
>              nb_rets = 1;
>          }
>  #endif
> @@ -801,7 +800,7 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg ret,
>  #ifdef TCG_TARGET_CALL_ALIGN_ARGS
>              /* some targets want aligned 64 bit args */
>              if (real_args & 1) {
> -                *s->gen_opparam_ptr++ = TCG_CALL_DUMMY_ARG;
> +                s->gen_opparam_buf[pi++] = TCG_CALL_DUMMY_ARG;
>                  real_args++;
>              }
>  #endif
> @@ -816,26 +815,42 @@ void tcg_gen_callN(TCGContext *s, void *func, TCGArg 
> ret,
>                have to get more complicated to differentiate between
>                stack arguments and register arguments.  */
>  #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
> -            *s->gen_opparam_ptr++ = args[i] + 1;
> -            *s->gen_opparam_ptr++ = args[i];
> +            s->gen_opparam_buf[pi++] = args[i] + 1;
> +            s->gen_opparam_buf[pi++] = args[i];
>  #else
> -            *s->gen_opparam_ptr++ = args[i];
> -            *s->gen_opparam_ptr++ = args[i] + 1;
> +            s->gen_opparam_buf[pi++] = args[i];
> +            s->gen_opparam_buf[pi++] = args[i] + 1;
>  #endif
>              real_args += 2;
>              continue;
>          }
>
> -        *s->gen_opparam_ptr++ = args[i];
> +        s->gen_opparam_buf[pi++] = args[i];
>          real_args++;
>      }
> -    *s->gen_opparam_ptr++ = (uintptr_t)func;
> -    *s->gen_opparam_ptr++ = flags;
> +    s->gen_opparam_buf[pi++] = (uintptr_t)func;
> +    s->gen_opparam_buf[pi++] = flags;
> +
> +    i = s->gen_next_op_idx;
> +    tcg_debug_assert(i < OPC_BUF_SIZE);
> +    tcg_debug_assert(pi <= OPPARAM_BUF_SIZE);
> +
> +    /* Set links for sequential allocation during translation.  */
> +    s->gen_op_buf[i] = (TCGOp){
> +        .opc = INDEX_op_call,
> +        .callo = nb_rets,
> +        .calli = real_args,
> +        .args = pi_first,
> +        .prev = i - 1,
> +        .next = i + 1
> +    };
>
> -    *nparam = (nb_rets << 16) | real_args;
> +    /* Make sure the calli field didn't overflow.  */
> +    tcg_debug_assert(s->gen_op_buf[i].calli == real_args);
>
> -    /* total parameters, needed to go backward in the instruction stream */
> -    *s->gen_opparam_ptr++ = 1 + nb_rets + real_args + 3;
> +    s->gen_last_op_idx = i;
> +    s->gen_next_op_idx = i + 1;
> +    s->gen_next_parm_idx = pi;
>
>  #if defined(__sparc__) && !defined(__arch64__) \
>      && !defined(CONFIG_TCG_INTERPRETER)
> @@ -972,20 +987,21 @@ static const char * const ldst_name[] =
>
>  void tcg_dump_ops(TCGContext *s)
>  {
> -    const uint16_t *opc_ptr;
> -    const TCGArg *args;
> -    TCGArg arg;
> -    TCGOpcode c;
> -    int i, k, nb_oargs, nb_iargs, nb_cargs, first_insn;
> -    const TCGOpDef *def;
>      char buf[128];
> +    TCGOp *op;
> +    int oi;
>
> -    first_insn = 1;
> -    opc_ptr = s->gen_opc_buf;
> -    args = s->gen_opparam_buf;
> -    while (opc_ptr < s->gen_opc_ptr) {
> -        c = *opc_ptr++;
> +    for (oi = s->gen_first_op_idx; oi >= 0; oi = op->next) {
> +        int i, k, nb_oargs, nb_iargs, nb_cargs;
> +        const TCGOpDef *def;
> +        const TCGArg *args;
> +        TCGOpcode c;
> +
> +        op = &s->gen_op_buf[oi];
> +        c = op->opc;
>          def = &tcg_op_defs[c];
> +        args = &s->gen_opparam_buf[op->args];
> +
>          if (c == INDEX_op_debug_insn_start) {
>              uint64_t pc;
>  #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
> @@ -993,21 +1009,14 @@ void tcg_dump_ops(TCGContext *s)
>  #else
>              pc = args[0];
>  #endif
> -            if (!first_insn) {
> +            if (oi != s->gen_first_op_idx) {
>                  qemu_log("\n");
>              }
>              qemu_log(" ---- 0x%" PRIx64, pc);
> -            first_insn = 0;
> -            nb_oargs = def->nb_oargs;
> -            nb_iargs = def->nb_iargs;
> -            nb_cargs = def->nb_cargs;
>          } else if (c == INDEX_op_call) {
> -            TCGArg arg;
> -
>              /* variable number of arguments */
> -            arg = *args++;
> -            nb_oargs = arg >> 16;
> -            nb_iargs = arg & 0xffff;
> +            nb_oargs = op->callo;
> +            nb_iargs = op->calli;
>              nb_cargs = def->nb_cargs;
>
>              /* function name, flags, out args */
> @@ -1028,26 +1037,20 @@ void tcg_dump_ops(TCGContext *s)
>              }
>          } else {
>              qemu_log(" %s ", def->name);
> -            if (c == INDEX_op_nopn) {
> -                /* variable number of arguments */
> -                nb_cargs = *args;
> -                nb_oargs = 0;
> -                nb_iargs = 0;
> -            } else {
> -                nb_oargs = def->nb_oargs;
> -                nb_iargs = def->nb_iargs;
> -                nb_cargs = def->nb_cargs;
> -            }
> -
> +
> +            nb_oargs = def->nb_oargs;
> +            nb_iargs = def->nb_iargs;
> +            nb_cargs = def->nb_cargs;
> +
>              k = 0;
> -            for(i = 0; i < nb_oargs; i++) {
> +            for (i = 0; i < nb_oargs; i++) {
>                  if (k != 0) {
>                      qemu_log(",");
>                  }
>                  qemu_log("%s", tcg_get_arg_str_idx(s, buf, sizeof(buf),
>                                                     args[k++]));
>              }
> -            for(i = 0; i < nb_iargs; i++) {
> +            for (i = 0; i < nb_iargs; i++) {
>                  if (k != 0) {
>                      qemu_log(",");
>                  }
> @@ -1085,16 +1088,14 @@ void tcg_dump_ops(TCGContext *s)
>                  i = 0;
>                  break;
>              }
> -            for(; i < nb_cargs; i++) {
> +            for (; i < nb_cargs; i++) {
>                  if (k != 0) {
>                      qemu_log(",");
>                  }
> -                arg = args[k++];
> -                qemu_log("$0x%" TCG_PRIlx, arg);
> +                qemu_log("$0x%" TCG_PRIlx, args[k++]);
>              }
>          }
>          qemu_log("\n");
> -        args += nb_iargs + nb_oargs + nb_cargs;
>      }
>  }
>
> @@ -1244,20 +1245,6 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef 
> *tdefs)
>  }
>
>  #ifdef USE_LIVENESS_ANALYSIS
> -
> -/* set a nop for an operation using 'nb_args' */
> -static inline void tcg_set_nop(TCGContext *s, uint16_t *opc_ptr,
> -                               TCGArg *args, int nb_args)
> -{
> -    if (nb_args == 0) {
> -        *opc_ptr = INDEX_op_nop;
> -    } else {
> -        *opc_ptr = INDEX_op_nopn;
> -        args[0] = nb_args;
> -        args[nb_args - 1] = nb_args;
> -    }
> -}
> -
>  /* liveness analysis: end of function: all temps are dead, and globals
>     should be in memory. */
>  static inline void tcg_la_func_end(TCGContext *s, uint8_t *dead_temps,
> @@ -1287,19 +1274,10 @@ static inline void tcg_la_bb_end(TCGContext *s, 
> uint8_t *dead_temps,
>     temporaries are removed. */
>  static void tcg_liveness_analysis(TCGContext *s)
>  {
> -    int i, op_index, nb_args, nb_iargs, nb_oargs, nb_ops;
> -    TCGOpcode op, op_new, op_new2;
> -    TCGArg *args, arg;
> -    const TCGOpDef *def;
>      uint8_t *dead_temps, *mem_temps;
> -    uint16_t dead_args;
> -    uint8_t sync_args;
> -    bool have_op_new2;
> -
> -    s->gen_opc_ptr++; /* skip end */
> -
> -    nb_ops = s->gen_opc_ptr - s->gen_opc_buf;
> +    int oi, oi_prev, nb_ops;
>
> +    nb_ops = s->gen_next_op_idx;
>      s->op_dead_args = tcg_malloc(nb_ops * sizeof(uint16_t));
>      s->op_sync_args = tcg_malloc(nb_ops * sizeof(uint8_t));
>
> @@ -1307,25 +1285,31 @@ static void tcg_liveness_analysis(TCGContext *s)
>      mem_temps = tcg_malloc(s->nb_temps);
>      tcg_la_func_end(s, dead_temps, mem_temps);
>
> -    args = s->gen_opparam_ptr;
> -    op_index = nb_ops - 1;
> -    while (op_index >= 0) {
> -        op = s->gen_opc_buf[op_index];
> -        def = &tcg_op_defs[op];
> -        switch(op) {
> +    for (oi = s->gen_last_op_idx; oi >= 0; oi = oi_prev) {
> +        int i, nb_iargs, nb_oargs;
> +        TCGOpcode opc_new, opc_new2;
> +        bool have_opc_new2;
> +        uint16_t dead_args;
> +        uint8_t sync_args;
> +        TCGArg arg;
> +
> +        TCGOp * const op = &s->gen_op_buf[oi];
> +        TCGArg * const args = &s->gen_opparam_buf[op->args];
> +        TCGOpcode opc = op->opc;
> +        const TCGOpDef *def = &tcg_op_defs[opc];
> +
> +        oi_prev = op->prev;
> +
> +        switch (opc) {
>          case INDEX_op_call:
>              {
>                  int call_flags;
>
> -                nb_args = args[-1];
> -                args -= nb_args;
> -                arg = *args++;
> -                nb_iargs = arg & 0xffff;
> -                nb_oargs = arg >> 16;
> +                nb_oargs = op->callo;
> +                nb_iargs = op->calli;
>                  call_flags = args[nb_oargs + nb_iargs + 1];
>
> -                /* pure functions can be removed if their result is not
> -                   used */
> +                /* pure functions can be removed if their result is unused */
>                  if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
>                      for (i = 0; i < nb_oargs; i++) {
>                          arg = args[i];
> @@ -1333,8 +1317,7 @@ static void tcg_liveness_analysis(TCGContext *s)
>                              goto do_not_remove_call;
>                          }
>                      }
> -                    tcg_set_nop(s, s->gen_opc_buf + op_index,
> -                                args - 1, nb_args);
> +                    goto do_remove;
>                  } else {
>                  do_not_remove_call:
>
> @@ -1373,41 +1356,33 @@ static void tcg_liveness_analysis(TCGContext *s)
>                              dead_temps[arg] = 0;
>                          }
>                      }
> -                    s->op_dead_args[op_index] = dead_args;
> -                    s->op_sync_args[op_index] = sync_args;
> +                    s->op_dead_args[oi] = dead_args;
> +                    s->op_sync_args[oi] = sync_args;
>                  }
> -                args--;
>              }
>              break;
>          case INDEX_op_debug_insn_start:
> -            args -= def->nb_args;
> -            break;
> -        case INDEX_op_nopn:
> -            nb_args = args[-1];
> -            args -= nb_args;
> +        case INDEX_op_nop:
> +        case INDEX_op_end:
>              break;
>          case INDEX_op_discard:
> -            args--;
>              /* mark the temporary as dead */
>              dead_temps[args[0]] = 1;
>              mem_temps[args[0]] = 0;
>              break;
> -        case INDEX_op_end:
> -            break;
>
>          case INDEX_op_add2_i32:
> -            op_new = INDEX_op_add_i32;
> +            opc_new = INDEX_op_add_i32;
>              goto do_addsub2;
>          case INDEX_op_sub2_i32:
> -            op_new = INDEX_op_sub_i32;
> +            opc_new = INDEX_op_sub_i32;
>              goto do_addsub2;
>          case INDEX_op_add2_i64:
> -            op_new = INDEX_op_add_i64;
> +            opc_new = INDEX_op_add_i64;
>              goto do_addsub2;
>          case INDEX_op_sub2_i64:
> -            op_new = INDEX_op_sub_i64;
> +            opc_new = INDEX_op_sub_i64;
>          do_addsub2:
> -            args -= 6;
>              nb_iargs = 4;
>              nb_oargs = 2;
>              /* Test if the high part of the operation is dead, but not
> @@ -1418,12 +1393,11 @@ static void tcg_liveness_analysis(TCGContext *s)
>                  if (dead_temps[args[0]] && !mem_temps[args[0]]) {
>                      goto do_remove;
>                  }
> -                /* Create the single operation plus nop.  */
> -                s->gen_opc_buf[op_index] = op = op_new;
> +                /* Replace the opcode and adjust the args in place,
> +                   leaving 3 unused args at the end.  */
> +                op->opc = opc = opc_new;
>                  args[1] = args[2];
>                  args[2] = args[4];
> -                assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> -                tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 3);
>                  /* Fall through and mark the single-word operation live.  */
>                  nb_iargs = 2;
>                  nb_oargs = 1;
> @@ -1431,27 +1405,26 @@ static void tcg_liveness_analysis(TCGContext *s)
>              goto do_not_remove;
>
>          case INDEX_op_mulu2_i32:
> -            op_new = INDEX_op_mul_i32;
> -            op_new2 = INDEX_op_muluh_i32;
> -            have_op_new2 = TCG_TARGET_HAS_muluh_i32;
> +            opc_new = INDEX_op_mul_i32;
> +            opc_new2 = INDEX_op_muluh_i32;
> +            have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
>              goto do_mul2;
>          case INDEX_op_muls2_i32:
> -            op_new = INDEX_op_mul_i32;
> -            op_new2 = INDEX_op_mulsh_i32;
> -            have_op_new2 = TCG_TARGET_HAS_mulsh_i32;
> +            opc_new = INDEX_op_mul_i32;
> +            opc_new2 = INDEX_op_mulsh_i32;
> +            have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
>              goto do_mul2;
>          case INDEX_op_mulu2_i64:
> -            op_new = INDEX_op_mul_i64;
> -            op_new2 = INDEX_op_muluh_i64;
> -            have_op_new2 = TCG_TARGET_HAS_muluh_i64;
> +            opc_new = INDEX_op_mul_i64;
> +            opc_new2 = INDEX_op_muluh_i64;
> +            have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
>              goto do_mul2;
>          case INDEX_op_muls2_i64:
> -            op_new = INDEX_op_mul_i64;
> -            op_new2 = INDEX_op_mulsh_i64;
> -            have_op_new2 = TCG_TARGET_HAS_mulsh_i64;
> +            opc_new = INDEX_op_mul_i64;
> +            opc_new2 = INDEX_op_mulsh_i64;
> +            have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
>              goto do_mul2;
>          do_mul2:
> -            args -= 4;
>              nb_iargs = 2;
>              nb_oargs = 2;
>              if (dead_temps[args[1]] && !mem_temps[args[1]]) {
> @@ -1460,28 +1433,25 @@ static void tcg_liveness_analysis(TCGContext *s)
>                      goto do_remove;
>                  }
>                  /* The high part of the operation is dead; generate the low. 
> */
> -                s->gen_opc_buf[op_index] = op = op_new;
> +                op->opc = opc = opc_new;
>                  args[1] = args[2];
>                  args[2] = args[3];
> -            } else if (have_op_new2 && dead_temps[args[0]]
> +            } else if (have_opc_new2 && dead_temps[args[0]]
>                         && !mem_temps[args[0]]) {
> -                /* The low part of the operation is dead; generate the high. 
>  */
> -                s->gen_opc_buf[op_index] = op = op_new2;
> +                /* The low part of the operation is dead; generate the high. 
> */
> +                op->opc = opc = opc_new2;
>                  args[0] = args[1];
>                  args[1] = args[2];
>                  args[2] = args[3];
>              } else {
>                  goto do_not_remove;
>              }
> -            assert(s->gen_opc_buf[op_index + 1] == INDEX_op_nop);
> -            tcg_set_nop(s, s->gen_opc_buf + op_index + 1, args + 3, 1);
>              /* Mark the single-word operation live.  */
>              nb_oargs = 1;
>              goto do_not_remove;
>
>          default:
>              /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
> -            args -= def->nb_args;
>              nb_iargs = def->nb_iargs;
>              nb_oargs = def->nb_oargs;
>
> @@ -1489,24 +1459,23 @@ static void tcg_liveness_analysis(TCGContext *s)
>                 its outputs are dead. We assume that nb_oargs == 0
>                 implies side effects */
>              if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
> -                for(i = 0; i < nb_oargs; i++) {
> +                for (i = 0; i < nb_oargs; i++) {
>                      arg = args[i];
>                      if (!dead_temps[arg] || mem_temps[arg]) {
>                          goto do_not_remove;
>                      }
>                  }
>              do_remove:
> -                tcg_set_nop(s, s->gen_opc_buf + op_index, args, 
> def->nb_args);
> +                op->opc = INDEX_op_nop;
>  #ifdef CONFIG_PROFILER
>                  s->del_op_count++;
>  #endif
>              } else {
>              do_not_remove:
> -
>                  /* output args are dead */
>                  dead_args = 0;
>                  sync_args = 0;
> -                for(i = 0; i < nb_oargs; i++) {
> +                for (i = 0; i < nb_oargs; i++) {
>                      arg = args[i];
>                      if (dead_temps[arg]) {
>                          dead_args |= (1 << i);
> @@ -1527,23 +1496,18 @@ static void tcg_liveness_analysis(TCGContext *s)
>                  }
>
>                  /* input args are live */
> -                for(i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
> +                for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
>                      arg = args[i];
>                      if (dead_temps[arg]) {
>                          dead_args |= (1 << i);
>                      }
>                      dead_temps[arg] = 0;
>                  }
> -                s->op_dead_args[op_index] = dead_args;
> -                s->op_sync_args[op_index] = sync_args;
> +                s->op_dead_args[oi] = dead_args;
> +                s->op_sync_args[oi] = sync_args;
>              }
>              break;
>          }
> -        op_index--;
> -    }
> -
> -    if (args != s->gen_opparam_buf) {
> -        tcg_abort();
>      }
>  }
>  #else
> @@ -2110,11 +2074,11 @@ static void tcg_reg_alloc_op(TCGContext *s,
>  #define STACK_DIR(x) (x)
>  #endif
>
> -static int tcg_reg_alloc_call(TCGContext *s, const TCGOpDef *def,
> -                              TCGOpcode opc, const TCGArg *args,
> -                              uint16_t dead_args, uint8_t sync_args)
> +static void tcg_reg_alloc_call(TCGContext *s, int nb_oargs, int nb_iargs,
> +                               const TCGArg * const args, uint16_t dead_args,
> +                               uint8_t sync_args)
>  {
> -    int nb_iargs, nb_oargs, flags, nb_regs, i, reg, nb_params;
> +    int flags, nb_regs, i, reg;
>      TCGArg arg;
>      TCGTemp *ts;
>      intptr_t stack_offset;
> @@ -2123,22 +2087,16 @@ static int tcg_reg_alloc_call(TCGContext *s, const 
> TCGOpDef *def,
>      int allocate_args;
>      TCGRegSet allocated_regs;
>
> -    arg = *args++;
> -
> -    nb_oargs = arg >> 16;
> -    nb_iargs = arg & 0xffff;
> -    nb_params = nb_iargs;
> -
>      func_addr = (tcg_insn_unit *)(intptr_t)args[nb_oargs + nb_iargs];
>      flags = args[nb_oargs + nb_iargs + 1];
>
>      nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
> -    if (nb_regs > nb_params) {
> -        nb_regs = nb_params;
> +    if (nb_regs > nb_iargs) {
> +        nb_regs = nb_iargs;
>      }
>
>      /* assign stack slots first */
> -    call_stack_size = (nb_params - nb_regs) * sizeof(tcg_target_long);
> +    call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
>      call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
>          ~(TCG_TARGET_STACK_ALIGN - 1);
>      allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
> @@ -2149,7 +2107,7 @@ static int tcg_reg_alloc_call(TCGContext *s, const 
> TCGOpDef *def,
>      }
>
>      stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
> -    for(i = nb_regs; i < nb_params; i++) {
> +    for(i = nb_regs; i < nb_iargs; i++) {
>          arg = args[nb_oargs + i];
>  #ifdef TCG_TARGET_STACK_GROWSUP
>          stack_offset -= sizeof(tcg_target_long);
> @@ -2256,8 +2214,6 @@ static int tcg_reg_alloc_call(TCGContext *s, const 
> TCGOpDef *def,
>              }
>          }
>      }
> -
> -    return nb_iargs + nb_oargs + def->nb_cargs + 1;
>  }
>
>  #ifdef CONFIG_PROFILER
> @@ -2285,10 +2241,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
>                                        tcg_insn_unit *gen_code_buf,
>                                        long search_pc)
>  {
> -    TCGOpcode opc;
> -    int op_index;
> -    const TCGOpDef *def;
> -    const TCGArg *args;
> +    int oi, oi_next;
>
>  #ifdef DEBUG_DISAS
>      if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP))) {
> @@ -2303,8 +2256,7 @@ static inline int tcg_gen_code_common(TCGContext *s,
>  #endif
>
>  #ifdef USE_TCG_OPTIMIZATIONS
> -    s->gen_opparam_ptr =
> -        tcg_optimize(s, s->gen_opc_ptr, s->gen_opparam_buf, tcg_op_defs);
> +    tcg_optimize(s);
>  #endif
>
>  #ifdef CONFIG_PROFILER
> @@ -2333,42 +2285,31 @@ static inline int tcg_gen_code_common(TCGContext *s,
>
>      tcg_out_tb_init(s);
>
> -    args = s->gen_opparam_buf;
> -    op_index = 0;
> +    for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
> +        TCGOp * const op = &s->gen_op_buf[oi];
> +        TCGArg * const args = &s->gen_opparam_buf[op->args];
> +        TCGOpcode opc = op->opc;
> +        const TCGOpDef *def = &tcg_op_defs[opc];
> +        uint16_t dead_args = s->op_dead_args[oi];
> +        uint8_t sync_args = s->op_sync_args[oi];
>
> -    for(;;) {
> -        opc = s->gen_opc_buf[op_index];
> +        oi_next = op->next;
>  #ifdef CONFIG_PROFILER
>          tcg_table_op_count[opc]++;
>  #endif
> -        def = &tcg_op_defs[opc];
> -#if 0
> -        printf("%s: %d %d %d\n", def->name,
> -               def->nb_oargs, def->nb_iargs, def->nb_cargs);
> -        //        dump_regs(s);
> -#endif
> -        switch(opc) {
> +
> +        switch (opc) {
>          case INDEX_op_mov_i32:
>          case INDEX_op_mov_i64:
> -            tcg_reg_alloc_mov(s, def, args, s->op_dead_args[op_index],
> -                              s->op_sync_args[op_index]);
> +            tcg_reg_alloc_mov(s, def, args, dead_args, sync_args);
>              break;
>          case INDEX_op_movi_i32:
>          case INDEX_op_movi_i64:
> -            tcg_reg_alloc_movi(s, args, s->op_dead_args[op_index],
> -                               s->op_sync_args[op_index]);
> +            tcg_reg_alloc_movi(s, args, dead_args, sync_args);
>              break;
>          case INDEX_op_debug_insn_start:
> -            /* debug instruction */
> -            break;
>          case INDEX_op_nop:
> -        case INDEX_op_nop1:
> -        case INDEX_op_nop2:
> -        case INDEX_op_nop3:
>              break;
> -        case INDEX_op_nopn:
> -            args += args[0];
> -            goto next;
>          case INDEX_op_discard:
>              temp_dead(s, args[0]);
>              break;
> @@ -2377,12 +2318,9 @@ static inline int tcg_gen_code_common(TCGContext *s,
>              tcg_out_label(s, args[0], s->code_ptr);
>              break;
>          case INDEX_op_call:
> -            args += tcg_reg_alloc_call(s, def, opc, args,
> -                                       s->op_dead_args[op_index],
> -                                       s->op_sync_args[op_index]);
> -            goto next;
> -        case INDEX_op_end:
> -            goto the_end;
> +            tcg_reg_alloc_call(s, op->callo, op->calli, args,
> +                               dead_args, sync_args);
> +            break;
>          default:
>              /* Sanity check that we've not introduced any unhandled opcodes. 
> */
>              if (def->flags & TCG_OPF_NOT_PRESENT) {
> @@ -2391,21 +2329,17 @@ static inline int tcg_gen_code_common(TCGContext *s,
>              /* Note: in order to speed up the code, it would be much
>                 faster to have specialized register allocator functions for
>                 some common argument patterns */
> -            tcg_reg_alloc_op(s, def, opc, args, s->op_dead_args[op_index],
> -                             s->op_sync_args[op_index]);
> +            tcg_reg_alloc_op(s, def, opc, args, dead_args, sync_args);
>              break;
>          }
> -        args += def->nb_args;
> -    next:
>          if (search_pc >= 0 && search_pc < tcg_current_code_size(s)) {
> -            return op_index;
> +            return oi;
>          }
> -        op_index++;
>  #ifndef NDEBUG
>          check_regs(s);
>  #endif
>      }
> - the_end:
> +
>      /* Generate TB finalization at the end of block */
>      tcg_out_tb_finalize(s);
>      return -1;
> @@ -2416,14 +2350,18 @@ int tcg_gen_code(TCGContext *s, tcg_insn_unit 
> *gen_code_buf)
>  #ifdef CONFIG_PROFILER
>      {
>          int n;
> -        n = (s->gen_opc_ptr - s->gen_opc_buf);
> +
> +        n = s->gen_last_op_idx + 1;
>          s->op_count += n;
> -        if (n > s->op_count_max)
> +        if (n > s->op_count_max) {
>              s->op_count_max = n;
> +        }
>
> -        s->temp_count += s->nb_temps;
> -        if (s->nb_temps > s->temp_count_max)
> -            s->temp_count_max = s->nb_temps;
> +        n = s->nb_temps;
> +        s->temp_count += n;
> +        if (n > s->temp_count_max) {
> +            s->temp_count_max = n;
> +        }
>      }
>  #endif
>
> diff --git a/tcg/tcg.h b/tcg/tcg.h
> index 95f1aad..596e30a 100644
> --- a/tcg/tcg.h
> +++ b/tcg/tcg.h
> @@ -448,10 +448,28 @@ typedef struct TCGTempSet {
>      unsigned long l[BITS_TO_LONGS(TCG_MAX_TEMPS)];
>  } TCGTempSet;
>
> +typedef struct TCGOp {
> +    TCGOpcode opc   : 8;
> +
> +    /* The number of out and in parameter for a call.  */
> +    unsigned callo  : 2;
> +    unsigned calli  : 6;
> +
> +    /* Index of the arguments for this op, or -1 for zero-operand ops.  */
> +    signed args     : 16;
> +
> +    /* Index of the prex/next op, or -1 for the end of the list.  */
> +    signed prev     : 16;
> +    signed next     : 16;
> +} TCGOp;
> +
> +QEMU_BUILD_BUG_ON(NB_OPS > 0xff);
> +QEMU_BUILD_BUG_ON(OPC_BUF_SIZE >= 0x7fff);
> +QEMU_BUILD_BUG_ON(OPPARAM_BUF_SIZE >= 0x7fff);
> +
>  struct TCGContext {
>      uint8_t *pool_cur, *pool_end;
>      TCGPool *pool_first, *pool_current, *pool_first_large;
> -    TCGLabel *labels;
>      int nb_labels;
>      int nb_globals;
>      int nb_temps;
> @@ -469,9 +487,6 @@ struct TCGContext {
>                                 corresponding output argument needs to be
>                                 sync to memory. */
>
> -    /* tells in which temporary a given register is. It does not take
> -       into account fixed registers */
> -    int reg_to_temp[TCG_TARGET_NB_REGS];
>      TCGRegSet reserved_regs;
>      intptr_t current_frame_offset;
>      intptr_t frame_start;
> @@ -479,8 +494,6 @@ struct TCGContext {
>      int frame_reg;
>
>      tcg_insn_unit *code_ptr;
> -    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
> -    TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
>
>      GHashTable *helpers;
>
> @@ -508,14 +521,10 @@ struct TCGContext {
>      int goto_tb_issue_mask;
>  #endif
>
> -    uint16_t gen_opc_buf[OPC_BUF_SIZE];
> -    TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
> -
> -    uint16_t *gen_opc_ptr;
> -    TCGArg *gen_opparam_ptr;
> -    target_ulong gen_opc_pc[OPC_BUF_SIZE];
> -    uint16_t gen_opc_icount[OPC_BUF_SIZE];
> -    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
> +    int gen_first_op_idx;
> +    int gen_last_op_idx;
> +    int gen_next_op_idx;
> +    int gen_next_parm_idx;
>
>      /* Code generation.  Note that we specifically do not use tcg_insn_unit
>         here, because there's too much arithmetic throughout that relies
> @@ -533,6 +542,22 @@ struct TCGContext {
>
>      /* The TCGBackendData structure is private to tcg-target.c.  */
>      struct TCGBackendData *be;
> +
> +    TCGTempSet free_temps[TCG_TYPE_COUNT * 2];
> +    TCGTemp temps[TCG_MAX_TEMPS]; /* globals first, temps after */
> +
> +    /* tells in which temporary a given register is. It does not take
> +       into account fixed registers */
> +    int reg_to_temp[TCG_TARGET_NB_REGS];
> +
> +    TCGOp gen_op_buf[OPC_BUF_SIZE];
> +    TCGArg gen_opparam_buf[OPPARAM_BUF_SIZE];
> +
> +    target_ulong gen_opc_pc[OPC_BUF_SIZE];
> +    uint16_t gen_opc_icount[OPC_BUF_SIZE];
> +    uint8_t gen_opc_instr_start[OPC_BUF_SIZE];
> +
> +    TCGLabel labels[TCG_MAX_LABELS];
>  };
>
>  extern TCGContext tcg_ctx;
> @@ -540,7 +565,7 @@ extern TCGContext tcg_ctx;
>  /* The number of opcodes emitted so far.  */
>  static inline int tcg_op_buf_count(void)
>  {
> -    return tcg_ctx.gen_opc_ptr - tcg_ctx.gen_opc_buf;
> +    return tcg_ctx.gen_next_op_idx;
>  }
>
>  /* Test for whether to terminate the TB for using too many opcodes.  */
> @@ -718,8 +743,7 @@ void tcg_add_target_add_op_defs(const TCGTargetOpDef 
> *tdefs);
>  void tcg_gen_callN(TCGContext *s, void *func,
>                     TCGArg ret, int nargs, TCGArg *args);
>
> -TCGArg *tcg_optimize(TCGContext *s, uint16_t *tcg_opc_ptr, TCGArg *args,
> -                     TCGOpDef *tcg_op_def);
> +void tcg_optimize(TCGContext *s);
>
>  /* only used for debugging purposes */
>  void tcg_dump_ops(TCGContext *s);
> --
> 2.1.0
>
>



-- 
Regards,
Artyom Tarasenko

SPARC and PPC PReP under qemu blog: http://tyom.blogspot.com/search/label/qemu



reply via email to

[Prev in Thread] Current Thread [Next in Thread]