qemu-s390x
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: patch s390x/tcg: Implement Miscellaneous-Instruction-Extensions Faci


From: David Hildenbrand
Subject: Re: patch s390x/tcg: Implement Miscellaneous-Instruction-Extensions Facility 3 for the s390x
Date: Mon, 14 Feb 2022 19:47:57 +0100
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:91.0) Gecko/20100101 Thunderbird/91.4.0

On 08.02.22 23:52, David Miller wrote:
> From c21eaa57a40ed22f10087a1019dd456d99e3fb03 Mon Sep 17 00:00:00 2001
> From: David Miller <dmiller423@gmail.com>
> Date: Tue, 8 Feb 2022 22:33:48 -0500
> Subject: [PATCH] s390x/tcg: Implement Miscellaneous-Instruction-Extensions
>  Facility 3 for the s390x

Thanks for working on this!


a) I fail to apply the patch for some reason:

Applying: patch s390x/tcg: Implement
Miscellaneous-Instruction-Extensions Facility 3 for the s390x
error: corrupt patch at line 90
Patch failed at 0001 patch s390x/tcg: Implement
Miscellaneous-Instruction-Extensions Facility 3 for the s390x


Do you use "git format-patch" + "git send-email" to generate+send patches?

b) Can you add a short patch description?

c) Can you split off the tests into a separate patch?

[...]

> +/* OR WITH COMPLEMENT */
> +    C(0xb975, OCRK,    RRF_a, MIE3, r2, r3, new, r1_32, orc, nz32)
> +    C(0xb965, OCGRK,   RRF_a, MIE3, r2, r3, r1, 0, orc, nz64)
> +
> +/* NAND */
> +    C(0xb974, NNRK,    RRF_a, MIE3, r2, r3, new, r1_32, nand, nz32)
> +    C(0xb964, NNGRK,   RRF_a, MIE3, r2, r3, r1, 0, nand, nz64)
> +
> +/* NOR */
> +    C(0xb976, NORK,    RRF_a, MIE3, r2, r3, new, r1_32, nor, nz32)
> +    C(0xb966, NOGRK,   RRF_a, MIE3, r2, r3, r1, 0, nor, nz64)
> +
> +/* NOT EXCLUSIVE OR */
> +    C(0xb977, NXRK,    RRF_a, MIE3, r2, r3, new, r1_32, nxor, nz32)
> +    C(0xb967, NXGRK,   RRF_a, MIE3, r2, r3, r1, 0, nxor, nz64)
> 

The N* ones should go to the proper previous section, no?

>  /* PACK */
>      /* Really format SS_b, but we pack both lengths into one argument
> @@ -765,6 +785,12 @@
>  /* SEARCH STRING UNICODE */
>      C(0xb9be, SRSTU,   RRE,   ETF3, 0, 0, 0, 0, srstu, 0)
> 
> +/* SELECT */
> +    C(0xb9f0, SELR,    RRF_a, MIE3, r2, r3, new, r1_32, sel, 0)
> +    C(0xb9e3, SELGR,   RRF_a, MIE3, r2, r3, r1, 0, sel, 0)
> +/* SELECT HIGH */
> +    C(0xb9c0, SELFHR,  RRF_a, MIE3, r2, r3, new, r1_32h, sel, 0)
> +
>  /* SET ACCESS */
>      C(0xb24e, SAR,     RRE,   Z,   0, r2_o, 0, 0, sar, 0)
>  /* SET ADDRESSING MODE */
> diff --git a/target/s390x/tcg/mem_helper.c b/target/s390x/tcg/mem_helper.c
> index 406578d105..9275f1349f 100644
> --- a/target/s390x/tcg/mem_helper.c
> +++ b/target/s390x/tcg/mem_helper.c
> @@ -546,6 +546,48 @@ void HELPER(mvc)(CPUS390XState *env, uint32_t l,
> uint64_t dest, uint64_t src)
>      do_helper_mvc(env, l, dest, src, GETPC());
>  }
> 
> +/* move right to left */
> +static uint32_t do_helper_mvcrl(CPUS390XState *env, uint64_t l, uint64_t 
> dest,
> +                                uint64_t src, uintptr_t ra)
> +{
> +    const int mmu_idx = cpu_mmu_index(env, false);
> +    S390Access srca, desta;
> +    uint32_t i;
> +
> +    HELPER_LOG("%s l %d dest %" PRIx64 " src %" PRIx64 "\n",
> +               __func__, l, dest, src);
> +
> +    /* MVCRL always copies one more byte than specified - maximum is 256 */
> +    l++;
> +
> +    srca = access_prepare(env, src, l, MMU_DATA_LOAD, mmu_idx, ra);
> +    desta = access_prepare(env, dest, l, MMU_DATA_STORE, mmu_idx, ra);
> +
> +    /*
> +     * "When the operands overlap, the result is obtained as if the operands
> +     * were processed one byte at a time". Only non-destructive overlaps
> +     * behave like memmove().
> +     */
> +    if (dest == src + l - 1) {

Shouldn't this be "src = dest + 1"?

SRC  23456789
DST 12345678

IIUC would have to result in
->  999999999

Because we first copy the I to the 9 to the 8, then the new 9 (old 8) to
the 7 and so on.


BUT

"
Destructive overlap occurs and results are unpredict-
able when the byte location designated by the sec-
ond-operand address (the leftmost byte of the
source) overlaps with any byte location of the first
operand, other than the byte location designated by
the first-operand address (the leftmost byte of the tar-
get).
"
IIUC, it says that on any destructive overlap the result is
unpredictable. Which makes sense, because we want to optimize for the
array use case which is e.g.,

SRC  12345678
DST   23456789
->   112345678

So I'd suggest you simply always do the proper loop. No need to optimize
for something with unpredictable behavior.

You could think about

if (!is_destructive_overlap(env, dest, src, l)) {
        access_memmove(env, &desta, &srca, ra);
} else {
        ... full loop
}

because it would cover the valid src==dest case "faster". (note that we
have to read+write even if src==dest to trigger proper access)

> +        access_memset(env, &desta, access_get_byte(env, &srca, 0, ra), ra);
> +    } else if (!is_destructive_overlap(env, dest, src, l)) {
> +        access_memmove(env, &desta, &srca, ra);
> +    } else {
> +        for (i = 0; i < l; i++) {
> +            uint32_t ti = l - i - 1;
> +            uint8_t byte = access_get_byte(env, &srca, ti, ra);
> +            access_set_byte(env, &desta, ti, byte, ra);
> +        }

Or simpler

for (i = l - 1; i >= 0; i--) {
    uint8_t byte = access_get_byte(env, &srca, i, ra);
    access_set_byte(env, &desta, i, byte, ra);
}

> +    }
> +
> +    return env->cc_op;
> +}
> +
> +void HELPER(mvcrl)(CPUS390XState *env, uint64_t l, uint64_t dest, uint64_t 
> src)
> +{
> +    do_helper_mvcrl(env, l, dest, src, GETPC());

Unless you intend to reuse do_helper_mvcrl() somewhere else , just
inline do_helper_mvcrl() here. Then you can also drop the "return
env->cc_op;"

> +}
> +
>  /* move inverse  */
>  void HELPER(mvcin)(CPUS390XState *env, uint32_t l, uint64_t dest, uint64_t 
> src)
>  {
> diff --git a/target/s390x/tcg/translate.c b/target/s390x/tcg/translate.c
> index 46dea73357..c0a89e2787 100644
> --- a/target/s390x/tcg/translate.c
> +++ b/target/s390x/tcg/translate.c
> @@ -1498,6 +1498,48 @@ static DisasJumpType op_andi(DisasContext *s,
> DisasOps *o)
>      return DISAS_NEXT;
>  }
> 
> +static DisasJumpType op_andc(DisasContext *s, DisasOps *o)
> +{
> +    tcg_gen_andc_i64(o->out, o->in1, o->in2);
> +    return DISAS_NEXT;
> +}
> +
> +static DisasJumpType op_orc(DisasContext *s, DisasOps *o)
> +{
> +    tcg_gen_orc_i64(o->out, o->in1, o->in2);
> +    return DISAS_NEXT;
> +}
> +
> +static DisasJumpType op_nand(DisasContext *s, DisasOps *o)
> +{
> +    tcg_gen_nand_i64(o->out, o->in1, o->in2);
> +    return DISAS_NEXT;
> +}
> +
> +static DisasJumpType op_nor(DisasContext *s, DisasOps *o)
> +{
> +    tcg_gen_nor_i64(o->out, o->in1, o->in2);
> +    return DISAS_NEXT;
> +}
> +
> +static DisasJumpType op_nxor(DisasContext *s, DisasOps *o)
> +{
> +    tcg_gen_xor_i64(o->out, o->in1, o->in2);
> +    tcg_gen_not_i64(o->out, o->out);

That can be simplified to

tcg_gen_eqv_i64(o->out, o->in1, o->in2);

> +    return DISAS_NEXT;
> +}
> +
> +static DisasJumpType op_sel(DisasContext *s, DisasOps *o)
> +{
> +    DisasCompare c;
> +    disas_jcc(s, &c, get_field(s, m4));
> +    tcg_gen_movcond_i64(c.cond, o->out, c.u.s64.a, c.u.s64.b,
> +                        o->in1, o->in2);
> +    free_compare(&c);
> +    return DISAS_NEXT;
> +}
> +
> +

Drop one \n

[...]

>  static const DisasInsn insn_info[] = {
>  #include "insn-data.def"
> diff --git a/tests/tcg/s390x/Makefile.target b/tests/tcg/s390x/Makefile.target
> index 1a7238b4eb..16b9d45307 100644
> --- a/tests/tcg/s390x/Makefile.target
> +++ b/tests/tcg/s390x/Makefile.target
> @@ -1,6 +1,6 @@
>  S390X_SRC=$(SRC_PATH)/tests/tcg/s390x
>  VPATH+=$(S390X_SRC)
> -CFLAGS+=-march=zEC12 -m64
> +CFLAGS+=-march=z15 -m64


Hmm, that might be dangerous in general before we "fully" support the
z15 in TCG. It would be better to encode the instructions via inline
assembly until TCG supports the z15.

(most probably MIE3 is the only thing we really need that gcc might
automatically use, but I haven't double-checked yet)

-- 
Thanks,

David / dhildenb




reply via email to

[Prev in Thread] Current Thread [Next in Thread]