qemu-arm
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [PATCH RFC v2] target/arm: Implement SVE2 MATCH, NMATCH


From: Richard Henderson
Subject: Re: [PATCH RFC v2] target/arm: Implement SVE2 MATCH, NMATCH
Date: Tue, 14 Apr 2020 19:05:10 -0700
User-agent: Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Thunderbird/68.4.1

On 4/14/20 4:16 PM, Stephen Long wrote:
> Signed-off-by: Stephen Long <address@hidden>
> ---
>  target/arm/helper-sve.h    | 10 ++++++++
>  target/arm/sve.decode      |  5 ++++
>  target/arm/sve_helper.c    | 51 ++++++++++++++++++++++++++++++++++++++
>  target/arm/translate-sve.c | 22 ++++++++++++++++
>  4 files changed, 88 insertions(+)
> 
> diff --git a/target/arm/helper-sve.h b/target/arm/helper-sve.h
> index 5dd880cf6d..bc4a463bc7 100644
> --- a/target/arm/helper-sve.h
> +++ b/target/arm/helper-sve.h
> @@ -2516,6 +2516,16 @@ DEF_HELPER_FLAGS_3(sve2_uqrshrnt_h, TCG_CALL_NO_RWG, 
> void, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_3(sve2_uqrshrnt_s, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_3(sve2_uqrshrnt_d, TCG_CALL_NO_RWG, void, ptr, ptr, i32)
>  
> +DEF_HELPER_FLAGS_5(sve2_match_ppzz_b, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_5(sve2_match_ppzz_h, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +
> +DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_b, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +DEF_HELPER_FLAGS_5(sve2_nmatch_ppzz_h, TCG_CALL_NO_RWG,
> +                   i32, ptr, ptr, ptr, ptr, i32)
> +
>  DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_h, TCG_CALL_NO_RWG,
>                     void, ptr, ptr, ptr, ptr, ptr, i32)
>  DEF_HELPER_FLAGS_6(sve2_faddp_zpzz_s, TCG_CALL_NO_RWG,
> diff --git a/target/arm/sve.decode b/target/arm/sve.decode
> index 374e47fb05..652668df02 100644
> --- a/target/arm/sve.decode
> +++ b/target/arm/sve.decode
> @@ -1305,6 +1305,11 @@ UQSHRNT         01000101 .. 1 ..... 00 1101 ..... 
> .....  @rd_rn_tszimm_shr
>  UQRSHRNB        01000101 .. 1 ..... 00 1110 ..... .....  @rd_rn_tszimm_shr
>  UQRSHRNT        01000101 .. 1 ..... 00 1111 ..... .....  @rd_rn_tszimm_shr
>  
> +### SVE2 Character Match
> +
> +MATCH           01000101 .. 1 ..... 100 ... ..... 0 .... @pd_pg_rn_rm
> +NMATCH          01000101 .. 1 ..... 100 ... ..... 1 .... @pd_pg_rn_rm
> +
>  ## SVE2 floating-point pairwise operations
>  
>  FADDP           01100100 .. 010 00 0 100 ... ..... ..... @rdn_pg_rm
> diff --git a/target/arm/sve_helper.c b/target/arm/sve_helper.c
> index b68f62cd7f..78abd8b62a 100644
> --- a/target/arm/sve_helper.c
> +++ b/target/arm/sve_helper.c
> @@ -6890,3 +6890,54 @@ DO_ST1_ZPZ_D(dd_be, zd, MO_64)
>  
>  #undef DO_ST1_ZPZ_S
>  #undef DO_ST1_ZPZ_D
> +
> +#define DO_PPZZ_CHAR_MATCH(NAME, TYPE, OP, H, MASK, DEFAULT_VAL)             
>  \
> +static inline bool NAME##_inner_loop(TYPE nn, void *segmentbase)             
>  \
> +{                                                                            
>  \
> +    intptr_t i = 128;                                                        
>  \
> +    do {                                                                     
>  \
> +        do {                                                                 
>  \
> +            i -= sizeof(TYPE) * 8;                                           
>  \
> +            TYPE mm = *(TYPE *)(segmentbase + H1(i));                        
>  \
> +            if (nn OP mm) {                                                  
>  \
> +                return !DEFAULT_VAL;                                         
>  \
> +            }                                                                
>  \
> +        } while (i & 63);                                                    
>  \
> +    } while (i > 0);                                                         
>  \
> +    return DEFAULT_VAL;                                                      
>  \
> +}                                                                            
>  \

You seem to be mixing up bit and bytes here, with 128 bits and H1 as a byte 
index.

I note that we don't need to keep re-loading the Zm segment elements from
memory.  Perhaps something like

static inline bool do_match1(uint64_t n, uint64_t m, int esz)
{
    int i, bits = 8 << esz;
    n = extract64(n, 0, bits);
    for (i = 0; i < 64; i += bits) {
        if (n == extract64(m, i, bits)) {
            return true;
        }
    }
    return false;
}

static inline bool do_match2(uint64_t n, uint64_t m0,
                             uint64_t m1, int esz)
{
    return do_match1(n, m0, esz) || do_match1(n, m1, esz);
}


As an improvement, we can use

https://graphics.stanford.edu/~seander/bithacks.html#ZeroInWord

static inline bool do_match2(uint64_t n, uint64_t m0,
                             uint64_t m1, int esz)
{
    int bits = 8 << esz;
    uint64_t ones = dup_const(esz, 1);
    uint64_t signs = ones << (bits - 1);
    uint64_t cmp0, cmp1;

    cmp1 = dup_const(esz, n);
    cmp0 = cmp1 ^ m0;
    cmp1 = cmp1 ^ m1;
    cmp0 = (cmp0 - ones) & ~cmp0;
    cmp1 = (cmp1 - ones) & ~cmp1;
    return (cmp0 | cmp1) & signs;
}


> +uint32_t HELPER(NAME)(void *vd, void *vn, void *vm, void *vg, uint32_t desc) 
>  \
> +{                                                                            
>  \
> +    intptr_t opr_sz = simd_oprsz(desc);                                      
>  \
> +    uint32_t flags = PREDTEST_INIT;                                          
>  \
> +    intptr_t i = opr_sz;                                                     
>  \
> +    do {                                                                     
>  \
> +        uint64_t out = 0;                                                    
>  \
> +        uint64_t pg;                                                         
>  \
> +        do {                                                                 
>  \
> +            i -= sizeof(TYPE), out <<= sizeof(TYPE);                         
>  \
> +            TYPE nn = *(TYPE *)(vn + H(i));                                  
>  \
> +            out = (out & ~1ull) | DEFAULT_VAL;                               
>  \
> +            out |= NAME##_inner_loop(nn, vm + (i & -16));                    
>  \
> +        } while (i & 63);                                                    
>  \
> +        pg = *(uint64_t *)(vg + (i >> 3)) & MASK;                            
>  \
> +        out &= pg;                                                           
>  \
> +        *(uint64_t *)(vd + (i >> 3)) = out;                                  
>  \
> +        flags = iter_predtest_bwd(out, pg, flags);                           
>  \
> +    } while (i > 0);                                                         
>  \
> +    return 0;                                                                
>  \

static inline uint32_t do_match(void *vd, void *vn,
    void *vm, void *vg, uint32_t desc,
    int esz, bool nmatch)
{
    intptr_opr_sz = simd_oprsz(desc);
    uint32_t flags = PREDTEST_INIT;
    intptr_t i, j, k;

    for (i = 0; i < opr_sz; i += 16) {
        uint64_t m0 = *(uint64_t *)(vm + i);
        uint64_t m1 = *(uint64_t *)(vm + i + 8);
        uint16_t pg = *(uint16_t *)(vg + H1_2(i >> 3));
        uint16_t out = 0;

        for (j = 0; j < 16; j += 8) {
            uint64_t n = *(uint64_t *)(vn + i + j);

            for (k = 0; k < 8; k += 1 << esz) {
                if (pg & (1 << (j + k))) {
                    bool o = do_match2(n >> (k * 8),
                                       m0, m1, esz);
                    out |= (o ^ nmatch) << (j + k);
                }
            }
        }
        *(uint16_t *)(vd + H1_2(i >> 3)) = out;
        flags = iter_predtest_fwd(out, pg, flags);
    }
    return flags;
}

#define DO_PPZZ_MATCH(NAME, ESZ, INV) \
uint32_t HELPER(NAME)(void *vd, void *vn, void *vm,   \
                      void *vg, uint32_t desc)        \
{                                                     \
    return do_match(vd, vn, vm, vg, desc, ESZ, INV);  \
}

DO_PPZZ_MATCH(sve2_match_ppzz_b, MO_8, false)
DO_PPZZ_MATCH(sve2_match_ppzz_h, MO_16, false)

DO_PPZZ_MATCH(sve2_nmatch_ppzz_b, MO_8, true)
DO_PPZZ_MATCH(sve2_nmatch_ppzz_h, MO_16, true)


r~



reply via email to

[Prev in Thread] Current Thread [Next in Thread]