qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [RFC PATCH v2 15/44] target/loongarch: Implement vmul/vmuh/vmulw{ev/


From: gaosong
Subject: Re: [RFC PATCH v2 15/44] target/loongarch: Implement vmul/vmuh/vmulw{ev/od}
Date: Thu, 6 Apr 2023 20:09:22 +0800
User-agent: Mozilla/5.0 (X11; Linux loongarch64; rv:68.0) Gecko/20100101 Thunderbird/68.7.0

HI, Richard

在 2023/3/29 上午4:46, Richard Henderson 写道:
+static void do_vmuh_s(unsigned vece, uint32_t vd_ofs, uint32_t vj_ofs,
+                      uint32_t vk_ofs, uint32_t oprsz, uint32_t maxsz)
+{
+    static const GVecGen3 op[4] = {
+        {
+            .fno = gen_helper_vmuh_b,
+            .vece = MO_8
+        },
+        {
+            .fno = gen_helper_vmuh_h,
+            .vece = MO_16
+        },
+        {
+            .fno = gen_helper_vmuh_w,
+            .vece = MO_32
+        },
+        {
+            .fno = gen_helper_vmuh_d,
+            .vece = MO_64
+        },
+    };

Could be worth integer expansion, especially for MO_32/MO_64?
Should be trivial...
For integer expansion.  How about the following code?

static void gen_vmuh_b(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
    int i;
    TCGv_i64 t1, t2;

    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();

    tcg_gen_mov_i64(t, tcg_constant_i64(0));

    for (i = 0; i < 8; i++) {
        tcg_gen_shri_i64(t1, a, 8 *i);
        tcg_gen_shri_i64(t2, b, 8 *i);
        tcg_gen_ext8s_i64(t1, t1);
        tcg_gen_ext8s_i64(t2, t2);
        tcg_gen_mul_i64(t1, t1, t2);
        tcg_gen_andi_i64(t1, t1, 0xffff);
        tcg_gen_shri_i64(t1, t1, 8);
        tcg_gen_shli_i64(t1, t1, 8 * i);
        tcg_gen_or_i64(t, t, t1);
    }
}

static void gen_vmuh_h(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
    int i;
    TCGv_i64 t1, t2;

    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();

    tcg_gen_mov_i64(t, tcg_constant_i64(0));

    for ( i = 0; i < 4; i++) {
        tcg_gen_shri_i64(t1, a, 16 *i);
        tcg_gen_shri_i64(t2, b, 16*i);
        tcg_gen_ext16s_i64(t1, t1);
        tcg_gen_ext16s_i64(t2, t2);
        tcg_gen_mul_i64(t1, t1, t2);
        tcg_gen_andi_i64(t1, t1, 0xffffffff);
        tcg_gen_shri_i64(t1, t1, 16);
        tcg_gen_shli_i64(t1, t1, 16 * i);
        tcg_gen_or_i64(t, t, t1);
    }
}

static void gen_vmuh_w(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
    TCGv_i64 t1, t2;

    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();
    tcg_gen_ext_i32_i64(t1, a);
    tcg_gen_ext_i32_i64(t2, b);
    tcg_gen_mul_i64(t2, t1, t2);
    tcg_gen_extrh_i64_i32(t, t2);
}

static void gen_vmuh_d(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
    TCGv_i64 t1;

    t1 = tcg_temp_new_i64();
    tcg_gen_muls2_i64(t1, t, a, b);
}

static void gen_vmuh_bu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
    int i;
    TCGv_i64 t1, t2;

    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();

    tcg_gen_mov_i64(t, tcg_constant_i64(0));

    for (i = 0; i < 8; i++) {
        tcg_gen_shri_i64(t1, a, 8 * i);
        tcg_gen_shri_i64(t2, b, 8 * i);
        tcg_gen_ext8u_i64(t1, t1);
        tcg_gen_ext8u_i64(t2, t2);
        tcg_gen_mul_i64(t1, t1, t2);
        tcg_gen_shri_i64(t1, t1, 8);
        tcg_gen_shli_i64(t1, t1, 8 * i);
        tcg_gen_or_i64(t, t, t1);
    }
}

static void gen_vmuh_hu(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
    int i;
    TCGv_i64 t1, t2;

    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();

    tcg_gen_mov_i64(t, tcg_constant_i64(0));

    for (i = 0; i < 4; i++) {
        tcg_gen_shri_i64(t1, a, 16 *i);
        tcg_gen_shri_i64(t2, b, 16*i);
        tcg_gen_ext16u_i64(t1, t1);
        tcg_gen_ext16u_i64(t2, t2);
        tcg_gen_mul_i64(t1, t1, t2);
        tcg_gen_shri_i64(t1, t1, 16);
        tcg_gen_shli_i64(t1, t1, 16 * i);
        tcg_gen_or_i64(t, t, t1);
    }
}

static void gen_vmuh_wu(TCGv_i32 t, TCGv_i32 a, TCGv_i32 b)
{
    TCGv_i64 t1, t2;

    t1 = tcg_temp_new_i64();
    t2 = tcg_temp_new_i64();
    tcg_gen_extu_i32_i64(t1, a);
    tcg_gen_extu_i32_i64(t2, b);
    tcg_gen_mul_i64(t2, t1, t2);
    tcg_gen_extrh_i64_i32(t, t2);
}

static void gen_vmuh_du(TCGv_i64 t, TCGv_i64 a, TCGv_i64 b)
{
    TCGv_i64 t1;

    t1 = tcg_temp_new_i64();
    tcg_gen_mulu2_i64(t1, t, a, b);
}

Thanks.
Song Gao

reply via email to

[Prev in Thread] Current Thread [Next in Thread]