[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Qemu-devel] [PATCH 6/7] target/mips: Unroll loops for MSA float max/min
From: |
Aleksandar Markovic |
Subject: |
[Qemu-devel] [PATCH 6/7] target/mips: Unroll loops for MSA float max/min instructions |
Date: |
Mon, 1 Jul 2019 13:04:39 +0200 |
From: Aleksandar Markovic <address@hidden>
Slight preformance improvement for MSA float max/min instructions.
Signed-off-by: Aleksandar Markovic <address@hidden>
---
target/mips/msa_helper.c | 200 +++++++++++++++++++++++++++++------------------
1 file changed, 126 insertions(+), 74 deletions(-)
diff --git a/target/mips/msa_helper.c b/target/mips/msa_helper.c
index 880fc52..9db2ce7 100644
--- a/target/mips/msa_helper.c
+++ b/target/mips/msa_helper.c
@@ -458,7 +458,7 @@
* +---------------+----------------------------------------------------------+
*/
-/* TODO: insert Interleave group helpers here */
+/* TODO: insert Logic group helpers here */
/*
@@ -3850,35 +3850,65 @@ void helper_msa_fmin_df(CPUMIPSState *env, uint32_t df,
uint32_t wd,
wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
wr_t *pws = &(env->active_fpu.fpr[ws].wr);
wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
- uint32_t i;
clear_msacsr_cause(env);
- switch (df) {
- case DF_WORD:
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
- if (NUMBER_QNAN_PAIR(pws->w[i], pwt->w[i], 32, status)) {
- MSA_FLOAT_MAXOP(pwx->w[i], min, pws->w[i], pws->w[i], 32);
- } else if (NUMBER_QNAN_PAIR(pwt->w[i], pws->w[i], 32, status)) {
- MSA_FLOAT_MAXOP(pwx->w[i], min, pwt->w[i], pwt->w[i], 32);
- } else {
- MSA_FLOAT_MAXOP(pwx->w[i], min, pws->w[i], pwt->w[i], 32);
- }
+ if (df == DF_WORD) {
+
+ if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pws->w[0], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[0], min, pwt->w[0], pwt->w[0], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[0], min, pws->w[0], pwt->w[0], 32);
}
- break;
- case DF_DOUBLE:
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
- if (NUMBER_QNAN_PAIR(pws->d[i], pwt->d[i], 64, status)) {
- MSA_FLOAT_MAXOP(pwx->d[i], min, pws->d[i], pws->d[i], 64);
- } else if (NUMBER_QNAN_PAIR(pwt->d[i], pws->d[i], 64, status)) {
- MSA_FLOAT_MAXOP(pwx->d[i], min, pwt->d[i], pwt->d[i], 64);
- } else {
- MSA_FLOAT_MAXOP(pwx->d[i], min, pws->d[i], pwt->d[i], 64);
- }
+
+ if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pws->w[1], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[1], min, pwt->w[1], pwt->w[1], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[1], min, pws->w[1], pwt->w[1], 32);
}
- break;
- default:
+
+ if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pws->w[2], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[2], min, pwt->w[2], pwt->w[2], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[2], min, pws->w[2], pwt->w[2], 32);
+ }
+
+ if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pws->w[3], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[3], min, pwt->w[3], pwt->w[3], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[3], min, pws->w[3], pwt->w[3], 32);
+ }
+
+ } else if (df == DF_DOUBLE) {
+
+ if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pws->d[0], 64);
+ } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[0], min, pwt->d[0], pwt->d[0], 64);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->d[0], min, pws->d[0], pwt->d[0], 64);
+ }
+
+ if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pws->d[1], 64);
+ } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[1], min, pwt->d[1], pwt->d[1], 64);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->d[1], min, pws->d[1], pwt->d[1], 64);
+ }
+
+ } else {
+
assert(0);
+
}
check_msacsr_cause(env, GETPC());
@@ -3894,22 +3924,18 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t
df, uint32_t wd,
wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
wr_t *pws = &(env->active_fpu.fpr[ws].wr);
wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
- uint32_t i;
clear_msacsr_cause(env);
- switch (df) {
- case DF_WORD:
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
- FMAXMIN_A(min, max, pwx->w[i], pws->w[i], pwt->w[i], 32, status);
- }
- break;
- case DF_DOUBLE:
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
- FMAXMIN_A(min, max, pwx->d[i], pws->d[i], pwt->d[i], 64, status);
- }
- break;
- default:
+ if (df == DF_WORD) {
+ FMAXMIN_A(min, max, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
+ FMAXMIN_A(min, max, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
+ FMAXMIN_A(min, max, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
+ FMAXMIN_A(min, max, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
+ } else if (df == DF_DOUBLE) {
+ FMAXMIN_A(min, max, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
+ FMAXMIN_A(min, max, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
+ } else {
assert(0);
}
@@ -3921,40 +3947,70 @@ void helper_msa_fmin_a_df(CPUMIPSState *env, uint32_t
df, uint32_t wd,
void helper_msa_fmax_df(CPUMIPSState *env, uint32_t df, uint32_t wd,
uint32_t ws, uint32_t wt)
{
- float_status *status = &env->active_tc.msa_fp_status;
+ float_status *status = &env->active_tc.msa_fp_status;
wr_t wx, *pwx = &wx;
wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
wr_t *pws = &(env->active_fpu.fpr[ws].wr);
wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
- uint32_t i;
clear_msacsr_cause(env);
- switch (df) {
- case DF_WORD:
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
- if (NUMBER_QNAN_PAIR(pws->w[i], pwt->w[i], 32, status)) {
- MSA_FLOAT_MAXOP(pwx->w[i], max, pws->w[i], pws->w[i], 32);
- } else if (NUMBER_QNAN_PAIR(pwt->w[i], pws->w[i], 32, status)) {
- MSA_FLOAT_MAXOP(pwx->w[i], max, pwt->w[i], pwt->w[i], 32);
- } else {
- MSA_FLOAT_MAXOP(pwx->w[i], max, pws->w[i], pwt->w[i], 32);
- }
+ if (df == DF_WORD) {
+
+ if (NUMBER_QNAN_PAIR(pws->w[0], pwt->w[0], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pws->w[0], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[0], pws->w[0], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[0], max, pwt->w[0], pwt->w[0], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[0], max, pws->w[0], pwt->w[0], 32);
}
- break;
- case DF_DOUBLE:
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
- if (NUMBER_QNAN_PAIR(pws->d[i], pwt->d[i], 64, status)) {
- MSA_FLOAT_MAXOP(pwx->d[i], max, pws->d[i], pws->d[i], 64);
- } else if (NUMBER_QNAN_PAIR(pwt->d[i], pws->d[i], 64, status)) {
- MSA_FLOAT_MAXOP(pwx->d[i], max, pwt->d[i], pwt->d[i], 64);
- } else {
- MSA_FLOAT_MAXOP(pwx->d[i], max, pws->d[i], pwt->d[i], 64);
- }
+
+ if (NUMBER_QNAN_PAIR(pws->w[1], pwt->w[1], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pws->w[1], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[1], pws->w[1], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[1], max, pwt->w[1], pwt->w[1], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[1], max, pws->w[1], pwt->w[1], 32);
}
- break;
- default:
+
+ if (NUMBER_QNAN_PAIR(pws->w[2], pwt->w[2], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pws->w[2], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[2], pws->w[2], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[2], max, pwt->w[2], pwt->w[2], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[2], max, pws->w[2], pwt->w[2], 32);
+ }
+
+ if (NUMBER_QNAN_PAIR(pws->w[3], pwt->w[3], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pws->w[3], 32);
+ } else if (NUMBER_QNAN_PAIR(pwt->w[3], pws->w[3], 32, status)) {
+ MSA_FLOAT_MAXOP(pwx->w[3], max, pwt->w[3], pwt->w[3], 32);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->w[3], max, pws->w[3], pwt->w[3], 32);
+ }
+
+ } else if (df == DF_DOUBLE) {
+
+ if (NUMBER_QNAN_PAIR(pws->d[0], pwt->d[0], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pws->d[0], 64);
+ } else if (NUMBER_QNAN_PAIR(pwt->d[0], pws->d[0], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[0], max, pwt->d[0], pwt->d[0], 64);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->d[0], max, pws->d[0], pwt->d[0], 64);
+ }
+
+ if (NUMBER_QNAN_PAIR(pws->d[1], pwt->d[1], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pws->d[1], 64);
+ } else if (NUMBER_QNAN_PAIR(pwt->d[1], pws->d[1], 64, status)) {
+ MSA_FLOAT_MAXOP(pwx->d[1], max, pwt->d[1], pwt->d[1], 64);
+ } else {
+ MSA_FLOAT_MAXOP(pwx->d[1], max, pws->d[1], pwt->d[1], 64);
+ }
+
+ } else {
+
assert(0);
+
}
check_msacsr_cause(env, GETPC());
@@ -3970,22 +4026,18 @@ void helper_msa_fmax_a_df(CPUMIPSState *env, uint32_t
df, uint32_t wd,
wr_t *pwd = &(env->active_fpu.fpr[wd].wr);
wr_t *pws = &(env->active_fpu.fpr[ws].wr);
wr_t *pwt = &(env->active_fpu.fpr[wt].wr);
- uint32_t i;
clear_msacsr_cause(env);
- switch (df) {
- case DF_WORD:
- for (i = 0; i < DF_ELEMENTS(DF_WORD); i++) {
- FMAXMIN_A(max, min, pwx->w[i], pws->w[i], pwt->w[i], 32, status);
- }
- break;
- case DF_DOUBLE:
- for (i = 0; i < DF_ELEMENTS(DF_DOUBLE); i++) {
- FMAXMIN_A(max, min, pwx->d[i], pws->d[i], pwt->d[i], 64, status);
- }
- break;
- default:
+ if (df == DF_WORD) {
+ FMAXMIN_A(max, min, pwx->w[0], pws->w[0], pwt->w[0], 32, status);
+ FMAXMIN_A(max, min, pwx->w[1], pws->w[1], pwt->w[1], 32, status);
+ FMAXMIN_A(max, min, pwx->w[2], pws->w[2], pwt->w[2], 32, status);
+ FMAXMIN_A(max, min, pwx->w[3], pws->w[3], pwt->w[3], 32, status);
+ } else if (df == DF_DOUBLE) {
+ FMAXMIN_A(max, min, pwx->d[0], pws->d[0], pwt->d[0], 64, status);
+ FMAXMIN_A(max, min, pwx->d[1], pws->d[1], pwt->d[1], 64, status);
+ } else {
assert(0);
}
--
2.7.4
- [Qemu-devel] [PATCH 0/7] target/mips: Mips improvements for 4.1, Aleksandar Markovic, 2019/07/01
- [Qemu-devel] [PATCH 3/7] target/mips: Correct comments in msa_helper.c, Aleksandar Markovic, 2019/07/01
- [Qemu-devel] [PATCH 7/7] target/mips: Correct helper for MSA FCLASS.<W|D> instructions, Aleksandar Markovic, 2019/07/01
- [Qemu-devel] [PATCH 6/7] target/mips: Unroll loops for MSA float max/min instructions,
Aleksandar Markovic <=
- [Qemu-devel] [PATCH 5/7] tcg/tests: target/mips: Correct MSA test compilation and execution order, Aleksandar Markovic, 2019/07/01
- [Qemu-devel] [PATCH 4/7] target/mips: Correct comments in translate.c, Aleksandar Markovic, 2019/07/01
- [Qemu-devel] [PATCH 2/7] tcg/tests: target/mips: Amend MSA integer multiply tests, Aleksandar Markovic, 2019/07/01
- [Qemu-devel] [PATCH 1/7] tcg/tests: target/mips: Amend MSA fixed point multiply tests, Aleksandar Markovic, 2019/07/01
- Re: [Qemu-devel] [PATCH 0/7] target/mips: Mips improvements for 4.1, Aleksandar Rikalo, 2019/07/01