qemu-devel
[Top][All Lists]
Advanced

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

Re: [Qemu-devel] [RISU v3 09/11] i386: Add avx512 state to reginfo_t


From: Alex Bennée
Subject: Re: [Qemu-devel] [RISU v3 09/11] i386: Add avx512 state to reginfo_t
Date: Fri, 24 May 2019 10:29:39 +0100
User-agent: mu4e 1.3.2; emacs 26.1

Jan Bobek <address@hidden> writes:

> From: Richard Henderson <address@hidden>
>
> The state expected for a given test must be specifically requested
> with the --xfeatures=mask command-line argument.  This is recorded
> with the saved state so that it is obvious if the apprentice is given
> a different argument.  Any features beyond what are present on the
> running cpu will read as zero.
>
> Signed-off-by: Richard Henderson <address@hidden>

Reviewed-by: Alex Bennée <address@hidden>


> ---
>  risu_reginfo_i386.h |  14 +++
>  risu_reginfo_i386.c | 228 ++++++++++++++++++++++++++++++++++++++++++--
>  test_i386.S         |  39 ++++++++
>  3 files changed, 273 insertions(+), 8 deletions(-)
>
> diff --git a/risu_reginfo_i386.h b/risu_reginfo_i386.h
> index e350f01..b468f79 100644
> --- a/risu_reginfo_i386.h
> +++ b/risu_reginfo_i386.h
> @@ -12,6 +12,10 @@
>  #ifndef RISU_REGINFO_I386_H
>  #define RISU_REGINFO_I386_H
>
> +struct avx512_reg {
> +    uint64_t q[8];
> +};
> +
>  /*
>   * This is the data structure we pass over the socket.
>   * It is a simplified and reduced subset of what can
> @@ -19,7 +23,17 @@
>   */
>  struct reginfo {
>      uint32_t faulting_insn;
> +    uint32_t mxcsr;
> +    uint64_t xfeatures;
> +
>      gregset_t gregs;
> +
> +#ifdef __x86_64__
> +    struct avx512_reg vregs[32];
> +#else
> +    struct avx512_reg vregs[8];
> +#endif
> +    uint64_t kregs[8];
>  };
>
>  /*
> diff --git a/risu_reginfo_i386.c b/risu_reginfo_i386.c
> index c4dc14a..83f9541 100644
> --- a/risu_reginfo_i386.c
> +++ b/risu_reginfo_i386.c
> @@ -11,19 +11,32 @@
>
>  #include <stdio.h>
>  #include <stdlib.h>
> +#include <stddef.h>
>  #include <string.h>
>  #include <ucontext.h>
>  #include <assert.h>
> +#include <cpuid.h>
>
>  #include "risu.h"
>  #include "risu_reginfo_i386.h"
>
> -const struct option * const arch_long_opts;
> -const char * const arch_extra_help;
> +#include <asm/sigcontext.h>
> +
> +static uint64_t xfeatures = 3;  /* SSE */
> +
> +static const struct option extra_ops[] = {
> +    {"xfeatures", required_argument, NULL, FIRST_ARCH_OPT },
> +    {0, 0, 0, 0}
> +};
> +
> +const struct option * const arch_long_opts = extra_ops;
> +const char * const arch_extra_help
> +    = "  --xfeatures=<mask>  Use features in mask for XSAVE\n";
>
>  void process_arch_opt(int opt, const char *arg)
>  {
> -    abort();
> +    assert(opt == FIRST_ARCH_OPT);
> +    xfeatures = strtoull(arg, 0, 0);
>  }
>
>  const int reginfo_size(void)
> @@ -31,13 +44,37 @@ const int reginfo_size(void)
>      return sizeof(struct reginfo);
>  }
>
> +static void *xsave_feature_buf(struct _xstate *xs, int feature)
> +{
> +    unsigned int eax, ebx, ecx, edx;
> +    int ok;
> +
> +    /*
> +     * Get the location of the XSAVE feature from the cpuid leaf.
> +     * Given that we know the xfeature bit is set, this must succeed.
> +     */
> +    ok = __get_cpuid_count(0xd, feature, &eax, &ebx, &ecx, &edx);
> +    assert(ok);
> +
> +    /* Sanity check that the frame stored by the kernel contains the data. */
> +    assert(xs->fpstate.sw_reserved.extended_size >= eax + ebx);
> +
> +    return (void *)xs + ebx;
> +}
> +
>  /* reginfo_init: initialize with a ucontext */
>  void reginfo_init(struct reginfo *ri, ucontext_t *uc)
>  {
> -    int i;
> +    int i, nvecregs;
> +    struct _fpstate *fp;
> +    struct _xstate *xs;
> +    uint64_t features;
>
>      memset(ri, 0, sizeof(*ri));
>
> +    /* Require master and apprentice to be given the same arguments.  */
> +    ri->xfeatures = xfeatures;
> +
>      for (i = 0; i < NGREG; i++) {
>          switch (i) {
>          case REG_E(IP):
> @@ -79,12 +116,89 @@ void reginfo_init(struct reginfo *ri, ucontext_t *uc)
>       * distinguish 'do compare' from 'stop'.
>       */
>      ri->faulting_insn = *(uint32_t *)uc->uc_mcontext.gregs[REG_E(IP)];
> +
> +    /*
> +     * FP state is omitted if unused (aka in init state).
> +     * Use the <asm/sigcontext.h> struct for access to AVX state.
> +     */
> +
> +    fp = (struct _fpstate *)uc->uc_mcontext.fpregs;
> +    if (fp == NULL) {
> +        return;
> +    }
> +
> +#ifdef __x86_64__
> +    nvecregs = 16;
> +#else
> +    /* We don't (currently) care about the 80387 state, only SSE+.  */
> +    if (fp->magic != X86_FXSR_MAGIC) {
> +        return;
> +    }
> +    nvecregs = 8;
> +#endif
> +
> +    /*
> +     * Now we know that _fpstate contains FXSAVE data.
> +     */
> +    ri->mxcsr = fp->mxcsr;
> +
> +    for (i = 0; i < nvecregs; ++i) {
> +#ifdef __x86_64__
> +        memcpy(&ri->vregs[i], &fp->xmm_space[i * 4], 16);
> +#else
> +        memcpy(&ri->vregs[i], &fp->_xmm[i], 16);
> +#endif
> +    }
> +
> +    if (fp->sw_reserved.magic1 != FP_XSTATE_MAGIC1) {
> +        return;
> +    }
> +    xs = (struct _xstate *)fp;
> +    features = xfeatures & xs->xstate_hdr.xfeatures;
> +
> +    /*
> +     * Now we know that _fpstate contains XSAVE data.
> +     */
> +
> +    if (features & (1 << 2)) {
> +        /* YMM_Hi128 state */
> +        void *buf = xsave_feature_buf(xs, 2);
> +        for (i = 0; i < nvecregs; ++i) {
> +            memcpy(&ri->vregs[i].q[2], buf + 16 * i, 16);
> +        }
> +    }
> +
> +    if (features & (1 << 5)) {
> +        /* Opmask state */
> +        uint64_t *buf = xsave_feature_buf(xs, 5);
> +        for (i = 0; i < 8; ++i) {
> +            ri->kregs[i] = buf[i];
> +        }
> +    }
> +
> +    if (features & (1 << 6)) {
> +        /* ZMM_Hi256 state */
> +        void *buf = xsave_feature_buf(xs, 6);
> +        for (i = 0; i < nvecregs; ++i) {
> +            memcpy(&ri->vregs[i].q[4], buf + 32 * i, 32);
> +        }
> +    }
> +
> +#ifdef __x86_64__
> +    if (features & (1 << 7)) {
> +        /* Hi16_ZMM state */
> +        void *buf = xsave_feature_buf(xs, 7);
> +        for (i = 0; i < 16; ++i) {
> +            memcpy(&ri->vregs[i + 16], buf + 64 * i, 64);
> +        }
> +    }
> +#endif
>  }
>
>  /* reginfo_is_eq: compare the reginfo structs, returns nonzero if equal */
>  int reginfo_is_eq(struct reginfo *m, struct reginfo *a)
>  {
> -    return 0 == memcmp(m, a, sizeof(*m));
> +    return !memcmp(m, a, sizeof(*m));
>  }
>
>  static const char *const regname[NGREG] = {
> @@ -126,28 +240,126 @@ static const char *const regname[NGREG] = {
>  # define PRIxREG   "%08x"
>  #endif
>
> +static int get_nvecregs(uint64_t features)
> +{
> +#ifdef __x86_64__
> +    return features & (1 << 7) ? 32 : 16;
> +#else
> +    return 8;
> +#endif
> +}
> +
> +static int get_nvecquads(uint64_t features)
> +{
> +    if (features & (1 << 6)) {
> +        return 8;
> +    } else if (features & (1 << 2)) {
> +        return 4;
> +    } else {
> +        return 2;
> +    }
> +}
> +
> +static char get_vecletter(uint64_t features)
> +{
> +    if (features & (1 << 6 | 1 << 7)) {
> +        return 'z';
> +    } else if (features & (1 << 2)) {
> +        return 'y';
> +    } else {
> +        return 'x';
> +    }
> +}
> +
>  /* reginfo_dump: print state to a stream, returns nonzero on success */
>  int reginfo_dump(struct reginfo *ri, FILE *f)
>  {
> -    int i;
> +    uint64_t features;
> +    int i, j, n, w;
> +    char r;
> +
>      fprintf(f, "  faulting insn %x\n", ri->faulting_insn);
>      for (i = 0; i < NGREG; i++) {
>          if (regname[i]) {
>              fprintf(f, "  %-6s: " PRIxREG "\n", regname[i], ri->gregs[i]);
>          }
>      }
> +
> +    fprintf(f, "  mxcsr : %x\n", ri->mxcsr);
> +    fprintf(f, "  xfeat : %" PRIx64 "\n", ri->xfeatures);
> +
> +    features = ri->xfeatures;
> +    n = get_nvecregs(features);
> +    w = get_nvecquads(features);
> +    r = get_vecletter(features);
> +
> +    for (i = 0; i < n; i++) {
> +        fprintf(f, "  %cmm%-3d: ", r, i);
> +        for (j = w - 1; j >= 0; j--) {
> +            fprintf(f, "%016" PRIx64 "%c",
> +                    ri->vregs[i].q[j], j == 0 ? '\n' : ' ');
> +        }
> +    }
> +
> +    if (features & (1 << 5)) {
> +        for (i = 0; i < 8; i++) {
> +            fprintf(f, "  k%-5d: %016" PRIx64 "\n", i, ri->kregs[i]);
> +        }
> +    }
> +
>      return !ferror(f);
>  }
>
>  int reginfo_dump_mismatch(struct reginfo *m, struct reginfo *a, FILE *f)
>  {
> -    int i;
> +    int i, j, n, w;
> +    uint64_t features;
> +    char r;
> +
> +    fprintf(f, "Mismatch (master v apprentice):\n");
> +
>      for (i = 0; i < NGREG; i++) {
>          if (m->gregs[i] != a->gregs[i]) {
>              assert(regname[i]);
> -            fprintf(f, "Mismatch: %s: " PRIxREG " v " PRIxREG "\n",
> +            fprintf(f, "  %-6s: " PRIxREG " v " PRIxREG "\n",
>                      regname[i], m->gregs[i], a->gregs[i]);
>          }
>      }
> +
> +    if (m->mxcsr != a->mxcsr) {
> +        fprintf(f, "  mxcsr : %x v %x\n", m->mxcsr, a->mxcsr);
> +    }
> +    if (m->xfeatures != a->xfeatures) {
> +        fprintf(f, "  xfeat : %" PRIx64 " v %" PRIx64 "\n",
> +                m->xfeatures, a->xfeatures);
> +    }
> +
> +    features = m->xfeatures;
> +    n = get_nvecregs(features);
> +    w = get_nvecquads(features);
> +    r = get_vecletter(features);
> +
> +    for (i = 0; i < n; i++) {
> +        if (memcmp(&m->vregs[i], &a->vregs[i], w * 8)) {
> +            fprintf(f, "  %cmm%-3d: ", r, i);
> +            for (j = w - 1; j >= 0; j--) {
> +                fprintf(f, "%016" PRIx64 "%c",
> +                        m->vregs[i].q[j], j == 0 ? '\n' : ' ');
> +            }
> +            fprintf(f, "       v: ");
> +            for (j = w - 1; j >= 0; j--) {
> +                fprintf(f, "%016" PRIx64 "%c",
> +                        a->vregs[i].q[j], j == 0 ? '\n' : ' ');
> +            }
> +        }
> +    }
> +
> +    for (i = 0; i < 8; i++) {
> +        if (m->kregs[i] != a->kregs[i]) {
> +            fprintf(f, "  k%-5d: %016" PRIx64 " v %016" PRIx64 "\n",
> +                    i, m->kregs[i], a->kregs[i]);
> +        }
> +    }
> +
>      return !ferror(f);
>  }
> diff --git a/test_i386.S b/test_i386.S
> index 456b99c..05344d7 100644
> --- a/test_i386.S
> +++ b/test_i386.S
> @@ -12,6 +12,37 @@
>  /* A trivial test image for x86 */
>
>  /* Initialise the registers to avoid spurious mismatches */
> +
> +#ifdef __x86_64__
> +#define BASE %rax
> +     lea     2f(%rip), BASE
> +#else
> +#define BASE %eax
> +     call    1f
> +1:   pop     BASE
> +     add     $2f-1b, BASE
> +#endif
> +
> +     movdqa  0(BASE), %xmm0
> +     movdqa  1*16(BASE), %xmm1
> +     movdqa  2*16(BASE), %xmm2
> +     movdqa  3*16(BASE), %xmm3
> +     movdqa  4*16(BASE), %xmm4
> +     movdqa  5*16(BASE), %xmm5
> +     movdqa  6*16(BASE), %xmm6
> +     movdqa  7*16(BASE), %xmm7
> +
> +#ifdef __x86_64__
> +     movdqa  8*16(BASE), %xmm8
> +     movdqa  9*16(BASE), %xmm9
> +     movdqa  10*16(BASE), %xmm10
> +     movdqa  11*16(BASE), %xmm11
> +     movdqa  12*16(BASE), %xmm12
> +     movdqa  13*16(BASE), %xmm13
> +     movdqa  14*16(BASE), %xmm14
> +     movdqa  15*16(BASE), %xmm15
> +#endif
> +
>       xor     %eax, %eax
>       sahf                            /* init eflags */
>
> @@ -39,3 +70,11 @@
>
>  /* exit test */
>       ud1     %ecx, %eax
> +
> +     .p2align 16
> +2:
> +     .set    i, 0
> +     .rept   256
> +     .byte   i
> +     .set    i, i + 1
> +     .endr


--
Alex Bennée



reply via email to

[Prev in Thread] Current Thread [Next in Thread]