[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
Re: [PATCH v2 3/8] x86: Grant AMX permission for guest
From: |
Yang Zhong |
Subject: |
Re: [PATCH v2 3/8] x86: Grant AMX permission for guest |
Date: |
Thu, 17 Feb 2022 13:58:36 +0800 |
User-agent: |
Mutt/1.5.21 (2010-09-15) |
On Wed, Feb 16, 2022 at 10:04:29PM -0800, Yang Zhong wrote:
> Kernel allocates 4K xstate buffer by default. For XSAVE features
> which require large state component (e.g. AMX), Linux kernel
> dynamically expands the xstate buffer only after the process has
> acquired the necessary permissions. Those are called dynamically-
> enabled XSAVE features (or dynamic xfeatures).
>
> There are separate permissions for native tasks and guests.
>
> Qemu should request the guest permissions for dynamic xfeatures
> which will be exposed to the guest. This only needs to be done
> once before the first vcpu is created.
>
> KVM implemented one new ARCH_GET_XCOMP_SUPP system attribute API to
> get host side supported_xcr0 and Qemu can decide if it can request
> dynamically enabled XSAVE features permission.
> https://lore.kernel.org/all/20220126152210.3044876-1-pbonzini@redhat.com/
>
> Suggested-by: Paolo Bonzini <pbonzini@redhat.com>
> Signed-off-by: Yang Zhong <yang.zhong@intel.com>
> Signed-off-by: Jing Liu <jing2.liu@intel.com>
> ---
> target/i386/cpu.h | 7 +++++++
> target/i386/cpu.c | 43 +++++++++++++++++++++++++++++++++++++++
> target/i386/kvm/kvm-cpu.c | 12 +++++------
> target/i386/kvm/kvm.c | 20 ++++++++++++++++++
> 4 files changed, 76 insertions(+), 6 deletions(-)
>
> diff --git a/target/i386/cpu.h b/target/i386/cpu.h
> index 06d2d6bccf..d4ad0f56bd 100644
> --- a/target/i386/cpu.h
> +++ b/target/i386/cpu.h
> @@ -549,6 +549,13 @@ typedef enum X86Seg {
> #define XSTATE_ZMM_Hi256_MASK (1ULL << XSTATE_ZMM_Hi256_BIT)
> #define XSTATE_Hi16_ZMM_MASK (1ULL << XSTATE_Hi16_ZMM_BIT)
> #define XSTATE_PKRU_MASK (1ULL << XSTATE_PKRU_BIT)
> +#define XSTATE_XTILE_CFG_MASK (1ULL << XSTATE_XTILE_CFG_BIT)
> +#define XSTATE_XTILE_DATA_MASK (1ULL << XSTATE_XTILE_DATA_BIT)
> +#define XFEATURE_XTILE_MASK (XSTATE_XTILE_CFG_MASK \
> + | XSTATE_XTILE_DATA_MASK)
> +
> +#define ARCH_GET_XCOMP_GUEST_PERM 0x1024
> +#define ARCH_REQ_XCOMP_GUEST_PERM 0x1025
>
> #define ESA_FEATURE_ALIGN64_BIT 1
>
> diff --git a/target/i386/cpu.c b/target/i386/cpu.c
> index ea7e8f9081..377d993438 100644
> --- a/target/i386/cpu.c
> +++ b/target/i386/cpu.c
> @@ -43,6 +43,8 @@
> #include "disas/capstone.h"
> #include "cpu-internal.h"
>
> +#include <sys/syscall.h>
> +
> /* Helpers for building CPUID[2] descriptors: */
>
> struct CPUID2CacheDescriptorInfo {
> @@ -6000,12 +6002,47 @@ static void x86_cpu_adjust_feat_level(X86CPU *cpu,
> FeatureWord w)
> }
> }
>
> +static void kvm_request_xsave_components(X86CPU *cpu, uint64_t mask)
> +{
> + KVMState *s = kvm_state;
> + uint64_t bitmask;
> + long rc;
> +
> + if ((mask & XSTATE_XTILE_DATA_MASK) == XSTATE_XTILE_DATA_MASK) {
> + bitmask = kvm_arch_get_supported_cpuid(s, 0xd, 0, R_EAX);
> + if (!(bitmask & XSTATE_XTILE_DATA_MASK)) {
Paolo, last time you suggested below changes for here:
rc = kvm_arch_get_supported_cpuid(s, 0xd, 0,
(xdata_bit < 32 ? R_EAX : R_EDX));
if (!(rc & BIT(xdata_bit & 31)) {
...
}
Since I used "mask" as parameter here, so I had to directly use R_EAX here.
Please review and if need change it to like "(xdata_bit < 32 ? R_EAX :
R_EDX)",
I will change this in next version, thanks!
Yang
> + warn_report("no amx support from supported_xcr0, "
> + "bitmask:0x%lx", bitmask);
> + return;
> + }
> +
> + rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM,
> + XSTATE_XTILE_DATA_BIT);
> + if (rc) {
> + /*
> + * The older kernel version(<5.15) can't support
> + * ARCH_REQ_XCOMP_GUEST_PERM and directly return.
> + */
> + return;
> + }
> +
> + rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
> + if (rc) {
> + warn_report("prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
> + } else if (!(bitmask & XFEATURE_XTILE_MASK)) {
> + warn_report("prctl(ARCH_REQ_XCOMP_GUEST_PERM) failure "
> + "and bitmask=0x%lx", bitmask);
> + }
> + }
> +}
> +
> /* Calculate XSAVE components based on the configured CPU feature flags */
> static void x86_cpu_enable_xsave_components(X86CPU *cpu)
> {
> CPUX86State *env = &cpu->env;
> int i;
> uint64_t mask;
> + static bool request_perm;
>
> if (!(env->features[FEAT_1_ECX] & CPUID_EXT_XSAVE)) {
> env->features[FEAT_XSAVE_COMP_LO] = 0;
> @@ -6021,6 +6058,12 @@ static void x86_cpu_enable_xsave_components(X86CPU
> *cpu)
> }
> }
>
> + /* Only request permission for first vcpu */
> + if (kvm_enabled() && !request_perm) {
> + kvm_request_xsave_components(cpu, mask);
> + request_perm = true;
> + }
> +
> env->features[FEAT_XSAVE_COMP_LO] = mask;
> env->features[FEAT_XSAVE_COMP_HI] = mask >> 32;
> }
> diff --git a/target/i386/kvm/kvm-cpu.c b/target/i386/kvm/kvm-cpu.c
> index ce27d3b1df..a35a1bf9fe 100644
> --- a/target/i386/kvm/kvm-cpu.c
> +++ b/target/i386/kvm/kvm-cpu.c
> @@ -84,7 +84,7 @@ static void kvm_cpu_max_instance_init(X86CPU *cpu)
> static void kvm_cpu_xsave_init(void)
> {
> static bool first = true;
> - KVMState *s = kvm_state;
> + uint32_t eax, ebx, ecx, edx;
> int i;
>
> if (!first) {
> @@ -100,11 +100,11 @@ static void kvm_cpu_xsave_init(void)
> ExtSaveArea *esa = &x86_ext_save_areas[i];
>
> if (esa->size) {
> - int sz = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EAX);
> - if (sz != 0) {
> - assert(esa->size == sz);
> - esa->offset = kvm_arch_get_supported_cpuid(s, 0xd, i, R_EBX);
> - esa->ecx = kvm_arch_get_supported_cpuid(s, 0xd, i, R_ECX);
> + host_cpuid(0xd, i, &eax, &ebx, &ecx, &edx);
> + if (eax != 0) {
> + assert(esa->size == eax);
> + esa->offset = ebx;
> + esa->ecx = ecx;
> }
> }
> }
> diff --git a/target/i386/kvm/kvm.c b/target/i386/kvm/kvm.c
> index 2c8feb4a6f..3bdcd724c4 100644
> --- a/target/i386/kvm/kvm.c
> +++ b/target/i386/kvm/kvm.c
> @@ -348,6 +348,7 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s,
> uint32_t function,
> struct kvm_cpuid2 *cpuid;
> uint32_t ret = 0;
> uint32_t cpuid_1_edx;
> + uint64_t bitmask;
>
> cpuid = get_supported_cpuid(s);
>
> @@ -405,6 +406,25 @@ uint32_t kvm_arch_get_supported_cpuid(KVMState *s,
> uint32_t function,
> if (!has_msr_arch_capabs) {
> ret &= ~CPUID_7_0_EDX_ARCH_CAPABILITIES;
> }
> + } else if (function == 0xd && index == 0 &&
> + (reg == R_EAX || reg == R_EDX)) {
> + struct kvm_device_attr attr = {
> + .group = 0,
> + .attr = KVM_X86_XCOMP_GUEST_SUPP,
> + .addr = (unsigned long) &bitmask
> + };
> +
> + bool sys_attr = kvm_check_extension(s, KVM_CAP_SYS_ATTRIBUTES);
> + if (!sys_attr) {
> + warn_report("cannot get sys attribute capabilities %d",
> sys_attr);
> + }
> +
> + int rc = kvm_ioctl(s, KVM_GET_DEVICE_ATTR, &attr);
> + if (rc == -1 && (errno == ENXIO || errno == EINVAL)) {
> + warn_report("KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) "
> + "error: %d", rc);
> + }
> + ret = (reg == R_EAX) ? bitmask : bitmask >> 32;
> } else if (function == 0x80000001 && reg == R_ECX) {
> /*
> * It's safe to enable TOPOEXT even if it's not returned by