1
0
mirror of https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git synced 2026-01-11 17:10:13 +00:00

Compare commits

...

6 Commits

Author SHA1 Message Date
Linus Torvalds
8b690556d8 Arm:
- Only adjust the ID registers when no irqchip has been created once
   per VM run, instead of doing it once per vcpu, as this otherwise
   triggers a pretty bad conbsistency check failure in the sysreg code.
 
 - Make sure the per-vcpu Fine Grain Traps are computed before we load
   the system registers on the HW, as we otherwise start running without
   anything set until the first preemption of the vcpu.
 
 x86:
 
 - Fix selftests failure on AMD, checking for an optimization that was not
   happening anymore.
 -----BEGIN PGP SIGNATURE-----
 
 iQFIBAABCgAyFiEE8TM4V0tmI4mGbHaCv/vSX3jHroMFAmkcpToUHHBib256aW5p
 QHJlZGhhdC5jb20ACgkQv/vSX3jHroMG4AgAl68nLWOdc6iRq/JRiiTOEx/urykc
 /nKqUz7YlefEbOB64F/ZADvmluIOTs85/bdutCTO/VzJ8lkimyhISygZzA/y6Gav
 XgAIekZ/QhriIqqfcrMGET+ug3EnOxCAo/M8kWmBtra7EPTrejUJhKtensBd4TXv
 GTcU+yxZJF7jLE84a8CuWVbHdSfyiLYP5V6cDeMtuqvZiR5cxqyzuL+KFri4jZ72
 2jxovTRpxjOh4n759Oe+eqEhl6tgWBEfOVvDMOP7hkcFGlFpsIkKGL5zR0b+xFNr
 jAm5AGbJMY8n8qSM2s0OS0+Md/3kiyhvL121XmV0iGRRc1Ceq3SWFB5pRg==
 =5CNi
 -----END PGP SIGNATURE-----

Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

Pull kvm fixes from Paolo Bonzini:
 "Arm:

   - Only adjust the ID registers when no irqchip has been created once
     per VM run, instead of doing it once per vcpu, as this otherwise
     triggers a pretty bad conbsistency check failure in the sysreg code

   - Make sure the per-vcpu Fine Grain Traps are computed before we load
     the system registers on the HW, as we otherwise start running
     without anything set until the first preemption of the vcpu

  x86:

   - Fix selftests failure on AMD, checking for an optimization that was
     not happening anymore"

* tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm:
  KVM: SVM: Fix redundant updates of LBR MSR intercepts
  KVM: arm64: VHE: Compute fgt traps before activating them
  KVM: arm64: Finalize ID registers only once per VM
2025-11-18 10:02:22 -08:00
Yosry Ahmed
3fa05f96fc KVM: SVM: Fix redundant updates of LBR MSR intercepts
Don't update the LBR MSR intercept bitmaps if they're already up-to-date,
as unconditionally updating the intercepts forces KVM to recalculate the
MSR bitmaps for vmcb02 on every nested VMRUN.  The redundant updates are
functionally okay; however, they neuter an optimization in Hyper-V
nested virtualization enlightenments and this manifests as a self-test
failure.

In particular, Hyper-V lets L1 mark "nested enlightenments" as clean, i.e.
tell KVM that no changes were made to the MSR bitmap since the last VMRUN.
The hyperv_svm_test KVM selftest intentionally changes the MSR bitmap
"without telling KVM about it" to verify that KVM honors the clean hint,
correctly fails because KVM notices the changed bitmap anyway:

  ==== Test Assertion Failure ====
  x86/hyperv_svm_test.c:120: vmcb->control.exit_code == 0x081
  pid=193558 tid=193558 errno=4 - Interrupted system call
     1	0x0000000000411361: assert_on_unhandled_exception at processor.c:659
     2	0x0000000000406186: _vcpu_run at kvm_util.c:1699
     3	 (inlined by) vcpu_run at kvm_util.c:1710
     4	0x0000000000401f2a: main at hyperv_svm_test.c:175
     5	0x000000000041d0d3: __libc_start_call_main at libc-start.o:?
     6	0x000000000041f27c: __libc_start_main_impl at ??:?
     7	0x00000000004021a0: _start at ??:?
  vmcb->control.exit_code == SVM_EXIT_VMMCALL

Do *not* fix this by skipping svm_hv_vmcb_dirty_nested_enlightenments()
when svm_set_intercept_for_msr() performs a no-op change.  changes to
the L0 MSR interception bitmap are only triggered by full CPUID updates
and MSR filter updates, both of which should be rare.  Changing
svm_set_intercept_for_msr() risks hiding unintended pessimizations
like this one, and is actually more complex than this change.

Fixes: fbe5e5f030c2 ("KVM: nSVM: Always recalculate LBR MSR intercepts in svm_update_lbrv()")
Cc: stable@vger.kernel.org
Signed-off-by: Yosry Ahmed <yosry.ahmed@linux.dev>
Link: https://patch.msgid.link/20251112013017.1836863-1-yosry.ahmed@linux.dev
[Rewritten commit message based on mailing list discussion. - Paolo]
Reviewed-by: Sean Christopherson <seanjc@google.com>
Tested-by: Sean Christopherson <seanjc@google.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2025-11-18 17:52:20 +01:00
Paolo Bonzini
b82ebaf298 KVM/arm64 fixes for 6.18, take #3
- Only adjust the ID registers when no irqchip has been created once
   per VM run, instead of doing it once per vcpu, as this otherwise
   triggers a pretty bad conbsistency check failure in the sysreg code.
 
 - Make sure the per-vcpu Fine Grain Traps are computed before we load
   the system registers on the HW, as we otherwise start running without
   anything set until the first preemption of the vcpu.
 -----BEGIN PGP SIGNATURE-----
 
 iQIzBAABCgAdFiEEn9UcU+C1Yxj9lZw9I9DQutE9ekMFAmkXEhAACgkQI9DQutE9
 ekNXYRAAs5z912VxL1RrmvVuHhDb1ggVJj+gPJS5cMBVBgZWT4A18EvT9D8pD0Wy
 3gqpxWzi8HlPPip2Lh0b57H24ObpFlfJ2Le+i+A4dVARjHilM2bo9/NfoBO0EC+I
 pm3MbLGw9fQyP7TbIQ7uVSrRMtyVvrQT/Z4g8GkJ/QidY32Rp6CkhJID3uHMuraG
 GzuB6VOUGDk7LPvKyMvMPvQ5IctckSylcZkAr+2lmKMUYrtwKRIbnYBiHrSLcPfH
 RQ7iekzDEQoFZppt96ucPiNgO22ZYA72hrbHig9+YLz7kU6/X4LlQDkm7vzgGSJm
 5zUJD7+BkLhapUulVtNbl6TzKkH/uo3PsXK0F+kvJ5AMOFW+kWaUs868LL1/I4O6
 ruMOPtZ8s3hjC+cOyxhrYxJ++rtoHe3Lyp8C7zqXwFVbStqdyNcr78SQyMthoyGz
 UJRr9FMw7aGkuaS8JWSCWI+Dw2VFQsYgFm/5LCZ5QFpWKdGX3gi2jZvr22/8m+6a
 nk1u9OmbAgqI+vhRwiXWWh4KyKUHq9cTUTWZ5ytpCrRaPMuf4Ixpv+9Ysb7SUcdu
 CpYrBg/67ntb9bFwxJAb9ZwqKmjJMdTSN6SGx6pbM62eoXMTpjko9rJxLPee9ad8
 yO9RSvyYj3F9adw+g0GdkKYDudqdpFV34ZTsvttuUEzCAMk3L1Q=
 =MCeE
 -----END PGP SIGNATURE-----

Merge tag 'kvmarm-fixes-6.18-3' of git://git.kernel.org/pub/scm/linux/kernel/git/kvmarm/kvmarm into HEAD

KVM/arm64 fixes for 6.18, take #3

- Only adjust the ID registers when no irqchip has been created once
  per VM run, instead of doing it once per vcpu, as this otherwise
  triggers a pretty bad conbsistency check failure in the sysreg code.

- Make sure the per-vcpu Fine Grain Traps are computed before we load
  the system registers on the HW, as we otherwise start running without
  anything set until the first preemption of the vcpu.
2025-11-18 17:38:01 +01:00
Linus Torvalds
5bebe8de19 mm/huge_memory: Fix initialization of huge zero folio
The recent fix to properly initialize the tags of the huge zero folio
had an unfortunate not-so-subtle side effect: it caused the actual
*contents* of the huge zero folio to not be initialized at all when the
hardware didn't support the memory tagging.

The reason was the unfortunate semantics of tag_clear_highpage(): on
hardware that didn't do the tagging, it would silently just not do
anything at all.  And since this is done only on arm64 with MTE support,
that basically meant most hardware.

It wasn't necessarily immediately obvious since the huge zero page isn't
necessarily very heavily used - or because it might already be zero
because all-zeroes is the most common pattern.  But it ends up causing
random odd user space failures when you do hit it.

The unfortunate semantics have been around for a while, but became a
real bug only when we started actively using __GFP_ZEROTAGS in the
generic get_huge_zero_folio() function - before that, it had only ever
been used in code that checked that the hardware supported it.

Fix this by simply changing the semantics of tag_clear_highpage() to
return whether it actually successfully did something or not.  While at
it, also make it initialize multiple pages in one go, since that's
actually what the only caller wants it to do and it simplifies the whole
logic.

Fixes: adfb6609c680 ("mm/huge_memory: initialise the tags of the huge zero folio")
Link: https://lore.kernel.org/all/20251117082023.90176-1-00107082@163.com/
Reviewed-by: David Hildenbrand (Red Hat) <david@kernel.org>
Reported-and-tested-by: David Wang <00107082@163.com>
Reported-and-tested-by: Carlos Llamas <cmllamas@google.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
2025-11-18 08:21:27 -08:00
Alexandru Elisei
85592114ff KVM: arm64: VHE: Compute fgt traps before activating them
On VHE, the Fine Grain Traps registers are written to hardware in
kvm_arch_vcpu_load()->..->__activate_traps_hfgxtr(), but the fgt array is
computed later, in kvm_vcpu_load_fgt(). This can lead to zero being written
to the FGT registers the first time a VCPU is loaded. Also, any changes to
the fgt array will be visible only after the VCPU is scheduled out, and
then back in, which is not the intended behaviour.

Fix it by computing the fgt array just before the fgt traps are written
to hardware.

Fixes: fb10ddf35c1c ("KVM: arm64: Compute per-vCPU FGTs at vcpu_load()")
Signed-off-by: Alexandru Elisei <alexandru.elisei@arm.com>
Reviewed-by: Oliver Upton <oupton@kernel.org>
Link: https://patch.msgid.link/20251112102853.47759-1-alexandru.elisei@arm.com
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-12 10:52:58 +00:00
Marc Zyngier
0f559cd91e KVM: arm64: Finalize ID registers only once per VM
Owing to the ID registers being global to the VM, there is no point
in computing them more than once.  However, recent changes making
use of kvm_set_vm_id_reg() outlined that we repeatedly hammer
the ID registers when we shouldn't.

Gate the ID reg update on the VM having never run.

Fixes: 50e7cce81b9b2 ("KVM: arm64: Limit clearing of ID_{AA64PFR0,PFR1}_EL1.GIC to userspace irqchip")
Fixes: 5cb57a1aff755 ("KVM: arm64: Zero ID_AA64PFR0_EL1.GIC when no GICv3 is presented to the guest")
Closes: https://lore.kernel.org/r/aRHf6x5umkTYhYJ3@finisterre.sirena.org.uk
Reported-by: Mark Brown <broonie@kernel.org>
Tested-by: Mark Brown <broonie@kernel.org>
Link: https://patch.msgid.link/20251110173010.1918424-1-maz@kernel.org
Signed-off-by: Marc Zyngier <maz@kernel.org>
2025-11-11 12:24:22 +00:00
8 changed files with 34 additions and 24 deletions

View File

@ -33,8 +33,8 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
unsigned long vaddr);
#define vma_alloc_zeroed_movable_folio vma_alloc_zeroed_movable_folio
void tag_clear_highpage(struct page *to);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
bool tag_clear_highpages(struct page *to, int numpages);
#define __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
#define clear_user_page(page, vaddr, pg) clear_page(page)
#define copy_user_page(to, from, vaddr, pg) copy_page(to, from)

View File

@ -624,6 +624,7 @@ nommu:
kvm_timer_vcpu_load(vcpu);
kvm_vgic_load(vcpu);
kvm_vcpu_load_debug(vcpu);
kvm_vcpu_load_fgt(vcpu);
if (has_vhe())
kvm_vcpu_load_vhe(vcpu);
kvm_arch_vcpu_load_fp(vcpu);
@ -642,7 +643,6 @@ nommu:
vcpu->arch.hcr_el2 |= HCR_TWI;
vcpu_set_pauth_traps(vcpu);
kvm_vcpu_load_fgt(vcpu);
if (is_protected_kvm_enabled()) {
kvm_call_hyp_nvhe(__pkvm_vcpu_load,

View File

@ -5609,7 +5609,11 @@ int kvm_finalize_sys_regs(struct kvm_vcpu *vcpu)
guard(mutex)(&kvm->arch.config_lock);
if (!irqchip_in_kernel(kvm)) {
/*
* This hacks into the ID registers, so only perform it when the
* first vcpu runs, or the kvm_set_vm_id_reg() helper will scream.
*/
if (!irqchip_in_kernel(kvm) && !kvm_vm_has_ran_once(kvm)) {
u64 val;
val = kvm_read_vm_id_reg(kvm, SYS_ID_AA64PFR0_EL1) & ~ID_AA64PFR0_EL1_GIC;

View File

@ -967,20 +967,21 @@ struct folio *vma_alloc_zeroed_movable_folio(struct vm_area_struct *vma,
return vma_alloc_folio(flags, 0, vma, vaddr);
}
void tag_clear_highpage(struct page *page)
bool tag_clear_highpages(struct page *page, int numpages)
{
/*
* Check if MTE is supported and fall back to clear_highpage().
* get_huge_zero_folio() unconditionally passes __GFP_ZEROTAGS and
* post_alloc_hook() will invoke tag_clear_highpage().
* post_alloc_hook() will invoke tag_clear_highpages().
*/
if (!system_supports_mte()) {
clear_highpage(page);
return;
}
if (!system_supports_mte())
return false;
/* Newly allocated page, shouldn't have been tagged yet */
WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
/* Newly allocated pages, shouldn't have been tagged yet */
for (int i = 0; i < numpages; i++, page++) {
WARN_ON_ONCE(!try_page_mte_tagging(page));
mte_zero_clear_page_tags(page_address(page));
set_page_mte_tagged(page);
}
return true;
}

View File

@ -705,7 +705,11 @@ void *svm_alloc_permissions_map(unsigned long size, gfp_t gfp_mask)
static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu)
{
bool intercept = !(to_svm(vcpu)->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK);
struct vcpu_svm *svm = to_svm(vcpu);
bool intercept = !(svm->vmcb->control.virt_ext & LBR_CTL_ENABLE_MASK);
if (intercept == svm->lbr_msrs_intercepted)
return;
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHFROMIP, MSR_TYPE_RW, intercept);
svm_set_intercept_for_msr(vcpu, MSR_IA32_LASTBRANCHTOIP, MSR_TYPE_RW, intercept);
@ -714,6 +718,8 @@ static void svm_recalc_lbr_msr_intercepts(struct kvm_vcpu *vcpu)
if (sev_es_guest(vcpu->kvm))
svm_set_intercept_for_msr(vcpu, MSR_IA32_DEBUGCTLMSR, MSR_TYPE_RW, intercept);
svm->lbr_msrs_intercepted = intercept;
}
void svm_vcpu_free_msrpm(void *msrpm)
@ -1221,6 +1227,7 @@ static int svm_vcpu_create(struct kvm_vcpu *vcpu)
}
svm->x2avic_msrs_intercepted = true;
svm->lbr_msrs_intercepted = true;
svm->vmcb01.ptr = page_address(vmcb01_page);
svm->vmcb01.pa = __sme_set(page_to_pfn(vmcb01_page) << PAGE_SHIFT);

View File

@ -336,6 +336,7 @@ struct vcpu_svm {
bool guest_state_loaded;
bool x2avic_msrs_intercepted;
bool lbr_msrs_intercepted;
/* Guest GIF value, used when vGIF is not enabled */
bool guest_gif;

View File

@ -249,10 +249,12 @@ static inline void clear_highpage_kasan_tagged(struct page *page)
kunmap_local(kaddr);
}
#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGE
#ifndef __HAVE_ARCH_TAG_CLEAR_HIGHPAGES
static inline void tag_clear_highpage(struct page *page)
/* Return false to let people know we did not initialize the pages */
static inline bool tag_clear_highpages(struct page *page, int numpages)
{
return false;
}
#endif

View File

@ -1822,14 +1822,9 @@ inline void post_alloc_hook(struct page *page, unsigned int order,
* If memory tags should be zeroed
* (which happens only when memory should be initialized as well).
*/
if (zero_tags) {
/* Initialize both memory and memory tags. */
for (i = 0; i != 1 << order; ++i)
tag_clear_highpage(page + i);
if (zero_tags)
init = !tag_clear_highpages(page, 1 << order);
/* Take note that memory was initialized by the loop above. */
init = false;
}
if (!should_skip_kasan_unpoison(gfp_flags) &&
kasan_unpoison_pages(page, order, init)) {
/* Take note that memory was initialized by KASAN. */