Skip to content

Commit 982ed0d

Browse files
dwmw2bonzini
authored andcommitted
KVM: Reinstate gfn_to_pfn_cache with invalidation support
This can be used in two modes. There is an atomic mode where the cached mapping is accessed while holding the rwlock, and a mode where the physical address is used by a vCPU in guest mode. For the latter case, an invalidation will wake the vCPU with the new KVM_REQ_GPC_INVALIDATE, and the architecture will need to refresh any caches it still needs to access before entering guest mode again. Only one vCPU can be targeted by the wake requests; it's simple enough to make it wake all vCPUs or even a mask but I don't see a use case for that additional complexity right now. Invalidation happens from the invalidate_range_start MMU notifier, which needs to be able to sleep in order to wake the vCPU and wait for it. This means that revalidation potentially needs to "wait" for the MMU operation to complete and the invalidate_range_end notifier to be invoked. Like the vCPU when it takes a page fault in that period, we just spin — fixing that in a future patch by implementing an actual *wait* may be another part of shaving this particularly hirsute yak. As noted in the comments in the function itself, the only case where the invalidate_range_start notifier is expected to be called *without* being able to sleep is when the OOM reaper is killing the process. In that case, we expect the vCPU threads already to have exited, and thus there will be nothing to wake, and no reason to wait. So we clear the KVM_REQUEST_WAIT bit and send the request anyway, then complain loudly if there actually *was* anything to wake up. Signed-off-by: David Woodhouse <[email protected]> Message-Id: <[email protected]> Signed-off-by: Paolo Bonzini <[email protected]>
1 parent 2efd61a commit 982ed0d

File tree

10 files changed

+517
-27
lines changed

10 files changed

+517
-27
lines changed

arch/x86/kvm/Kconfig

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ config KVM
2626
select PREEMPT_NOTIFIERS
2727
select MMU_NOTIFIER
2828
select HAVE_KVM_IRQCHIP
29+
select HAVE_KVM_PFNCACHE
2930
select HAVE_KVM_IRQFD
3031
select HAVE_KVM_DIRTY_RING
3132
select IRQ_BYPASS_MANAGER

include/linux/kvm_host.h

Lines changed: 103 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,7 @@ static inline bool is_error_page(struct page *page)
155155
#define KVM_REQ_UNBLOCK 2
156156
#define KVM_REQ_UNHALT 3
157157
#define KVM_REQ_VM_DEAD (4 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
158+
#define KVM_REQ_GPC_INVALIDATE (5 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
158159
#define KVM_REQUEST_ARCH_BASE 8
159160

160161
#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \
@@ -593,6 +594,10 @@ struct kvm {
593594
unsigned long mn_active_invalidate_count;
594595
struct rcuwait mn_memslots_update_rcuwait;
595596

597+
/* For management / invalidation of gfn_to_pfn_caches */
598+
spinlock_t gpc_lock;
599+
struct list_head gpc_list;
600+
596601
/*
597602
* created_vcpus is protected by kvm->lock, and is incremented
598603
* at the beginning of KVM_CREATE_VCPU. online_vcpus is only
@@ -1099,6 +1104,104 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data,
10991104
unsigned long len);
11001105
void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn);
11011106

1107+
/**
1108+
* kvm_gfn_to_pfn_cache_init - prepare a cached kernel mapping and HPA for a
1109+
* given guest physical address.
1110+
*
1111+
* @kvm: pointer to kvm instance.
1112+
* @gpc: struct gfn_to_pfn_cache object.
1113+
* @vcpu: vCPU to be used for marking pages dirty and to be woken on
1114+
* invalidation.
1115+
* @guest_uses_pa: indicates that the resulting host physical PFN is used while
1116+
* @vcpu is IN_GUEST_MODE so invalidations should wake it.
1117+
* @kernel_map: requests a kernel virtual mapping (kmap / memremap).
1118+
* @gpa: guest physical address to map.
1119+
* @len: sanity check; the range being access must fit a single page.
1120+
* @dirty: mark the cache dirty immediately.
1121+
*
1122+
* @return: 0 for success.
1123+
* -EINVAL for a mapping which would cross a page boundary.
1124+
* -EFAULT for an untranslatable guest physical address.
1125+
*
1126+
* This primes a gfn_to_pfn_cache and links it into the @kvm's list for
1127+
* invalidations to be processed. Invalidation callbacks to @vcpu using
1128+
* %KVM_REQ_GPC_INVALIDATE will occur only for MMU notifiers, not for KVM
1129+
* memslot changes. Callers are required to use kvm_gfn_to_pfn_cache_check()
1130+
* to ensure that the cache is valid before accessing the target page.
1131+
*/
1132+
int kvm_gfn_to_pfn_cache_init(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
1133+
struct kvm_vcpu *vcpu, bool guest_uses_pa,
1134+
bool kernel_map, gpa_t gpa, unsigned long len,
1135+
bool dirty);
1136+
1137+
/**
1138+
* kvm_gfn_to_pfn_cache_check - check validity of a gfn_to_pfn_cache.
1139+
*
1140+
* @kvm: pointer to kvm instance.
1141+
* @gpc: struct gfn_to_pfn_cache object.
1142+
* @gpa: current guest physical address to map.
1143+
* @len: sanity check; the range being access must fit a single page.
1144+
* @dirty: mark the cache dirty immediately.
1145+
*
1146+
* @return: %true if the cache is still valid and the address matches.
1147+
* %false if the cache is not valid.
1148+
*
1149+
* Callers outside IN_GUEST_MODE context should hold a read lock on @gpc->lock
1150+
* while calling this function, and then continue to hold the lock until the
1151+
* access is complete.
1152+
*
1153+
* Callers in IN_GUEST_MODE may do so without locking, although they should
1154+
* still hold a read lock on kvm->scru for the memslot checks.
1155+
*/
1156+
bool kvm_gfn_to_pfn_cache_check(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
1157+
gpa_t gpa, unsigned long len);
1158+
1159+
/**
1160+
* kvm_gfn_to_pfn_cache_refresh - update a previously initialized cache.
1161+
*
1162+
* @kvm: pointer to kvm instance.
1163+
* @gpc: struct gfn_to_pfn_cache object.
1164+
* @gpa: updated guest physical address to map.
1165+
* @len: sanity check; the range being access must fit a single page.
1166+
* @dirty: mark the cache dirty immediately.
1167+
*
1168+
* @return: 0 for success.
1169+
* -EINVAL for a mapping which would cross a page boundary.
1170+
* -EFAULT for an untranslatable guest physical address.
1171+
*
1172+
* This will attempt to refresh a gfn_to_pfn_cache. Note that a successful
1173+
* returm from this function does not mean the page can be immediately
1174+
* accessed because it may have raced with an invalidation. Callers must
1175+
* still lock and check the cache status, as this function does not return
1176+
* with the lock still held to permit access.
1177+
*/
1178+
int kvm_gfn_to_pfn_cache_refresh(struct kvm *kvm, struct gfn_to_pfn_cache *gpc,
1179+
gpa_t gpa, unsigned long len, bool dirty);
1180+
1181+
/**
1182+
* kvm_gfn_to_pfn_cache_unmap - temporarily unmap a gfn_to_pfn_cache.
1183+
*
1184+
* @kvm: pointer to kvm instance.
1185+
* @gpc: struct gfn_to_pfn_cache object.
1186+
*
1187+
* This unmaps the referenced page and marks it dirty, if appropriate. The
1188+
* cache is left in the invalid state but at least the mapping from GPA to
1189+
* userspace HVA will remain cached and can be reused on a subsequent
1190+
* refresh.
1191+
*/
1192+
void kvm_gfn_to_pfn_cache_unmap(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
1193+
1194+
/**
1195+
* kvm_gfn_to_pfn_cache_destroy - destroy and unlink a gfn_to_pfn_cache.
1196+
*
1197+
* @kvm: pointer to kvm instance.
1198+
* @gpc: struct gfn_to_pfn_cache object.
1199+
*
1200+
* This removes a cache from the @kvm's list to be processed on MMU notifier
1201+
* invocation.
1202+
*/
1203+
void kvm_gfn_to_pfn_cache_destroy(struct kvm *kvm, struct gfn_to_pfn_cache *gpc);
1204+
11021205
void kvm_sigset_activate(struct kvm_vcpu *vcpu);
11031206
void kvm_sigset_deactivate(struct kvm_vcpu *vcpu);
11041207

include/linux/kvm_types.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ struct kvm_memslots;
1919
enum kvm_mr_change;
2020

2121
#include <linux/types.h>
22+
#include <linux/spinlock_types.h>
2223

2324
#include <asm/kvm_types.h>
2425

@@ -53,6 +54,23 @@ struct gfn_to_hva_cache {
5354
struct kvm_memory_slot *memslot;
5455
};
5556

57+
struct gfn_to_pfn_cache {
58+
u64 generation;
59+
gpa_t gpa;
60+
unsigned long uhva;
61+
struct kvm_memory_slot *memslot;
62+
struct kvm_vcpu *vcpu;
63+
struct list_head list;
64+
rwlock_t lock;
65+
void *khva;
66+
kvm_pfn_t pfn;
67+
bool active;
68+
bool valid;
69+
bool dirty;
70+
bool kernel_map;
71+
bool guest_uses_pa;
72+
};
73+
5674
#ifdef KVM_ARCH_NR_OBJS_PER_MEMORY_CACHE
5775
/*
5876
* Memory caches are used to preallocate memory ahead of various MMU flows,

virt/kvm/Kconfig

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,9 @@
44
config HAVE_KVM
55
bool
66

7+
config HAVE_KVM_PFNCACHE
8+
bool
9+
710
config HAVE_KVM_IRQCHIP
811
bool
912

virt/kvm/Makefile.kvm

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,3 +11,4 @@ kvm-$(CONFIG_KVM_MMIO) += $(KVM)/coalesced_mmio.o
1111
kvm-$(CONFIG_KVM_ASYNC_PF) += $(KVM)/async_pf.o
1212
kvm-$(CONFIG_HAVE_KVM_IRQ_ROUTING) += $(KVM)/irqchip.o
1313
kvm-$(CONFIG_HAVE_KVM_DIRTY_RING) += $(KVM)/dirty_ring.o
14+
kvm-$(CONFIG_HAVE_KVM_PFNCACHE) += $(KVM)/pfncache.o

virt/kvm/dirty_ring.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#include <linux/vmalloc.h>
1010
#include <linux/kvm_dirty_ring.h>
1111
#include <trace/events/kvm.h>
12-
#include "mmu_lock.h"
12+
#include "kvm_mm.h"
1313

1414
int __weak kvm_cpu_dirty_log_size(void)
1515
{

virt/kvm/kvm_main.c

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,7 @@
5959

6060
#include "coalesced_mmio.h"
6161
#include "async_pf.h"
62-
#include "mmu_lock.h"
62+
#include "kvm_mm.h"
6363
#include "vfio.h"
6464

6565
#define CREATE_TRACE_POINTS
@@ -711,6 +711,9 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
711711
kvm->mn_active_invalidate_count++;
712712
spin_unlock(&kvm->mn_invalidate_lock);
713713

714+
gfn_to_pfn_cache_invalidate_start(kvm, range->start, range->end,
715+
hva_range.may_block);
716+
714717
__kvm_handle_hva_range(kvm, &hva_range);
715718

716719
return 0;
@@ -1071,6 +1074,9 @@ static struct kvm *kvm_create_vm(unsigned long type)
10711074
rcuwait_init(&kvm->mn_memslots_update_rcuwait);
10721075
xa_init(&kvm->vcpu_array);
10731076

1077+
INIT_LIST_HEAD(&kvm->gpc_list);
1078+
spin_lock_init(&kvm->gpc_lock);
1079+
10741080
INIT_LIST_HEAD(&kvm->devices);
10751081

10761082
BUILD_BUG_ON(KVM_MEM_SLOTS_NUM > SHRT_MAX);
@@ -2539,8 +2545,8 @@ static int hva_to_pfn_remapped(struct vm_area_struct *vma,
25392545
* 2): @write_fault = false && @writable, @writable will tell the caller
25402546
* whether the mapping is writable.
25412547
*/
2542-
static kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
2543-
bool write_fault, bool *writable)
2548+
kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
2549+
bool write_fault, bool *writable)
25442550
{
25452551
struct vm_area_struct *vma;
25462552
kvm_pfn_t pfn = 0;

virt/kvm/kvm_mm.h

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// SPDX-License-Identifier: GPL-2.0-only
2+
3+
#ifndef __KVM_MM_H__
4+
#define __KVM_MM_H__ 1
5+
6+
/*
7+
* Architectures can choose whether to use an rwlock or spinlock
8+
* for the mmu_lock. These macros, for use in common code
9+
* only, avoids using #ifdefs in places that must deal with
10+
* multiple architectures.
11+
*/
12+
13+
#ifdef KVM_HAVE_MMU_RWLOCK
14+
#define KVM_MMU_LOCK_INIT(kvm) rwlock_init(&(kvm)->mmu_lock)
15+
#define KVM_MMU_LOCK(kvm) write_lock(&(kvm)->mmu_lock)
16+
#define KVM_MMU_UNLOCK(kvm) write_unlock(&(kvm)->mmu_lock)
17+
#define KVM_MMU_READ_LOCK(kvm) read_lock(&(kvm)->mmu_lock)
18+
#define KVM_MMU_READ_UNLOCK(kvm) read_unlock(&(kvm)->mmu_lock)
19+
#else
20+
#define KVM_MMU_LOCK_INIT(kvm) spin_lock_init(&(kvm)->mmu_lock)
21+
#define KVM_MMU_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
22+
#define KVM_MMU_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
23+
#define KVM_MMU_READ_LOCK(kvm) spin_lock(&(kvm)->mmu_lock)
24+
#define KVM_MMU_READ_UNLOCK(kvm) spin_unlock(&(kvm)->mmu_lock)
25+
#endif /* KVM_HAVE_MMU_RWLOCK */
26+
27+
kvm_pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
28+
bool write_fault, bool *writable);
29+
30+
#ifdef CONFIG_HAVE_KVM_PFNCACHE
31+
void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
32+
unsigned long start,
33+
unsigned long end,
34+
bool may_block);
35+
#else
36+
static inline void gfn_to_pfn_cache_invalidate_start(struct kvm *kvm,
37+
unsigned long start,
38+
unsigned long end,
39+
bool may_block)
40+
{
41+
}
42+
#endif /* HAVE_KVM_PFNCACHE */
43+
44+
#endif /* __KVM_MM_H__ */

virt/kvm/mmu_lock.h

Lines changed: 0 additions & 23 deletions
This file was deleted.

0 commit comments

Comments
 (0)