Skip to content

Commit af585b9

Browse files
Gleb Natapovavikivity
Gleb Natapov
authored andcommitted
KVM: Halt vcpu if page it tries to access is swapped out
If a guest accesses swapped out memory do not swap it in from vcpu thread context. Schedule work to do swapping and put vcpu into halted state instead. Interrupts will still be delivered to the guest and if interrupt will cause reschedule guest will continue to run another task. [avi: remove call to get_user_pages_noio(), nacked by Linus; this makes everything synchrnous again] Acked-by: Rik van Riel <riel@redhat.com> Signed-off-by: Gleb Natapov <gleb@redhat.com> Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
1 parent 010c520 commit af585b9

File tree

12 files changed

+570
-16
lines changed

12 files changed

+570
-16
lines changed

arch/x86/include/asm/kvm_host.h

+18
Original file line numberDiff line numberDiff line change
@@ -83,11 +83,14 @@
8383
#define KVM_NR_FIXED_MTRR_REGION 88
8484
#define KVM_NR_VAR_MTRR 8
8585

86+
#define ASYNC_PF_PER_VCPU 64
87+
8688
extern spinlock_t kvm_lock;
8789
extern struct list_head vm_list;
8890

8991
struct kvm_vcpu;
9092
struct kvm;
93+
struct kvm_async_pf;
9194

9295
enum kvm_reg {
9396
VCPU_REGS_RAX = 0,
@@ -412,6 +415,11 @@ struct kvm_vcpu_arch {
412415
u64 hv_vapic;
413416

414417
cpumask_var_t wbinvd_dirty_mask;
418+
419+
struct {
420+
bool halted;
421+
gfn_t gfns[roundup_pow_of_two(ASYNC_PF_PER_VCPU)];
422+
} apf;
415423
};
416424

417425
struct kvm_arch {
@@ -585,6 +593,10 @@ struct kvm_x86_ops {
585593
const struct trace_print_flags *exit_reasons_str;
586594
};
587595

596+
struct kvm_arch_async_pf {
597+
gfn_t gfn;
598+
};
599+
588600
extern struct kvm_x86_ops *kvm_x86_ops;
589601

590602
int kvm_mmu_module_init(void);
@@ -799,4 +811,10 @@ void kvm_set_shared_msr(unsigned index, u64 val, u64 mask);
799811

800812
bool kvm_is_linear_rip(struct kvm_vcpu *vcpu, unsigned long linear_rip);
801813

814+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
815+
struct kvm_async_pf *work);
816+
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
817+
struct kvm_async_pf *work);
818+
extern bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn);
819+
802820
#endif /* _ASM_X86_KVM_HOST_H */

arch/x86/kvm/Kconfig

+1
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ config KVM
2828
select HAVE_KVM_IRQCHIP
2929
select HAVE_KVM_EVENTFD
3030
select KVM_APIC_ARCHITECTURE
31+
select KVM_ASYNC_PF
3132
select USER_RETURN_NOTIFIER
3233
select KVM_MMIO
3334
---help---

arch/x86/kvm/Makefile

+1
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@ kvm-y += $(addprefix ../../../virt/kvm/, kvm_main.o ioapic.o \
99
coalesced_mmio.o irq_comm.o eventfd.o \
1010
assigned-dev.o)
1111
kvm-$(CONFIG_IOMMU_API) += $(addprefix ../../../virt/kvm/, iommu.o)
12+
kvm-$(CONFIG_KVM_ASYNC_PF) += $(addprefix ../../../virt/kvm/, async_pf.o)
1213

1314
kvm-y += x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
1415
i8254.o timer.o

arch/x86/kvm/mmu.c

+51-1
Original file line numberDiff line numberDiff line change
@@ -18,9 +18,11 @@
1818
*
1919
*/
2020

21+
#include "irq.h"
2122
#include "mmu.h"
2223
#include "x86.h"
2324
#include "kvm_cache_regs.h"
25+
#include "x86.h"
2426

2527
#include <linux/kvm_host.h>
2628
#include <linux/types.h>
@@ -2587,6 +2589,50 @@ static int nonpaging_page_fault(struct kvm_vcpu *vcpu, gva_t gva,
25872589
error_code & PFERR_WRITE_MASK, gfn);
25882590
}
25892591

2592+
int kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, gfn_t gfn)
2593+
{
2594+
struct kvm_arch_async_pf arch;
2595+
arch.gfn = gfn;
2596+
2597+
return kvm_setup_async_pf(vcpu, gva, gfn, &arch);
2598+
}
2599+
2600+
static bool can_do_async_pf(struct kvm_vcpu *vcpu)
2601+
{
2602+
if (unlikely(!irqchip_in_kernel(vcpu->kvm) ||
2603+
kvm_event_needs_reinjection(vcpu)))
2604+
return false;
2605+
2606+
return kvm_x86_ops->interrupt_allowed(vcpu);
2607+
}
2608+
2609+
static bool try_async_pf(struct kvm_vcpu *vcpu, gfn_t gfn, gva_t gva,
2610+
pfn_t *pfn)
2611+
{
2612+
bool async;
2613+
2614+
*pfn = gfn_to_pfn_async(vcpu->kvm, gfn, &async);
2615+
2616+
if (!async)
2617+
return false; /* *pfn has correct page already */
2618+
2619+
put_page(pfn_to_page(*pfn));
2620+
2621+
if (can_do_async_pf(vcpu)) {
2622+
trace_kvm_try_async_get_page(async, *pfn);
2623+
if (kvm_find_async_pf_gfn(vcpu, gfn)) {
2624+
trace_kvm_async_pf_doublefault(gva, gfn);
2625+
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
2626+
return true;
2627+
} else if (kvm_arch_setup_async_pf(vcpu, gva, gfn))
2628+
return true;
2629+
}
2630+
2631+
*pfn = gfn_to_pfn(vcpu->kvm, gfn);
2632+
2633+
return false;
2634+
}
2635+
25902636
static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
25912637
u32 error_code)
25922638
{
@@ -2609,7 +2655,11 @@ static int tdp_page_fault(struct kvm_vcpu *vcpu, gva_t gpa,
26092655

26102656
mmu_seq = vcpu->kvm->mmu_notifier_seq;
26112657
smp_rmb();
2612-
pfn = gfn_to_pfn(vcpu->kvm, gfn);
2658+
2659+
if (try_async_pf(vcpu, gfn, gpa, &pfn))
2660+
return 0;
2661+
2662+
/* mmio */
26132663
if (is_error_pfn(pfn))
26142664
return kvm_handle_bad_page(vcpu->kvm, gfn, pfn);
26152665
spin_lock(&vcpu->kvm->mmu_lock);

arch/x86/kvm/paging_tmpl.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -568,7 +568,9 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gva_t addr,
568568

569569
mmu_seq = vcpu->kvm->mmu_notifier_seq;
570570
smp_rmb();
571-
pfn = gfn_to_pfn(vcpu->kvm, walker.gfn);
571+
572+
if (try_async_pf(vcpu, walker.gfn, addr, &pfn))
573+
return 0;
572574

573575
/* mmio */
574576
if (is_error_pfn(pfn))

arch/x86/kvm/x86.c

+109-3
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include <linux/slab.h>
4444
#include <linux/perf_event.h>
4545
#include <linux/uaccess.h>
46+
#include <linux/hash.h>
4647
#include <trace/events/kvm.h>
4748

4849
#define CREATE_TRACE_POINTS
@@ -155,6 +156,13 @@ struct kvm_stats_debugfs_item debugfs_entries[] = {
155156

156157
u64 __read_mostly host_xcr0;
157158

159+
static inline void kvm_async_pf_hash_reset(struct kvm_vcpu *vcpu)
160+
{
161+
int i;
162+
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU); i++)
163+
vcpu->arch.apf.gfns[i] = ~0;
164+
}
165+
158166
static void kvm_on_user_return(struct user_return_notifier *urn)
159167
{
160168
unsigned slot;
@@ -5115,6 +5123,12 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
51155123
vcpu->fpu_active = 0;
51165124
kvm_x86_ops->fpu_deactivate(vcpu);
51175125
}
5126+
if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
5127+
/* Page is swapped out. Do synthetic halt */
5128+
vcpu->arch.apf.halted = true;
5129+
r = 1;
5130+
goto out;
5131+
}
51185132
}
51195133

51205134
r = kvm_mmu_reload(vcpu);
@@ -5243,7 +5257,8 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
52435257

52445258
r = 1;
52455259
while (r > 0) {
5246-
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE)
5260+
if (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
5261+
!vcpu->arch.apf.halted)
52475262
r = vcpu_enter_guest(vcpu);
52485263
else {
52495264
srcu_read_unlock(&kvm->srcu, vcpu->srcu_idx);
@@ -5256,6 +5271,7 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
52565271
vcpu->arch.mp_state =
52575272
KVM_MP_STATE_RUNNABLE;
52585273
case KVM_MP_STATE_RUNNABLE:
5274+
vcpu->arch.apf.halted = false;
52595275
break;
52605276
case KVM_MP_STATE_SIPI_RECEIVED:
52615277
default:
@@ -5277,6 +5293,9 @@ static int __vcpu_run(struct kvm_vcpu *vcpu)
52775293
vcpu->run->exit_reason = KVM_EXIT_INTR;
52785294
++vcpu->stat.request_irq_exits;
52795295
}
5296+
5297+
kvm_check_async_pf_completion(vcpu);
5298+
52805299
if (signal_pending(current)) {
52815300
r = -EINTR;
52825301
vcpu->run->exit_reason = KVM_EXIT_INTR;
@@ -5792,6 +5811,10 @@ int kvm_arch_vcpu_reset(struct kvm_vcpu *vcpu)
57925811

57935812
kvm_make_request(KVM_REQ_EVENT, vcpu);
57945813

5814+
kvm_clear_async_pf_completion_queue(vcpu);
5815+
kvm_async_pf_hash_reset(vcpu);
5816+
vcpu->arch.apf.halted = false;
5817+
57955818
return kvm_x86_ops->vcpu_reset(vcpu);
57965819
}
57975820

@@ -5880,6 +5903,8 @@ int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu)
58805903
if (!zalloc_cpumask_var(&vcpu->arch.wbinvd_dirty_mask, GFP_KERNEL))
58815904
goto fail_free_mce_banks;
58825905

5906+
kvm_async_pf_hash_reset(vcpu);
5907+
58835908
return 0;
58845909
fail_free_mce_banks:
58855910
kfree(vcpu->arch.mce_banks);
@@ -5938,8 +5963,10 @@ static void kvm_free_vcpus(struct kvm *kvm)
59385963
/*
59395964
* Unpin any mmu pages first.
59405965
*/
5941-
kvm_for_each_vcpu(i, vcpu, kvm)
5966+
kvm_for_each_vcpu(i, vcpu, kvm) {
5967+
kvm_clear_async_pf_completion_queue(vcpu);
59425968
kvm_unload_vcpu_mmu(vcpu);
5969+
}
59435970
kvm_for_each_vcpu(i, vcpu, kvm)
59445971
kvm_arch_vcpu_free(vcpu);
59455972

@@ -6050,7 +6077,9 @@ void kvm_arch_flush_shadow(struct kvm *kvm)
60506077

60516078
int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu)
60526079
{
6053-
return vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE
6080+
return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
6081+
!vcpu->arch.apf.halted)
6082+
|| !list_empty_careful(&vcpu->async_pf.done)
60546083
|| vcpu->arch.mp_state == KVM_MP_STATE_SIPI_RECEIVED
60556084
|| vcpu->arch.nmi_pending ||
60566085
(kvm_arch_interrupt_allowed(vcpu) &&
@@ -6109,6 +6138,83 @@ void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags)
61096138
}
61106139
EXPORT_SYMBOL_GPL(kvm_set_rflags);
61116140

6141+
static inline u32 kvm_async_pf_hash_fn(gfn_t gfn)
6142+
{
6143+
return hash_32(gfn & 0xffffffff, order_base_2(ASYNC_PF_PER_VCPU));
6144+
}
6145+
6146+
static inline u32 kvm_async_pf_next_probe(u32 key)
6147+
{
6148+
return (key + 1) & (roundup_pow_of_two(ASYNC_PF_PER_VCPU) - 1);
6149+
}
6150+
6151+
static void kvm_add_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6152+
{
6153+
u32 key = kvm_async_pf_hash_fn(gfn);
6154+
6155+
while (vcpu->arch.apf.gfns[key] != ~0)
6156+
key = kvm_async_pf_next_probe(key);
6157+
6158+
vcpu->arch.apf.gfns[key] = gfn;
6159+
}
6160+
6161+
static u32 kvm_async_pf_gfn_slot(struct kvm_vcpu *vcpu, gfn_t gfn)
6162+
{
6163+
int i;
6164+
u32 key = kvm_async_pf_hash_fn(gfn);
6165+
6166+
for (i = 0; i < roundup_pow_of_two(ASYNC_PF_PER_VCPU) &&
6167+
(vcpu->arch.apf.gfns[key] != gfn ||
6168+
vcpu->arch.apf.gfns[key] == ~0); i++)
6169+
key = kvm_async_pf_next_probe(key);
6170+
6171+
return key;
6172+
}
6173+
6174+
bool kvm_find_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6175+
{
6176+
return vcpu->arch.apf.gfns[kvm_async_pf_gfn_slot(vcpu, gfn)] == gfn;
6177+
}
6178+
6179+
static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn)
6180+
{
6181+
u32 i, j, k;
6182+
6183+
i = j = kvm_async_pf_gfn_slot(vcpu, gfn);
6184+
while (true) {
6185+
vcpu->arch.apf.gfns[i] = ~0;
6186+
do {
6187+
j = kvm_async_pf_next_probe(j);
6188+
if (vcpu->arch.apf.gfns[j] == ~0)
6189+
return;
6190+
k = kvm_async_pf_hash_fn(vcpu->arch.apf.gfns[j]);
6191+
/*
6192+
* k lies cyclically in ]i,j]
6193+
* | i.k.j |
6194+
* |....j i.k.| or |.k..j i...|
6195+
*/
6196+
} while ((i <= j) ? (i < k && k <= j) : (i < k || k <= j));
6197+
vcpu->arch.apf.gfns[i] = vcpu->arch.apf.gfns[j];
6198+
i = j;
6199+
}
6200+
}
6201+
6202+
void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu,
6203+
struct kvm_async_pf *work)
6204+
{
6205+
trace_kvm_async_pf_not_present(work->gva);
6206+
6207+
kvm_make_request(KVM_REQ_APF_HALT, vcpu);
6208+
kvm_add_async_pf_gfn(vcpu, work->arch.gfn);
6209+
}
6210+
6211+
void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
6212+
struct kvm_async_pf *work)
6213+
{
6214+
trace_kvm_async_pf_ready(work->gva);
6215+
kvm_del_async_pf_gfn(vcpu, work->arch.gfn);
6216+
}
6217+
61126218
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_exit);
61136219
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_inj_virq);
61146220
EXPORT_TRACEPOINT_SYMBOL_GPL(kvm_page_fault);

0 commit comments

Comments
 (0)