[Intel-gfx] [RFC PATCH v3 13/19] KVM: x86: nSVM: wire nested AVIC to nested guest entry/exit

Maxim Levitsky mlevitsk at redhat.com
Wed Apr 27 20:03:08 UTC 2022


  * Passthrough guest's avic pages that can be passed through
     - logical id table
     - avic backing page

  * Passthrough AVIC's mmio range
     - nested guest is responsible for marking it RW
       in its NPT tables.

  * Write track physical id page
     - all peer's avic backing pages are pinned
       as long as the shadow table is not invalidated/
       freed.

  * Cache guest AVIC settings.

  * Add SDM mandated changes to emulated VM enter/exit.

Note that nested AVIC still can't be enabled, thus this
code has no effect yet.

Signed-off-by: Maxim Levitsky <mlevitsk at redhat.com>
---
 arch/x86/kvm/svm/avic.c   |  51 ++++++++++++++-
 arch/x86/kvm/svm/nested.c | 127 +++++++++++++++++++++++++++++++++++++-
 arch/x86/kvm/svm/svm.c    |   2 +
 arch/x86/kvm/svm/svm.h    |  24 +++++++
 4 files changed, 199 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/svm/avic.c b/arch/x86/kvm/svm/avic.c
index 34da9fabd5194..e6ec525a88625 100644
--- a/arch/x86/kvm/svm/avic.c
+++ b/arch/x86/kvm/svm/avic.c
@@ -59,6 +59,18 @@ static inline struct kvm_vcpu *avic_vcpu_by_l1_apicid(struct kvm *kvm,
 	return kvm_get_vcpu_by_id(kvm, l1_apicid);
 }
 
+static u32 nested_avic_get_reg(struct kvm_vcpu *vcpu, int reg_off)
+{
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	void *nested_apic_regs = svm->nested.l2_apic_access_page.hva;
+
+	if (WARN_ON_ONCE(!nested_apic_regs))
+		return 0;
+
+	return *((u32 *) (nested_apic_regs + reg_off));
+}
+
 static void avic_physid_shadow_entry_set_vcpu(struct kvm *kvm,
 					      struct avic_physid_table *t,
 					      int n,
@@ -531,6 +543,20 @@ static void avic_physid_shadow_table_flush_memslot(struct kvm *kvm,
 	mutex_unlock(&kvm_svm->avic.tables_lock);
 }
 
+void avic_free_nested(struct kvm_vcpu *vcpu)
+{
+	struct avic_physid_table *t;
+	struct vcpu_svm *svm = to_svm(vcpu);
+
+	t = svm->nested.l2_physical_id_table;
+	if (t) {
+		avic_physid_shadow_table_put(vcpu->kvm, t);
+		svm->nested.l2_physical_id_table = NULL;
+	}
+
+	kvm_vcpu_unmap(vcpu, &svm->nested.l2_apic_access_page, true);
+	kvm_vcpu_unmap(vcpu, &svm->nested.l2_logical_id_table, true);
+}
 
 /*
  * This is a wrapper of struct amd_iommu_ir_data.
@@ -586,10 +612,18 @@ void avic_vm_destroy(struct kvm *kvm)
 {
 	unsigned long flags;
 	struct kvm_svm_avic *avic = &to_kvm_svm(kvm)->avic;
+	unsigned long i;
+	struct kvm_vcpu *vcpu;
 
 	if (!enable_apicv)
 		return;
 
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		vcpu_load(vcpu);
+		avic_free_nested(vcpu);
+		vcpu_put(vcpu);
+	}
+
 	if (avic->logical_id_table_page)
 		__free_page(avic->logical_id_table_page);
 	if (avic->physical_id_table_page)
@@ -1501,7 +1535,7 @@ void __nested_avic_load(struct kvm_vcpu *vcpu, int cpu)
 	if (kvm_vcpu_is_blocking(vcpu))
 		return;
 
-	if (svm->nested.initialized)
+	if (svm->nested.initialized && svm->avic_enabled)
 		avic_update_peer_physid_entries(vcpu, cpu);
 }
 
@@ -1511,7 +1545,7 @@ void __nested_avic_put(struct kvm_vcpu *vcpu)
 
 	lockdep_assert_preemption_disabled();
 
-	if (svm->nested.initialized)
+	if (svm->nested.initialized && svm->avic_enabled)
 		avic_update_peer_physid_entries(vcpu, -1);
 }
 
@@ -1591,3 +1625,16 @@ void avic_vcpu_unblocking(struct kvm_vcpu *vcpu)
 
 	nested_avic_load(vcpu);
 }
+
+bool avic_nested_has_interrupt(struct kvm_vcpu *vcpu)
+{
+	int off;
+
+	if (!nested_avic_in_use(vcpu))
+		return false;
+
+	for (off = 0x10; off < 0x80; off += 0x10)
+		if (nested_avic_get_reg(vcpu, APIC_IRR + off))
+			return true;
+	return false;
+}
diff --git a/arch/x86/kvm/svm/nested.c b/arch/x86/kvm/svm/nested.c
index bed5e1692cef0..eb5e9b600e052 100644
--- a/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@ -387,6 +387,14 @@ void __nested_copy_vmcb_control_to_cache(struct kvm_vcpu *vcpu,
 		memcpy(to->reserved_sw, from->reserved_sw,
 		       sizeof(struct hv_enlightenments));
 	}
+
+	/* copy avic related settings only when it is enabled */
+	if (from->int_ctl & AVIC_ENABLE_MASK) {
+		to->avic_vapic_bar      = from->avic_vapic_bar;
+		to->avic_backing_page   = from->avic_backing_page;
+		to->avic_logical_id     = from->avic_logical_id;
+		to->avic_physical_id    = from->avic_physical_id;
+	}
 }
 
 void nested_copy_vmcb_control_to_cache(struct vcpu_svm *svm,
@@ -539,6 +547,79 @@ void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
 	svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
 }
 
+
+static bool nested_vmcb02_prepare_avic(struct vcpu_svm *svm)
+{
+	struct vmcb *vmcb02 = svm->nested.vmcb02.ptr;
+	struct avic_physid_table *t = svm->nested.l2_physical_id_table;
+	gfn_t physid_gfn;
+	int physid_nentries;
+
+	if (!nested_avic_in_use(&svm->vcpu))
+		return true;
+
+	if (svm->vcpu.kvm->arch.apic_id_changed) {
+		/* if the guest played with apic id, it will keep both pieces */
+		kvm_vm_bugged(svm->vcpu.kvm);
+		return false;
+	}
+
+	if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.ctl.avic_backing_page & AVIC_HPA_MASK),
+			  &svm->nested.l2_apic_access_page))
+		goto error;
+
+	if (kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.ctl.avic_logical_id & AVIC_HPA_MASK),
+			  &svm->nested.l2_logical_id_table))
+		goto error_unmap_backing_page;
+
+	physid_gfn = gpa_to_gfn(svm->nested.ctl.avic_physical_id &
+		     AVIC_HPA_MASK);
+	physid_nentries = svm->nested.ctl.avic_physical_id &
+			AVIC_PHYSICAL_ID_TABLE_SIZE_MASK;
+
+	if (t && t->gfn != physid_gfn) {
+		avic_physid_shadow_table_put(svm->vcpu.kvm, t);
+		svm->nested.l2_physical_id_table = NULL;
+	}
+
+	if (!svm->nested.l2_physical_id_table) {
+		t = avic_physid_shadow_table_get(&svm->vcpu, physid_gfn);
+		if (!t)
+			goto error_unmap_logical_id_table;
+		svm->nested.l2_physical_id_table = t;
+	}
+
+	atomic_inc(&t->usecount);
+
+	if (t->nentries < physid_nentries)
+		if (avic_physid_shadow_table_sync(&svm->vcpu, t, physid_nentries) < 0)
+			goto error_put_table;
+
+	/* Everything is setup, we can enable AVIC */
+	vmcb02->control.avic_vapic_bar =
+		svm->nested.ctl.avic_vapic_bar & VMCB_AVIC_APIC_BAR_MASK;
+	vmcb02->control.avic_backing_page =
+		pfn_to_hpa(svm->nested.l2_apic_access_page.pfn);
+	vmcb02->control.avic_logical_id =
+		pfn_to_hpa(svm->nested.l2_logical_id_table.pfn);
+	vmcb02->control.avic_physical_id =
+		(svm->nested.l2_physical_id_table->shadow_table_hpa) | physid_nentries;
+
+	vmcb02->control.int_ctl |= AVIC_ENABLE_MASK;
+	vmcb_mark_dirty(vmcb02, VMCB_AVIC);
+	return true;
+
+error_put_table:
+	avic_physid_shadow_table_put(svm->vcpu.kvm, t);
+	svm->nested.l2_physical_id_table = NULL;
+error_unmap_logical_id_table:
+	kvm_vcpu_unmap(&svm->vcpu, &svm->nested.l2_logical_id_table, false);
+error_unmap_backing_page:
+	kvm_vcpu_unmap(&svm->vcpu, &svm->nested.l2_apic_access_page, false);
+error:
+	return false;
+}
+
 static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
 {
 	bool new_vmcb12 = false;
@@ -627,6 +708,17 @@ static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
 	else
 		int_ctl_vmcb01_bits |= (V_GIF_MASK | V_GIF_ENABLE_MASK);
 
+	if (nested_avic_in_use(vcpu)) {
+
+		/*
+		 * Enabling AVIC implicitly disables the
+		 * V_IRQ, V_INTR_PRIO, V_IGN_TPR, and V_INTR_VECTOR
+		 * fields in the VMCB Control Word
+		 */
+		int_ctl_vmcb12_bits &= ~V_IRQ_INJECTION_BITS_MASK;
+	}
+
+
 	/* Copied from vmcb01.  msrpm_base can be overwritten later.  */
 	vmcb02->control.nested_ctl = vmcb01->control.nested_ctl;
 	vmcb02->control.iopm_base_pa = vmcb01->control.iopm_base_pa;
@@ -829,7 +921,10 @@ int nested_svm_vmrun(struct kvm_vcpu *vcpu)
 	if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12, true))
 		goto out_exit_err;
 
-	if (nested_svm_vmrun_msrpm(svm))
+	if (!nested_svm_vmrun_msrpm(svm))
+		goto out_exit_err;
+
+	if (nested_vmcb02_prepare_avic(svm))
 		goto out;
 
 out_exit_err:
@@ -956,6 +1051,15 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
 
 	nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
+	if (nested_avic_in_use(vcpu)) {
+		struct avic_physid_table *t = svm->nested.l2_physical_id_table;
+
+		kvm_vcpu_unmap(vcpu, &svm->nested.l2_apic_access_page, true);
+		kvm_vcpu_unmap(vcpu, &svm->nested.l2_logical_id_table, true);
+
+		atomic_dec(&t->usecount);
+	}
+
 	svm_switch_vmcb(svm, &svm->vmcb01);
 
 	if (unlikely(svm->lbrv_enabled && (svm->nested.ctl.virt_ext & LBR_CTL_ENABLE_MASK))) {
@@ -1069,6 +1173,7 @@ int svm_allocate_nested(struct vcpu_svm *svm)
 	svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
 
 	svm->nested.initialized = true;
+	nested_avic_load(&svm->vcpu);
 	return 0;
 
 err_free_vmcb02:
@@ -1078,6 +1183,8 @@ int svm_allocate_nested(struct vcpu_svm *svm)
 
 void svm_free_nested(struct vcpu_svm *svm)
 {
+	struct kvm_vcpu *vcpu = &svm->vcpu;
+
 	if (!svm->nested.initialized)
 		return;
 
@@ -1096,6 +1203,11 @@ void svm_free_nested(struct vcpu_svm *svm)
 	 */
 	svm->nested.last_vmcb12_gpa = INVALID_GPA;
 
+	if (svm->avic_enabled) {
+		nested_avic_put(vcpu);
+		avic_free_nested(vcpu);
+	}
+
 	svm->nested.initialized = false;
 }
 
@@ -1116,8 +1228,10 @@ void svm_leave_nested(struct kvm_vcpu *vcpu)
 
 		nested_svm_uninit_mmu_context(vcpu);
 		vmcb_mark_all_dirty(svm->vmcb);
-	}
 
+		kvm_vcpu_unmap(vcpu, &svm->nested.l2_apic_access_page, true);
+		kvm_vcpu_unmap(vcpu, &svm->nested.l2_logical_id_table, true);
+	}
 	kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
 }
 
@@ -1423,6 +1537,13 @@ static void nested_copy_vmcb_cache_to_control(struct vmcb_control_area *dst,
 	dst->pause_filter_count   = from->pause_filter_count;
 	dst->pause_filter_thresh  = from->pause_filter_thresh;
 	/* 'clean' and 'reserved_sw' are not changed by KVM */
+
+	if (from->int_ctl & AVIC_ENABLE_MASK) {
+		dst->avic_vapic_bar      = from->avic_vapic_bar;
+		dst->avic_backing_page   = from->avic_backing_page;
+		dst->avic_logical_id     = from->avic_logical_id;
+		dst->avic_physical_id    = from->avic_physical_id;
+	}
 }
 
 static int svm_get_nested_state(struct kvm_vcpu *vcpu,
@@ -1644,7 +1765,7 @@ static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
 		if (CC(!load_pdptrs(vcpu, vcpu->arch.cr3)))
 			return false;
 
-	if (!nested_svm_vmrun_msrpm(svm)) {
+	if (!nested_svm_vmrun_msrpm(svm) || !nested_vmcb02_prepare_avic(svm)) {
 		vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
 		vcpu->run->internal.suberror =
 			KVM_INTERNAL_ERROR_EMULATION;
diff --git a/arch/x86/kvm/svm/svm.c b/arch/x86/kvm/svm/svm.c
index 76fbee2c8c5d7..a39bb0b27a51d 100644
--- a/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@ -4680,6 +4680,7 @@ static struct kvm_x86_ops svm_x86_ops __initdata = {
 	.refresh_apicv_exec_ctrl = avic_refresh_apicv_exec_ctrl,
 	.check_apicv_inhibit_reasons = avic_check_apicv_inhibit_reasons,
 	.apicv_post_state_restore = avic_apicv_post_state_restore,
+	.guest_apic_has_interrupt = avic_nested_has_interrupt,
 
 	.get_mt_mask = svm_get_mt_mask,
 	.get_exit_info = svm_get_exit_info,
@@ -4931,6 +4932,7 @@ static __init int svm_hardware_setup(void)
 		svm_x86_ops.vcpu_blocking = NULL;
 		svm_x86_ops.vcpu_unblocking = NULL;
 		svm_x86_ops.vcpu_get_apicv_inhibit_reasons = NULL;
+		svm_x86_ops.guest_apic_has_interrupt = NULL;
 	}
 
 	if (vls) {
diff --git a/arch/x86/kvm/svm/svm.h b/arch/x86/kvm/svm/svm.h
index 401449dbce65d..17fcc09cf4be1 100644
--- a/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@ -167,6 +167,11 @@ struct vmcb_ctrl_area_cached {
 	u64 virt_ext;
 	u32 clean;
 	u8 reserved_sw[32];
+
+	u64 avic_vapic_bar;
+	u64 avic_backing_page;
+	u64 avic_logical_id;
+	u64 avic_physical_id;
 };
 
 struct avic_physid_entry_descr {
@@ -248,6 +253,10 @@ struct svm_nested_state {
 
 	/* All AVIC shadow PID table entry descriptors that reference this vCPU */
 	struct list_head physid_ref_entries;
+
+	struct kvm_host_map l2_apic_access_page;
+	struct kvm_host_map l2_logical_id_table;
+	struct avic_physid_table *l2_physical_id_table;
 };
 
 struct vcpu_sev_es_state {
@@ -310,6 +319,7 @@ struct vcpu_svm {
 	bool pause_filter_enabled         : 1;
 	bool pause_threshold_enabled      : 1;
 	bool vgif_enabled                 : 1;
+	bool avic_enabled                 : 1;
 
 	u32 ldr_reg;
 	u32 dfr_reg;
@@ -701,6 +711,8 @@ void avic_vcpu_blocking(struct kvm_vcpu *vcpu);
 void avic_vcpu_unblocking(struct kvm_vcpu *vcpu);
 void avic_ring_doorbell(struct kvm_vcpu *vcpu);
 unsigned long avic_vcpu_get_apicv_inhibit_reasons(struct kvm_vcpu *vcpu);
+void avic_free_nested(struct kvm_vcpu *vcpu);
+bool avic_nested_has_interrupt(struct kvm_vcpu *vcpu);
 
 struct avic_physid_table *
 avic_physid_shadow_table_get(struct kvm_vcpu *vcpu, gfn_t gfn);
@@ -708,6 +720,18 @@ void avic_physid_shadow_table_put(struct kvm *kvm, struct avic_physid_table *t);
 int avic_physid_shadow_table_sync(struct kvm_vcpu *vcpu,
 				  struct avic_physid_table *t, int nentries);
 
+static inline bool nested_avic_in_use(struct kvm_vcpu *vcpu)
+{
+	struct vcpu_svm *vcpu_svm = to_svm(vcpu);
+
+	if (!vcpu_svm->avic_enabled)
+		return false;
+
+	if (!nested_npt_enabled(vcpu_svm))
+		return false;
+
+	return vcpu_svm->nested.ctl.int_ctl & AVIC_ENABLE_MASK;
+}
 
 #define INVALID_BACKING_PAGE	(~(u64)0)
 
-- 
2.26.3



More information about the Intel-gfx mailing list