Merge branch 'kvm-tdpmmu-fixes' into HEAD
Merge topic branch with fixes for 5.14-rc6 and 5.15 merge window.
This commit is contained in:
commit
9a63b4517c
@ -31,10 +31,10 @@ On x86:
|
||||
|
||||
- vcpu->mutex is taken outside kvm->arch.hyperv.hv_lock
|
||||
|
||||
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock is
|
||||
taken inside kvm->arch.mmu_lock, and cannot be taken without already
|
||||
holding kvm->arch.mmu_lock (typically with ``read_lock``, otherwise
|
||||
there's no need to take kvm->arch.tdp_mmu_pages_lock at all).
|
||||
- kvm->arch.mmu_lock is an rwlock. kvm->arch.tdp_mmu_pages_lock and
|
||||
kvm->arch.mmu_unsync_pages_lock are taken inside kvm->arch.mmu_lock, and
|
||||
cannot be taken without already holding kvm->arch.mmu_lock (typically with
|
||||
``read_lock`` for the TDP MMU, thus the need for additional spinlocks).
|
||||
|
||||
Everything else is a leaf: no other lock is taken inside the critical
|
||||
sections.
|
||||
|
@ -1038,6 +1038,13 @@ struct kvm_arch {
|
||||
struct list_head lpage_disallowed_mmu_pages;
|
||||
struct kvm_page_track_notifier_node mmu_sp_tracker;
|
||||
struct kvm_page_track_notifier_head track_notifier_head;
|
||||
/*
|
||||
* Protects marking pages unsync during page faults, as TDP MMU page
|
||||
* faults only take mmu_lock for read. For simplicity, the unsync
|
||||
* pages lock is always taken when marking pages unsync regardless of
|
||||
* whether mmu_lock is held for read or write.
|
||||
*/
|
||||
spinlock_t mmu_unsync_pages_lock;
|
||||
|
||||
struct list_head assigned_dev_head;
|
||||
struct iommu_domain *iommu_domain;
|
||||
|
@ -2575,6 +2575,7 @@ static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
|
||||
int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
|
||||
{
|
||||
struct kvm_mmu_page *sp;
|
||||
bool locked = false;
|
||||
|
||||
/*
|
||||
* Force write-protection if the page is being tracked. Note, the page
|
||||
@ -2597,9 +2598,34 @@ int mmu_try_to_unsync_pages(struct kvm_vcpu *vcpu, gfn_t gfn, bool can_unsync)
|
||||
if (sp->unsync)
|
||||
continue;
|
||||
|
||||
/*
|
||||
* TDP MMU page faults require an additional spinlock as they
|
||||
* run with mmu_lock held for read, not write, and the unsync
|
||||
* logic is not thread safe. Take the spinklock regardless of
|
||||
* the MMU type to avoid extra conditionals/parameters, there's
|
||||
* no meaningful penalty if mmu_lock is held for write.
|
||||
*/
|
||||
if (!locked) {
|
||||
locked = true;
|
||||
spin_lock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
|
||||
|
||||
/*
|
||||
* Recheck after taking the spinlock, a different vCPU
|
||||
* may have since marked the page unsync. A false
|
||||
* positive on the unprotected check above is not
|
||||
* possible as clearing sp->unsync _must_ hold mmu_lock
|
||||
* for write, i.e. unsync cannot transition from 0->1
|
||||
* while this CPU holds mmu_lock for read (or write).
|
||||
*/
|
||||
if (READ_ONCE(sp->unsync))
|
||||
continue;
|
||||
}
|
||||
|
||||
WARN_ON(sp->role.level != PG_LEVEL_4K);
|
||||
kvm_unsync_page(vcpu, sp);
|
||||
}
|
||||
if (locked)
|
||||
spin_unlock(&vcpu->kvm->arch.mmu_unsync_pages_lock);
|
||||
|
||||
/*
|
||||
* We need to ensure that the marking of unsync pages is visible
|
||||
@ -5605,6 +5631,8 @@ void kvm_mmu_init_vm(struct kvm *kvm)
|
||||
{
|
||||
struct kvm_page_track_notifier_node *node = &kvm->arch.mmu_sp_tracker;
|
||||
|
||||
spin_lock_init(&kvm->arch.mmu_unsync_pages_lock);
|
||||
|
||||
if (!kvm_mmu_init_tdp_mmu(kvm))
|
||||
/*
|
||||
* No smp_load/store wrappers needed here as we are in
|
||||
|
@ -43,6 +43,7 @@ void kvm_mmu_uninit_tdp_mmu(struct kvm *kvm)
|
||||
if (!kvm->arch.tdp_mmu_enabled)
|
||||
return;
|
||||
|
||||
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_pages));
|
||||
WARN_ON(!list_empty(&kvm->arch.tdp_mmu_roots));
|
||||
|
||||
/*
|
||||
@ -81,8 +82,6 @@ static void tdp_mmu_free_sp_rcu_callback(struct rcu_head *head)
|
||||
void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
bool shared)
|
||||
{
|
||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
|
||||
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
||||
|
||||
if (!refcount_dec_and_test(&root->tdp_mmu_root_count))
|
||||
@ -94,7 +93,7 @@ void kvm_tdp_mmu_put_root(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
list_del_rcu(&root->link);
|
||||
spin_unlock(&kvm->arch.tdp_mmu_pages_lock);
|
||||
|
||||
zap_gfn_range(kvm, root, 0, max_gfn, false, false, shared);
|
||||
zap_gfn_range(kvm, root, 0, -1ull, false, false, shared);
|
||||
|
||||
call_rcu(&root->rcu_head, tdp_mmu_free_sp_rcu_callback);
|
||||
}
|
||||
@ -753,13 +752,29 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
|
||||
gfn_t start, gfn_t end, bool can_yield, bool flush,
|
||||
bool shared)
|
||||
{
|
||||
gfn_t max_gfn_host = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
bool zap_all = (start == 0 && end >= max_gfn_host);
|
||||
struct tdp_iter iter;
|
||||
|
||||
/*
|
||||
* No need to try to step down in the iterator when zapping all SPTEs,
|
||||
* zapping the top-level non-leaf SPTEs will recurse on their children.
|
||||
*/
|
||||
int min_level = zap_all ? root->role.level : PG_LEVEL_4K;
|
||||
|
||||
/*
|
||||
* Bound the walk at host.MAXPHYADDR, guest accesses beyond that will
|
||||
* hit a #PF(RSVD) and never get to an EPT Violation/Misconfig / #NPF,
|
||||
* and so KVM will never install a SPTE for such addresses.
|
||||
*/
|
||||
end = min(end, max_gfn_host);
|
||||
|
||||
kvm_lockdep_assert_mmu_lock_held(kvm, shared);
|
||||
|
||||
rcu_read_lock();
|
||||
|
||||
tdp_root_for_each_pte(iter, root, start, end) {
|
||||
for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
|
||||
min_level, start, end) {
|
||||
retry:
|
||||
if (can_yield &&
|
||||
tdp_mmu_iter_cond_resched(kvm, &iter, flush, shared)) {
|
||||
@ -773,9 +788,10 @@ retry:
|
||||
/*
|
||||
* If this is a non-last-level SPTE that covers a larger range
|
||||
* than should be zapped, continue, and zap the mappings at a
|
||||
* lower level.
|
||||
* lower level, except when zapping all SPTEs.
|
||||
*/
|
||||
if ((iter.gfn < start ||
|
||||
if (!zap_all &&
|
||||
(iter.gfn < start ||
|
||||
iter.gfn + KVM_PAGES_PER_HPAGE(iter.level) > end) &&
|
||||
!is_last_spte(iter.old_spte, iter.level))
|
||||
continue;
|
||||
@ -823,12 +839,11 @@ bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, int as_id, gfn_t start,
|
||||
|
||||
void kvm_tdp_mmu_zap_all(struct kvm *kvm)
|
||||
{
|
||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
bool flush = false;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++)
|
||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, max_gfn,
|
||||
flush = kvm_tdp_mmu_zap_gfn_range(kvm, i, 0, -1ull,
|
||||
flush, false);
|
||||
|
||||
if (flush)
|
||||
@ -867,7 +882,6 @@ static struct kvm_mmu_page *next_invalidated_root(struct kvm *kvm,
|
||||
*/
|
||||
void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
|
||||
{
|
||||
gfn_t max_gfn = 1ULL << (shadow_phys_bits - PAGE_SHIFT);
|
||||
struct kvm_mmu_page *next_root;
|
||||
struct kvm_mmu_page *root;
|
||||
bool flush = false;
|
||||
@ -883,8 +897,7 @@ void kvm_tdp_mmu_zap_invalidated_roots(struct kvm *kvm)
|
||||
|
||||
rcu_read_unlock();
|
||||
|
||||
flush = zap_gfn_range(kvm, root, 0, max_gfn, true, flush,
|
||||
true);
|
||||
flush = zap_gfn_range(kvm, root, 0, -1ull, true, flush, true);
|
||||
|
||||
/*
|
||||
* Put the reference acquired in
|
||||
|
Loading…
Reference in New Issue
Block a user