summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSean Christopherson <seanjc@google.com>2025-05-22 16:52:15 -0700
committerSean Christopherson <seanjc@google.com>2025-06-23 09:50:56 -0700
commit86e00cd162a727c0b847def89bbd787c20eb8f5d (patch)
tree70bb3f43320af57c714b6cbbc9a0a24038423054
parent5f8ca05ea99183ab2b69c7fd9617961211d194e7 (diff)
KVM: Add irqfd to eventfd's waitqueue while holding irqfds.lock
Add an irqfd to its target eventfd's waitqueue while holding irqfds.lock, which is mildly terrifying but functionally safe. irqfds.lock is taken inside the waitqueue's lock, but if and only if the eventfd is being released, i.e. that path is mutually exclusive with registration as KVM holds a reference to the eventfd (and obviously must do so to avoid UAF). This will allow using the eventfd's waitqueue to enforce KVM's requirement that eventfd is assigned to at most one irqfd, without introducing races. Tested-by: K Prateek Nayak <kprateek.nayak@amd.com> Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org> Link: https://lore.kernel.org/r/20250522235223.3178519-6-seanjc@google.com Signed-off-by: Sean Christopherson <seanjc@google.com>
-rw-r--r--virt/kvm/eventfd.c21
1 files changed, 18 insertions, 3 deletions
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index 170a67aad983..a9a13f919de8 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -204,6 +204,11 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
int ret = 0;
if (flags & EPOLLIN) {
+ /*
+ * WARNING: Do NOT take irqfds.lock in any path except EPOLLHUP,
+ * as KVM holds irqfds.lock when registering the irqfd with the
+ * eventfd.
+ */
u64 cnt;
eventfd_ctx_do_read(irqfd->eventfd, &cnt);
@@ -225,6 +230,11 @@ irqfd_wakeup(wait_queue_entry_t *wait, unsigned mode, int sync, void *key)
/* The eventfd is closing, detach from KVM */
unsigned long iflags;
+ /*
+ * Taking irqfds.lock is safe here, as KVM holds a reference to
+ * the eventfd when registering the irqfd, i.e. this path can't
+ * be reached while kvm_irqfd_add() is running.
+ */
spin_lock_irqsave(&kvm->irqfds.lock, iflags);
/*
@@ -296,16 +306,21 @@ static void kvm_irqfd_register(struct file *file, wait_queue_head_t *wqh,
list_add_tail(&irqfd->list, &kvm->irqfds.items);
- spin_unlock_irq(&kvm->irqfds.lock);
-
/*
* Add the irqfd as a priority waiter on the eventfd, with a custom
* wake-up handler, so that KVM *and only KVM* is notified whenever the
- * underlying eventfd is signaled.
+ * underlying eventfd is signaled. Temporarily lie to lockdep about
+ * holding irqfds.lock to avoid a false positive regarding potential
+ * deadlock with irqfd_wakeup() (see irqfd_wakeup() for details).
*/
init_waitqueue_func_entry(&irqfd->wait, irqfd_wakeup);
+ spin_release(&kvm->irqfds.lock.dep_map, _RET_IP_);
add_wait_queue_priority(wqh, &irqfd->wait);
+ spin_acquire(&kvm->irqfds.lock.dep_map, 0, 0, _RET_IP_);
+
+ spin_unlock_irq(&kvm->irqfds.lock);
+
p->ret = 0;
}