diff options
Diffstat (limited to 'kernel/bpf/rqspinlock.c')
| -rw-r--r-- | kernel/bpf/rqspinlock.c | 90 |
1 files changed, 46 insertions, 44 deletions
diff --git a/kernel/bpf/rqspinlock.c b/kernel/bpf/rqspinlock.c index a00561b1d3e5..f7d0c8d4644e 100644 --- a/kernel/bpf/rqspinlock.c +++ b/kernel/bpf/rqspinlock.c @@ -89,15 +89,14 @@ struct rqspinlock_timeout { DEFINE_PER_CPU_ALIGNED(struct rqspinlock_held, rqspinlock_held_locks); EXPORT_SYMBOL_GPL(rqspinlock_held_locks); -static bool is_lock_released(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) +static bool is_lock_released(rqspinlock_t *lock, u32 mask) { if (!(atomic_read_acquire(&lock->val) & (mask))) return true; return false; } -static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask, - struct rqspinlock_timeout *ts) +static noinline int check_deadlock_AA(rqspinlock_t *lock) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); int cnt = min(RES_NR_HELD, rqh->cnt); @@ -118,8 +117,7 @@ static noinline int check_deadlock_AA(rqspinlock_t *lock, u32 mask, * more locks, which reduce to ABBA). This is not exhaustive, and we rely on * timeouts as the final line of defense. */ -static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, - struct rqspinlock_timeout *ts) +static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask) { struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks); int rqh_cnt = min(RES_NR_HELD, rqh->cnt); @@ -142,7 +140,7 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, * Let's ensure to break out of this loop if the lock is available for * us to potentially acquire. */ - if (is_lock_released(lock, mask, ts)) + if (is_lock_released(lock, mask)) return 0; /* @@ -198,33 +196,21 @@ static noinline int check_deadlock_ABBA(rqspinlock_t *lock, u32 mask, return 0; } -static noinline int check_deadlock(rqspinlock_t *lock, u32 mask, - struct rqspinlock_timeout *ts) -{ - int ret; - - ret = check_deadlock_AA(lock, mask, ts); - if (ret) - return ret; - ret = check_deadlock_ABBA(lock, mask, ts); - if (ret) - return ret; - - return 0; -} - static noinline int check_timeout(rqspinlock_t *lock, u32 mask, struct rqspinlock_timeout *ts) { - u64 time = ktime_get_mono_fast_ns(); u64 prev = ts->cur; + u64 time; if (!ts->timeout_end) { - ts->cur = time; - ts->timeout_end = time + ts->duration; + if (check_deadlock_AA(lock)) + return -EDEADLK; + ts->cur = ktime_get_mono_fast_ns(); + ts->timeout_end = ts->cur + ts->duration; return 0; } + time = ktime_get_mono_fast_ns(); if (time > ts->timeout_end) return -ETIMEDOUT; @@ -234,7 +220,7 @@ static noinline int check_timeout(rqspinlock_t *lock, u32 mask, */ if (prev + NSEC_PER_MSEC < time) { ts->cur = time; - return check_deadlock(lock, mask, ts); + return check_deadlock_ABBA(lock, mask); } return 0; @@ -278,6 +264,10 @@ int __lockfunc resilient_tas_spin_lock(rqspinlock_t *lock) int val, ret = 0; RES_INIT_TIMEOUT(ts); + /* + * The fast path is not invoked for the TAS fallback, so we must grab + * the deadlock detection entry here. + */ grab_held_lock_entry(lock); /* @@ -400,10 +390,7 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) goto queue; } - /* - * Grab an entry in the held locks array, to enable deadlock detection. - */ - grab_held_lock_entry(lock); + /* Deadlock detection entry already held after failing fast path. */ /* * We're pending, wait for the owner to go away. @@ -450,12 +437,21 @@ int __lockfunc resilient_queued_spin_lock_slowpath(rqspinlock_t *lock, u32 val) * queuing. */ queue: - lockevent_inc(lock_slowpath); /* - * Grab deadlock detection entry for the queue path. + * Do not queue if we're a waiter and someone is attempting this lock on + * the same CPU. In case of NMIs, this prevents long timeouts where we + * interrupt the pending waiter, and the owner, that will eventually + * signal the head of our queue, both of which are logically but not + * physically part of the queue, hence outside the scope of the idx > 0 + * check above for the trylock fallback. */ - grab_held_lock_entry(lock); + if (check_deadlock_AA(lock)) { + ret = -EDEADLK; + goto err_release_entry; + } + lockevent_inc(lock_slowpath); + /* Deadlock detection entry already held after failing fast path. */ node = this_cpu_ptr(&rqnodes[0].mcs); idx = node->count++; tail = encode_tail(smp_processor_id(), idx); @@ -467,19 +463,17 @@ queue: * not be nested NMIs taking spinlocks. That may not be true in * some architectures even though the chance of needing more than * 4 nodes will still be extremely unlikely. When that happens, - * we fall back to spinning on the lock directly without using - * any MCS node. This is not the most elegant solution, but is - * simple enough. + * we fall back to attempting a trylock operation without using + * any MCS node. Unlike qspinlock which cannot fail, we have the + * option of failing the slow path, and under contention, such a + * trylock spinning will likely be treated unfairly due to lack of + * queueing, hence do not spin. */ - if (unlikely(idx >= _Q_MAX_NODES || in_nmi())) { + if (unlikely(idx >= _Q_MAX_NODES || (in_nmi() && idx > 0))) { lockevent_inc(lock_no_node); - RES_RESET_TIMEOUT(ts, RES_DEF_TIMEOUT); - while (!queued_spin_trylock(lock)) { - if (RES_CHECK_TIMEOUT(ts, ret, ~0u)) { - lockevent_inc(rqspinlock_lock_timeout); - goto err_release_node; - } - cpu_relax(); + if (!queued_spin_trylock(lock)) { + ret = -EDEADLK; + goto err_release_node; } goto release; } @@ -540,7 +534,7 @@ queue: val = arch_mcs_spin_lock_contended(&node->locked); if (val == RES_TIMEOUT_VAL) { - ret = -EDEADLK; + ret = -ETIMEDOUT; goto waitq_timeout; } @@ -575,6 +569,14 @@ queue: val = res_atomic_cond_read_acquire(&lock->val, !(VAL & _Q_LOCKED_PENDING_MASK) || RES_CHECK_TIMEOUT(ts, ret, _Q_LOCKED_PENDING_MASK)); + /* Disable queue destruction when we detect deadlocks. */ + if (ret == -EDEADLK) { + if (!next) + next = smp_cond_load_relaxed(&node->next, (VAL)); + arch_mcs_spin_unlock_contended(&next->locked); + goto err_release_node; + } + waitq_timeout: if (ret) { /* |
