diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-24 13:19:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2025-03-24 13:19:17 -0700 |
commit | 912b82dc0b27abc407c831e74fbcbdebfe19997b (patch) | |
tree | 0a1f074db125a6c6f57bc1a01f9fc97a84bed004 | |
parent | d41066dd76eb86f7fab45cf19d0a04e97e5c339f (diff) | |
parent | 5370b43e4bcf60049bfd44db83ba8d2ec43d0152 (diff) |
Merge tag 'vfs-6.15-rc1.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs
Pull vfs file handling updates from Christian Brauner:
"This contains performance improvements for struct file's new refcount
mechanism and various other performance work:
- The stock kernel transitioning the file to no refs held penalizes
the caller with an extra atomic to block any increments. For cases
where the file is highly likely to be going away this is easily
avoidable.
Add file_ref_put_close() to better handle the common case where
closing a file descriptor also operates on the last reference and
build fput_close_sync() and fput_close() on top of it. This brings
about 1% performance improvement by eliding one atomic in the
common case.
- Predict no error in close() since the vast majority of the time
system call returns 0.
- Reduce the work done in fdget_pos() by predicting that the file was
found and by explicitly comparing the reference count to one and
ignoring the dead zone"
* tag 'vfs-6.15-rc1.file' of git://git.kernel.org/pub/scm/linux/kernel/git/vfs/vfs:
fs: reduce work in fdget_pos()
fs: use fput_close() in path_openat()
fs: use fput_close() in filp_close()
fs: use fput_close_sync() in close()
file: add fput and file_ref_put routines optimized for use when closing a fd
fs: predict no error in close()
-rw-r--r-- | fs/file.c | 52 | ||||
-rw-r--r-- | fs/file_table.c | 70 | ||||
-rw-r--r-- | fs/internal.h | 3 | ||||
-rw-r--r-- | fs/namei.c | 2 | ||||
-rw-r--r-- | fs/open.c | 15 | ||||
-rw-r--r-- | include/linux/file_ref.h | 48 |
6 files changed, 141 insertions, 49 deletions
diff --git a/fs/file.c b/fs/file.c index 40fed4501aab..dc3f7e120e3e 100644 --- a/fs/file.c +++ b/fs/file.c @@ -26,6 +26,28 @@ #include "internal.h" +bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt) +{ + /* + * If the reference count was already in the dead zone, then this + * put() operation is imbalanced. Warn, put the reference count back to + * DEAD and tell the caller to not deconstruct the object. + */ + if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) { + atomic_long_set(&ref->refcnt, FILE_REF_DEAD); + return false; + } + + /* + * This is a put() operation on a saturated refcount. Restore the + * mean saturation value and tell the caller to not deconstruct the + * object. + */ + if (cnt > FILE_REF_MAXREF) + atomic_long_set(&ref->refcnt, FILE_REF_SATURATED); + return false; +} + /** * __file_ref_put - Slowpath of file_ref_put() * @ref: Pointer to the reference count @@ -67,24 +89,7 @@ bool __file_ref_put(file_ref_t *ref, unsigned long cnt) return true; } - /* - * If the reference count was already in the dead zone, then this - * put() operation is imbalanced. Warn, put the reference count back to - * DEAD and tell the caller to not deconstruct the object. - */ - if (WARN_ONCE(cnt >= FILE_REF_RELEASED, "imbalanced put on file reference count")) { - atomic_long_set(&ref->refcnt, FILE_REF_DEAD); - return false; - } - - /* - * This is a put() operation on a saturated refcount. Restore the - * mean saturation value and tell the caller to not deconstruct the - * object. - */ - if (cnt > FILE_REF_MAXREF) - atomic_long_set(&ref->refcnt, FILE_REF_SATURATED); - return false; + return __file_ref_put_badval(ref, cnt); } EXPORT_SYMBOL_GPL(__file_ref_put); @@ -1179,8 +1184,13 @@ struct fd fdget_raw(unsigned int fd) */ static inline bool file_needs_f_pos_lock(struct file *file) { - return (file->f_mode & FMODE_ATOMIC_POS) && - (file_count(file) > 1 || file->f_op->iterate_shared); + if (!(file->f_mode & FMODE_ATOMIC_POS)) + return false; + if (__file_ref_read_raw(&file->f_ref) != FILE_REF_ONEREF) + return true; + if (file->f_op->iterate_shared) + return true; + return false; } bool file_seek_cur_needs_f_lock(struct file *file) @@ -1198,7 +1208,7 @@ struct fd fdget_pos(unsigned int fd) struct fd f = fdget(fd); struct file *file = fd_file(f); - if (file && file_needs_f_pos_lock(file)) { + if (likely(file) && file_needs_f_pos_lock(file)) { f.word |= FDPUT_POS_UNLOCK; mutex_lock(&file->f_pos_lock); } diff --git a/fs/file_table.c b/fs/file_table.c index 9f0a1a164c82..c04ed94cdc4b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -512,31 +512,37 @@ void flush_delayed_fput(void) } EXPORT_SYMBOL_GPL(flush_delayed_fput); -void fput(struct file *file) +static void __fput_deferred(struct file *file) { - if (file_ref_put(&file->f_ref)) { - struct task_struct *task = current; + struct task_struct *task = current; + + if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) { + file_free(file); + return; + } - if (unlikely(!(file->f_mode & (FMODE_BACKING | FMODE_OPENED)))) { - file_free(file); + if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { + init_task_work(&file->f_task_work, ____fput); + if (!task_work_add(task, &file->f_task_work, TWA_RESUME)) return; - } - if (likely(!in_interrupt() && !(task->flags & PF_KTHREAD))) { - init_task_work(&file->f_task_work, ____fput); - if (!task_work_add(task, &file->f_task_work, TWA_RESUME)) - return; - /* - * After this task has run exit_task_work(), - * task_work_add() will fail. Fall through to delayed - * fput to avoid leaking *file. - */ - } - - if (llist_add(&file->f_llist, &delayed_fput_list)) - schedule_delayed_work(&delayed_fput_work, 1); + /* + * After this task has run exit_task_work(), + * task_work_add() will fail. Fall through to delayed + * fput to avoid leaking *file. + */ } + + if (llist_add(&file->f_llist, &delayed_fput_list)) + schedule_delayed_work(&delayed_fput_work, 1); } +void fput(struct file *file) +{ + if (unlikely(file_ref_put(&file->f_ref))) + __fput_deferred(file); +} +EXPORT_SYMBOL(fput); + /* * synchronous analog of fput(); for kernel threads that might be needed * in some umount() (and thus can't use flush_delayed_fput() without @@ -550,10 +556,32 @@ void __fput_sync(struct file *file) if (file_ref_put(&file->f_ref)) __fput(file); } - -EXPORT_SYMBOL(fput); EXPORT_SYMBOL(__fput_sync); +/* + * Equivalent to __fput_sync(), but optimized for being called with the last + * reference. + * + * See file_ref_put_close() for details. + */ +void fput_close_sync(struct file *file) +{ + if (likely(file_ref_put_close(&file->f_ref))) + __fput(file); +} + +/* + * Equivalent to fput(), but optimized for being called with the last + * reference. + * + * See file_ref_put_close() for details. + */ +void fput_close(struct file *file) +{ + if (file_ref_put_close(&file->f_ref)) + __fput_deferred(file); +} + void __init files_init(void) { struct kmem_cache_args args = { diff --git a/fs/internal.h b/fs/internal.h index f1837b77f91e..b9b3e29a73fd 100644 --- a/fs/internal.h +++ b/fs/internal.h @@ -118,6 +118,9 @@ static inline void put_file_access(struct file *file) } } +void fput_close_sync(struct file *); +void fput_close(struct file *); + /* * super.c */ diff --git a/fs/namei.c b/fs/namei.c index ea3653314832..360a86ca1f02 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -4010,7 +4010,7 @@ static struct file *path_openat(struct nameidata *nd, WARN_ON(1); error = -EINVAL; } - fput(file); + fput_close(file); if (error == -EOPENSTALE) { if (flags & LOOKUP_RCU) error = -ECHILD; diff --git a/fs/open.c b/fs/open.c index 201497f65590..a9063cca9911 100644 --- a/fs/open.c +++ b/fs/open.c @@ -1552,7 +1552,7 @@ int filp_close(struct file *filp, fl_owner_t id) int retval; retval = filp_flush(filp, id); - fput(filp); + fput_close(filp); return retval; } @@ -1578,13 +1578,16 @@ SYSCALL_DEFINE1(close, unsigned int, fd) * We're returning to user space. Don't bother * with any delayed fput() cases. */ - __fput_sync(file); + fput_close_sync(file); + + if (likely(retval == 0)) + return 0; /* can't restart close syscall because file table entry was cleared */ - if (unlikely(retval == -ERESTARTSYS || - retval == -ERESTARTNOINTR || - retval == -ERESTARTNOHAND || - retval == -ERESTART_RESTARTBLOCK)) + if (retval == -ERESTARTSYS || + retval == -ERESTARTNOINTR || + retval == -ERESTARTNOHAND || + retval == -ERESTART_RESTARTBLOCK) retval = -EINTR; return retval; diff --git a/include/linux/file_ref.h b/include/linux/file_ref.h index 9b3a8d9b17ab..7db62fbc0500 100644 --- a/include/linux/file_ref.h +++ b/include/linux/file_ref.h @@ -61,6 +61,7 @@ static inline void file_ref_init(file_ref_t *ref, unsigned long cnt) atomic_long_set(&ref->refcnt, cnt - 1); } +bool __file_ref_put_badval(file_ref_t *ref, unsigned long cnt); bool __file_ref_put(file_ref_t *ref, unsigned long cnt); /** @@ -161,6 +162,39 @@ static __always_inline __must_check bool file_ref_put(file_ref_t *ref) } /** + * file_ref_put_close - drop a reference expecting it would transition to FILE_REF_NOREF + * @ref: Pointer to the reference count + * + * Semantically it is equivalent to calling file_ref_put(), but it trades lower + * performance in face of other CPUs also modifying the refcount for higher + * performance when this happens to be the last reference. + * + * For the last reference file_ref_put() issues 2 atomics. One to drop the + * reference and another to transition it to FILE_REF_DEAD. This routine does + * the work in one step, but in order to do it has to pre-read the variable which + * decreases scalability. + * + * Use with close() et al, stick to file_ref_put() by default. + */ +static __always_inline __must_check bool file_ref_put_close(file_ref_t *ref) +{ + long old, new; + + old = atomic_long_read(&ref->refcnt); + do { + if (unlikely(old < 0)) + return __file_ref_put_badval(ref, old); + + if (old == FILE_REF_ONEREF) + new = FILE_REF_DEAD; + else + new = old - 1; + } while (!atomic_long_try_cmpxchg(&ref->refcnt, &old, new)); + + return new == FILE_REF_DEAD; +} + +/** * file_ref_read - Read the number of file references * @ref: Pointer to the reference count * @@ -174,4 +208,18 @@ static inline unsigned long file_ref_read(file_ref_t *ref) return c >= FILE_REF_RELEASED ? 0 : c + 1; } +/* + * __file_ref_read_raw - Return the value stored in ref->refcnt + * @ref: Pointer to the reference count + * + * Return: The raw value found in the counter + * + * A hack for file_needs_f_pos_lock(), you probably want to use + * file_ref_read() instead. + */ +static inline unsigned long __file_ref_read_raw(file_ref_t *ref) +{ + return atomic_long_read(&ref->refcnt); +} + #endif |