summaryrefslogtreecommitdiff
path: root/mm/memory.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-23 13:36:06 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-23 13:36:06 -0800
commit8883957b3c9de2087fb6cf9691c1188cccf1ac9c (patch)
treeedd99014c5e520d44d6fe572a0c9d4776307b53a /mm/memory.c
parentfb6fec6bdd9b16a935a0557773e313262366d071 (diff)
parent0c0214df28f0dba8de084cb4dedc0c459dfbc083 (diff)
Merge tag 'fsnotify_hsm_for_v6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs
Pull fsnotify pre-content notification support from Jan Kara: "This introduces a new fsnotify event (FS_PRE_ACCESS) that gets generated before a file contents is accessed. The event is synchronous so if there is listener for this event, the kernel waits for reply. On success the execution continues as usual, on failure we propagate the error to userspace. This allows userspace to fill in file content on demand from slow storage. The context in which the events are generated has been picked so that we don't hold any locks and thus there's no risk of a deadlock for the userspace handler. The new pre-content event is available only for users with global CAP_SYS_ADMIN capability (similarly to other parts of fanotify functionality) and it is an administrator responsibility to make sure the userspace event handler doesn't do stupid stuff that can DoS the system. Based on your feedback from the last submission, fsnotify code has been improved and now file->f_mode encodes whether pre-content event needs to be generated for the file so the fast path when nobody wants pre-content event for the file just grows the additional file->f_mode check. As a bonus this also removes the checks whether the old FS_ACCESS event needs to be generated from the fast path. Also the place where the event is generated during page fault has been moved so now filemap_fault() generates the event if and only if there is no uptodate folio in the page cache. Also we have dropped FS_PRE_MODIFY event as current real-world users of the pre-content functionality don't really use it so let's start with the minimal useful feature set" * tag 'fsnotify_hsm_for_v6.14-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/jack/linux-fs: (21 commits) fanotify: Fix crash in fanotify_init(2) fs: don't block write during exec on pre-content watched files fs: enable pre-content events on supported file systems ext4: add pre-content fsnotify hook for DAX faults btrfs: disable defrag on pre-content watched files xfs: add pre-content fsnotify hook for DAX faults fsnotify: generate pre-content permission event on page fault mm: don't allow huge faults for files with pre content watches fanotify: disable readahead if we have pre-content watches fanotify: allow to set errno in FAN_DENY permission response fanotify: report file range info with pre-content events fanotify: introduce FAN_PRE_ACCESS permission event fsnotify: generate pre-content permission event on truncate fsnotify: pass optional file access range in pre-content event fsnotify: introduce pre-content permission events fanotify: reserve event bit of deprecated FAN_DIR_MODIFY fanotify: rename a misnamed constant fanotify: don't skip extra event info if no info_mode is set fsnotify: check if file is actually being watched for pre-content events on open fsnotify: opt-in for permission events at file open time ...
Diffstat (limited to 'mm/memory.c')
-rw-r--r--mm/memory.c19
1 files changed, 19 insertions, 0 deletions
diff --git a/mm/memory.c b/mm/memory.c
index 398c031be9ba..f8bebec145f5 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -76,6 +76,7 @@
#include <linux/ptrace.h>
#include <linux/vmalloc.h>
#include <linux/sched/sysctl.h>
+#include <linux/fsnotify.h>
#include <trace/events/kmem.h>
@@ -5662,8 +5663,17 @@ out_map:
static inline vm_fault_t create_huge_pmd(struct vm_fault *vmf)
{
struct vm_area_struct *vma = vmf->vma;
+
if (vma_is_anonymous(vma))
return do_huge_pmd_anonymous_page(vmf);
+ /*
+ * Currently we just emit PAGE_SIZE for our fault events, so don't allow
+ * a huge fault if we have a pre content watch on this file. This would
+ * be trivial to support, but there would need to be tests to ensure
+ * this works properly and those don't exist currently.
+ */
+ if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
+ return VM_FAULT_FALLBACK;
if (vma->vm_ops->huge_fault)
return vma->vm_ops->huge_fault(vmf, PMD_ORDER);
return VM_FAULT_FALLBACK;
@@ -5687,6 +5697,9 @@ static inline vm_fault_t wp_huge_pmd(struct vm_fault *vmf)
}
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+ /* See comment in create_huge_pmd. */
+ if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
+ goto split;
if (vma->vm_ops->huge_fault) {
ret = vma->vm_ops->huge_fault(vmf, PMD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))
@@ -5709,6 +5722,9 @@ static vm_fault_t create_huge_pud(struct vm_fault *vmf)
/* No support for anonymous transparent PUD pages yet */
if (vma_is_anonymous(vma))
return VM_FAULT_FALLBACK;
+ /* See comment in create_huge_pmd. */
+ if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
+ return VM_FAULT_FALLBACK;
if (vma->vm_ops->huge_fault)
return vma->vm_ops->huge_fault(vmf, PUD_ORDER);
#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -5726,6 +5742,9 @@ static vm_fault_t wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud)
if (vma_is_anonymous(vma))
goto split;
if (vma->vm_flags & (VM_SHARED | VM_MAYSHARE)) {
+ /* See comment in create_huge_pmd. */
+ if (unlikely(FMODE_FSNOTIFY_HSM(vma->vm_file->f_mode)))
+ goto split;
if (vma->vm_ops->huge_fault) {
ret = vma->vm_ops->huge_fault(vmf, PUD_ORDER);
if (!(ret & VM_FAULT_FALLBACK))