summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/Kconfig1
-rw-r--r--fs/bcachefs/util.h2
-rw-r--r--fs/buffer.c14
-rw-r--r--fs/dax.c369
-rw-r--r--fs/ext4/inline.c2
-rw-r--r--fs/ext4/inode.c18
-rw-r--r--fs/ext4/move_extent.c2
-rw-r--r--fs/fuse/dax.c30
-rw-r--r--fs/fuse/dir.c2
-rw-r--r--fs/fuse/file.c4
-rw-r--r--fs/fuse/virtio_fs.c3
-rw-r--r--fs/hugetlbfs/inode.c28
-rw-r--r--fs/iomap/buffered-io.c2
-rw-r--r--fs/ocfs2/aops.c4
-rw-r--r--fs/ocfs2/file.c2
-rw-r--r--fs/proc/internal.h43
-rw-r--r--fs/proc/meminfo.c2
-rw-r--r--fs/proc/page.c11
-rw-r--r--fs/proc/task_mmu.c56
-rw-r--r--fs/udf/file.c2
-rw-r--r--fs/xfs/xfs_inode.c31
-rw-r--r--fs/xfs/xfs_inode.h2
-rw-r--r--fs/xfs/xfs_super.c12
23 files changed, 433 insertions, 209 deletions
diff --git a/fs/Kconfig b/fs/Kconfig
index afe21866d6b4..c718b2e2de0e 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -286,6 +286,7 @@ config HUGETLB_PAGE_OPTIMIZE_VMEMMAP
def_bool HUGETLB_PAGE
depends on ARCH_WANT_OPTIMIZE_HUGETLB_VMEMMAP
depends on SPARSEMEM_VMEMMAP
+ select SPARSEMEM_VMEMMAP_PREINIT if ARCH_WANT_HUGETLB_VMEMMAP_PREINIT
config HUGETLB_PMD_PAGE_TABLE_SHARING
def_bool HUGETLB_PAGE
diff --git a/fs/bcachefs/util.h b/fs/bcachefs/util.h
index 1e94f89aabed..6ba5071ab6dd 100644
--- a/fs/bcachefs/util.h
+++ b/fs/bcachefs/util.h
@@ -622,7 +622,7 @@ do { \
#define per_cpu_sum(_p) \
({ \
- typeof(*_p) _ret = 0; \
+ TYPEOF_UNQUAL(*_p) _ret = 0; \
\
int cpu; \
for_each_possible_cpu(cpu) \
diff --git a/fs/buffer.c b/fs/buffer.c
index 194eacbefc95..c7abb4a029dc 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2166,7 +2166,7 @@ int __block_write_begin(struct folio *folio, loff_t pos, unsigned len,
}
EXPORT_SYMBOL(__block_write_begin);
-static void __block_commit_write(struct folio *folio, size_t from, size_t to)
+void block_commit_write(struct folio *folio, size_t from, size_t to)
{
size_t block_start, block_end;
bool partial = false;
@@ -2204,6 +2204,7 @@ static void __block_commit_write(struct folio *folio, size_t from, size_t to)
if (!partial)
folio_mark_uptodate(folio);
}
+EXPORT_SYMBOL(block_commit_write);
/*
* block_write_begin takes care of the basic task of block allocation and
@@ -2262,7 +2263,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
flush_dcache_folio(folio);
/* This could be a short (even 0-length) commit */
- __block_commit_write(folio, start, start + copied);
+ block_commit_write(folio, start, start + copied);
return copied;
}
@@ -2566,13 +2567,6 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
}
EXPORT_SYMBOL(cont_write_begin);
-void block_commit_write(struct page *page, unsigned from, unsigned to)
-{
- struct folio *folio = page_folio(page);
- __block_commit_write(folio, from, to);
-}
-EXPORT_SYMBOL(block_commit_write);
-
/*
* block_page_mkwrite() is not allowed to change the file size as it gets
* called from a page fault handler when a page is first dirtied. Hence we must
@@ -2618,7 +2612,7 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
if (unlikely(ret))
goto out_unlock;
- __block_commit_write(folio, 0, end);
+ block_commit_write(folio, 0, end);
folio_mark_dirty(folio);
folio_wait_stable(folio);
diff --git a/fs/dax.c b/fs/dax.c
index 7fd4cd9a51f2..af5045b0f476 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -71,6 +71,11 @@ static unsigned long dax_to_pfn(void *entry)
return xa_to_value(entry) >> DAX_SHIFT;
}
+static struct folio *dax_to_folio(void *entry)
+{
+ return page_folio(pfn_to_page(dax_to_pfn(entry)));
+}
+
static void *dax_make_entry(pfn_t pfn, unsigned long flags)
{
return xa_mk_value(flags | (pfn_t_to_pfn(pfn) << DAX_SHIFT));
@@ -206,7 +211,7 @@ static void dax_wake_entry(struct xa_state *xas, void *entry,
*
* Must be called with the i_pages lock held.
*/
-static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
+static void *get_next_unlocked_entry(struct xa_state *xas, unsigned int order)
{
void *entry;
struct wait_exceptional_entry_queue ewait;
@@ -236,6 +241,37 @@ static void *get_unlocked_entry(struct xa_state *xas, unsigned int order)
}
/*
+ * Wait for the given entry to become unlocked. Caller must hold the i_pages
+ * lock and call either put_unlocked_entry() if it did not lock the entry or
+ * dax_unlock_entry() if it did. Returns an unlocked entry if still present.
+ */
+static void *wait_entry_unlocked_exclusive(struct xa_state *xas, void *entry)
+{
+ struct wait_exceptional_entry_queue ewait;
+ wait_queue_head_t *wq;
+
+ init_wait(&ewait.wait);
+ ewait.wait.func = wake_exceptional_entry_func;
+
+ while (unlikely(dax_is_locked(entry))) {
+ wq = dax_entry_waitqueue(xas, entry, &ewait.key);
+ prepare_to_wait_exclusive(wq, &ewait.wait,
+ TASK_UNINTERRUPTIBLE);
+ xas_pause(xas);
+ xas_unlock_irq(xas);
+ schedule();
+ finish_wait(wq, &ewait.wait);
+ xas_lock_irq(xas);
+ entry = xas_load(xas);
+ }
+
+ if (xa_is_internal(entry))
+ return NULL;
+
+ return entry;
+}
+
+/*
* The only thing keeping the address space around is the i_pages lock
* (it's cycled in clear_inode() after removing the entries from i_pages)
* After we call xas_unlock_irq(), we cannot touch xas->xa.
@@ -250,7 +286,7 @@ static void wait_entry_unlocked(struct xa_state *xas, void *entry)
wq = dax_entry_waitqueue(xas, entry, &ewait.key);
/*
- * Unlike get_unlocked_entry() there is no guarantee that this
+ * Unlike get_next_unlocked_entry() there is no guarantee that this
* path ever successfully retrieves an unlocked entry before an
* inode dies. Perform a non-exclusive wait in case this path
* never successfully performs its own wake up.
@@ -307,109 +343,156 @@ static unsigned long dax_entry_size(void *entry)
return PAGE_SIZE;
}
-static unsigned long dax_end_pfn(void *entry)
+/*
+ * A DAX folio is considered shared if it has no mapping set and ->share (which
+ * shares the ->index field) is non-zero. Note this may return false even if the
+ * page is shared between multiple files but has not yet actually been mapped
+ * into multiple address spaces.
+ */
+static inline bool dax_folio_is_shared(struct folio *folio)
{
- return dax_to_pfn(entry) + dax_entry_size(entry) / PAGE_SIZE;
+ return !folio->mapping && folio->share;
}
/*
- * Iterate through all mapped pfns represented by an entry, i.e. skip
- * 'empty' and 'zero' entries.
+ * When it is called by dax_insert_entry(), the shared flag will indicate
+ * whether this entry is shared by multiple files. If the page has not
+ * previously been associated with any mappings the ->mapping and ->index
+ * fields will be set. If it has already been associated with a mapping
+ * the mapping will be cleared and the share count set. It's then up to
+ * reverse map users like memory_failure() to call back into the filesystem to
+ * recover ->mapping and ->index information. For example by implementing
+ * dax_holder_operations.
*/
-#define for_each_mapped_pfn(entry, pfn) \
- for (pfn = dax_to_pfn(entry); \
- pfn < dax_end_pfn(entry); pfn++)
-
-static inline bool dax_page_is_shared(struct page *page)
+static void dax_folio_make_shared(struct folio *folio)
{
- return page->mapping == PAGE_MAPPING_DAX_SHARED;
+ /*
+ * folio is not currently shared so mark it as shared by clearing
+ * folio->mapping.
+ */
+ folio->mapping = NULL;
+
+ /*
+ * folio has previously been mapped into one address space so set the
+ * share count.
+ */
+ folio->share = 1;
}
-/*
- * Set the page->mapping with PAGE_MAPPING_DAX_SHARED flag, increase the
- * refcount.
- */
-static inline void dax_page_share_get(struct page *page)
+static inline unsigned long dax_folio_put(struct folio *folio)
{
- if (page->mapping != PAGE_MAPPING_DAX_SHARED) {
+ unsigned long ref;
+ int order, i;
+
+ if (!dax_folio_is_shared(folio))
+ ref = 0;
+ else
+ ref = --folio->share;
+
+ if (ref)
+ return ref;
+
+ folio->mapping = NULL;
+ order = folio_order(folio);
+ if (!order)
+ return 0;
+
+ for (i = 0; i < (1UL << order); i++) {
+ struct dev_pagemap *pgmap = page_pgmap(&folio->page);
+ struct page *page = folio_page(folio, i);
+ struct folio *new_folio = (struct folio *)page;
+
+ ClearPageHead(page);
+ clear_compound_head(page);
+
+ new_folio->mapping = NULL;
/*
- * Reset the index if the page was already mapped
- * regularly before.
+ * Reset pgmap which was over-written by
+ * prep_compound_page().
*/
- if (page->mapping)
- page->share = 1;
- page->mapping = PAGE_MAPPING_DAX_SHARED;
+ new_folio->pgmap = pgmap;
+ new_folio->share = 0;
+ WARN_ON_ONCE(folio_ref_count(new_folio));
}
- page->share++;
+
+ return ref;
}
-static inline unsigned long dax_page_share_put(struct page *page)
+static void dax_folio_init(void *entry)
{
- return --page->share;
+ struct folio *folio = dax_to_folio(entry);
+ int order = dax_entry_order(entry);
+
+ /*
+ * Folio should have been split back to order-0 pages in
+ * dax_folio_put() when they were removed from their
+ * final mapping.
+ */
+ WARN_ON_ONCE(folio_order(folio));
+
+ if (order > 0) {
+ prep_compound_page(&folio->page, order);
+ if (order > 1)
+ INIT_LIST_HEAD(&folio->_deferred_list);
+ WARN_ON_ONCE(folio_ref_count(folio));
+ }
}
-/*
- * When it is called in dax_insert_entry(), the shared flag will indicate that
- * whether this entry is shared by multiple files. If so, set the page->mapping
- * PAGE_MAPPING_DAX_SHARED, and use page->share as refcount.
- */
static void dax_associate_entry(void *entry, struct address_space *mapping,
- struct vm_area_struct *vma, unsigned long address, bool shared)
+ struct vm_area_struct *vma,
+ unsigned long address, bool shared)
{
- unsigned long size = dax_entry_size(entry), pfn, index;
- int i = 0;
+ unsigned long size = dax_entry_size(entry), index;
+ struct folio *folio = dax_to_folio(entry);
+
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
+ return;
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
index = linear_page_index(vma, address & ~(size - 1));
- for_each_mapped_pfn(entry, pfn) {
- struct page *page = pfn_to_page(pfn);
+ if (shared && (folio->mapping || dax_folio_is_shared(folio))) {
+ if (folio->mapping)
+ dax_folio_make_shared(folio);
- if (shared) {
- dax_page_share_get(page);
- } else {
- WARN_ON_ONCE(page->mapping);
- page->mapping = mapping;
- page->index = index + i++;
- }
+ WARN_ON_ONCE(!folio->share);
+ WARN_ON_ONCE(dax_entry_order(entry) != folio_order(folio));
+ folio->share++;
+ } else {
+ WARN_ON_ONCE(folio->mapping);
+ dax_folio_init(entry);
+ folio = dax_to_folio(entry);
+ folio->mapping = mapping;
+ folio->index = index;
}
}
static void dax_disassociate_entry(void *entry, struct address_space *mapping,
- bool trunc)
+ bool trunc)
{
- unsigned long pfn;
+ struct folio *folio = dax_to_folio(entry);
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return;
- for_each_mapped_pfn(entry, pfn) {
- struct page *page = pfn_to_page(pfn);
-
- WARN_ON_ONCE(trunc && page_ref_count(page) > 1);
- if (dax_page_is_shared(page)) {
- /* keep the shared flag if this page is still shared */
- if (dax_page_share_put(page) > 0)
- continue;
- } else
- WARN_ON_ONCE(page->mapping && page->mapping != mapping);
- page->mapping = NULL;
- page->index = 0;
- }
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
+ return;
+
+ dax_folio_put(folio);
}
static struct page *dax_busy_page(void *entry)
{
- unsigned long pfn;
+ struct folio *folio = dax_to_folio(entry);
- for_each_mapped_pfn(entry, pfn) {
- struct page *page = pfn_to_page(pfn);
+ if (dax_is_zero_entry(entry) || dax_is_empty_entry(entry))
+ return NULL;
- if (page_ref_count(page) > 1)
- return page;
- }
- return NULL;
+ if (folio_ref_count(folio) - folio_mapcount(folio))
+ return &folio->page;
+ else
+ return NULL;
}
/**
@@ -580,7 +663,7 @@ static void *grab_mapping_entry(struct xa_state *xas,
retry:
pmd_downgrade = false;
xas_lock_irq(xas);
- entry = get_unlocked_entry(xas, order);
+ entry = get_next_unlocked_entry(xas, order);
if (entry) {
if (dax_is_conflict(entry))
@@ -690,7 +773,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
if (IS_ENABLED(CONFIG_FS_DAX_LIMITED))
return NULL;
- if (!dax_mapping(mapping) || !mapping_mapped(mapping))
+ if (!dax_mapping(mapping))
return NULL;
/* If end == LLONG_MAX, all pages from start to till end of file */
@@ -716,8 +799,7 @@ struct page *dax_layout_busy_page_range(struct address_space *mapping,
xas_for_each(&xas, entry, end_idx) {
if (WARN_ON_ONCE(!xa_is_value(entry)))
continue;
- if (unlikely(dax_is_locked(entry)))
- entry = get_unlocked_entry(&xas, 0);
+ entry = wait_entry_unlocked_exclusive(&xas, entry);
if (entry)
page = dax_busy_page(entry);
put_unlocked_entry(&xas, entry, WAKE_NEXT);
@@ -743,14 +825,14 @@ struct page *dax_layout_busy_page(struct address_space *mapping)
EXPORT_SYMBOL_GPL(dax_layout_busy_page);
static int __dax_invalidate_entry(struct address_space *mapping,
- pgoff_t index, bool trunc)
+ pgoff_t index, bool trunc)
{
XA_STATE(xas, &mapping->i_pages, index);
int ret = 0;
void *entry;
xas_lock_irq(&xas);
- entry = get_unlocked_entry(&xas, 0);
+ entry = get_next_unlocked_entry(&xas, 0);
if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
goto out;
if (!trunc &&
@@ -776,7 +858,9 @@ static int __dax_clear_dirty_range(struct address_space *mapping,
xas_lock_irq(&xas);
xas_for_each(&xas, entry, end) {
- entry = get_unlocked_entry(&xas, 0);
+ entry = wait_entry_unlocked_exclusive(&xas, entry);
+ if (!entry)
+ continue;
xas_clear_mark(&xas, PAGECACHE_TAG_DIRTY);
xas_clear_mark(&xas, PAGECACHE_TAG_TOWRITE);
put_unlocked_entry(&xas, entry, WAKE_NEXT);
@@ -813,6 +897,107 @@ int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index)
return ret;
}
+void dax_delete_mapping_range(struct address_space *mapping,
+ loff_t start, loff_t end)
+{
+ void *entry;
+ pgoff_t start_idx = start >> PAGE_SHIFT;
+ pgoff_t end_idx;
+ XA_STATE(xas, &mapping->i_pages, start_idx);
+
+ /* If end == LLONG_MAX, all pages from start to till end of file */
+ if (end == LLONG_MAX)
+ end_idx = ULONG_MAX;
+ else
+ end_idx = end >> PAGE_SHIFT;
+
+ xas_lock_irq(&xas);
+ xas_for_each(&xas, entry, end_idx) {
+ if (!xa_is_value(entry))
+ continue;
+ entry = wait_entry_unlocked_exclusive(&xas, entry);
+ if (!entry)
+ continue;
+ dax_disassociate_entry(entry, mapping, true);
+ xas_store(&xas, NULL);
+ mapping->nrpages -= 1UL << dax_entry_order(entry);
+ put_unlocked_entry(&xas, entry, WAKE_ALL);
+ }
+ xas_unlock_irq(&xas);
+}
+EXPORT_SYMBOL_GPL(dax_delete_mapping_range);
+
+static int wait_page_idle(struct page *page,
+ void (cb)(struct inode *),
+ struct inode *inode)
+{
+ return ___wait_var_event(page, dax_page_is_idle(page),
+ TASK_INTERRUPTIBLE, 0, 0, cb(inode));
+}
+
+static void wait_page_idle_uninterruptible(struct page *page,
+ struct inode *inode)
+{
+ ___wait_var_event(page, dax_page_is_idle(page),
+ TASK_UNINTERRUPTIBLE, 0, 0, schedule());
+}
+
+/*
+ * Unmaps the inode and waits for any DMA to complete prior to deleting the
+ * DAX mapping entries for the range.
+ *
+ * For NOWAIT behavior, pass @cb as NULL to early-exit on first found
+ * busy page
+ */
+int dax_break_layout(struct inode *inode, loff_t start, loff_t end,
+ void (cb)(struct inode *))
+{
+ struct page *page;
+ int error = 0;
+
+ if (!dax_mapping(inode->i_mapping))
+ return 0;
+
+ do {
+ page = dax_layout_busy_page_range(inode->i_mapping, start, end);
+ if (!page)
+ break;
+ if (!cb) {
+ error = -ERESTARTSYS;
+ break;
+ }
+
+ error = wait_page_idle(page, cb, inode);
+ } while (error == 0);
+
+ if (!page)
+ dax_delete_mapping_range(inode->i_mapping, start, end);
+
+ return error;
+}
+EXPORT_SYMBOL_GPL(dax_break_layout);
+
+void dax_break_layout_final(struct inode *inode)
+{
+ struct page *page;
+
+ if (!dax_mapping(inode->i_mapping))
+ return;
+
+ do {
+ page = dax_layout_busy_page_range(inode->i_mapping, 0,
+ LLONG_MAX);
+ if (!page)
+ break;
+
+ wait_page_idle_uninterruptible(page, inode);
+ } while (true);
+
+ if (!page)
+ dax_delete_mapping_range(inode->i_mapping, 0, LLONG_MAX);
+}
+EXPORT_SYMBOL_GPL(dax_break_layout_final);
+
/*
* Invalidate DAX entry if it is clean.
*/
@@ -895,8 +1080,9 @@ static void *dax_insert_entry(struct xa_state *xas, struct vm_fault *vmf,
void *old;
dax_disassociate_entry(entry, mapping, false);
- dax_associate_entry(new_entry, mapping, vmf->vma, vmf->address,
- shared);
+ dax_associate_entry(new_entry, mapping, vmf->vma,
+ vmf->address, shared);
+
/*
* Only swap our new entry into the page cache if the current
* entry is a zero page or an empty entry. If a normal PTE or
@@ -940,7 +1126,7 @@ static int dax_writeback_one(struct xa_state *xas, struct dax_device *dax_dev,
if (unlikely(dax_is_locked(entry))) {
void *old_entry = entry;
- entry = get_unlocked_entry(xas, 0);
+ entry = get_next_unlocked_entry(xas, 0);
/* Entry got punched out / reallocated? */
if (!entry || WARN_ON_ONCE(!xa_is_value(entry)))
@@ -1084,9 +1270,7 @@ static int dax_iomap_direct_access(const struct iomap *iomap, loff_t pos,
goto out;
if (pfn_t_to_pfn(*pfnp) & (PHYS_PFN(size)-1))
goto out;
- /* For larger pages we need devmap */
- if (length > 1 && !pfn_t_devmap(*pfnp))
- goto out;
+
rc = 0;
out_check_addr:
@@ -1193,7 +1377,7 @@ static vm_fault_t dax_load_hole(struct xa_state *xas, struct vm_fault *vmf,
*entry = dax_insert_entry(xas, vmf, iter, *entry, pfn, DAX_ZERO_PAGE);
- ret = vmf_insert_mixed(vmf->vma, vaddr, pfn);
+ ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), false);
trace_dax_load_hole(inode, vmf, ret);
return ret;
}
@@ -1664,7 +1848,8 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
loff_t pos = (loff_t)xas->xa_index << PAGE_SHIFT;
bool write = iter->flags & IOMAP_WRITE;
unsigned long entry_flags = pmd ? DAX_PMD : 0;
- int err = 0;
+ struct folio *folio;
+ int ret, err = 0;
pfn_t pfn;
void *kaddr;
@@ -1696,17 +1881,19 @@ static vm_fault_t dax_fault_iter(struct vm_fault *vmf,
return dax_fault_return(err);
}
+ folio = dax_to_folio(*entry);
if (dax_fault_is_synchronous(iter, vmf->vma))
return dax_fault_synchronous_pfnp(pfnp, pfn);
- /* insert PMD pfn */
+ folio_ref_inc(folio);
if (pmd)
- return vmf_insert_pfn_pmd(vmf, pfn, write);
+ ret = vmf_insert_folio_pmd(vmf, pfn_folio(pfn_t_to_pfn(pfn)),
+ write);
+ else
+ ret = vmf_insert_page_mkwrite(vmf, pfn_t_to_page(pfn), write);
+ folio_put(folio);
- /* insert PTE pfn */
- if (write)
- return vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
- return vmf_insert_mixed(vmf->vma, vmf->address, pfn);
+ return ret;
}
static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, pfn_t *pfnp,
@@ -1949,11 +2136,12 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
{
struct address_space *mapping = vmf->vma->vm_file->f_mapping;
XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order);
+ struct folio *folio;
void *entry;
vm_fault_t ret;
xas_lock_irq(&xas);
- entry = get_unlocked_entry(&xas, order);
+ entry = get_next_unlocked_entry(&xas, order);
/* Did we race with someone splitting entry or so? */
if (!entry || dax_is_conflict(entry) ||
(order == 0 && !dax_is_pte_entry(entry))) {
@@ -1966,14 +2154,17 @@ dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order)
xas_set_mark(&xas, PAGECACHE_TAG_DIRTY);
dax_lock_entry(&xas, entry);
xas_unlock_irq(&xas);
+ folio = pfn_folio(pfn_t_to_pfn(pfn));
+ folio_ref_inc(folio);
if (order == 0)
- ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn);
+ ret = vmf_insert_page_mkwrite(vmf, &folio->page, true);
#ifdef CONFIG_FS_DAX_PMD
else if (order == PMD_ORDER)
- ret = vmf_insert_pfn_pmd(vmf, pfn, FAULT_FLAG_WRITE);
+ ret = vmf_insert_folio_pmd(vmf, folio, FAULT_FLAG_WRITE);
#endif
else
ret = VM_FAULT_FALLBACK;
+ folio_put(folio);
dax_unlock_entry(&xas, entry);
trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret);
return ret;
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index f608f6554b95..2c9b762925c7 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -642,7 +642,7 @@ retry:
goto retry;
if (folio)
- block_commit_write(&folio->page, from, to);
+ block_commit_write(folio, from, to);
out:
if (folio) {
folio_unlock(folio);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index bcb96caf77c0..1dc09ed5d403 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -182,6 +182,8 @@ void ext4_evict_inode(struct inode *inode)
trace_ext4_evict_inode(inode);
+ dax_break_layout_final(inode);
+
if (EXT4_I(inode)->i_flags & EXT4_EA_INODE_FL)
ext4_evict_ea_inode(inode);
if (inode->i_nlink) {
@@ -3981,24 +3983,10 @@ static void ext4_wait_dax_page(struct inode *inode)
int ext4_break_layouts(struct inode *inode)
{
- struct page *page;
- int error;
-
if (WARN_ON_ONCE(!rwsem_is_locked(&inode->i_mapping->invalidate_lock)))
return -EINVAL;
- do {
- page = dax_layout_busy_page(inode->i_mapping);
- if (!page)
- return 0;
-
- error = ___wait_var_event(&page->_refcount,
- atomic_read(&page->_refcount) == 1,
- TASK_INTERRUPTIBLE, 0, 0,
- ext4_wait_dax_page(inode));
- } while (error == 0);
-
- return error;
+ return dax_break_layout_inode(inode, ext4_wait_dax_page);
}
/*
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 898443e98efc..48649be64d6a 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -399,7 +399,7 @@ data_copy:
bh = bh->b_this_page;
}
- block_commit_write(&folio[0]->page, from, from + replaced_size);
+ block_commit_write(folio[0], from, from + replaced_size);
/* Even in case of data=writeback it is reasonable to pin
* inode to transaction, to prevent unexpected data loss */
diff --git a/fs/fuse/dax.c b/fs/fuse/dax.c
index 0b6ee6dd1fd6..0502bf3cdf6a 100644
--- a/fs/fuse/dax.c
+++ b/fs/fuse/dax.c
@@ -666,36 +666,12 @@ static void fuse_wait_dax_page(struct inode *inode)
filemap_invalidate_lock(inode->i_mapping);
}
-/* Should be called with mapping->invalidate_lock held exclusively */
-static int __fuse_dax_break_layouts(struct inode *inode, bool *retry,
- loff_t start, loff_t end)
-{
- struct page *page;
-
- page = dax_layout_busy_page_range(inode->i_mapping, start, end);
- if (!page)
- return 0;
-
- *retry = true;
- return ___wait_var_event(&page->_refcount,
- atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
- 0, 0, fuse_wait_dax_page(inode));
-}
-
-/* dmap_end == 0 leads to unmapping of whole file */
+/* Should be called with mapping->invalidate_lock held exclusively. */
int fuse_dax_break_layouts(struct inode *inode, u64 dmap_start,
u64 dmap_end)
{
- bool retry;
- int ret;
-
- do {
- retry = false;
- ret = __fuse_dax_break_layouts(inode, &retry, dmap_start,
- dmap_end);
- } while (ret == 0 && retry);
-
- return ret;
+ return dax_break_layout(inode, dmap_start, dmap_end,
+ fuse_wait_dax_page);
}
ssize_t fuse_dax_read_iter(struct kiocb *iocb, struct iov_iter *to)
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index fa8f1141ea74..85e4f894a59f 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1954,7 +1954,7 @@ int fuse_do_setattr(struct mnt_idmap *idmap, struct dentry *dentry,
if (FUSE_IS_DAX(inode) && is_truncate) {
filemap_invalidate_lock(mapping);
fault_blocked = true;
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err) {
filemap_invalidate_unlock(mapping);
return err;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index d63e56fd3dd2..754378dd9f71 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -253,7 +253,7 @@ static int fuse_open(struct inode *inode, struct file *file)
if (dax_truncate) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out_inode_unlock;
}
@@ -3205,7 +3205,7 @@ static long fuse_file_fallocate(struct file *file, int mode, loff_t offset,
inode_lock(inode);
if (block_faults) {
filemap_invalidate_lock(inode->i_mapping);
- err = fuse_dax_break_layouts(inode, 0, 0);
+ err = fuse_dax_break_layouts(inode, 0, -1);
if (err)
goto out;
}
diff --git a/fs/fuse/virtio_fs.c b/fs/fuse/virtio_fs.c
index 82afe78ec542..2c7b24cb67ad 100644
--- a/fs/fuse/virtio_fs.c
+++ b/fs/fuse/virtio_fs.c
@@ -1017,8 +1017,7 @@ static long virtio_fs_direct_access(struct dax_device *dax_dev, pgoff_t pgoff,
if (kaddr)
*kaddr = fs->window_kaddr + offset;
if (pfn)
- *pfn = phys_to_pfn_t(fs->window_phys_addr + offset,
- PFN_DEV | PFN_MAP);
+ *pfn = phys_to_pfn_t(fs->window_phys_addr + offset, 0);
return nr_pages > max_nr_pages ? max_nr_pages : nr_pages;
}
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index d98caedbb723..e4de5425838d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -193,19 +193,21 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
}
/*
- * Someone wants to read @bytes from a HWPOISON hugetlb @page from @offset.
+ * Someone wants to read @bytes from a HWPOISON hugetlb @folio from @offset.
* Returns the maximum number of bytes one can read without touching the 1st raw
- * HWPOISON subpage.
+ * HWPOISON page.
*
* The implementation borrows the iteration logic from copy_page_to_iter*.
*/
-static size_t adjust_range_hwpoison(struct page *page, size_t offset, size_t bytes)
+static size_t adjust_range_hwpoison(struct folio *folio, size_t offset,
+ size_t bytes)
{
+ struct page *page;
size_t n = 0;
size_t res = 0;
- /* First subpage to start the loop. */
- page = nth_page(page, offset / PAGE_SIZE);
+ /* First page to start the loop. */
+ page = folio_page(folio, offset / PAGE_SIZE);
offset %= PAGE_SIZE;
while (1) {
if (is_raw_hwpoison_page_in_hugepage(page))
@@ -278,10 +280,10 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
else {
/*
* Adjust how many bytes safe to read without
- * touching the 1st raw HWPOISON subpage after
+ * touching the 1st raw HWPOISON page after
* offset.
*/
- want = adjust_range_hwpoison(&folio->page, offset, nr);
+ want = adjust_range_hwpoison(folio, offset, nr);
if (want == 0) {
folio_put(folio);
retval = -EIO;
@@ -338,8 +340,8 @@ static void hugetlb_delete_from_page_cache(struct folio *folio)
* mutex for the page in the mapping. So, we can not race with page being
* faulted into the vma.
*/
-static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
- unsigned long addr, struct page *page)
+static bool hugetlb_vma_maps_pfn(struct vm_area_struct *vma,
+ unsigned long addr, unsigned long pfn)
{
pte_t *ptep, pte;
@@ -351,7 +353,7 @@ static bool hugetlb_vma_maps_page(struct vm_area_struct *vma,
if (huge_pte_none(pte) || !pte_present(pte))
return false;
- if (pte_page(pte) == page)
+ if (pte_pfn(pte) == pfn)
return true;
return false;
@@ -396,7 +398,7 @@ static void hugetlb_unmap_file_folio(struct hstate *h,
{
struct rb_root_cached *root = &mapping->i_mmap;
struct hugetlb_vma_lock *vma_lock;
- struct page *page = &folio->page;
+ unsigned long pfn = folio_pfn(folio);
struct vm_area_struct *vma;
unsigned long v_start;
unsigned long v_end;
@@ -412,7 +414,7 @@ retry:
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- if (!hugetlb_vma_maps_page(vma, v_start, page))
+ if (!hugetlb_vma_maps_pfn(vma, v_start, pfn))
continue;
if (!hugetlb_vma_trylock_write(vma)) {
@@ -462,7 +464,7 @@ retry:
*/
v_start = vma_offset_start(vma, start);
v_end = vma_offset_end(vma, end);
- if (hugetlb_vma_maps_page(vma, v_start, page))
+ if (hugetlb_vma_maps_pfn(vma, v_start, pfn))
unmap_hugepage_range(vma, v_start, v_end, NULL,
ZAP_FLAG_DROP_MARKER);
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 814b7f679486..31553372b33a 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -1480,7 +1480,7 @@ static int iomap_folio_mkwrite_iter(struct iomap_iter *iter,
&iter->iomap);
if (ret)
return ret;
- block_commit_write(&folio->page, 0, length);
+ block_commit_write(folio, 0, length);
} else {
WARN_ON_ONCE(!folio_test_uptodate(folio));
folio_mark_dirty(folio);
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index 5bbeb6fbb1ac..ee1d92ed950f 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -920,7 +920,7 @@ static void ocfs2_write_failure(struct inode *inode,
ocfs2_jbd2_inode_add_write(wc->w_handle, inode,
user_pos, user_len);
- block_commit_write(&folio->page, from, to);
+ block_commit_write(folio, from, to);
}
}
}
@@ -2012,7 +2012,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping, loff_t pos,
ocfs2_jbd2_inode_add_write(handle, inode,
start_byte, length);
}
- block_commit_write(&folio->page, from, to);
+ block_commit_write(folio, from, to);
}
}
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index e54f2c4b5a90..2056cf08ac1e 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -813,7 +813,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
/* must not update i_size! */
- block_commit_write(&folio->page, block_start + 1, block_start + 1);
+ block_commit_write(folio, block_start + 1, block_start + 1);
}
/*
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index 77a517f91821..96122e91c645 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -157,6 +157,7 @@ unsigned name_to_int(const struct qstr *qstr);
/* Worst case buffer size needed for holding an integer. */
#define PROC_NUMBUF 13
+#ifdef CONFIG_PAGE_MAPCOUNT
/**
* folio_precise_page_mapcount() - Number of mappings of this folio page.
* @folio: The folio.
@@ -187,7 +188,49 @@ static inline int folio_precise_page_mapcount(struct folio *folio,
return mapcount;
}
+#else /* !CONFIG_PAGE_MAPCOUNT */
+static inline int folio_precise_page_mapcount(struct folio *folio,
+ struct page *page)
+{
+ BUILD_BUG();
+}
+#endif /* CONFIG_PAGE_MAPCOUNT */
+/**
+ * folio_average_page_mapcount() - Average number of mappings per page in this
+ * folio
+ * @folio: The folio.
+ *
+ * The average number of user page table entries that reference each page in
+ * this folio as tracked via the RMAP: either referenced directly (PTE) or
+ * as part of a larger area that covers this page (e.g., PMD).
+ *
+ * The average is calculated by rounding to the nearest integer; however,
+ * to avoid duplicated code in current callers, the average is at least
+ * 1 if any page of the folio is mapped.
+ *
+ * Returns: The average number of mappings per page in this folio.
+ */
+static inline int folio_average_page_mapcount(struct folio *folio)
+{
+ int mapcount, entire_mapcount, avg;
+
+ if (!folio_test_large(folio))
+ return atomic_read(&folio->_mapcount) + 1;
+
+ mapcount = folio_large_mapcount(folio);
+ if (unlikely(mapcount <= 0))
+ return 0;
+ entire_mapcount = folio_entire_mapcount(folio);
+ if (mapcount <= entire_mapcount)
+ return entire_mapcount;
+ mapcount -= entire_mapcount;
+
+ /* Round to closest integer ... */
+ avg = ((unsigned int)mapcount + folio_large_nr_pages(folio) / 2) >> folio_large_order(folio);
+ /* ... but return at least 1. */
+ return max_t(int, avg + entire_mapcount, 1);
+}
/*
* array.c
*/
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index 8ba9b1472390..83be312159c9 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -162,6 +162,8 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
show_val_kb(m, "Unaccepted: ",
global_zone_page_state(NR_UNACCEPTED));
#endif
+ show_val_kb(m, "Balloon: ",
+ global_node_page_state(NR_BALLOON_PAGES));
hugetlb_report_meminfo(m);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index a55f5acefa97..23fc771100ae 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -67,9 +67,14 @@ static ssize_t kpagecount_read(struct file *file, char __user *buf,
* memmaps that were actually initialized.
*/
page = pfn_to_online_page(pfn);
- if (page)
- mapcount = folio_precise_page_mapcount(page_folio(page),
- page);
+ if (page) {
+ struct folio *folio = page_folio(page);
+
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ mapcount = folio_precise_page_mapcount(folio, page);
+ else
+ mapcount = folio_average_page_mapcount(folio);
+ }
if (put_user(mapcount, out)) {
ret = -EFAULT;
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index f02cd362309a..994cde10e3f4 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -707,6 +707,8 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
struct folio *folio = page_folio(page);
int i, nr = compound ? compound_nr(page) : 1;
unsigned long size = nr * PAGE_SIZE;
+ bool exclusive;
+ int mapcount;
/*
* First accumulate quantities that depend only on |size| and the type
@@ -747,18 +749,29 @@ static void smaps_account(struct mem_size_stats *mss, struct page *page,
dirty, locked, present);
return;
}
+
+ if (IS_ENABLED(CONFIG_NO_PAGE_MAPCOUNT)) {
+ mapcount = folio_average_page_mapcount(folio);
+ exclusive = !folio_maybe_mapped_shared(folio);
+ }
+
/*
* We obtain a snapshot of the mapcount. Without holding the folio lock
* this snapshot can be slightly wrong as we cannot always read the
* mapcount atomically.
*/
for (i = 0; i < nr; i++, page++) {
- int mapcount = folio_precise_page_mapcount(folio, page);
unsigned long pss = PAGE_SIZE << PSS_SHIFT;
+
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT)) {
+ mapcount = folio_precise_page_mapcount(folio, page);
+ exclusive = mapcount < 2;
+ }
+
if (mapcount >= 2)
pss /= mapcount;
smaps_page_accumulate(mss, folio, PAGE_SIZE, pss,
- dirty, locked, mapcount < 2);
+ dirty, locked, exclusive);
}
}
@@ -1023,7 +1036,7 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
if (folio) {
/* We treat non-present entries as "maybe shared". */
- if (!present || folio_likely_mapped_shared(folio) ||
+ if (!present || folio_maybe_mapped_shared(folio) ||
hugetlb_pmd_shared(pte))
mss->shared_hugetlb += huge_page_size(hstate_vma(vma));
else
@@ -1632,6 +1645,7 @@ struct pagemapread {
#define PM_SOFT_DIRTY BIT_ULL(55)
#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
#define PM_UFFD_WP BIT_ULL(57)
+#define PM_GUARD_REGION BIT_ULL(58)
#define PM_FILE BIT_ULL(61)
#define PM_SWAP BIT_ULL(62)
#define PM_PRESENT BIT_ULL(63)
@@ -1651,6 +1665,13 @@ static int add_to_pagemap(pagemap_entry_t *pme, struct pagemapread *pm)
return 0;
}
+static bool __folio_page_mapped_exclusively(struct folio *folio, struct page *page)
+{
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ return folio_precise_page_mapcount(folio, page) == 1;
+ return !folio_maybe_mapped_shared(folio);
+}
+
static int pagemap_pte_hole(unsigned long start, unsigned long end,
__always_unused int depth, struct mm_walk *walk)
{
@@ -1732,6 +1753,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
page = pfn_swap_entry_to_page(entry);
if (pte_marker_entry_uffd_wp(entry))
flags |= PM_UFFD_WP;
+ if (is_guard_swp_entry(entry))
+ flags |= PM_GUARD_REGION;
}
if (page) {
@@ -1739,7 +1762,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm,
if (!folio_test_anon(folio))
flags |= PM_FILE;
if ((flags & PM_PRESENT) &&
- folio_precise_page_mapcount(folio, page) == 1)
+ __folio_page_mapped_exclusively(folio, page))
flags |= PM_MMAP_EXCLUSIVE;
}
if (vma->vm_flags & VM_SOFTDIRTY)
@@ -1814,7 +1837,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end,
pagemap_entry_t pme;
if (folio && (flags & PM_PRESENT) &&
- folio_precise_page_mapcount(folio, page + idx) == 1)
+ __folio_page_mapped_exclusively(folio, page))
cur_flags |= PM_MMAP_EXCLUSIVE;
pme = make_pme(frame, cur_flags);
@@ -1879,7 +1902,7 @@ static int pagemap_hugetlb_range(pte_t *ptep, unsigned long hmask,
if (!folio_test_anon(folio))
flags |= PM_FILE;
- if (!folio_likely_mapped_shared(folio) &&
+ if (!folio_maybe_mapped_shared(folio) &&
!hugetlb_pmd_shared(ptep))
flags |= PM_MMAP_EXCLUSIVE;
@@ -1931,7 +1954,8 @@ static const struct mm_walk_ops pagemap_ops = {
* Bit 55 pte is soft-dirty (see Documentation/admin-guide/mm/soft-dirty.rst)
* Bit 56 page exclusively mapped
* Bit 57 pte is uffd-wp write-protected
- * Bits 58-60 zero
+ * Bit 58 pte is a guard region
+ * Bits 59-60 zero
* Bit 61 page is file-page or shared-anon
* Bit 62 page swapped
* Bit 63 page present
@@ -2455,22 +2479,19 @@ static int pagemap_scan_pmd_entry(pmd_t *pmd, unsigned long start,
spinlock_t *ptl;
int ret;
- arch_enter_lazy_mmu_mode();
-
ret = pagemap_scan_thp_entry(pmd, start, end, walk);
- if (ret != -ENOENT) {
- arch_leave_lazy_mmu_mode();
+ if (ret != -ENOENT)
return ret;
- }
ret = 0;
start_pte = pte = pte_offset_map_lock(vma->vm_mm, pmd, start, &ptl);
if (!pte) {
- arch_leave_lazy_mmu_mode();
walk->action = ACTION_AGAIN;
return 0;
}
+ arch_enter_lazy_mmu_mode();
+
if ((p->arg.flags & PM_SCAN_WP_MATCHING) && !p->vec_out) {
/* Fast path for performing exclusive WP */
for (addr = start; addr != end; pte++, addr += PAGE_SIZE) {
@@ -2539,8 +2560,8 @@ flush_and_return:
if (flush_end)
flush_tlb_range(vma, start, addr);
- pte_unmap_unlock(start_pte, ptl);
arch_leave_lazy_mmu_mode();
+ pte_unmap_unlock(start_pte, ptl);
cond_resched();
return ret;
@@ -2855,7 +2876,12 @@ static void gather_stats(struct page *page, struct numa_maps *md, int pte_dirty,
unsigned long nr_pages)
{
struct folio *folio = page_folio(page);
- int count = folio_precise_page_mapcount(folio, page);
+ int count;
+
+ if (IS_ENABLED(CONFIG_PAGE_MAPCOUNT))
+ count = folio_precise_page_mapcount(folio, page);
+ else
+ count = folio_average_page_mapcount(folio);
md->pages += nr_pages;
if (pte_dirty || folio_test_dirty(folio))
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 412fe7c4d348..0d76c4f37b3e 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -69,7 +69,7 @@ static vm_fault_t udf_page_mkwrite(struct vm_fault *vmf)
goto out_unlock;
}
- block_commit_write(&folio->page, 0, end);
+ block_commit_write(folio, 0, end);
out_dirty:
folio_mark_dirty(folio);
folio_wait_stable(folio);
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ce6b8ffbaa2c..ee3e0f284287 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2734,21 +2734,16 @@ xfs_mmaplock_two_inodes_and_break_dax_layout(
struct xfs_inode *ip2)
{
int error;
- bool retry;
- struct page *page;
if (ip1->i_ino > ip2->i_ino)
swap(ip1, ip2);
again:
- retry = false;
/* Lock the first inode */
xfs_ilock(ip1, XFS_MMAPLOCK_EXCL);
- error = xfs_break_dax_layouts(VFS_I(ip1), &retry);
- if (error || retry) {
+ error = xfs_break_dax_layouts(VFS_I(ip1));
+ if (error) {
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
- if (error == 0 && retry)
- goto again;
return error;
}
@@ -2762,8 +2757,8 @@ again:
* need to unlock & lock the XFS_MMAPLOCK_EXCL which is not suitable
* for this nested lock case.
*/
- page = dax_layout_busy_page(VFS_I(ip2)->i_mapping);
- if (page && page_ref_count(page) != 1) {
+ error = dax_break_layout(VFS_I(ip2), 0, -1, NULL);
+ if (error) {
xfs_iunlock(ip2, XFS_MMAPLOCK_EXCL);
xfs_iunlock(ip1, XFS_MMAPLOCK_EXCL);
goto again;
@@ -3007,21 +3002,11 @@ xfs_wait_dax_page(
int
xfs_break_dax_layouts(
- struct inode *inode,
- bool *retry)
+ struct inode *inode)
{
- struct page *page;
-
xfs_assert_ilocked(XFS_I(inode), XFS_MMAPLOCK_EXCL);
- page = dax_layout_busy_page(inode->i_mapping);
- if (!page)
- return 0;
-
- *retry = true;
- return ___wait_var_event(&page->_refcount,
- atomic_read(&page->_refcount) == 1, TASK_INTERRUPTIBLE,
- 0, 0, xfs_wait_dax_page(inode));
+ return dax_break_layout_inode(inode, xfs_wait_dax_page);
}
int
@@ -3039,8 +3024,8 @@ xfs_break_layouts(
retry = false;
switch (reason) {
case BREAK_UNMAP:
- error = xfs_break_dax_layouts(inode, &retry);
- if (error || retry)
+ error = xfs_break_dax_layouts(inode);
+ if (error)
break;
fallthrough;
case BREAK_WRITE:
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index 4bb7a99e0dc4..eae0159983ca 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -603,7 +603,7 @@ xfs_itruncate_extents(
return xfs_itruncate_extents_flags(tpp, ip, whichfork, new_size, 0);
}
-int xfs_break_dax_layouts(struct inode *inode, bool *retry);
+int xfs_break_dax_layouts(struct inode *inode);
int xfs_break_layouts(struct inode *inode, uint *iolock,
enum layout_break_reason reason);
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 53944cc7af24..b2dd0c0bf509 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -768,6 +768,17 @@ xfs_fs_drop_inode(
return generic_drop_inode(inode);
}
+STATIC void
+xfs_fs_evict_inode(
+ struct inode *inode)
+{
+ if (IS_DAX(inode))
+ dax_break_layout_final(inode);
+
+ truncate_inode_pages_final(&inode->i_data);
+ clear_inode(inode);
+}
+
static void
xfs_mount_free(
struct xfs_mount *mp)
@@ -1275,6 +1286,7 @@ static const struct super_operations xfs_super_operations = {
.destroy_inode = xfs_fs_destroy_inode,
.dirty_inode = xfs_fs_dirty_inode,
.drop_inode = xfs_fs_drop_inode,
+ .evict_inode = xfs_fs_evict_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
.freeze_fs = xfs_fs_freeze,