summaryrefslogtreecommitdiff
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorKairui Song <kasong@tencent.com>2025-01-14 01:57:32 +0800
committerAndrew Morton <akpm@linux-foundation.org>2025-01-25 20:22:37 -0800
commit4f79384a25d57a59e142009e52f40ae1f25102fe (patch)
tree84fb85dcd0ed557715268cfb6c211400f5cf06f9 /mm/swapfile.c
parentbae8a4ef3efb56bb7e83bafd3c0856845aeaf605 (diff)
mm, swap_slots: remove slot cache for freeing path
The slot cache for freeing path is mostly for reducing the overhead of si->lock. As we have basically eliminated the si->lock usage for freeing path, it can be removed. This helps simplify the code, and avoids swap entries from being hold in cache upon freeing. The delayed freeing of entries have been causing trouble for further optimizations for zswap [1] and in theory will also cause more fragmentation, and extra overhead. Test with build linux kernel showed both performance and fragmentation is better without the cache: tiem make -j96 / 768M memcg, 4K pages, 10G ZRAM, avg of 4 test run:: Before: Sys time: 36047.78, Real time: 472.43 After: (-7.6% sys time, -7.3% real time) Sys time: 33314.76, Real time: 437.67 time make -j96 / 1152M memcg, 64K mTHP, 10G ZRAM, avg of 4 test run: Before: Sys time: 46859.04, Real time: 562.63 hugepages-64kB/stats/swpout: 1783392 hugepages-64kB/stats/swpout_fallback: 240875 After: (-23.3% sys time, -21.3% real time) Sys time: 35958.87, Real time: 442.69 hugepages-64kB/stats/swpout: 1866267 hugepages-64kB/stats/swpout_fallback: 158330 Sequential SWAP should be also slightly faster, tests didn't show a measurable difference though, at least no regression: Swapin 4G zero page on ZRAM (time in us): Before (avg. 1923756) 1912391 1927023 1927957 1916527 1918263 1914284 1934753 1940813 1921791 After (avg. 1922290): 1919101 1925743 1916810 1917007 1923930 1935152 1917403 1923549 1921913 Link: https://lore.kernel.org/all/CAMgjq7ACohT_uerSz8E_994ZZCv709Zor+43hdmesW_59W1BWw@mail.gmail.com/[1] Link: https://lkml.kernel.org/r/20250113175732.48099-14-ryncsn@gmail.com Signed-off-by: Kairui Song <kasong@tencent.com> Suggested-by: Chris Li <chrisl@kernel.org> Cc: Baoquan He <bhe@redhat.com> Cc: Barry Song <v-songbaohua@oppo.com> Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> Cc: Hugh Dickens <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kalesh Singh <kaleshsingh@google.com> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Ryan Roberts <ryan.roberts@arm.com> Cc: Yosry Ahmed <yosryahmed@google.com> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c89
1 files changed, 34 insertions, 55 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index adf97c9ccb96..6e867c16ea93 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -53,14 +53,15 @@
static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
unsigned char);
static void free_swap_count_continuations(struct swap_info_struct *);
-static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry,
- unsigned int nr_pages);
+static void swap_entry_range_free(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ swp_entry_t entry, unsigned int nr_pages);
static void swap_range_alloc(struct swap_info_struct *si,
unsigned int nr_entries);
static bool folio_swapcache_freeable(struct folio *folio);
static struct swap_cluster_info *lock_cluster(struct swap_info_struct *si,
unsigned long offset);
-static void unlock_cluster(struct swap_cluster_info *ci);
+static inline void unlock_cluster(struct swap_cluster_info *ci);
static DEFINE_SPINLOCK(swap_lock);
static unsigned int nr_swapfiles;
@@ -261,10 +262,9 @@ static int __try_to_reclaim_swap(struct swap_info_struct *si,
folio_ref_sub(folio, nr_pages);
folio_set_dirty(folio);
- /* Only sinple page folio can be backed by zswap */
- if (nr_pages == 1)
- zswap_invalidate(entry);
- swap_entry_range_free(si, entry, nr_pages);
+ ci = lock_cluster(si, offset);
+ swap_entry_range_free(si, ci, entry, nr_pages);
+ unlock_cluster(ci);
ret = nr_pages;
out_unlock:
folio_unlock(folio);
@@ -1128,8 +1128,10 @@ static void swap_range_free(struct swap_info_struct *si, unsigned long offset,
* Use atomic clear_bit operations only on zeromap instead of non-atomic
* bitmap_clear to prevent adjacent bits corruption due to simultaneous writes.
*/
- for (i = 0; i < nr_entries; i++)
+ for (i = 0; i < nr_entries; i++) {
clear_bit(offset + i, si->zeromap);
+ zswap_invalidate(swp_entry(si->type, offset + i));
+ }
if (si->flags & SWP_BLKDEV)
swap_slot_free_notify =
@@ -1434,9 +1436,9 @@ static unsigned char __swap_entry_free(struct swap_info_struct *si,
ci = lock_cluster(si, offset);
usage = __swap_entry_free_locked(si, offset, 1);
- unlock_cluster(ci);
if (!usage)
- free_swap_slot(entry);
+ swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1);
+ unlock_cluster(ci);
return usage;
}
@@ -1464,13 +1466,10 @@ static bool __swap_entries_free(struct swap_info_struct *si,
}
for (i = 0; i < nr; i++)
WRITE_ONCE(si->swap_map[offset + i], SWAP_HAS_CACHE);
+ if (!has_cache)
+ swap_entry_range_free(si, ci, entry, nr);
unlock_cluster(ci);
- if (!has_cache) {
- for (i = 0; i < nr; i++)
- zswap_invalidate(swp_entry(si->type, offset + i));
- swap_entry_range_free(si, entry, nr);
- }
return has_cache;
fallback:
@@ -1490,15 +1489,13 @@ fallback:
* Drop the last HAS_CACHE flag of swap entries, caller have to
* ensure all entries belong to the same cgroup.
*/
-static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry,
- unsigned int nr_pages)
+static void swap_entry_range_free(struct swap_info_struct *si,
+ struct swap_cluster_info *ci,
+ swp_entry_t entry, unsigned int nr_pages)
{
unsigned long offset = swp_offset(entry);
unsigned char *map = si->swap_map + offset;
unsigned char *map_end = map + nr_pages;
- struct swap_cluster_info *ci;
-
- ci = lock_cluster(si, offset);
/* It should never free entries across different clusters */
VM_BUG_ON(ci != offset_to_cluster(si, offset + nr_pages - 1));
@@ -1518,7 +1515,6 @@ static void swap_entry_range_free(struct swap_info_struct *si, swp_entry_t entry
free_cluster(si, ci);
else
partial_free_cluster(si, ci);
- unlock_cluster(ci);
}
static void cluster_swap_free_nr(struct swap_info_struct *si,
@@ -1526,28 +1522,13 @@ static void cluster_swap_free_nr(struct swap_info_struct *si,
unsigned char usage)
{
struct swap_cluster_info *ci;
- DECLARE_BITMAP(to_free, BITS_PER_LONG) = { 0 };
- int i, nr;
+ unsigned long end = offset + nr_pages;
ci = lock_cluster(si, offset);
- while (nr_pages) {
- nr = min(BITS_PER_LONG, nr_pages);
- for (i = 0; i < nr; i++) {
- if (!__swap_entry_free_locked(si, offset + i, usage))
- bitmap_set(to_free, i, 1);
- }
- if (!bitmap_empty(to_free, BITS_PER_LONG)) {
- unlock_cluster(ci);
- for_each_set_bit(i, to_free, BITS_PER_LONG)
- free_swap_slot(swp_entry(si->type, offset + i));
- if (nr == nr_pages)
- return;
- bitmap_clear(to_free, 0, BITS_PER_LONG);
- ci = lock_cluster(si, offset);
- }
- offset += nr;
- nr_pages -= nr;
- }
+ do {
+ if (!__swap_entry_free_locked(si, offset, usage))
+ swap_entry_range_free(si, ci, swp_entry(si->type, offset), 1);
+ } while (++offset < end);
unlock_cluster(ci);
}
@@ -1588,18 +1569,12 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
return;
ci = lock_cluster(si, offset);
- if (size > 1 && swap_is_has_cache(si, offset, size)) {
- unlock_cluster(ci);
- swap_entry_range_free(si, entry, size);
- return;
- }
- for (int i = 0; i < size; i++, entry.val++) {
- if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE)) {
- unlock_cluster(ci);
- free_swap_slot(entry);
- if (i == size - 1)
- return;
- lock_cluster(si, offset);
+ if (swap_is_has_cache(si, offset, size))
+ swap_entry_range_free(si, ci, entry, size);
+ else {
+ for (int i = 0; i < size; i++, entry.val++) {
+ if (!__swap_entry_free_locked(si, offset + i, SWAP_HAS_CACHE))
+ swap_entry_range_free(si, ci, entry, 1);
}
}
unlock_cluster(ci);
@@ -1608,6 +1583,7 @@ void put_swap_folio(struct folio *folio, swp_entry_t entry)
void swapcache_free_entries(swp_entry_t *entries, int n)
{
int i;
+ struct swap_cluster_info *ci;
struct swap_info_struct *si = NULL;
if (n <= 0)
@@ -1615,8 +1591,11 @@ void swapcache_free_entries(swp_entry_t *entries, int n)
for (i = 0; i < n; ++i) {
si = _swap_info_get(entries[i]);
- if (si)
- swap_entry_range_free(si, entries[i], 1);
+ if (si) {
+ ci = lock_cluster(si, swp_offset(entries[i]));
+ swap_entry_range_free(si, ci, entries[i], 1);
+ unlock_cluster(ci);
+ }
}
}