summaryrefslogtreecommitdiff
path: root/mm/swapfile.c
diff options
context:
space:
mode:
authorKairui Song <kasong@tencent.com>2025-09-17 00:00:58 +0800
committerAndrew Morton <akpm@linux-foundation.org>2025-09-21 14:22:25 -0700
commit685a17fbd35e66ae9b6440979b438caa2ae540cd (patch)
treec4d8765c614e67b0a6490621d4350c1accfee1d6 /mm/swapfile.c
parent8b47299a411a178d572aaac31ff7ab33a8bd27e2 (diff)
mm, swap: remove contention workaround for swap cache
Swap cluster setup will try to shuffle the clusters on initialization. It was helpful to avoid contention for the swap cache space. The cluster size (2M) was much smaller than each swap cache space (64M), so shuffling the cluster means the allocator will try to allocate swap slots that are in different swap cache spaces for each CPU, reducing the chance of two CPUs using the same swap cache space, and hence reducing the contention. Now, swap cache is managed by swap clusters, this shuffle is pointless. Just remove it, and clean up related macros. This also improves the HDD swap performance as shuffling IO is a bad idea for HDD, and now the shuffling is gone. Test have shown a ~40% performance gain for HDD [1]: Doing sequential swap in of 8G data using 8 processes with usemem, average of 3 test runs: Before: 1270.91 KB/s per process After: 1849.54 KB/s per process Link: https://lore.kernel.org/linux-mm/CAMgjq7AdauQ8=X0zeih2r21QoV=-WWj1hyBxLWRzq74n-C=-Ng@mail.gmail.com/ [1] Link: https://lkml.kernel.org/r/20250916160100.31545-14-ryncsn@gmail.com Reported-by: kernel test robot <oliver.sang@intel.com> Closes: https://lore.kernel.org/oe-lkp/202504241621.f27743ec-lkp@intel.com Signed-off-by: Kairui Song <kasong@tencent.com> Acked-by: Chris Li <chrisl@kernel.org> Reviewed-by: Barry Song <baohua@kernel.org> Acked-by: David Hildenbrand <david@redhat.com> Suggested-by: Chris Li <chrisl@kernel.org> Cc: Baolin Wang <baolin.wang@linux.alibaba.com> Cc: Baoquan He <bhe@redhat.com> Cc: "Huang, Ying" <ying.huang@linux.alibaba.com> Cc: Hugh Dickins <hughd@google.com> Cc: Johannes Weiner <hannes@cmpxchg.org> Cc: Kemeng Shi <shikemeng@huaweicloud.com> Cc: Lorenzo Stoakes <lorenzo.stoakes@oracle.com> Cc: Matthew Wilcox (Oracle) <willy@infradead.org> Cc: Nhat Pham <nphamcs@gmail.com> Cc: Yosry Ahmed <yosryahmed@google.com> Cc: Zi Yan <ziy@nvidia.com> Cc: SeongJae Park <sj@kernel.org> Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Diffstat (limited to 'mm/swapfile.c')
-rw-r--r--mm/swapfile.c32
1 files changed, 8 insertions, 24 deletions
diff --git a/mm/swapfile.c b/mm/swapfile.c
index b183e96be289..314c5c10d3bd 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -3204,21 +3204,14 @@ static int setup_swap_map(struct swap_info_struct *si,
return 0;
}
-#define SWAP_CLUSTER_INFO_COLS \
- DIV_ROUND_UP(L1_CACHE_BYTES, sizeof(struct swap_cluster_info))
-#define SWAP_CLUSTER_SPACE_COLS \
- DIV_ROUND_UP(SWAP_ADDRESS_SPACE_PAGES, SWAPFILE_CLUSTER)
-#define SWAP_CLUSTER_COLS \
- max_t(unsigned int, SWAP_CLUSTER_INFO_COLS, SWAP_CLUSTER_SPACE_COLS)
-
static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
union swap_header *swap_header,
unsigned long maxpages)
{
unsigned long nr_clusters = DIV_ROUND_UP(maxpages, SWAPFILE_CLUSTER);
struct swap_cluster_info *cluster_info;
- unsigned long i, j, idx;
int err = -ENOMEM;
+ unsigned long i;
cluster_info = kvcalloc(nr_clusters, sizeof(*cluster_info), GFP_KERNEL);
if (!cluster_info)
@@ -3267,22 +3260,13 @@ static struct swap_cluster_info *setup_clusters(struct swap_info_struct *si,
INIT_LIST_HEAD(&si->frag_clusters[i]);
}
- /*
- * Reduce false cache line sharing between cluster_info and
- * sharing same address space.
- */
- for (j = 0; j < SWAP_CLUSTER_COLS; j++) {
- for (i = 0; i < DIV_ROUND_UP(nr_clusters, SWAP_CLUSTER_COLS); i++) {
- struct swap_cluster_info *ci;
- idx = i * SWAP_CLUSTER_COLS + j;
- ci = cluster_info + idx;
- if (idx >= nr_clusters)
- continue;
- if (ci->count) {
- ci->flags = CLUSTER_FLAG_NONFULL;
- list_add_tail(&ci->list, &si->nonfull_clusters[0]);
- continue;
- }
+ for (i = 0; i < nr_clusters; i++) {
+ struct swap_cluster_info *ci = &cluster_info[i];
+
+ if (ci->count) {
+ ci->flags = CLUSTER_FLAG_NONFULL;
+ list_add_tail(&ci->list, &si->nonfull_clusters[0]);
+ } else {
ci->flags = CLUSTER_FLAG_FREE;
list_add_tail(&ci->list, &si->free_clusters);
}