summaryrefslogtreecommitdiff
path: root/include/linux/swap.h
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux/swap.h')
-rw-r--r--include/linux/swap.h93
1 files changed, 39 insertions, 54 deletions
diff --git a/include/linux/swap.h b/include/linux/swap.h
index f3e0ac20c2e8..bc0e1c275fc0 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -24,7 +24,6 @@ struct pagevec;
#define SWAP_FLAG_PREFER 0x8000 /* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK 0x7fff
-#define SWAP_FLAG_PRIO_SHIFT 0
#define SWAP_FLAG_DISCARD 0x10000 /* enable discard for swap */
#define SWAP_FLAG_DISCARD_ONCE 0x20000 /* discard swap area at swapon-time */
#define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
@@ -74,14 +73,13 @@ static inline int current_is_kswapd(void)
* to a special SWP_DEVICE_{READ|WRITE} entry.
*
* When a page is mapped by the device for exclusive access we set the CPU page
- * table entries to special SWP_DEVICE_EXCLUSIVE_* entries.
+ * table entries to a special SWP_DEVICE_EXCLUSIVE entry.
*/
#ifdef CONFIG_DEVICE_PRIVATE
-#define SWP_DEVICE_NUM 4
+#define SWP_DEVICE_NUM 3
#define SWP_DEVICE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM)
#define SWP_DEVICE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+1)
-#define SWP_DEVICE_EXCLUSIVE_WRITE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
-#define SWP_DEVICE_EXCLUSIVE_READ (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+3)
+#define SWP_DEVICE_EXCLUSIVE (MAX_SWAPFILES+SWP_HWPOISON_NUM+SWP_MIGRATION_NUM+2)
#else
#define SWP_DEVICE_NUM 0
#endif
@@ -219,10 +217,10 @@ enum {
SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */
SWP_SYNCHRONOUS_IO = (1 << 12), /* synchronous IO is efficient */
/* add others here before... */
- SWP_SCANNING = (1 << 14), /* refcount in scan_swap_map */
};
#define SWAP_CLUSTER_MAX 32UL
+#define SWAP_CLUSTER_MAX_SKIPPED (SWAP_CLUSTER_MAX << 10)
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
/* Bit flag in swap_map */
@@ -257,18 +255,27 @@ struct swap_cluster_info {
u8 order;
struct list_head list;
};
-#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
-#define CLUSTER_FLAG_NONFULL 2 /* This cluster is on nonfull list */
-#define CLUSTER_FLAG_FRAG 4 /* This cluster is on nonfull list */
-#define CLUSTER_FLAG_FULL 8 /* This cluster is on full list */
+
+/* All on-list cluster must have a non-zero flag. */
+enum swap_cluster_flags {
+ CLUSTER_FLAG_NONE = 0, /* For temporary off-list cluster */
+ CLUSTER_FLAG_FREE,
+ CLUSTER_FLAG_NONFULL,
+ CLUSTER_FLAG_FRAG,
+ /* Clusters with flags above are allocatable */
+ CLUSTER_FLAG_USABLE = CLUSTER_FLAG_FRAG,
+ CLUSTER_FLAG_FULL,
+ CLUSTER_FLAG_DISCARD,
+ CLUSTER_FLAG_MAX,
+};
/*
* The first page in the swap file is the swap header, which is always marked
* bad to prevent it from being allocated as an entry. This also prevents the
* cluster to which it belongs being marked free. Therefore 0 is safe to use as
- * a sentinel to indicate next is not valid in percpu_cluster.
+ * a sentinel to indicate an entry is not valid.
*/
-#define SWAP_NEXT_INVALID 0
+#define SWAP_ENTRY_INVALID 0
#ifdef CONFIG_THP_SWAP
#define SWAP_NR_ORDERS (PMD_ORDER + 1)
@@ -277,11 +284,10 @@ struct swap_cluster_info {
#endif
/*
- * We assign a cluster to each CPU, so each CPU can allocate swap entry from
- * its own cluster and swapout sequentially. The purpose is to optimize swapout
- * throughput.
+ * We keep using same cluster for rotational device so IO will be sequential.
+ * The purpose is to optimize SWAP throughput on these device.
*/
-struct percpu_cluster {
+struct swap_sequential_cluster {
unsigned int next[SWAP_NR_ORDERS]; /* Likely next allocation offset */
};
@@ -304,15 +310,11 @@ struct swap_info_struct {
/* list of cluster that contains at least one free slot */
struct list_head frag_clusters[SWAP_NR_ORDERS];
/* list of cluster that are fragmented or contented */
- unsigned int frag_cluster_nr[SWAP_NR_ORDERS];
- unsigned int lowest_bit; /* index of first free in swap_map */
- unsigned int highest_bit; /* index of last free in swap_map */
+ atomic_long_t frag_cluster_nr[SWAP_NR_ORDERS];
unsigned int pages; /* total of usable pages of swap */
- unsigned int inuse_pages; /* number of those currently in use */
- unsigned int cluster_next; /* likely index for next allocation */
- unsigned int cluster_nr; /* countdown to next cluster search */
- unsigned int __percpu *cluster_next_cpu; /*percpu index for next allocation */
- struct percpu_cluster __percpu *percpu_cluster; /* per cpu's swap location */
+ atomic_long_t inuse_pages; /* number of those currently in use */
+ struct swap_sequential_cluster *global_cluster; /* Use one global cluster for rotating device */
+ spinlock_t global_cluster_lock; /* Serialize usage of global cluster */
struct rb_root swap_extent_root;/* root of the swap extent rbtree */
struct block_device *bdev; /* swap device or bdev of swap file */
struct file *swap_file; /* seldom referenced */
@@ -412,6 +414,10 @@ extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
#define MIN_SWAPPINESS 0
#define MAX_SWAPPINESS 200
+
+/* Just recliam from anon folios in proactive memory reclaim */
+#define SWAPPINESS_ANON_ONLY (MAX_SWAPPINESS + 1)
+
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
unsigned long nr_pages,
gfp_t gfp_mask,
@@ -426,19 +432,10 @@ extern int vm_swappiness;
long remove_mapping(struct address_space *mapping, struct folio *folio);
#ifdef CONFIG_NUMA
-extern int node_reclaim_mode;
extern int sysctl_min_unmapped_ratio;
extern int sysctl_min_slab_ratio;
-#else
-#define node_reclaim_mode 0
#endif
-static inline bool node_reclaim_enabled(void)
-{
- /* Is any node_reclaim_mode bit set? */
- return node_reclaim_mode & (RECLAIM_ZONE|RECLAIM_WRITE|RECLAIM_UNMAP);
-}
-
void check_move_unevictable_folios(struct folio_batch *fbatch);
extern void __meminit kswapd_run(int nid);
@@ -457,13 +454,12 @@ static inline unsigned long total_swapcache_pages(void)
}
void free_swap_cache(struct folio *folio);
-void free_page_and_swap_cache(struct page *);
+void free_folio_and_swap_cache(struct folio *folio);
void free_pages_and_swap_cache(struct encoded_page **, int);
/* linux/mm/swapfile.c */
extern atomic_long_t nr_swap_pages;
extern long total_swap_pages;
extern atomic_t nr_rotate_swap;
-extern bool has_usable_swap(void);
/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(void)
@@ -477,24 +473,22 @@ static inline long get_nr_swap_pages(void)
}
extern void si_swapinfo(struct sysinfo *);
-swp_entry_t folio_alloc_swap(struct folio *folio);
+int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask);
bool folio_free_swap(struct folio *folio);
void put_swap_folio(struct folio *folio, swp_entry_t entry);
extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, swp_entry_t swp_entries[], int order);
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
extern void swap_shmem_alloc(swp_entry_t, int);
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t entry, int nr);
extern void swap_free_nr(swp_entry_t entry, int nr_pages);
-extern void swapcache_free_entries(swp_entry_t *entries, int n);
extern void free_swap_and_cache_nr(swp_entry_t entry, int nr);
int swap_type_of(dev_t device, sector_t offset);
int find_first_swap(dev_t *device);
extern unsigned int count_swap_pages(int, int);
extern sector_t swapdev_block(int, pgoff_t);
extern int __swap_count(swp_entry_t entry);
-extern int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry);
+extern bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry);
extern int swp_swapcount(swp_entry_t entry);
struct swap_info_struct *swp_swap_info(swp_entry_t entry);
struct backing_dev_info;
@@ -530,10 +524,8 @@ static inline void put_swap_device(struct swap_info_struct *si)
#define si_swapinfo(val) \
do { (val)->freeswap = (val)->totalswap = 0; } while (0)
-/* only sparc can not include linux/pagemap.h in this file
- * so leave put_page and release_pages undeclared... */
-#define free_page_and_swap_cache(page) \
- put_page(page)
+#define free_folio_and_swap_cache(folio) \
+ folio_put(folio)
#define free_pages_and_swap_cache(pages, nr) \
release_pages((pages), (nr));
@@ -577,9 +569,9 @@ static inline int __swap_count(swp_entry_t entry)
return 0;
}
-static inline int swap_swapcount(struct swap_info_struct *si, swp_entry_t entry)
+static inline bool swap_entry_swapped(struct swap_info_struct *si, swp_entry_t entry)
{
- return 0;
+ return false;
}
static inline int swp_swapcount(swp_entry_t entry)
@@ -587,11 +579,9 @@ static inline int swp_swapcount(swp_entry_t entry)
return 0;
}
-static inline swp_entry_t folio_alloc_swap(struct folio *folio)
+static inline int folio_alloc_swap(struct folio *folio, gfp_t gfp_mask)
{
- swp_entry_t entry;
- entry.val = 0;
- return entry;
+ return -EINVAL;
}
static inline bool folio_free_swap(struct folio *folio)
@@ -652,7 +642,6 @@ static inline void folio_throttle_swaprate(struct folio *folio, gfp_t gfp)
#endif
#if defined(CONFIG_MEMCG) && defined(CONFIG_SWAP)
-void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry);
int __mem_cgroup_try_charge_swap(struct folio *folio, swp_entry_t entry);
static inline int mem_cgroup_try_charge_swap(struct folio *folio,
swp_entry_t entry)
@@ -673,10 +662,6 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t entry, unsigned int nr_p
extern long mem_cgroup_get_nr_swap_pages(struct mem_cgroup *memcg);
extern bool mem_cgroup_swap_full(struct folio *folio);
#else
-static inline void mem_cgroup_swapout(struct folio *folio, swp_entry_t entry)
-{
-}
-
static inline int mem_cgroup_try_charge_swap(struct folio *folio,
swp_entry_t entry)
{