diff options
Diffstat (limited to 'include/linux')
117 files changed, 2280 insertions, 940 deletions
diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3f2e93ed9730..f4b3d442b7df 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -772,6 +772,10 @@ int acpi_get_local_u64_address(acpi_handle handle, u64 *addr); int acpi_get_local_address(acpi_handle handle, u32 *addr); const char *acpi_get_subsystem_id(acpi_handle handle); +#ifdef CONFIG_ACPI_MRRM +int acpi_mrrm_max_mem_region(void); +#endif + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 @@ -1092,6 +1096,11 @@ static inline acpi_handle acpi_get_processor_handle(int cpu) return NULL; } +static inline int acpi_mrrm_max_mem_region(void) +{ + return 1; +} + #endif /* !CONFIG_ACPI */ #ifdef CONFIG_ACPI_HMAT @@ -1125,13 +1134,13 @@ void acpi_os_set_prepare_extended_sleep(int (*func)(u8 sleep_state, acpi_status acpi_os_prepare_extended_sleep(u8 sleep_state, u32 val_a, u32 val_b); -#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) struct acpi_s2idle_dev_ops { struct list_head list_node; void (*prepare)(void); void (*check)(void); void (*restore)(void); }; +#if defined(CONFIG_SUSPEND) && defined(CONFIG_X86) int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg); void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg); int acpi_get_lps0_constraint(struct acpi_device *adev); @@ -1140,6 +1149,13 @@ static inline int acpi_get_lps0_constraint(struct device *dev) { return ACPI_STATE_UNKNOWN; } +static inline int acpi_register_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ + return -ENODEV; +} +static inline void acpi_unregister_lps0_dev(struct acpi_s2idle_dev_ops *arg) +{ +} #endif /* CONFIG_SUSPEND && CONFIG_X86 */ void arch_reserve_mem_area(acpi_physical_address addr, size_t size); #else diff --git a/include/linux/alloc_tag.h b/include/linux/alloc_tag.h index a946e0203e6d..8f7931eb7d16 100644 --- a/include/linux/alloc_tag.h +++ b/include/linux/alloc_tag.h @@ -104,6 +104,16 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); #else /* ARCH_NEEDS_WEAK_PER_CPU */ +#ifdef MODULE + +#define DEFINE_ALLOC_TAG(_alloc_tag) \ + static struct alloc_tag _alloc_tag __used __aligned(8) \ + __section(ALLOC_TAG_SECTION_NAME) = { \ + .ct = CODE_TAG_INIT, \ + .counters = NULL }; + +#else /* MODULE */ + #define DEFINE_ALLOC_TAG(_alloc_tag) \ static DEFINE_PER_CPU(struct alloc_tag_counters, _alloc_tag_cntr); \ static struct alloc_tag _alloc_tag __used __aligned(8) \ @@ -111,6 +121,8 @@ DECLARE_PER_CPU(struct alloc_tag_counters, _shared_alloc_tag); .ct = CODE_TAG_INIT, \ .counters = &_alloc_tag_cntr }; +#endif /* MODULE */ + #endif /* ARCH_NEEDS_WEAK_PER_CPU */ DECLARE_STATIC_KEY_MAYBE(CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT, diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 2222e8b03ff4..d72d6e5aa200 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -14,14 +14,6 @@ int topology_update_cpu_topology(void); struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); -DECLARE_PER_CPU(unsigned long, cpu_scale); - -static inline unsigned long topology_get_cpu_scale(int cpu) -{ - return per_cpu(cpu_scale, cpu); -} - -void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); DECLARE_PER_CPU(unsigned long, capacity_freq_ref); diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 1625c8529e70..65abd5ab8836 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -90,7 +90,6 @@ struct linux_binfmt { struct list_head lh; struct module *module; int (*load_binary)(struct linux_binprm *); - int (*load_shlib)(struct file *); #ifdef CONFIG_COREDUMP int (*core_dump)(struct coredump_params *cprm); unsigned long min_coredump; /* minimal dump size */ diff --git a/include/linux/bio.h b/include/linux/bio.h index b786ec5bcc81..9c37c66ef9ca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -403,7 +403,6 @@ static inline int bio_iov_vecs_to_alloc(struct iov_iter *iter, int max_segs) struct request_queue; -extern int submit_bio_wait(struct bio *bio); void bio_init(struct bio *bio, struct block_device *bdev, struct bio_vec *table, unsigned short max_vecs, blk_opf_t opf); extern void bio_uninit(struct bio *); @@ -418,6 +417,30 @@ void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); void bio_add_folio_nofail(struct bio *bio, struct folio *folio, size_t len, size_t off); +void bio_add_virt_nofail(struct bio *bio, void *vaddr, unsigned len); + +/** + * bio_add_max_vecs - number of bio_vecs needed to add data to a bio + * @kaddr: kernel virtual address to add + * @len: length in bytes to add + * + * Calculate how many bio_vecs need to be allocated to add the kernel virtual + * address range in [@kaddr:@len] in the worse case. + */ +static inline unsigned int bio_add_max_vecs(void *kaddr, unsigned int len) +{ + if (is_vmalloc_addr(kaddr)) + return DIV_ROUND_UP(offset_in_page(kaddr) + len, PAGE_SIZE); + return 1; +} + +unsigned int bio_add_vmalloc_chunk(struct bio *bio, void *vaddr, unsigned len); +bool bio_add_vmalloc(struct bio *bio, void *vaddr, unsigned int len); + +int submit_bio_wait(struct bio *bio); +int bdev_rw_virt(struct block_device *bdev, sector_t sector, void *data, + size_t len, enum req_op op); + int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); void bio_iov_bvec_set(struct bio *bio, const struct iov_iter *iter); void __bio_release_pages(struct bio *bio, bool mark_dirty); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8eb9b3310167..de8c85a03bb7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -9,6 +9,7 @@ #include <linux/prefetch.h> #include <linux/srcu.h> #include <linux/rw_hint.h> +#include <linux/rwsem.h> struct blk_mq_tags; struct blk_flush_queue; @@ -506,6 +507,9 @@ enum hctx_type { * request_queue.tag_set_list. * @srcu: Use as lock when type of the request queue is blocking * (BLK_MQ_F_BLOCKING). + * @update_nr_hwq_lock: + * Synchronize updating nr_hw_queues with add/del disk & + * switching elevator. */ struct blk_mq_tag_set { const struct blk_mq_ops *ops; @@ -527,6 +531,8 @@ struct blk_mq_tag_set { struct mutex tag_list_lock; struct list_head tag_list; struct srcu_struct *srcu; + + struct rw_semaphore update_nr_hwq_lock; }; /** @@ -1031,8 +1037,8 @@ int blk_rq_map_user_io(struct request *, struct rq_map_data *, int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); int blk_rq_unmap_user(struct bio *); -int blk_rq_map_kern(struct request_queue *, struct request *, void *, - unsigned int, gfp_t); +int blk_rq_map_kern(struct request *rq, void *kbuf, unsigned int len, + gfp_t gfp); int blk_rq_append_bio(struct request *rq, struct bio *bio); void blk_execute_rq_nowait(struct request *rq, bool at_head); blk_status_t blk_execute_rq(struct request *rq, bool at_head); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dce7615c35e7..3d1577f07c1c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -220,6 +220,7 @@ struct bio { unsigned short bi_flags; /* BIO_* below */ unsigned short bi_ioprio; enum rw_hint bi_write_hint; + u8 bi_write_stream; blk_status_t bi_status; atomic_t __bi_remaining; @@ -286,7 +287,6 @@ struct bio { enum { BIO_PAGE_PINNED, /* Unpin pages in bio_release_pages() */ BIO_CLONED, /* doesn't own data */ - BIO_BOUNCED, /* bio is a bounce bio */ BIO_QUIET, /* Make BIO Quiet */ BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ BIO_REFFED, /* bio has elevated ->bi_cnt */ @@ -296,6 +296,14 @@ enum { * of this bio. */ BIO_CGROUP_ACCT, /* has been accounted to a cgroup */ BIO_QOS_THROTTLED, /* bio went through rq_qos throttle path */ + /* + * This bio has completed bps throttling at the single tg granularity, + * which is different from BIO_BPS_THROTTLED. When the bio is enqueued + * into the sq->queued of the upper tg, or is about to be dispatched, + * this flag needs to be cleared. Since blk-throttle and rq_qos are not + * on the same hierarchical level, reuse the value. + */ + BIO_TG_BPS_THROTTLED = BIO_QOS_THROTTLED, BIO_QOS_MERGED, /* but went through rq_qos merge path */ BIO_REMAPPED, BIO_ZONE_WRITE_PLUGGING, /* bio handled through zone write plugging */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9a1f0ee40b56..332b56f323d9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -182,7 +182,6 @@ struct gendisk { struct list_head slave_bdevs; #endif struct timer_rand_state *random; - atomic_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_ZONED @@ -218,6 +217,8 @@ struct gendisk { * devices that do not have multiple independent access ranges. */ struct blk_independent_access_ranges *ia_ranges; + + struct mutex rqos_state_mutex; /* rqos state change mutex */ }; /** @@ -331,9 +332,6 @@ typedef unsigned int __bitwise blk_features_t; /* skip this queue in blk_mq_(un)quiesce_tagset */ #define BLK_FEAT_SKIP_TAGSET_QUIESCE ((__force blk_features_t)(1u << 13)) -/* bounce all highmem pages */ -#define BLK_FEAT_BOUNCE_HIGH ((__force blk_features_t)(1u << 14)) - /* undocumented magic for bcache */ #define BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE \ ((__force blk_features_t)(1u << 15)) @@ -347,7 +345,7 @@ typedef unsigned int __bitwise blk_features_t; */ #define BLK_FEAT_INHERIT_MASK \ (BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA | BLK_FEAT_ROTATIONAL | \ - BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | BLK_FEAT_BOUNCE_HIGH | \ + BLK_FEAT_STABLE_WRITES | BLK_FEAT_ZONED | \ BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE) /* internal flags in queue_limits.flags */ @@ -405,6 +403,9 @@ struct queue_limits { unsigned short max_integrity_segments; unsigned short max_discard_segments; + unsigned short max_write_streams; + unsigned int write_stream_granularity; + unsigned int max_open_zones; unsigned int max_active_zones; @@ -644,6 +645,8 @@ enum { QUEUE_FLAG_RQ_ALLOC_TIME, /* record rq->alloc_time_ns */ QUEUE_FLAG_HCTX_ACTIVE, /* at least one blk-mq hctx is active */ QUEUE_FLAG_SQ_SCHED, /* single queue style io dispatch */ + QUEUE_FLAG_DISABLE_WBT_DEF, /* for sched to disable/enable wbt */ + QUEUE_FLAG_NO_ELV_SWITCH, /* can't switch elevator any more */ QUEUE_FLAG_MAX }; @@ -679,6 +682,10 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); #define blk_queue_sq_sched(q) test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags) #define blk_queue_skip_tagset_quiesce(q) \ ((q)->limits.features & BLK_FEAT_SKIP_TAGSET_QUIESCE) +#define blk_queue_disable_wbt(q) \ + test_bit(QUEUE_FLAG_DISABLE_WBT_DEF, &(q)->queue_flags) +#define blk_queue_no_elv_switch(q) \ + test_bit(QUEUE_FLAG_NO_ELV_SWITCH, &(q)->queue_flags) extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); @@ -1288,6 +1295,13 @@ static inline unsigned int bdev_max_segments(struct block_device *bdev) return queue_max_segments(bdev_get_queue(bdev)); } +static inline unsigned short bdev_max_write_streams(struct block_device *bdev) +{ + if (bdev_is_partition(bdev)) + return 0; + return bdev_limits(bdev)->max_write_streams; +} + static inline unsigned queue_logical_block_size(const struct request_queue *q) { return q->limits.logical_block_size; diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9de7adb68294..60d1511b4f4d 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -114,8 +114,7 @@ struct bpf_prog_list { u32 flags; }; -int cgroup_bpf_inherit(struct cgroup *cgrp); -void cgroup_bpf_offline(struct cgroup *cgrp); +void __init cgroup_bpf_lifetime_notifier_init(void); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -431,8 +430,10 @@ const struct bpf_func_proto * cgroup_current_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog); #else -static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } -static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} +static inline void cgroup_bpf_lifetime_notifier_init(void) +{ + return; +} static inline int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 5bc8f55c8cca..e61687d5e496 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -170,6 +170,23 @@ struct cgroup_subsys_state { struct percpu_ref refcnt; /* + * Depending on the context, this field is initialized + * via css_rstat_init() at different places: + * + * when css is associated with cgroup::self + * when css->cgroup is the root cgroup + * performed in cgroup_init() + * when css->cgroup is not the root cgroup + * performed in cgroup_create() + * when css is associated with a subsystem + * when css->cgroup is the root cgroup + * performed in cgroup_init_subsys() in the non-early path + * when css->cgroup is not the root cgroup + * performed in css_create() + */ + struct css_rstat_cpu __percpu *rstat_cpu; + + /* * siblings list anchored at the parent's ->children * * linkage is protected by cgroup_mutex or RCU @@ -177,9 +194,6 @@ struct cgroup_subsys_state { struct list_head sibling; struct list_head children; - /* flush target list anchored at cgrp->rstat_css_list */ - struct list_head rstat_css_node; - /* * PI: Subsys-unique ID. 0 is unused and root is always 1. The * matching css can be looked up using css_from_id(). @@ -219,6 +233,16 @@ struct cgroup_subsys_state { * Protected by cgroup_mutex. */ int nr_descendants; + + /* + * A singly-linked list of css structures to be rstat flushed. + * This is a scratch field to be used exclusively by + * css_rstat_flush(). + * + * Protected by rstat_base_lock when css is cgroup::self. + * Protected by css->ss->rstat_ss_lock otherwise. + */ + struct cgroup_subsys_state *rstat_flush_next; }; /* @@ -329,10 +353,10 @@ struct cgroup_base_stat { /* * rstat - cgroup scalable recursive statistics. Accounting is done - * per-cpu in cgroup_rstat_cpu which is then lazily propagated up the + * per-cpu in css_rstat_cpu which is then lazily propagated up the * hierarchy on reads. * - * When a stat gets updated, the cgroup_rstat_cpu and its ancestors are + * When a stat gets updated, the css_rstat_cpu and its ancestors are * linked into the updated tree. On the following read, propagation only * considers and consumes the updated tree. This makes reading O(the * number of descendants which have been active since last read) instead of @@ -344,10 +368,29 @@ struct cgroup_base_stat { * frequency decreases the cost of each read. * * This struct hosts both the fields which implement the above - - * updated_children and updated_next - and the fields which track basic - * resource statistics on top of it - bsync, bstat and last_bstat. + * updated_children and updated_next. */ -struct cgroup_rstat_cpu { +struct css_rstat_cpu { + /* + * Child cgroups with stat updates on this cpu since the last read + * are linked on the parent's ->updated_children through + * ->updated_next. updated_children is terminated by its container css. + * + * In addition to being more compact, singly-linked list pointing to + * the css makes it unnecessary for each per-cpu struct to point back + * to the associated css. + * + * Protected by per-cpu css->ss->rstat_ss_cpu_lock. + */ + struct cgroup_subsys_state *updated_children; + struct cgroup_subsys_state *updated_next; /* NULL if not on the list */ +}; + +/* + * This struct hosts the fields which track basic resource statistics on + * top of it - bsync, bstat and last_bstat. + */ +struct cgroup_rstat_base_cpu { /* * ->bsync protects ->bstat. These are the only fields which get * updated in the hot path. @@ -374,20 +417,6 @@ struct cgroup_rstat_cpu { * deltas to propagate to the per-cpu subtree_bstat. */ struct cgroup_base_stat last_subtree_bstat; - - /* - * Child cgroups with stat updates on this cpu since the last read - * are linked on the parent's ->updated_children through - * ->updated_next. - * - * In addition to being more compact, singly-linked list pointing - * to the cgroup makes it unnecessary for each per-cpu struct to - * point back to the associated cgroup. - * - * Protected by per-cpu cgroup_rstat_cpu_lock. - */ - struct cgroup *updated_children; /* terminated by self cgroup */ - struct cgroup *updated_next; /* NULL iff not on the list */ }; struct cgroup_freezer_state { @@ -516,23 +545,23 @@ struct cgroup { struct cgroup *dom_cgrp; struct cgroup *old_dom_cgrp; /* used while enabling threaded */ - /* per-cpu recursive resource statistics */ - struct cgroup_rstat_cpu __percpu *rstat_cpu; - struct list_head rstat_css_list; - /* - * Add padding to separate the read mostly rstat_cpu and - * rstat_css_list into a different cacheline from the following - * rstat_flush_next and *bstat fields which can have frequent updates. + * Depending on the context, this field is initialized via + * css_rstat_init() at different places: + * + * when cgroup is the root cgroup + * performed in cgroup_setup_root() + * otherwise + * performed in cgroup_create() */ - CACHELINE_PADDING(_pad_); + struct cgroup_rstat_base_cpu __percpu *rstat_base_cpu; /* - * A singly-linked list of cgroup structures to be rstat flushed. - * This is a scratch field to be used exclusively by - * cgroup_rstat_flush_locked() and protected by cgroup_rstat_lock. + * Add padding to keep the read mostly rstat per-cpu pointer on a + * different cacheline than the following *bstat fields which can have + * frequent updates. */ - struct cgroup *rstat_flush_next; + CACHELINE_PADDING(_pad_); /* cgroup basic resource statistics */ struct cgroup_base_stat last_bstat; @@ -790,6 +819,9 @@ struct cgroup_subsys { * specifies the mask of subsystems that this one depends on. */ unsigned int depends_on; + + spinlock_t rstat_ss_lock; + raw_spinlock_t __percpu *rstat_ss_cpu_lock; }; extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e7da3c3b098b..b18fb5fcb38e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -19,6 +19,7 @@ #include <linux/kernfs.h> #include <linux/jump_label.h> #include <linux/types.h> +#include <linux/notifier.h> #include <linux/ns_common.h> #include <linux/nsproxy.h> #include <linux/user_namespace.h> @@ -40,7 +41,7 @@ struct kernel_clone_args; #ifdef CONFIG_CGROUPS -enum { +enum css_task_iter_flags { CSS_TASK_ITER_PROCS = (1U << 0), /* walk only threadgroup leaders */ CSS_TASK_ITER_THREADED = (1U << 1), /* walk all threaded css_sets in the domain */ CSS_TASK_ITER_SKIPPED = (1U << 16), /* internal flags */ @@ -66,10 +67,16 @@ struct css_task_iter { struct list_head iters_node; /* css_set->task_iters */ }; +enum cgroup_lifetime_events { + CGROUP_LIFETIME_ONLINE, + CGROUP_LIFETIME_OFFLINE, +}; + extern struct file_system_type cgroup_fs_type; extern struct cgroup_root cgrp_dfl_root; extern struct css_set init_css_set; extern spinlock_t css_set_lock; +extern struct blocking_notifier_head cgroup_lifetime_notifier; #define SUBSYS(_x) extern struct cgroup_subsys _x ## _cgrp_subsys; #include <linux/cgroup_subsys.h> @@ -347,6 +354,17 @@ static inline bool css_is_dying(struct cgroup_subsys_state *css) return css->flags & CSS_DYING; } +static inline bool css_is_self(struct cgroup_subsys_state *css) +{ + if (css == &css->cgroup->self) { + /* cgroup::self should not have subsystem association */ + WARN_ON(css->ss != NULL); + return true; + } + + return false; +} + static inline void cgroup_get(struct cgroup *cgrp) { css_get(&cgrp->self); @@ -688,8 +706,8 @@ static inline void cgroup_path_from_kernfs_id(u64 id, char *buf, size_t buflen) /* * cgroup scalable recursive statistics. */ -void cgroup_rstat_updated(struct cgroup *cgrp, int cpu); -void cgroup_rstat_flush(struct cgroup *cgrp); +void css_rstat_updated(struct cgroup_subsys_state *css, int cpu); +void css_rstat_flush(struct cgroup_subsys_state *css); /* * Basic resource stats. @@ -785,6 +803,17 @@ struct cgroup_namespace *copy_cgroup_ns(unsigned long flags, int cgroup_path_ns(struct cgroup *cgrp, char *buf, size_t buflen, struct cgroup_namespace *ns); +static inline void get_cgroup_ns(struct cgroup_namespace *ns) +{ + refcount_inc(&ns->ns.count); +} + +static inline void put_cgroup_ns(struct cgroup_namespace *ns) +{ + if (refcount_dec_and_test(&ns->ns.count)) + free_cgroup_ns(ns); +} + #else /* !CONFIG_CGROUPS */ static inline void free_cgroup_ns(struct cgroup_namespace *ns) { } @@ -795,19 +824,10 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, return old_ns; } -#endif /* !CONFIG_CGROUPS */ +static inline void get_cgroup_ns(struct cgroup_namespace *ns) { } +static inline void put_cgroup_ns(struct cgroup_namespace *ns) { } -static inline void get_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns) - refcount_inc(&ns->ns.count); -} - -static inline void put_cgroup_ns(struct cgroup_namespace *ns) -{ - if (ns && refcount_dec_and_test(&ns->ns.count)) - free_cgroup_ns(ns); -} +#endif /* !CONFIG_CGROUPS */ #ifdef CONFIG_CGROUPS diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 7e57047e1564..7093e1d08af0 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -216,6 +216,25 @@ const volatile void * __must_check_fn(const volatile void *val) #define return_ptr(p) return no_free_ptr(p) +/* + * Only for situations where an allocation is handed in to another function + * and consumed by that function on success. + * + * struct foo *f __free(kfree) = kzalloc(sizeof(*f), GFP_KERNEL); + * + * setup(f); + * if (some_condition) + * return -EINVAL; + * .... + * ret = bar(f); + * if (!ret) + * retain_and_null_ptr(f); + * return ret; + * + * After retain_and_null_ptr(f) the variable f is NULL and cannot be + * dereferenced anymore. + */ +#define retain_and_null_ptr(p) ((void)__get_and_null(p, NULL)) /* * DEFINE_CLASS(name, type, exit, init, init_args...): diff --git a/include/linux/codetag.h b/include/linux/codetag.h index d14dbd26b370..0ee4c21c6dbc 100644 --- a/include/linux/codetag.h +++ b/include/linux/codetag.h @@ -36,10 +36,10 @@ union codetag_ref { struct codetag_type_desc { const char *section; size_t tag_size; - void (*module_load)(struct codetag_type *cttype, - struct codetag_module *cmod); - void (*module_unload)(struct codetag_type *cttype, - struct codetag_module *cmod); + void (*module_load)(struct module *mod, + struct codetag *start, struct codetag *end); + void (*module_unload)(struct module *mod, + struct codetag *start, struct codetag *end); #ifdef CONFIG_MODULES void (*module_replaced)(struct module *mod, struct module *new_mod); bool (*needs_section_mem)(struct module *mod, unsigned long size); diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h index 573fa85b6c0c..ac1665a98a15 100644 --- a/include/linux/compiler-version.h +++ b/include/linux/compiler-version.h @@ -12,3 +12,33 @@ * and add dependency on include/config/CC_VERSION_TEXT, which is touched * by Kconfig when the version string from the compiler changes. */ + +/* Additional tree-wide dependencies start here. */ + +/* + * If any of the GCC plugins change, we need to rebuild everything that + * was built with them, as they may have changed their behavior and those + * behaviors may need to be synchronized across all translation units. + */ +#ifdef GCC_PLUGINS +#include <generated/gcc-plugins.h> +#endif + +/* + * If the randstruct seed itself changes (whether for GCC plugins or + * Clang), the entire tree needs to be rebuilt since the randomization of + * structures may change between compilation units if not. + */ +#ifdef RANDSTRUCT +#include <generated/randstruct_hash.h> +#endif + +/* + * If any external changes affect Clang's integer wrapping sanitizer + * behavior, a full rebuild is needed as the coverage for wrapping types + * may have changed, which may impact the expected behaviors that should + * not differ between compilation units. + */ +#ifdef INTEGER_WRAP +#include <generated/integer-wrap.h> +#endif diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 501cffddc2f4..20881cc761fa 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -449,6 +449,11 @@ struct ftrace_likely_data { /* * When the size of an allocated object is needed, use the best available * mechanism to find it. (For cases where sizeof() cannot be used.) + * + * Optional: only supported since gcc >= 12 + * + * gcc: https://gcc.gnu.org/onlinedocs/gcc/Object-Size-Checking.html + * clang: https://clang.llvm.org/docs/LanguageExtensions.html#evaluating-object-size */ #if __has_builtin(__builtin_dynamic_object_size) #define __struct_size(p) __builtin_dynamic_object_size(p, 0) diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c771e9d0d0b9..698520b1bfdb 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -120,15 +120,19 @@ struct configfs_attribute { ssize_t (*store)(struct config_item *, const char *, size_t); }; -#define CONFIGFS_ATTR(_pfx, _name) \ +#define CONFIGFS_ATTR_PERM(_pfx, _name, _perm) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ - .ca_mode = S_IRUGO | S_IWUSR, \ + .ca_mode = _perm, \ .ca_owner = THIS_MODULE, \ .show = _pfx##_name##_show, \ .store = _pfx##_name##_store, \ } +#define CONFIGFS_ATTR(_pfx, _name) CONFIGFS_ATTR_PERM( \ + _pfx, _name, S_IRUGO | S_IWUSR \ +) + #define CONFIGFS_ATTR_RO(_pfx, _name) \ static struct configfs_attribute _pfx##attr_##_name = { \ .ca_name = __stringify(_name), \ diff --git a/include/linux/coredump.h b/include/linux/coredump.h index 77e6e195d1d6..76e41805b92d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -28,6 +28,7 @@ struct coredump_params { int vma_count; size_t vma_data_size; struct core_vma_metadata *vma_meta; + struct pid *pid; }; extern unsigned int core_file_note_size_limit; diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 3aa955102b34..e6089abc28e2 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -78,6 +78,8 @@ extern ssize_t cpu_show_gds(struct device *dev, extern ssize_t cpu_show_reg_file_data_sampling(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_ghostwrite(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_old_microcode(struct device *dev, + struct device_attribute *attr, char *buf); extern ssize_t cpu_show_indirect_target_selection(struct device *dev, struct device_attribute *attr, char *buf); diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 7a5b391dcc01..95f3807c8c55 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -170,6 +170,12 @@ struct cpufreq_policy { struct notifier_block nb_max; }; +DEFINE_GUARD(cpufreq_policy_write, struct cpufreq_policy *, + down_write(&_T->rwsem), up_write(&_T->rwsem)) + +DEFINE_GUARD(cpufreq_policy_read, struct cpufreq_policy *, + down_read(&_T->rwsem), up_read(&_T->rwsem)) + /* * Used for passing new cpufreq policy data to the cpufreq driver's ->verify() * callback for sanitization. That callback is only expected to modify the min @@ -235,9 +241,6 @@ void disable_cpufreq(void); u64 get_cpu_idle_time(unsigned int cpu, u64 *wall, int io_busy); -struct cpufreq_policy *cpufreq_cpu_acquire(unsigned int cpu); -void cpufreq_cpu_release(struct cpufreq_policy *policy); -int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu); void refresh_frequency_limits(struct cpufreq_policy *policy); void cpufreq_update_policy(unsigned int cpu); void cpufreq_update_limits(unsigned int cpu); @@ -395,7 +398,7 @@ struct cpufreq_driver { unsigned int (*get)(unsigned int cpu); /* Called to update policy limits on firmware notifications. */ - void (*update_limits)(unsigned int cpu); + void (*update_limits)(struct cpufreq_policy *policy); /* optional */ int (*bios_limit)(int cpu, unsigned int *limit); @@ -647,6 +650,15 @@ module_exit(__governor##_exit) struct cpufreq_governor *cpufreq_default_governor(void); struct cpufreq_governor *cpufreq_fallback_governor(void); +#ifdef CONFIG_CPU_FREQ_GOV_SCHEDUTIL +bool sugov_is_governor(struct cpufreq_policy *policy); +#else +static inline bool sugov_is_governor(struct cpufreq_policy *policy) +{ + return false; +} +#endif + static inline void cpufreq_policy_apply_limits(struct cpufreq_policy *policy) { if (policy->max < policy->cur) @@ -1225,6 +1237,8 @@ void cpufreq_generic_init(struct cpufreq_policy *policy, struct cpufreq_frequency_table *table, unsigned int transition_latency); +bool cpufreq_ready_for_eas(const struct cpumask *cpu_mask); + static inline void cpufreq_register_em_with_opp(struct cpufreq_policy *policy) { dev_pm_opp_of_register_em(get_cpu_device(policy->cpu), diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 1987400000b4..df366ee15456 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -60,7 +60,6 @@ enum cpuhp_state { /* PREPARE section invoked on a control CPU */ CPUHP_OFFLINE = 0, CPUHP_CREATE_THREADS, - CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_POWER, diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index f9a868384083..6a569c7534db 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -179,6 +179,19 @@ unsigned int cpumask_first_and(const struct cpumask *srcp1, const struct cpumask } /** + * cpumask_first_andnot - return the first cpu from *srcp1 & ~*srcp2 + * @srcp1: the first input + * @srcp2: the second input + * + * Return: >= nr_cpu_ids if no such cpu found. + */ +static __always_inline +unsigned int cpumask_first_andnot(const struct cpumask *srcp1, const struct cpumask *srcp2) +{ + return find_first_andnot_bit(cpumask_bits(srcp1), cpumask_bits(srcp2), small_cpumask_bits); +} + +/** * cpumask_first_and_and - return the first cpu from *srcp1 & *srcp2 & *srcp3 * @srcp1: the first input * @srcp2: the second input @@ -285,6 +298,25 @@ unsigned int cpumask_next_and(int n, const struct cpumask *src1p, } /** + * cpumask_next_andnot - get the next cpu in *src1p & ~*src2p + * @n: the cpu prior to the place to search (i.e. return will be > @n) + * @src1p: the first cpumask pointer + * @src2p: the second cpumask pointer + * + * Return: >= nr_cpu_ids if no further cpus set in both. + */ +static __always_inline +unsigned int cpumask_next_andnot(int n, const struct cpumask *src1p, + const struct cpumask *src2p) +{ + /* -1 is a legal arg here. */ + if (n != -1) + cpumask_check(n); + return find_next_andnot_bit(cpumask_bits(src1p), cpumask_bits(src2p), + small_cpumask_bits, n + 1); +} + +/** * cpumask_next_and_wrap - get the next cpu in *src1p & *src2p, starting from * @n+1. If nothing found, wrap around and start from * the beginning @@ -413,14 +445,18 @@ unsigned int cpumask_next_wrap(int n, const struct cpumask *src) * @cpu: the cpu to ignore. * * Often used to find any cpu but smp_processor_id() in a mask. + * If @cpu == -1, the function is equivalent to cpumask_any(). * Return: >= nr_cpu_ids if no cpus set. */ static __always_inline -unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) +unsigned int cpumask_any_but(const struct cpumask *mask, int cpu) { unsigned int i; - cpumask_check(cpu); + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + for_each_cpu(i, mask) if (i != cpu) break; @@ -433,16 +469,20 @@ unsigned int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) * @mask2: the second input cpumask * @cpu: the cpu to ignore * + * If @cpu == -1, the function is equivalent to cpumask_any_and(). * Returns >= nr_cpu_ids if no cpus set. */ static __always_inline unsigned int cpumask_any_and_but(const struct cpumask *mask1, const struct cpumask *mask2, - unsigned int cpu) + int cpu) { unsigned int i; - cpumask_check(cpu); + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + i = cpumask_first_and(mask1, mask2); if (i != cpu) return i; @@ -451,6 +491,33 @@ unsigned int cpumask_any_and_but(const struct cpumask *mask1, } /** + * cpumask_any_andnot_but - pick an arbitrary cpu from *mask1 & ~*mask2, but not this one. + * @mask1: the first input cpumask + * @mask2: the second input cpumask + * @cpu: the cpu to ignore + * + * If @cpu == -1, the function returns the first matching cpu. + * Returns >= nr_cpu_ids if no cpus set. + */ +static __always_inline +unsigned int cpumask_any_andnot_but(const struct cpumask *mask1, + const struct cpumask *mask2, + int cpu) +{ + unsigned int i; + + /* -1 is a legal arg here. */ + if (cpu != -1) + cpumask_check(cpu); + + i = cpumask_first_andnot(mask1, mask2); + if (i != cpu) + return i; + + return cpumask_next_andnot(cpu, mask1, mask2); +} + +/** * cpumask_nth - get the Nth cpu in a cpumask * @srcp: the cpumask pointer * @cpu: the Nth cpu to find, starting from 0 diff --git a/include/linux/crc16.h b/include/linux/crc16.h index 9fa74529b317..b861d969b161 100644 --- a/include/linux/crc16.h +++ b/include/linux/crc16.h @@ -15,14 +15,7 @@ #include <linux/types.h> -extern u16 const crc16_table[256]; - -extern u16 crc16(u16 crc, const u8 *buffer, size_t len); - -static inline u16 crc16_byte(u16 crc, const u8 data) -{ - return (crc >> 8) ^ crc16_table[(crc ^ data) & 0xff]; -} +u16 crc16(u16 crc, const u8 *p, size_t len); #endif /* __CRC16_H */ diff --git a/include/linux/crc32.h b/include/linux/crc32.h index 69c2e8bb3782..569dc13f139f 100644 --- a/include/linux/crc32.h +++ b/include/linux/crc32.h @@ -1,7 +1,4 @@ -/* - * crc32.h - * See linux/lib/crc32.c for license and changes - */ +/* SPDX-License-Identifier: GPL-2.0-only */ #ifndef _LINUX_CRC32_H #define _LINUX_CRC32_H diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 1e3809d28abd..b50f1954d1bb 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -14,8 +14,7 @@ #include <linux/completion.h> #include <linux/errno.h> -#include <linux/list.h> -#include <linux/refcount.h> +#include <linux/refcount_types.h> #include <linux/slab.h> #include <linux/types.h> @@ -51,6 +50,15 @@ #define CRYPTO_ALG_NEED_FALLBACK 0x00000100 /* + * Set if the algorithm data structure should be duplicated into + * kmalloc memory before registration. This is useful for hardware + * that can be disconnected at will. Do not use this if the data + * structure is embedded into a bigger one. Duplicate the overall + * data structure in the driver in that case. + */ +#define CRYPTO_ALG_DUP_FIRST 0x00000200 + +/* * Set if the algorithm has passed automated run-time testing. Note that * if there is no run-time testing for a given algorithm it is considered * to have passed. @@ -125,8 +133,10 @@ */ #define CRYPTO_ALG_FIPS_INTERNAL 0x00020000 -/* Set if the algorithm supports request chains and virtual addresses. */ -#define CRYPTO_ALG_REQ_CHAIN 0x00040000 +/* Set if the algorithm supports virtual addresses. */ +#define CRYPTO_ALG_REQ_VIRT 0x00040000 + +/* The high bits 0xff000000 are reserved for type-specific flags. */ /* * Transform masks and values (for crt_flags). @@ -179,7 +189,6 @@ struct crypto_async_request { struct crypto_tfm *tfm; u32 flags; - int err; }; /** @@ -278,6 +287,7 @@ struct cipher_alg { * to the alignmask of the algorithm being used, in order to * avoid the API having to realign them. Note: the alignmask is * not supported for hash algorithms and is always 0 for them. + * @cra_reqsize: Size of the request context for this algorithm. * @cra_priority: Priority of this transformation implementation. In case * multiple transformations with same @cra_name are available to * the Crypto API, the kernel will use the one with highest @@ -302,17 +312,8 @@ struct cipher_alg { * by @cra_type and @cra_flags above, the associated structure must be * filled with callbacks. This field might be empty. This is the case * for ahash, shash. - * @cra_init: Initialize the cryptographic transformation object. This function - * is used to initialize the cryptographic transformation object. - * This function is called only once at the instantiation time, right - * after the transformation context was allocated. In case the - * cryptographic hardware has some special requirements which need to - * be handled by software, this function shall check for the precise - * requirement of the transformation and put any software fallbacks - * in place. - * @cra_exit: Deinitialize the cryptographic transformation object. This is a - * counterpart to @cra_init, used to remove various changes set in - * @cra_init. + * @cra_init: Deprecated, do not use. + * @cra_exit: Deprecated, do not use. * @cra_u.cipher: Union member which contains a single-block symmetric cipher * definition. See @struct @cipher_alg. * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE @@ -333,6 +334,7 @@ struct crypto_alg { unsigned int cra_blocksize; unsigned int cra_ctxsize; unsigned int cra_alignmask; + unsigned int cra_reqsize; int cra_priority; refcount_t cra_refcnt; @@ -409,9 +411,11 @@ struct crypto_tfm { u32 crt_flags; int node; - + + struct crypto_tfm *fb; + void (*exit)(struct crypto_tfm *tfm); - + struct crypto_alg *__crt_alg; void *__crt_ctx[] CRYPTO_MINALIGN_ATTR; @@ -452,6 +456,11 @@ static inline unsigned int crypto_tfm_alg_alignmask(struct crypto_tfm *tfm) return tfm->__crt_alg->cra_alignmask; } +static inline unsigned int crypto_tfm_alg_reqsize(struct crypto_tfm *tfm) +{ + return tfm->__crt_alg->cra_reqsize; +} + static inline u32 crypto_tfm_get_flags(struct crypto_tfm *tfm) { return tfm->crt_flags; @@ -473,22 +482,44 @@ static inline unsigned int crypto_tfm_ctx_alignment(void) return __alignof__(tfm->__crt_ctx); } -static inline void crypto_reqchain_init(struct crypto_async_request *req) +static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) { - req->err = -EINPROGRESS; - INIT_LIST_HEAD(&req->list); + return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; } -static inline void crypto_request_chain(struct crypto_async_request *req, - struct crypto_async_request *head) +static inline bool crypto_req_on_stack(struct crypto_async_request *req) { - req->err = -EINPROGRESS; - list_add_tail(&req->list, &head->list); + return req->flags & CRYPTO_TFM_REQ_ON_STACK; } -static inline bool crypto_tfm_is_async(struct crypto_tfm *tfm) +static inline void crypto_request_set_callback( + struct crypto_async_request *req, u32 flags, + crypto_completion_t compl, void *data) { - return tfm->__crt_alg->cra_flags & CRYPTO_ALG_ASYNC; + u32 keep = CRYPTO_TFM_REQ_ON_STACK; + + req->complete = compl; + req->data = data; + req->flags &= keep; + req->flags |= flags & ~keep; +} + +static inline void crypto_request_set_tfm(struct crypto_async_request *req, + struct crypto_tfm *tfm) +{ + req->tfm = tfm; + req->flags &= ~CRYPTO_TFM_REQ_ON_STACK; +} + +struct crypto_async_request *crypto_request_clone( + struct crypto_async_request *req, size_t total, gfp_t gfp); + +static inline void crypto_stack_request_init(struct crypto_async_request *req, + struct crypto_tfm *tfm) +{ + req->flags = 0; + crypto_request_set_tfm(req, tfm); + req->flags |= CRYPTO_TFM_REQ_ON_STACK; } #endif /* _LINUX_CRYPTO_H */ diff --git a/include/linux/dcache.h b/include/linux/dcache.h index e9f07e37dd6f..e29823c701ac 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -57,7 +57,8 @@ struct qstr { }; #define QSTR_INIT(n,l) { { { .len = l } }, .name = n } -#define QSTR(n) (struct qstr)QSTR_INIT(n, strlen(n)) +#define QSTR_LEN(n,l) (struct qstr)QSTR_INIT(n,l) +#define QSTR(n) QSTR_LEN(n, strlen(n)) extern const struct qstr empty_name; extern const struct qstr slash_name; @@ -281,7 +282,6 @@ extern void d_exchange(struct dentry *, struct dentry *); extern struct dentry *d_ancestor(struct dentry *, struct dentry *); extern struct dentry *d_lookup(const struct dentry *, const struct qstr *); -extern struct dentry *d_hash_and_lookup(struct dentry *, struct qstr *); static inline unsigned d_count(const struct dentry *dentry) { diff --git a/include/linux/device.h b/include/linux/device.h index 79e49fe494b7..4940db137fff 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -281,44 +281,6 @@ int __must_check device_create_bin_file(struct device *dev, void device_remove_bin_file(struct device *dev, const struct bin_attribute *attr); -/* allows to add/remove a custom action to devres stack */ -int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); - -/** - * devm_remove_action() - removes previously added custom action - * @dev: Device that owns the action - * @action: Function implementing the action - * @data: Pointer to data passed to @action implementation - * - * Removes instance of @action previously added by devm_add_action(). - * Both action and data should match one of the existing entries. - */ -static inline -void devm_remove_action(struct device *dev, void (*action)(void *), void *data) -{ - WARN_ON(devm_remove_action_nowarn(dev, action, data)); -} - -void devm_release_action(struct device *dev, void (*action)(void *), void *data); - -int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); -#define devm_add_action(dev, action, data) \ - __devm_add_action(dev, action, data, #action) - -static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), - void *data, const char *name) -{ - int ret; - - ret = __devm_add_action(dev, action, data, name); - if (ret) - action(data); - - return ret; -} -#define devm_add_action_or_reset(dev, action, data) \ - __devm_add_action_or_reset(dev, action, data, #action) - /** * devm_alloc_percpu - Resource-managed alloc_percpu * @dev: Device to allocate per-cpu memory for diff --git a/include/linux/device/devres.h b/include/linux/device/devres.h index 9b49f9915850..ae696d10faff 100644 --- a/include/linux/device/devres.h +++ b/include/linux/device/devres.h @@ -8,6 +8,7 @@ #include <linux/overflow.h> #include <linux/stdarg.h> #include <linux/types.h> +#include <asm/bug.h> struct device; struct device_node; @@ -126,4 +127,44 @@ void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int in #endif +/* allows to add/remove a custom action to devres stack */ +int devm_remove_action_nowarn(struct device *dev, void (*action)(void *), void *data); + +/** + * devm_remove_action() - removes previously added custom action + * @dev: Device that owns the action + * @action: Function implementing the action + * @data: Pointer to data passed to @action implementation + * + * Removes instance of @action previously added by devm_add_action(). + * Both action and data should match one of the existing entries. + */ +static inline +void devm_remove_action(struct device *dev, void (*action)(void *), void *data) +{ + WARN_ON(devm_remove_action_nowarn(dev, action, data)); +} + +void devm_release_action(struct device *dev, void (*action)(void *), void *data); + +int __devm_add_action(struct device *dev, void (*action)(void *), void *data, const char *name); +#define devm_add_action(dev, action, data) \ + __devm_add_action(dev, action, data, #action) + +static inline int __devm_add_action_or_reset(struct device *dev, void (*action)(void *), + void *data, const char *name) +{ + int ret; + + ret = __devm_add_action(dev, action, data, name); + if (ret) + action(data); + + return ret; +} +#define devm_add_action_or_reset(dev, action, data) \ + __devm_add_action_or_reset(dev, action, data, #action) + +bool devm_is_action_added(struct device *dev, void (*action)(void *), void *data); + #endif /* _DEVICE_DEVRES_H_ */ diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index d02f32b7514e..0864773a57e8 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -18,15 +18,16 @@ static inline int devcgroup_inode_permission(struct inode *inode, int mask) { short type, access = 0; + if (likely(!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode))) + return 0; + if (likely(!inode->i_rdev)) return 0; if (S_ISBLK(inode->i_mode)) type = DEVCG_DEV_BLOCK; - else if (S_ISCHR(inode->i_mode)) + else /* S_ISCHR by the test above */ type = DEVCG_DEV_CHAR; - else - return 0; if (mask & MAY_WRITE) access |= DEVCG_ACC_WRITE; diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index 36216d28d8bd..544f8f8c3f44 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -35,15 +35,6 @@ struct dma_buf_attachment; */ struct dma_buf_ops { /** - * @cache_sgt_mapping: - * - * If true the framework will cache the first mapping made for each - * attachment. This avoids creating mappings for attachments multiple - * times. - */ - bool cache_sgt_mapping; - - /** * @attach: * * This is called from dma_buf_attach() to make sure that a given @@ -493,8 +484,6 @@ struct dma_buf_attach_ops { * @dmabuf: buffer for this attachment. * @dev: device attached to the buffer. * @node: list of dma_buf_attachment, protected by dma_resv lock of the dmabuf. - * @sgt: cached mapping. - * @dir: direction of cached mapping. * @peer2peer: true if the importer can handle peer resources without pages. * @priv: exporter specific attachment data. * @importer_ops: importer operations for this attachment, if provided @@ -514,8 +503,6 @@ struct dma_buf_attachment { struct dma_buf *dmabuf; struct device *dev; struct list_head node; - struct sg_table *sgt; - enum dma_data_direction dir; bool peer2peer; const struct dma_buf_attach_ops *importer_ops; void *importer_priv; @@ -583,20 +570,6 @@ static inline bool dma_buf_is_dynamic(struct dma_buf *dmabuf) return !!dmabuf->ops->pin; } -/** - * dma_buf_attachment_is_dynamic - check if a DMA-buf attachment uses dynamic - * mappings - * @attach: the DMA-buf attachment to check - * - * Returns true if a DMA-buf importer wants to call the map/unmap functions with - * the dma_resv lock held. - */ -static inline bool -dma_buf_attachment_is_dynamic(struct dma_buf_attachment *attach) -{ - return !!attach->importer_ops; -} - struct dma_buf_attachment *dma_buf_attach(struct dma_buf *dmabuf, struct device *dev); struct dma_buf_attachment * diff --git a/include/linux/dma-fence-unwrap.h b/include/linux/dma-fence-unwrap.h index 66b1e56fbb81..62df222fe0f1 100644 --- a/include/linux/dma-fence-unwrap.h +++ b/include/linux/dma-fence-unwrap.h @@ -52,6 +52,8 @@ struct dma_fence *__dma_fence_unwrap_merge(unsigned int num_fences, struct dma_fence **fences, struct dma_fence_unwrap *cursors); +int dma_fence_dedup_array(struct dma_fence **array, int num_fences); + /** * dma_fence_unwrap_merge - unwrap and merge fences * diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index e7ad819962e3..b12776883d14 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -169,8 +169,8 @@ struct dma_fence_ops { * implementation know that there is another driver waiting on the * signal (ie. hw->sw case). * - * This function can be called from atomic context, but not - * from irq context, so normal spinlocks can be used. + * This is called with irq's disabled, so only spinlocks which disable + * IRQ's can be used in the code outside of this callback. * * A return value of false indicates the fence already passed, * or some failure occurred that made it impossible to enable @@ -239,27 +239,6 @@ struct dma_fence_ops { void (*release)(struct dma_fence *fence); /** - * @fence_value_str: - * - * Callback to fill in free-form debug info specific to this fence, like - * the sequence number. - * - * This callback is optional. - */ - void (*fence_value_str)(struct dma_fence *fence, char *str, int size); - - /** - * @timeline_value_str: - * - * Fills in the current value of the timeline as a string, like the - * sequence number. Note that the specific fence passed to this function - * should not matter, drivers should only use it to look up the - * corresponding timeline structures. - */ - void (*timeline_value_str)(struct dma_fence *fence, - char *str, int size); - - /** * @set_deadline: * * Callback to allow a fence waiter to inform the fence signaler of diff --git a/include/linux/dma-map-ops.h b/include/linux/dma-map-ops.h index e172522cd936..f48e5fb88bd5 100644 --- a/include/linux/dma-map-ops.h +++ b/include/linux/dma-map-ops.h @@ -434,58 +434,4 @@ static inline void debug_dma_dump_mappings(struct device *dev) #endif /* CONFIG_DMA_API_DEBUG */ extern const struct dma_map_ops dma_dummy_ops; - -enum pci_p2pdma_map_type { - /* - * PCI_P2PDMA_MAP_UNKNOWN: Used internally for indicating the mapping - * type hasn't been calculated yet. Functions that return this enum - * never return this value. - */ - PCI_P2PDMA_MAP_UNKNOWN = 0, - - /* - * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will - * traverse the host bridge and the host bridge is not in the - * allowlist. DMA Mapping routines should return an error when - * this is returned. - */ - PCI_P2PDMA_MAP_NOT_SUPPORTED, - - /* - * PCI_P2PDMA_BUS_ADDR: Indicates that two devices can talk to - * each other directly through a PCI switch and the transaction will - * not traverse the host bridge. Such a mapping should program - * the DMA engine with PCI bus addresses. - */ - PCI_P2PDMA_MAP_BUS_ADDR, - - /* - * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk - * to each other, but the transaction traverses a host bridge on the - * allowlist. In this case, a normal mapping either with CPU physical - * addresses (in the case of dma-direct) or IOVA addresses (in the - * case of IOMMUs) should be used to program the DMA engine. - */ - PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, -}; - -struct pci_p2pdma_map_state { - struct dev_pagemap *pgmap; - int map; - u64 bus_off; -}; - -#ifdef CONFIG_PCI_P2PDMA -enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg); -#else /* CONFIG_PCI_P2PDMA */ -static inline enum pci_p2pdma_map_type -pci_p2pdma_map_segment(struct pci_p2pdma_map_state *state, struct device *dev, - struct scatterlist *sg) -{ - return PCI_P2PDMA_MAP_NOT_SUPPORTED; -} -#endif /* CONFIG_PCI_P2PDMA */ - #endif /* _LINUX_DMA_MAP_OPS_H */ diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 85ab710ec0e7..55c03e5fe8cb 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -72,6 +72,22 @@ #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) +struct dma_iova_state { + dma_addr_t addr; + u64 __size; +}; + +/* + * Use the high bit to mark if we used swiotlb for one or more ranges. + */ +#define DMA_IOVA_USE_SWIOTLB (1ULL << 63) + +static inline size_t dma_iova_size(struct dma_iova_state *state) +{ + /* Casting is needed for 32-bits systems */ + return (size_t)(state->__size & ~DMA_IOVA_USE_SWIOTLB); +} + #ifdef CONFIG_DMA_API_DEBUG void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); void debug_dma_map_single(struct device *dev, const void *addr, @@ -277,6 +293,70 @@ static inline int dma_mmap_noncontiguous(struct device *dev, } #endif /* CONFIG_HAS_DMA */ +#ifdef CONFIG_IOMMU_DMA +/** + * dma_use_iova - check if the IOVA API is used for this state + * @state: IOVA state + * + * Return %true if the DMA transfers uses the dma_iova_*() calls or %false if + * they can't be used. + */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return state->__size != 0; +} + +bool dma_iova_try_alloc(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t size); +void dma_iova_free(struct device *dev, struct dma_iova_state *state); +void dma_iova_destroy(struct device *dev, struct dma_iova_state *state, + size_t mapped_len, enum dma_data_direction dir, + unsigned long attrs); +int dma_iova_sync(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size); +int dma_iova_link(struct device *dev, struct dma_iova_state *state, + phys_addr_t phys, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs); +void dma_iova_unlink(struct device *dev, struct dma_iova_state *state, + size_t offset, size_t size, enum dma_data_direction dir, + unsigned long attrs); +#else /* CONFIG_IOMMU_DMA */ +static inline bool dma_use_iova(struct dma_iova_state *state) +{ + return false; +} +static inline bool dma_iova_try_alloc(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t size) +{ + return false; +} +static inline void dma_iova_free(struct device *dev, + struct dma_iova_state *state) +{ +} +static inline void dma_iova_destroy(struct device *dev, + struct dma_iova_state *state, size_t mapped_len, + enum dma_data_direction dir, unsigned long attrs) +{ +} +static inline int dma_iova_sync(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size) +{ + return -EOPNOTSUPP; +} +static inline int dma_iova_link(struct device *dev, + struct dma_iova_state *state, phys_addr_t phys, size_t offset, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + return -EOPNOTSUPP; +} +static inline void dma_iova_unlink(struct device *dev, + struct dma_iova_state *state, size_t offset, size_t size, + enum dma_data_direction dir, unsigned long attrs) +{ +} +#endif /* CONFIG_IOMMU_DMA */ + #if defined(CONFIG_HAS_DMA) && defined(CONFIG_DMA_NEED_SYNC) void __dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir); @@ -326,6 +406,7 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return dma_dev_need_sync(dev) ? __dma_need_sync(dev, dma_addr) : false; } +bool dma_need_unmap(struct device *dev); #else /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ static inline bool dma_dev_need_sync(const struct device *dev) { @@ -351,6 +432,10 @@ static inline bool dma_need_sync(struct device *dev, dma_addr_t dma_addr) { return false; } +static inline bool dma_need_unmap(struct device *dev) +{ + return false; +} #endif /* !CONFIG_HAS_DMA || !CONFIG_DMA_NEED_SYNC */ struct page *dma_alloc_pages(struct device *dev, size_t size, diff --git a/include/linux/dmapool.h b/include/linux/dmapool.h index f632ecfb4238..06c4de602b2f 100644 --- a/include/linux/dmapool.h +++ b/include/linux/dmapool.h @@ -11,6 +11,7 @@ #ifndef LINUX_DMAPOOL_H #define LINUX_DMAPOOL_H +#include <linux/nodemask_types.h> #include <linux/scatterlist.h> #include <asm/io.h> @@ -18,8 +19,8 @@ struct device; #ifdef CONFIG_HAS_DMA -struct dma_pool *dma_pool_create(const char *name, struct device *dev, - size_t size, size_t align, size_t allocation); +struct dma_pool *dma_pool_create_node(const char *name, struct device *dev, + size_t size, size_t align, size_t boundary, int node); void dma_pool_destroy(struct dma_pool *pool); @@ -35,9 +36,12 @@ struct dma_pool *dmam_pool_create(const char *name, struct device *dev, void dmam_pool_destroy(struct dma_pool *pool); #else /* !CONFIG_HAS_DMA */ -static inline struct dma_pool *dma_pool_create(const char *name, - struct device *dev, size_t size, size_t align, size_t allocation) -{ return NULL; } +static inline struct dma_pool *dma_pool_create_node(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary, + int node) +{ + return NULL; +} static inline void dma_pool_destroy(struct dma_pool *pool) { } static inline void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { return NULL; } @@ -49,6 +53,13 @@ static inline struct dma_pool *dmam_pool_create(const char *name, static inline void dmam_pool_destroy(struct dma_pool *pool) { } #endif /* !CONFIG_HAS_DMA */ +static inline struct dma_pool *dma_pool_create(const char *name, + struct device *dev, size_t size, size_t align, size_t boundary) +{ + return dma_pool_create_node(name, dev, size, align, boundary, + NUMA_NO_NODE); +} + static inline void *dma_pool_zalloc(struct dma_pool *pool, gfp_t mem_flags, dma_addr_t *handle) { diff --git a/include/linux/energy_model.h b/include/linux/energy_model.h index d8eabbf86a5b..7fa1eb3cc823 100644 --- a/include/linux/energy_model.h +++ b/include/linux/energy_model.h @@ -179,6 +179,7 @@ int em_dev_compute_costs(struct device *dev, struct em_perf_state *table, int em_dev_update_chip_binning(struct device *dev); int em_update_performance_limits(struct em_perf_domain *pd, unsigned long freq_min_khz, unsigned long freq_max_khz); +void em_adjust_cpu_capacity(unsigned int cpu); void em_rebuild_sched_domains(void); /** @@ -403,6 +404,7 @@ int em_update_performance_limits(struct em_perf_domain *pd, { return -EINVAL; } +static inline void em_adjust_cpu_capacity(unsigned int cpu) {} static inline void em_rebuild_sched_domains(void) {} #endif diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index fc61d0205c97..f94f3fdf15fc 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -14,6 +14,7 @@ #include <linux/kmsan.h> #include <asm/entry-common.h> +#include <asm/syscall.h> /* * Define dummy _TIF work flags if not defined by the architecture or for @@ -367,6 +368,15 @@ static __always_inline void exit_to_user_mode(void) } /** + * syscall_exit_work - Handle work before returning to user mode + * @regs: Pointer to current pt_regs + * @work: Current thread syscall work + * + * Do one-time syscall specific work. + */ +void syscall_exit_work(struct pt_regs *regs, unsigned long work); + +/** * syscall_exit_to_user_mode_work - Handle work before returning to user mode * @regs: Pointer to currents pt_regs * @@ -379,7 +389,30 @@ static __always_inline void exit_to_user_mode(void) * make the final state transitions. Interrupts must stay disabled between * return from this function and the invocation of exit_to_user_mode(). */ -void syscall_exit_to_user_mode_work(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode_work(struct pt_regs *regs) +{ + unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long nr = syscall_get_nr(current, regs); + + CT_WARN_ON(ct_state() != CT_STATE_KERNEL); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + if (WARN(irqs_disabled(), "syscall %lu left IRQs disabled", nr)) + local_irq_enable(); + } + + rseq_syscall(regs); + + /* + * Do one-time syscall specific work. If these work items are + * enabled, we want to run them exactly once per syscall exit with + * interrupts enabled. + */ + if (unlikely(work & SYSCALL_WORK_EXIT)) + syscall_exit_work(regs, work); + local_irq_disable_exit_to_user(); + exit_to_user_mode_prepare(regs); +} /** * syscall_exit_to_user_mode - Handle work before returning to user mode @@ -410,7 +443,13 @@ void syscall_exit_to_user_mode_work(struct pt_regs *regs); * exit_to_user_mode(). This function is preferred unless there is a * compelling architectural reason to use the separate functions. */ -void syscall_exit_to_user_mode(struct pt_regs *regs); +static __always_inline void syscall_exit_to_user_mode(struct pt_regs *regs) +{ + instrumentation_begin(); + syscall_exit_to_user_mode_work(regs); + instrumentation_end(); + exit_to_user_mode(); +} /** * irqentry_enter_from_user_mode - Establish state before invoking the irq handler diff --git a/include/linux/file.h b/include/linux/file.h index 302f11355b10..af1768d934a0 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -59,7 +59,7 @@ static inline struct fd CLONED_FD(struct file *f) static inline void fdput(struct fd fd) { - if (fd.word & FDPUT_FPUT) + if (unlikely(fd.word & FDPUT_FPUT)) fput(fd_file(fd)); } diff --git a/include/linux/find.h b/include/linux/find.h index 68685714bc18..5a2c267ea7f9 100644 --- a/include/linux/find.h +++ b/include/linux/find.h @@ -29,6 +29,8 @@ unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, const unsign unsigned long n); extern unsigned long _find_first_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long size); +unsigned long _find_first_andnot_bit(const unsigned long *addr1, const unsigned long *addr2, + unsigned long size); unsigned long _find_first_and_and_bit(const unsigned long *addr1, const unsigned long *addr2, const unsigned long *addr3, unsigned long size); extern unsigned long _find_first_zero_bit(const unsigned long *addr, unsigned long size); @@ -348,6 +350,29 @@ unsigned long find_first_and_bit(const unsigned long *addr1, #endif /** + * find_first_andnot_bit - find the first bit set in 1st memory region and unset in 2nd + * @addr1: The first address to base the search on + * @addr2: The second address to base the search on + * @size: The bitmap size in bits + * + * Returns the bit number for the first set bit + * If no bits are set, returns >= @size. + */ +static __always_inline +unsigned long find_first_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + unsigned long size) +{ + if (small_const_nbits(size)) { + unsigned long val = *addr1 & (~*addr2) & GENMASK(size - 1, 0); + + return val ? __ffs(val) : size; + } + + return _find_first_andnot_bit(addr1, addr2, size); +} + +/** * find_first_and_and_bit - find the first set bit in 3 memory regions * @addr1: The first address to base the search on * @addr2: The second address to base the search on diff --git a/include/linux/fs.h b/include/linux/fs.h index 016b0fe1536e..0db87f8e676c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -408,6 +408,7 @@ struct kiocb { void *private; int ki_flags; u16 ki_ioprio; /* See linux/ioprio.h */ + u8 ki_write_stream; union { /* * Only used for async buffered reads, where it denotes the @@ -433,7 +434,6 @@ static inline bool is_sync_kiocb(struct kiocb *kiocb) } struct address_space_operations { - int (*writepage)(struct page *page, struct writeback_control *wbc); int (*read_folio)(struct file *, struct folio *); /* Write back some dirty pages from this mapping. */ @@ -867,6 +867,11 @@ static inline void inode_lock(struct inode *inode) down_write(&inode->i_rwsem); } +static inline __must_check int inode_lock_killable(struct inode *inode) +{ + return down_write_killable(&inode->i_rwsem); +} + static inline void inode_unlock(struct inode *inode) { up_write(&inode->i_rwsem); @@ -877,6 +882,11 @@ static inline void inode_lock_shared(struct inode *inode) down_read(&inode->i_rwsem); } +static inline __must_check int inode_lock_shared_killable(struct inode *inode) +{ + return down_read_killable(&inode->i_rwsem); +} + static inline void inode_unlock_shared(struct inode *inode) { up_read(&inode->i_rwsem); @@ -1307,6 +1317,7 @@ struct sb_writers { unsigned short frozen; /* Is sb frozen? */ int freeze_kcount; /* How many kernel freeze requests? */ int freeze_ucount; /* How many userspace freeze requests? */ + const void *freeze_owner; /* Owner of the freeze */ struct percpu_rw_semaphore rw_sem[SB_FREEZE_LEVELS]; }; @@ -1780,7 +1791,7 @@ static inline void __sb_end_write(struct super_block *sb, int level) static inline void __sb_start_write(struct super_block *sb, int level) { - percpu_down_read(sb->s_writers.rw_sem + level - 1); + percpu_down_read_freezable(sb->s_writers.rw_sem + level - 1, true); } static inline bool __sb_start_write_trylock(struct super_block *sb, int level) @@ -2071,8 +2082,18 @@ typedef bool (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64, struct dir_context { filldir_t actor; loff_t pos; + /* + * Filesystems MUST NOT MODIFY count, but may use as a hint: + * 0 unknown + * > 0 space in buffer (assume at least one entry) + * INT_MAX unlimited + */ + int count; }; +/* If OR-ed with d_type, pending signals are not checked */ +#define FILLDIR_FLAG_NOINTR 0x1000 + /* * These flags let !MMU mmap() govern direct device mapping vs immediate * copying more easily for MAP_PRIVATE, especially for ROM filesystems. @@ -2186,7 +2207,7 @@ struct file_operations { /* Supports asynchronous lock callbacks */ #define FOP_ASYNC_LOCK ((__force fop_flags_t)(1 << 6)) /* File system supports uncached read/write buffered IO */ -#define FOP_DONTCACHE ((__force fop_flags_t)(1 << 7)) +#define FOP_DONTCACHE 0 /* ((__force fop_flags_t)(1 << 7)) */ /* Wrap a directory iterator that needs exclusive inode access */ int wrap_directory_iterator(struct file *, struct dir_context *, @@ -2269,6 +2290,7 @@ extern loff_t vfs_dedupe_file_range_one(struct file *src_file, loff_t src_pos, * @FREEZE_HOLDER_KERNEL: kernel wants to freeze or thaw filesystem * @FREEZE_HOLDER_USERSPACE: userspace wants to freeze or thaw filesystem * @FREEZE_MAY_NEST: whether nesting freeze and thaw requests is allowed + * @FREEZE_EXCL: a freeze that can only be undone by the owner * * Indicate who the owner of the freeze or thaw request is and whether * the freeze needs to be exclusive or can nest. @@ -2282,6 +2304,7 @@ enum freeze_holder { FREEZE_HOLDER_KERNEL = (1U << 0), FREEZE_HOLDER_USERSPACE = (1U << 1), FREEZE_MAY_NEST = (1U << 2), + FREEZE_EXCL = (1U << 3), }; struct super_operations { @@ -2295,9 +2318,9 @@ struct super_operations { void (*evict_inode) (struct inode *); void (*put_super) (struct super_block *); int (*sync_fs)(struct super_block *sb, int wait); - int (*freeze_super) (struct super_block *, enum freeze_holder who); + int (*freeze_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*freeze_fs) (struct super_block *); - int (*thaw_super) (struct super_block *, enum freeze_holder who); + int (*thaw_super) (struct super_block *, enum freeze_holder who, const void *owner); int (*unfreeze_fs) (struct super_block *); int (*statfs) (struct dentry *, struct kstatfs *); int (*remount_fs) (struct super_block *, int *, char *); @@ -2344,6 +2367,7 @@ struct super_operations { #define S_CASEFOLD (1 << 15) /* Casefolded file */ #define S_VERITY (1 << 16) /* Verity file (using fs/verity/) */ #define S_KERNEL_FILE (1 << 17) /* File is in use by the kernel (eg. fs/cachefiles) */ +#define S_ANON_INODE (1 << 19) /* Inode is an anonymous inode */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -2400,6 +2424,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) +#define IS_ANON_FILE(inode) ((inode)->i_flags & S_ANON_INODE) static inline bool HAS_UNMAPPED_ID(struct mnt_idmap *idmap, struct inode *inode) @@ -2705,8 +2730,10 @@ extern int unregister_filesystem(struct file_system_type *); extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -int freeze_super(struct super_block *super, enum freeze_holder who); -int thaw_super(struct super_block *super, enum freeze_holder who); +int freeze_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); +int thaw_super(struct super_block *super, enum freeze_holder who, + const void *freeze_owner); extern __printf(2, 3) int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); extern int super_setup_bdi(struct super_block *sb); @@ -3475,7 +3502,8 @@ void generic_fillattr(struct mnt_idmap *, u32, struct inode *, struct kstat *); void generic_fill_statx_attr(struct inode *inode, struct kstat *stat); void generic_fill_statx_atomic_writes(struct kstat *stat, unsigned int unit_min, - unsigned int unit_max); + unsigned int unit_max, + unsigned int unit_max_opt); extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); @@ -3515,9 +3543,11 @@ extern void put_filesystem(struct file_system_type *fs); extern struct file_system_type *get_fs_type(const char *name); extern void drop_super(struct super_block *sb); extern void drop_super_exclusive(struct super_block *sb); -extern void iterate_supers(void (*)(struct super_block *, void *), void *); +extern void iterate_supers(void (*f)(struct super_block *, void *), void *arg); extern void iterate_supers_type(struct file_system_type *, void (*)(struct super_block *, void *), void *); +void filesystems_freeze(void); +void filesystems_thaw(void); extern int dcache_dir_open(struct inode *, struct file *); extern int dcache_dir_close(struct inode *, struct file *); diff --git a/include/linux/fs_parser.h b/include/linux/fs_parser.h index 53e566efd5fd..5a0e897cae80 100644 --- a/include/linux/fs_parser.h +++ b/include/linux/fs_parser.h @@ -87,14 +87,9 @@ extern int lookup_constant(const struct constant_table tbl[], const char *name, extern const struct constant_table bool_names[]; #ifdef CONFIG_VALIDATE_FS_PARSER -extern bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special); extern bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc); #else -static inline bool validate_constant_table(const struct constant_table *tbl, size_t tbl_size, - int low, int high, int special) -{ return true; } static inline bool fs_validate_description(const char *name, const struct fs_parameter_spec *desc) { return true; } @@ -125,8 +120,6 @@ static inline bool fs_validate_description(const char *name, #define fsparam_u32(NAME, OPT) __fsparam(fs_param_is_u32, NAME, OPT, 0, NULL) #define fsparam_u32oct(NAME, OPT) \ __fsparam(fs_param_is_u32, NAME, OPT, 0, (void *)8) -#define fsparam_u32hex(NAME, OPT) \ - __fsparam(fs_param_is_u32_hex, NAME, OPT, 0, (void *)16) #define fsparam_s32(NAME, OPT) __fsparam(fs_param_is_s32, NAME, OPT, 0, NULL) #define fsparam_u64(NAME, OPT) __fsparam(fs_param_is_u64, NAME, OPT, 0, NULL) #define fsparam_enum(NAME, OPT, array) __fsparam(fs_param_is_enum, NAME, OPT, 0, array) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index fbabc3d848b3..95851a6fb942 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -569,8 +569,6 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, #ifdef CONFIG_STACK_TRACER -extern int stack_tracer_enabled; - int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos); @@ -1298,16 +1296,9 @@ static inline void unpause_graph_tracing(void) { } #ifdef CONFIG_TRACING enum ftrace_dump_mode; -#define MAX_TRACER_SIZE 100 -extern char ftrace_dump_on_oops[]; extern int ftrace_dump_on_oops_enabled(void); -extern int tracepoint_printk; extern void disable_trace_on_warning(void); -extern int __disable_trace_on_warning; - -int tracepoint_printk_sysctl(const struct ctl_table *table, int write, - void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } diff --git a/include/linux/futex.h b/include/linux/futex.h index b70df27d7e85..005b040c4791 100644 --- a/include/linux/futex.h +++ b/include/linux/futex.h @@ -4,11 +4,11 @@ #include <linux/sched.h> #include <linux/ktime.h> +#include <linux/mm_types.h> #include <uapi/linux/futex.h> struct inode; -struct mm_struct; struct task_struct; /* @@ -34,6 +34,7 @@ union futex_key { u64 i_seq; unsigned long pgoff; unsigned int offset; + /* unsigned int node; */ } shared; struct { union { @@ -42,11 +43,13 @@ union futex_key { }; unsigned long address; unsigned int offset; + /* unsigned int node; */ } private; struct { u64 ptr; unsigned long word; unsigned int offset; + unsigned int node; /* NOT hashed! */ } both; }; @@ -77,7 +80,25 @@ void futex_exec_release(struct task_struct *tsk); long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout, u32 __user *uaddr2, u32 val2, u32 val3); -#else +int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4); + +#ifdef CONFIG_FUTEX_PRIVATE_HASH +int futex_hash_allocate_default(void); +void futex_hash_free(struct mm_struct *mm); + +static inline void futex_mm_init(struct mm_struct *mm) +{ + RCU_INIT_POINTER(mm->futex_phash, NULL); + mutex_init(&mm->futex_hash_lock); +} + +#else /* !CONFIG_FUTEX_PRIVATE_HASH */ +static inline int futex_hash_allocate_default(void) { return 0; } +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } +#endif /* CONFIG_FUTEX_PRIVATE_HASH */ + +#else /* !CONFIG_FUTEX */ static inline void futex_init_task(struct task_struct *tsk) { } static inline void futex_exit_recursive(struct task_struct *tsk) { } static inline void futex_exit_release(struct task_struct *tsk) { } @@ -88,6 +109,17 @@ static inline long do_futex(u32 __user *uaddr, int op, u32 val, { return -EINVAL; } +static inline int futex_hash_prctl(unsigned long arg2, unsigned long arg3, unsigned long arg4) +{ + return -EINVAL; +} +static inline int futex_hash_allocate_default(void) +{ + return 0; +} +static inline void futex_hash_free(struct mm_struct *mm) { } +static inline void futex_mm_init(struct mm_struct *mm) { } + #endif #endif diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 8adc8e9cb4a7..f0b1982da0cc 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -181,6 +181,8 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, enum gpiod_flags flags, const char *label); +bool gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other); + #else /* CONFIG_GPIOLIB */ #include <linux/bug.h> @@ -548,6 +550,13 @@ struct gpio_desc *devm_fwnode_gpiod_get_index(struct device *dev, return ERR_PTR(-ENOSYS); } +static inline bool +gpiod_is_equal(struct gpio_desc *desc, struct gpio_desc *other) +{ + WARN_ON(desc || other); + return false; +} + #endif /* CONFIG_GPIOLIB */ #if IS_ENABLED(CONFIG_GPIOLIB) && IS_ENABLED(CONFIG_HTE) @@ -588,7 +597,7 @@ struct gpio_desc *devm_fwnode_gpiod_get(struct device *dev, struct acpi_gpio_params { unsigned int crs_entry_index; - unsigned int line_index; + unsigned short line_index; bool active_low; }; diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 4c0294a9104d..b53233051bee 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -287,8 +287,9 @@ struct gpio_irq_chip { /** * @first: * - * Required for static IRQ allocation. If set, irq_domain_add_simple() - * will allocate and map all IRQs during initialization. + * Required for static IRQ allocation. If set, + * irq_domain_create_simple() will allocate and map all IRQs + * during initialization. */ unsigned int first; diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index dd100e849f5e..9a7683d79a4b 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -73,6 +73,14 @@ static inline void *kmap_local_page(struct page *page) return __kmap_local_page_prot(page, kmap_prot); } +static inline void *kmap_local_page_try_from_panic(struct page *page) +{ + if (!PageHighMem(page)) + return page_address(page); + /* If the page is in HighMem, it's not safe to kmap it.*/ + return NULL; +} + static inline void *kmap_local_folio(struct folio *folio, size_t offset) { struct page *page = folio_page(folio, offset / PAGE_SIZE); @@ -180,6 +188,11 @@ static inline void *kmap_local_page(struct page *page) return page_address(page); } +static inline void *kmap_local_page_try_from_panic(struct page *page) +{ + return page_address(page); +} + static inline void *kmap_local_folio(struct folio *folio, size_t offset) { return page_address(&folio->page) + offset; diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 5c6bea81a90e..c698f8415675 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -461,7 +461,7 @@ static inline void memcpy_from_folio(char *to, struct folio *folio, const char *from = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -489,7 +489,7 @@ static inline void memcpy_to_folio(struct folio *folio, size_t offset, char *to = kmap_local_folio(folio, offset); size_t chunk = len; - if (folio_test_highmem(folio) && + if (folio_test_partial_kmap(folio) && chunk > PAGE_SIZE - offset_in_page(offset)) chunk = PAGE_SIZE - offset_in_page(offset); memcpy(to, from, chunk); @@ -522,7 +522,7 @@ static inline __must_check void *folio_zero_tail(struct folio *folio, { size_t len = folio_size(folio) - offset; - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -560,7 +560,7 @@ static inline void folio_fill_tail(struct folio *folio, size_t offset, VM_BUG_ON(offset + len > folio_size(folio)); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { size_t max = PAGE_SIZE - offset_in_page(offset); while (len > max) { @@ -597,7 +597,7 @@ static inline size_t memcpy_from_file_folio(char *to, struct folio *folio, size_t offset = offset_in_folio(folio, pos); char *from = kmap_local_folio(folio, offset); - if (folio_test_highmem(folio)) { + if (folio_test_partial_kmap(folio)) { offset = offset_in_page(offset); len = min_t(size_t, len, PAGE_SIZE - offset); } else diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 8f3ac832ee7f..4861a7e304bb 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -275,6 +275,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, bool is_hugetlb_entry_migration(pte_t pte); bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); +void fixup_hugetlb_reservations(struct vm_area_struct *vma); #else /* !CONFIG_HUGETLB_PAGE */ @@ -468,6 +469,10 @@ static inline vm_fault_t hugetlb_fault(struct mm_struct *mm, static inline void hugetlb_unshare_all_pmds(struct vm_area_struct *vma) { } +static inline void fixup_hugetlb_reservations(struct vm_area_struct *vma) +{ +} + #endif /* !CONFIG_HUGETLB_PAGE */ #ifndef pgd_write diff --git a/include/linux/ima.h b/include/linux/ima.h index 0bae61a15b60..8e29cb4e6a01 100644 --- a/include/linux/ima.h +++ b/include/linux/ima.h @@ -32,6 +32,9 @@ static inline void ima_appraise_parse_cmdline(void) {} #ifdef CONFIG_IMA_KEXEC extern void ima_add_kexec_buffer(struct kimage *image); +extern void ima_kexec_post_load(struct kimage *image); +#else +static inline void ima_kexec_post_load(struct kimage *image) {} #endif #else diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index c782a74d2a30..51b6484c0493 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -140,7 +140,7 @@ extern irqreturn_t no_action(int cpl, void *dev_id); /* * If a (PCI) device interrupt is not connected we set dev->irq to * IRQ_NOTCONNECTED. This causes request_irq() to fail with -ENOTCONN, so we - * can distingiush that case from other error returns. + * can distinguish that case from other error returns. * * 0x80000000 is guaranteed to be outside the available range of interrupts * and easy to distinguish from other possible incorrect values. diff --git a/include/linux/io.h b/include/linux/io.h index 6a6bc4d46d0a..0642c7ee41db 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -183,4 +183,25 @@ static inline void arch_io_free_memtype_wc(resource_size_t base, int devm_arch_io_reserve_memtype_wc(struct device *dev, resource_size_t start, resource_size_t size); +#ifdef CONFIG_STRICT_DEVMEM +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + u64 from = ((u64)pfn) << PAGE_SHIFT; + u64 to = from + size; + u64 cursor = from; + + while (cursor < to) { + if (!devmem_is_allowed(pfn)) + return 0; + cursor += PAGE_SIZE; + pfn++; + } + return 1; +} +#else +static inline int range_is_allowed(unsigned long pfn, unsigned long size) +{ + return 1; +} +#endif #endif /* _LINUX_IO_H */ diff --git a/include/linux/io_uring/cmd.h b/include/linux/io_uring/cmd.h index 0634a3de1782..53408124c1e5 100644 --- a/include/linux/io_uring/cmd.h +++ b/include/linux/io_uring/cmd.h @@ -140,6 +140,15 @@ static inline struct io_uring_cmd_data *io_uring_cmd_get_async_data(struct io_ur return cmd_to_io_kiocb(cmd)->async_data; } +/* + * Return uring_cmd's context reference as its context handle for driver to + * track per-context resource, such as registered kernel IO buffer + */ +static inline void *io_uring_cmd_ctx_handle(struct io_uring_cmd *cmd) +{ + return cmd_to_io_kiocb(cmd)->ctx; +} + int io_buffer_register_bvec(struct io_uring_cmd *cmd, struct request *rq, void (*release)(void *), unsigned int index, unsigned int issue_flags); diff --git a/include/linux/io_uring_types.h b/include/linux/io_uring_types.h index b44d201520d8..2922635986f5 100644 --- a/include/linux/io_uring_types.h +++ b/include/linux/io_uring_types.h @@ -40,8 +40,6 @@ enum io_uring_cmd_flags { IO_URING_F_TASK_DEAD = (1 << 13), }; -struct io_zcrx_ifq; - struct io_wq_work_node { struct io_wq_work_node *next; }; @@ -343,7 +341,6 @@ struct io_ring_ctx { unsigned cached_cq_tail; unsigned cq_entries; struct io_ev_fd __rcu *io_ev_fd; - unsigned cq_extra; void *cq_wait_arg; size_t cq_wait_size; @@ -394,7 +391,8 @@ struct io_ring_ctx { struct wait_queue_head poll_wq; struct io_restriction restrictions; - struct io_zcrx_ifq *ifq; + /* Stores zcrx object pointers of type struct io_zcrx_ifq */ + struct xarray zcrx_ctxs; u32 pers_next; struct xarray personalities; @@ -418,6 +416,7 @@ struct io_ring_ctx { struct callback_head poll_wq_task_work; struct list_head defer_list; + unsigned nr_drained; struct io_alloc_cache msg_cache; spinlock_t msg_lock; @@ -436,6 +435,7 @@ struct io_ring_ctx { /* protected by ->completion_lock */ unsigned evfd_last_cq_tail; + unsigned nr_req_allocated; /* * Protection for resize vs mmap races - both the mmap and resize @@ -448,8 +448,6 @@ struct io_ring_ctx { struct io_mapped_region ring_region; /* used for optimised request parameter and wait argument passing */ struct io_mapped_region param_region; - /* just one zcrx per ring for now, will move to io_zcrx_ifq eventually */ - struct io_mapped_region zcrx_region; }; /* @@ -653,8 +651,7 @@ struct io_kiocb { u8 iopoll_completed; /* * Can be either a fixed buffer index, or used with provided buffers. - * For the latter, before issue it points to the buffer group ID, - * and after selection it points to the buffer ID itself. + * For the latter, it points to the selected buffer ID. */ u16 buf_index; @@ -713,7 +710,7 @@ struct io_kiocb { const struct cred *creds; struct io_wq_work work; - struct { + struct io_big_cqe { u64 extra1; u64 extra2; } big_cqe; diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3a8d35d41fda..15cdadace993 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -872,6 +872,10 @@ extern struct iommu_domain *iommu_get_domain_for_dev(struct device *dev); extern struct iommu_domain *iommu_get_dma_domain(struct device *dev); extern int iommu_map(struct iommu_domain *domain, unsigned long iova, phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_map_nosync(struct iommu_domain *domain, unsigned long iova, + phys_addr_t paddr, size_t size, int prot, gfp_t gfp); +int iommu_sync_map(struct iommu_domain *domain, unsigned long iova, + size_t size); extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size); extern size_t iommu_unmap_fast(struct iommu_domain *domain, diff --git a/include/linux/irq.h b/include/linux/irq.h index dd5df1e2d032..1d6b606a81ef 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -597,7 +597,6 @@ enum { struct irqaction; extern int setup_percpu_irq(unsigned int irq, struct irqaction *new); -extern void remove_percpu_irq(unsigned int irq, struct irqaction *act); #ifdef CONFIG_DEPRECATED_IRQ_CPU_ONOFFLINE extern void irq_cpu_online(void); @@ -700,7 +699,7 @@ extern void note_interrupt(struct irq_desc *desc, irqreturn_t action_ret); extern int noirqdebug_setup(char *str); /* Checks whether the interrupt can be requested by request_irq(): */ -extern int can_request_irq(unsigned int irq, unsigned long irqflags); +extern bool can_request_irq(unsigned int irq, unsigned long irqflags); /* Dummy irq-chip implementations: */ extern struct irq_chip no_irq_chip; @@ -1222,31 +1221,6 @@ static inline struct irq_chip_type *irq_data_get_chip_type(struct irq_data *d) #define IRQ_MSK(n) (u32)((n) < 32 ? ((1 << (n)) - 1) : UINT_MAX) -#ifdef CONFIG_SMP -static inline void irq_gc_lock(struct irq_chip_generic *gc) -{ - raw_spin_lock(&gc->lock); -} - -static inline void irq_gc_unlock(struct irq_chip_generic *gc) -{ - raw_spin_unlock(&gc->lock); -} -#else -static inline void irq_gc_lock(struct irq_chip_generic *gc) { } -static inline void irq_gc_unlock(struct irq_chip_generic *gc) { } -#endif - -/* - * The irqsave variants are for usage in non interrupt code. Do not use - * them in irq_chip callbacks. Use irq_gc_lock() instead. - */ -#define irq_gc_lock_irqsave(gc, flags) \ - raw_spin_lock_irqsave(&(gc)->lock, flags) - -#define irq_gc_unlock_irqrestore(gc, flags) \ - raw_spin_unlock_irqrestore(&(gc)->lock, flags) - static inline void irq_reg_writel(struct irq_chip_generic *gc, u32 val, int reg_offset) { diff --git a/include/linux/irqchip/irq-msi-lib.h b/include/linux/irqchip/irq-msi-lib.h new file mode 100644 index 000000000000..dd8d1d138544 --- /dev/null +++ b/include/linux/irqchip/irq-msi-lib.h @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0-only +// Copyright (C) 2022 Linutronix GmbH +// Copyright (C) 2022 Intel + +#ifndef _IRQCHIP_IRQ_MSI_LIB_H +#define _IRQCHIP_IRQ_MSI_LIB_H + +#include <linux/bits.h> +#include <linux/irqdomain.h> +#include <linux/msi.h> + +#ifdef CONFIG_PCI_MSI +#define MATCH_PCI_MSI BIT(DOMAIN_BUS_PCI_MSI) +#else +#define MATCH_PCI_MSI (0) +#endif + +#define MATCH_PLATFORM_MSI BIT(DOMAIN_BUS_PLATFORM_MSI) + +int msi_lib_irq_domain_select(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); + +bool msi_lib_init_dev_msi_info(struct device *dev, struct irq_domain *domain, + struct irq_domain *real_parent, + struct msi_domain_info *info); + +#endif /* _IRQCHIP_IRQ_MSI_LIB_H */ diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index bb7111105296..7387d183029b 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -1,30 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 */ /* - * irq_domain - IRQ translation domains + * irq_domain - IRQ Translation Domains * - * Translation infrastructure between hw and linux irq numbers. This is - * helpful for interrupt controllers to implement mapping between hardware - * irq numbers and the Linux irq number space. - * - * irq_domains also have hooks for translating device tree or other - * firmware interrupt representations into a hardware irq number that - * can be mapped back to a Linux irq number without any extra platform - * support code. - * - * Interrupt controller "domain" data structure. This could be defined as a - * irq domain controller. That is, it handles the mapping between hardware - * and virtual interrupt numbers for a given interrupt domain. The domain - * structure is generally created by the PIC code for a given PIC instance - * (though a domain can cover more than one PIC if they have a flat number - * model). It's the domain callbacks that are responsible for setting the - * irq_chip on a given irq_desc after it's been mapped. - * - * The host code and data structures use a fwnode_handle pointer to - * identify the domain. In some cases, and in order to preserve source - * code compatibility, this fwnode pointer is "upgraded" to a DT - * device_node. For those firmware infrastructures that do not provide - * a unique identifier for an interrupt controller, the irq_domain - * code offers a fwnode allocator. + * See Documentation/core-api/irq/irq-domain.rst for the details. */ #ifndef _LINUX_IRQDOMAIN_H @@ -61,9 +39,9 @@ struct msi_parent_ops; * pass a device-specific description of an interrupt. */ struct irq_fwspec { - struct fwnode_handle *fwnode; - int param_count; - u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; + struct fwnode_handle *fwnode; + int param_count; + u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; }; /* Conversion function from of_phandle_args fields to fwspec */ @@ -72,26 +50,26 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, /** * struct irq_domain_ops - Methods for irq_domain objects - * @match: Match an interrupt controller device node to a domain, returns - * 1 on a match - * @select: Match an interrupt controller fw specification. It is more generic - * than @match as it receives a complete struct irq_fwspec. Therefore, - * @select is preferred if provided. Returns 1 on a match. - * @map: Create or update a mapping between a virtual irq number and a hw - * irq number. This is called only once for a given mapping. - * @unmap: Dispose of such a mapping - * @xlate: Given a device tree node and interrupt specifier, decode - * the hardware irq number and linux irq type value. - * @alloc: Allocate @nr_irqs interrupts starting from @virq. - * @free: Free @nr_irqs interrupts starting from @virq. - * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only - * reserve the vector. If unset, assign the vector (called from - * request_irq()). - * @deactivate: Disarm one interrupt (@irqd). - * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and - * linux irq type value (@out_type). This is a generalised @xlate - * (over struct irq_fwspec) and is preferred if provided. - * @debug_show: For domains to show specific data for an interrupt in debugfs. + * @match: Match an interrupt controller device node to a domain, returns + * 1 on a match + * @select: Match an interrupt controller fw specification. It is more generic + * than @match as it receives a complete struct irq_fwspec. Therefore, + * @select is preferred if provided. Returns 1 on a match. + * @map: Create or update a mapping between a virtual irq number and a hw + * irq number. This is called only once for a given mapping. + * @unmap: Dispose of such a mapping + * @xlate: Given a device tree node and interrupt specifier, decode + * the hardware irq number and linux irq type value. + * @alloc: Allocate @nr_irqs interrupts starting from @virq. + * @free: Free @nr_irqs interrupts starting from @virq. + * @activate: Activate one interrupt in HW (@irqd). If @reserve is set, only + * reserve the vector. If unset, assign the vector (called from + * request_irq()). + * @deactivate: Disarm one interrupt (@irqd). + * @translate: Given @fwspec, decode the hardware irq number (@out_hwirq) and + * linux irq type value (@out_type). This is a generalised @xlate + * (over struct irq_fwspec) and is preferred if provided. + * @debug_show: For domains to show specific data for an interrupt in debugfs. * * Functions below are provided by the driver and called whenever a new mapping * is created or an old mapping is disposed. The driver can then proceed to @@ -99,29 +77,29 @@ void of_phandle_args_to_fwspec(struct device_node *np, const u32 *args, * to setup the irq_desc when returning from map(). */ struct irq_domain_ops { - int (*match)(struct irq_domain *d, struct device_node *node, - enum irq_domain_bus_token bus_token); - int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, - enum irq_domain_bus_token bus_token); - int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); - void (*unmap)(struct irq_domain *d, unsigned int virq); - int (*xlate)(struct irq_domain *d, struct device_node *node, - const u32 *intspec, unsigned int intsize, - unsigned long *out_hwirq, unsigned int *out_type); + int (*match)(struct irq_domain *d, struct device_node *node, + enum irq_domain_bus_token bus_token); + int (*select)(struct irq_domain *d, struct irq_fwspec *fwspec, + enum irq_domain_bus_token bus_token); + int (*map)(struct irq_domain *d, unsigned int virq, irq_hw_number_t hw); + void (*unmap)(struct irq_domain *d, unsigned int virq); + int (*xlate)(struct irq_domain *d, struct device_node *node, + const u32 *intspec, unsigned int intsize, + unsigned long *out_hwirq, unsigned int *out_type); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /* extended V2 interfaces to support hierarchy irq_domains */ - int (*alloc)(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs, void *arg); - void (*free)(struct irq_domain *d, unsigned int virq, - unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); - void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); - int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, - unsigned long *out_hwirq, unsigned int *out_type); + int (*alloc)(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs, void *arg); + void (*free)(struct irq_domain *d, unsigned int virq, + unsigned int nr_irqs); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); + void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); + int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS - void (*debug_show)(struct seq_file *m, struct irq_domain *d, - struct irq_data *irqd, int ind); + void (*debug_show)(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind); #endif }; @@ -231,6 +209,9 @@ enum { /* Irq domain must destroy generic chips when removed */ IRQ_DOMAIN_FLAG_DESTROY_GC = (1 << 10), + /* Address and data pair is mutable when irq_set_affinity() */ + IRQ_DOMAIN_FLAG_MSI_IMMUTABLE = (1 << 11), + /* * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved * for implementation specific purposes and ignored by the @@ -244,8 +225,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) return to_of_node(d->fwnode); } -static inline void irq_domain_set_pm_device(struct irq_domain *d, - struct device *dev) +static inline void irq_domain_set_pm_device(struct irq_domain *d, struct device *dev) { if (d) d->pm_dev = dev; @@ -261,14 +241,12 @@ enum { IRQCHIP_FWNODE_NAMED_ID, }; -static inline -struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) +static inline struct fwnode_handle *irq_domain_alloc_named_fwnode(const char *name) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED, 0, name, NULL); } -static inline -struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) +static inline struct fwnode_handle *irq_domain_alloc_named_id_fwnode(const char *name, int id) { return __irq_domain_alloc_fwnode(IRQCHIP_FWNODE_NAMED_ID, id, name, NULL); @@ -281,6 +259,8 @@ static inline struct fwnode_handle *irq_domain_alloc_fwnode(phys_addr_t *pa) void irq_domain_free_fwnode(struct fwnode_handle *fwnode); +DEFINE_FREE(irq_domain_free_fwnode, struct fwnode_handle *, if (_T) irq_domain_free_fwnode(_T)) + struct irq_domain_chip_generic_info; /** @@ -333,36 +313,19 @@ struct irq_domain *irq_domain_instantiate(const struct irq_domain_info *info); struct irq_domain *devm_irq_domain_instantiate(struct device *dev, const struct irq_domain_info *info); -struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, - unsigned int size, +struct irq_domain *irq_domain_create_simple(struct fwnode_handle *fwnode, unsigned int size, unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data); -struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, - unsigned int size, - unsigned int first_irq, - irq_hw_number_t first_hwirq, - const struct irq_domain_ops *ops, - void *host_data); + const struct irq_domain_ops *ops, void *host_data); +struct irq_domain *irq_domain_create_legacy(struct fwnode_handle *fwnode, unsigned int size, + unsigned int first_irq, irq_hw_number_t first_hwirq, + const struct irq_domain_ops *ops, void *host_data); struct irq_domain *irq_find_matching_fwspec(struct irq_fwspec *fwspec, enum irq_domain_bus_token bus_token); void irq_set_default_domain(struct irq_domain *domain); struct irq_domain *irq_get_default_domain(void); -int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, - irq_hw_number_t hwirq, int node, +int irq_domain_alloc_descs(int virq, unsigned int nr_irqs, irq_hw_number_t hwirq, int node, const struct irq_affinity_desc *affinity); -static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) -{ - return node ? &node->fwnode : NULL; -} - extern const struct fwnode_operations irqchip_fwnode_ops; static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) @@ -370,12 +333,10 @@ static inline bool is_fwnode_irqchip(const struct fwnode_handle *fwnode) return fwnode && fwnode->ops == &irqchip_fwnode_ops; } -void irq_domain_update_bus_token(struct irq_domain *domain, - enum irq_domain_bus_token bus_token); +void irq_domain_update_bus_token(struct irq_domain *domain, enum irq_domain_bus_token bus_token); -static inline -struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, - enum irq_domain_bus_token bus_token) +static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { struct irq_fwspec fwspec = { .fwnode = fwnode, @@ -387,7 +348,7 @@ struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, static inline struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token) { - return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token); + return irq_find_matching_fwnode(of_fwnode_handle(node), bus_token); } static inline struct irq_domain *irq_find_host(struct device_node *node) @@ -401,128 +362,92 @@ static inline struct irq_domain *irq_find_host(struct device_node *node) return d; } -static inline struct irq_domain *irq_domain_add_simple(struct device_node *of_node, - unsigned int size, - unsigned int first_irq, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_simple(of_node_to_fwnode(of_node), size, first_irq, ops, host_data); -} - -/** - * irq_domain_add_linear() - Allocate and register a linear revmap irq_domain. - * @of_node: pointer to interrupt controller's device tree node. - * @size: Number of interrupts in the domain. - * @ops: map/unmap domain callbacks - * @host_data: Controller private data pointer - */ -static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), - .size = size, - .hwirq_max = size, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} - #ifdef CONFIG_IRQ_DOMAIN_NOMAP -static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, - unsigned int max_irq, - const struct irq_domain_ops *ops, - void *host_data) +static inline struct irq_domain *irq_domain_create_nomap(struct fwnode_handle *fwnode, + unsigned int max_irq, + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), + const struct irq_domain_info info = { + .fwnode = fwnode, .hwirq_max = max_irq, .direct_max = max_irq, .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } unsigned int irq_create_direct_mapping(struct irq_domain *domain); #endif -static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, - const struct irq_domain_ops *ops, - void *host_data) -{ - struct irq_domain_info info = { - .fwnode = of_node_to_fwnode(of_node), - .hwirq_max = ~0U, - .ops = ops, - .host_data = host_data, - }; - struct irq_domain *d; - - d = irq_domain_instantiate(&info); - return IS_ERR(d) ? NULL : d; -} - +/** + * irq_domain_create_linear - Allocate and register a linear revmap irq_domain. + * @fwnode: pointer to interrupt controller's FW node. + * @size: Number of interrupts in the domain. + * @ops: map/unmap domain callbacks + * @host_data: Controller private data pointer + * + * Returns: Newly created irq_domain + */ static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, - unsigned int size, - const struct irq_domain_ops *ops, - void *host_data) + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .size = size, .hwirq_max = size, .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data) + const struct irq_domain_ops *ops, + void *host_data) { - struct irq_domain_info info = { + const struct irq_domain_info info = { .fwnode = fwnode, .hwirq_max = ~0, .ops = ops, .host_data = host_data, }; - struct irq_domain *d; + struct irq_domain *d = irq_domain_instantiate(&info); - d = irq_domain_instantiate(&info); return IS_ERR(d) ? NULL : d; } void irq_domain_remove(struct irq_domain *domain); -int irq_domain_associate(struct irq_domain *domain, unsigned int irq, - irq_hw_number_t hwirq); -void irq_domain_associate_many(struct irq_domain *domain, - unsigned int irq_base, +int irq_domain_associate(struct irq_domain *domain, unsigned int irq, irq_hw_number_t hwirq); +void irq_domain_associate_many(struct irq_domain *domain, unsigned int irq_base, irq_hw_number_t hwirq_base, int count); -unsigned int irq_create_mapping_affinity(struct irq_domain *domain, - irq_hw_number_t hwirq, +unsigned int irq_create_mapping_affinity(struct irq_domain *domain, irq_hw_number_t hwirq, const struct irq_affinity_desc *affinity); unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); void irq_dispose_mapping(unsigned int virq); -static inline unsigned int irq_create_mapping(struct irq_domain *domain, - irq_hw_number_t hwirq) +/** + * irq_create_mapping - Map a hardware interrupt into linux irq space + * @domain: domain owning this hardware interrupt or NULL for default domain + * @hwirq: hardware irq number in that domain space + * + * Only one mapping per hardware interrupt is permitted. + * + * If the sense/trigger is to be specified, set_irq_type() should be called + * on the number returned from that call. + * + * Returns: Linux irq number or 0 on error + */ +static inline unsigned int irq_create_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { return irq_create_mapping_affinity(domain, hwirq, NULL); } @@ -531,6 +456,13 @@ struct irq_desc *__irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq, unsigned int *irq); +/** + * irq_resolve_mapping - Find a linux irq from a hw irq number. + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * Returns: Interrupt descriptor + */ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) { @@ -539,8 +471,10 @@ static inline struct irq_desc *irq_resolve_mapping(struct irq_domain *domain, /** * irq_find_mapping() - Find a linux irq from a hw irq number. - * @domain: domain owning this hardware interrupt - * @hwirq: hardware irq number in that domain space + * @domain: domain owning this hardware interrupt + * @hwirq: hardware irq number in that domain space + * + * Returns: Linux irq number or 0 if not found */ static inline unsigned int irq_find_mapping(struct irq_domain *domain, irq_hw_number_t hwirq) @@ -553,107 +487,115 @@ static inline unsigned int irq_find_mapping(struct irq_domain *domain, return 0; } -static inline unsigned int irq_linear_revmap(struct irq_domain *domain, - irq_hw_number_t hwirq) -{ - return irq_find_mapping(domain, hwirq); -} - extern const struct irq_domain_ops irq_domain_simple_ops; /* stock xlate functions */ int irq_domain_xlate_onecell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_twocell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr, - const u32 *intspec, unsigned int intsize, - irq_hw_number_t *out_hwirq, unsigned int *out_type); - -int irq_domain_translate_twocell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); - -int irq_domain_translate_onecell(struct irq_domain *d, - struct irq_fwspec *fwspec, - unsigned long *out_hwirq, - unsigned int *out_type); + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); +int irq_domain_xlate_twothreecell(struct irq_domain *d, struct device_node *ctrlr, + const u32 *intspec, unsigned int intsize, + irq_hw_number_t *out_hwirq, unsigned int *out_type); + +int irq_domain_translate_onecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twocell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); +int irq_domain_translate_twothreecell(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); /* IPI functions */ int irq_reserve_ipi(struct irq_domain *domain, const struct cpumask *dest); int irq_destroy_ipi(unsigned int irq, const struct cpumask *dest); /* V2 interfaces to support hierarchy IRQ domains. */ -struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, - unsigned int virq); -void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, - void *chip_data, irq_flow_handler_t handler, +struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain, unsigned int virq); +void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, irq_hw_number_t hwirq, + const struct irq_chip *chip, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); void irq_domain_reset_irq_data(struct irq_data *irq_data); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct fwnode_handle *fwnode, - const struct irq_domain_ops *ops, - void *host_data); - -static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, - unsigned int flags, - unsigned int size, - struct device_node *node, - const struct irq_domain_ops *ops, - void *host_data) -{ - return irq_domain_create_hierarchy(parent, flags, size, - of_node_to_fwnode(node), - ops, host_data); -} - -int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, - unsigned int nr_irqs, int node, void *arg, - bool realloc, +/** + * irq_domain_create_hierarchy - Add a irqdomain into the hierarchy + * @parent: Parent irq domain to associate with the new domain + * @flags: Irq domain flags associated to the domain + * @size: Size of the domain. See below + * @fwnode: Optional fwnode of the interrupt controller + * @ops: Pointer to the interrupt domain callbacks + * @host_data: Controller private data pointer + * + * If @size is 0 a tree domain is created, otherwise a linear domain. + * + * If successful the parent is associated to the new domain and the + * domain flags are set. + * + * Returns: A pointer to IRQ domain, or %NULL on failure. + */ +static inline struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, + unsigned int flags, unsigned int size, + struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) +{ + const struct irq_domain_info info = { + .fwnode = fwnode, + .size = size, + .hwirq_max = size ? : ~0U, + .ops = ops, + .host_data = host_data, + .domain_flags = flags, + .parent = parent, + }; + struct irq_domain *d = irq_domain_instantiate(&info); + + return IS_ERR(d) ? NULL : d; +} + +int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, + int node, void *arg, bool realloc, const struct irq_affinity_desc *affinity); void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); int irq_domain_activate_irq(struct irq_data *irq_data, bool early); void irq_domain_deactivate_irq(struct irq_data *irq_data); -static inline int irq_domain_alloc_irqs(struct irq_domain *domain, - unsigned int nr_irqs, int node, void *arg) +/** + * irq_domain_alloc_irqs - Allocate IRQs from domain + * @domain: domain to allocate from + * @nr_irqs: number of IRQs to allocate + * @node: NUMA node id for memory allocation + * @arg: domain specific argument + * + * See __irq_domain_alloc_irqs()' documentation. + */ +static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, + int node, void *arg) { - return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, - NULL); + return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false, NULL); } -int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, - unsigned int virq, - irq_hw_number_t hwirq, - const struct irq_chip *chip, +int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq, + irq_hw_number_t hwirq, const struct irq_chip *chip, void *chip_data); -void irq_domain_free_irqs_common(struct irq_domain *domain, - unsigned int virq, +void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); -void irq_domain_free_irqs_top(struct irq_domain *domain, - unsigned int virq, unsigned int nr_irqs); +void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq, unsigned int nr_irqs); int irq_domain_push_irq(struct irq_domain *domain, int virq, void *arg); int irq_domain_pop_irq(struct irq_domain *domain, int virq); -int irq_domain_alloc_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, +int irq_domain_alloc_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs, void *arg); -void irq_domain_free_irqs_parent(struct irq_domain *domain, - unsigned int irq_base, +void irq_domain_free_irqs_parent(struct irq_domain *domain, unsigned int irq_base, unsigned int nr_irqs); -int irq_domain_disconnect_hierarchy(struct irq_domain *domain, - unsigned int virq); +int irq_domain_disconnect_hierarchy(struct irq_domain *domain, unsigned int virq); static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { @@ -662,8 +604,7 @@ static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) static inline bool irq_domain_is_ipi(struct irq_domain *domain) { - return domain->flags & - (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); + return domain->flags & (IRQ_DOMAIN_FLAG_IPI_PER_CPU | IRQ_DOMAIN_FLAG_IPI_SINGLE); } static inline bool irq_domain_is_ipi_per_cpu(struct irq_domain *domain) @@ -691,15 +632,18 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) return domain->flags & IRQ_DOMAIN_FLAG_MSI_DEVICE; } +static inline bool irq_domain_is_msi_immutable(struct irq_domain *domain) +{ + return domain->flags & IRQ_DOMAIN_FLAG_MSI_IMMUTABLE; +} #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -static inline int irq_domain_alloc_irqs(struct irq_domain *domain, - unsigned int nr_irqs, int node, void *arg) +static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, + int node, void *arg) { return -1; } -static inline void irq_domain_free_irqs(unsigned int virq, - unsigned int nr_irqs) { } +static inline void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs) { } static inline bool irq_domain_is_hierarchy(struct irq_domain *domain) { @@ -739,8 +683,7 @@ static inline bool irq_domain_is_msi_device(struct irq_domain *domain) #endif /* CONFIG_IRQ_DOMAIN_HIERARCHY */ #ifdef CONFIG_GENERIC_MSI_IRQ -int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, - unsigned int type); +int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, unsigned int type); void msi_device_domain_free_wired(struct irq_domain *domain, unsigned int virq); #else static inline int msi_device_domain_alloc_wired(struct irq_domain *domain, unsigned int hwirq, @@ -755,10 +698,50 @@ static inline void msi_device_domain_free_wired(struct irq_domain *domain, unsig } #endif +/* Deprecated functions. Will be removed in the merge window */ +static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) +{ + return node ? &node->fwnode : NULL; +} + +static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(of_node), + .hwirq_max = ~0U, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} + +static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_node, + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) +{ + struct irq_domain_info info = { + .fwnode = of_fwnode_handle(of_node), + .size = size, + .hwirq_max = size, + .ops = ops, + .host_data = host_data, + }; + struct irq_domain *d; + + d = irq_domain_instantiate(&info); + return IS_ERR(d) ? NULL : d; +} + #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } -static inline struct irq_domain *irq_find_matching_fwnode( - struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) +static inline struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token) { return NULL; } diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 0ea8c9887429..91b20788273d 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -59,7 +59,7 @@ /* LATCH is used in the interval timer and ftape setup. */ #define LATCH ((CLOCK_TICK_RATE + HZ/2) / HZ) /* For divider */ -extern int register_refined_jiffies(long clock_tick_rate); +extern void register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming SHIFTED_HZ */ #define TICK_USEC ((USEC_PER_SEC + HZ/2) / HZ) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index c8971861521a..53ef1b6c8712 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -369,6 +369,9 @@ struct kimage { phys_addr_t ima_buffer_addr; size_t ima_buffer_size; + + unsigned long ima_segment_index; + bool is_ima_segment_index_set; #endif /* Core ELF header buffer */ @@ -474,13 +477,19 @@ extern bool kexec_file_dbg_print; #define kexec_dprintk(fmt, arg...) \ do { if (kexec_file_dbg_print) pr_info(fmt, ##arg); } while (0) +extern void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size); +extern void kimage_unmap_segment(void *buffer); #else /* !CONFIG_KEXEC_CORE */ struct pt_regs; struct task_struct; +struct kimage; static inline void __crash_kexec(struct pt_regs *regs) { } static inline void crash_kexec(struct pt_regs *regs) { } static inline int kexec_should_crash(struct task_struct *p) { return 0; } static inline int kexec_crash_loaded(void) { return 0; } +static inline void *kimage_map_segment(struct kimage *image, unsigned long addr, unsigned long size) +{ return NULL; } +static inline void kimage_unmap_segment(void *buffer) { } #define kexec_in_progress false #endif /* CONFIG_KEXEC_CORE */ diff --git a/include/linux/livepatch_sched.h b/include/linux/livepatch_sched.h index 013794fb5da0..065c185f2763 100644 --- a/include/linux/livepatch_sched.h +++ b/include/linux/livepatch_sched.h @@ -3,27 +3,23 @@ #define _LINUX_LIVEPATCH_SCHED_H_ #include <linux/jump_label.h> -#include <linux/static_call_types.h> +#include <linux/sched.h> #ifdef CONFIG_LIVEPATCH void __klp_sched_try_switch(void); -#if !defined(CONFIG_PREEMPT_DYNAMIC) || !defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) - DECLARE_STATIC_KEY_FALSE(klp_sched_try_switch_key); -static __always_inline void klp_sched_try_switch(void) +static __always_inline void klp_sched_try_switch(struct task_struct *curr) { - if (static_branch_unlikely(&klp_sched_try_switch_key)) + if (static_branch_unlikely(&klp_sched_try_switch_key) && + READ_ONCE(curr->__state) & TASK_FREEZABLE) __klp_sched_try_switch(); } -#endif /* !CONFIG_PREEMPT_DYNAMIC || !CONFIG_HAVE_PREEMPT_DYNAMIC_CALL */ - #else /* !CONFIG_LIVEPATCH */ -static inline void klp_sched_try_switch(void) {} -static inline void __klp_sched_try_switch(void) {} +static inline void klp_sched_try_switch(struct task_struct *curr) {} #endif /* CONFIG_LIVEPATCH */ #endif /* _LINUX_LIVEPATCH_SCHED_H_ */ diff --git a/include/linux/mfd/max77759.h b/include/linux/mfd/max77759.h new file mode 100644 index 000000000000..c6face34e385 --- /dev/null +++ b/include/linux/mfd/max77759.h @@ -0,0 +1,165 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright 2020 Google Inc. + * Copyright 2025 Linaro Ltd. + * + * Maxim MAX77759 core driver + */ + +#ifndef __LINUX_MFD_MAX77759_H +#define __LINUX_MFD_MAX77759_H + +#include <linux/completion.h> +#include <linux/mutex.h> +#include <linux/regmap.h> + +#define MAX77759_PMIC_REG_PMIC_ID 0x00 +#define MAX77759_PMIC_REG_PMIC_REVISION 0x01 +#define MAX77759_PMIC_REG_OTP_REVISION 0x02 +#define MAX77759_PMIC_REG_INTSRC 0x22 +#define MAX77759_PMIC_REG_INTSRCMASK 0x23 +#define MAX77759_PMIC_REG_INTSRC_MAXQ BIT(3) +#define MAX77759_PMIC_REG_INTSRC_TOPSYS BIT(1) +#define MAX77759_PMIC_REG_INTSRC_CHGR BIT(0) +#define MAX77759_PMIC_REG_TOPSYS_INT 0x24 +#define MAX77759_PMIC_REG_TOPSYS_INT_MASK 0x26 +#define MAX77759_PMIC_REG_TOPSYS_INT_TSHDN BIT(6) +#define MAX77759_PMIC_REG_TOPSYS_INT_SYSOVLO BIT(5) +#define MAX77759_PMIC_REG_TOPSYS_INT_SYSUVLO BIT(4) +#define MAX77759_PMIC_REG_TOPSYS_INT_FSHIP BIT(0) +#define MAX77759_PMIC_REG_I2C_CNFG 0x40 +#define MAX77759_PMIC_REG_SWRESET 0x50 +#define MAX77759_PMIC_REG_CONTROL_FG 0x51 + +#define MAX77759_MAXQ_REG_UIC_INT1 0x64 +#define MAX77759_MAXQ_REG_UIC_INT1_APCMDRESI BIT(7) +#define MAX77759_MAXQ_REG_UIC_INT1_SYSMSGI BIT(6) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIO6I BIT(1) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIO5I BIT(0) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, en) (((en) & 1) << (offs)) +#define MAX77759_MAXQ_REG_UIC_INT1_GPIOxI_MASK(offs) \ + MAX77759_MAXQ_REG_UIC_INT1_GPIOxI(offs, ~0) +#define MAX77759_MAXQ_REG_UIC_INT2 0x65 +#define MAX77759_MAXQ_REG_UIC_INT3 0x66 +#define MAX77759_MAXQ_REG_UIC_INT4 0x67 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS1 0x68 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS2 0x69 +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS3 0x6a +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS4 0x6b +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS5 0x6c +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS6 0x6d +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS7 0x6f +#define MAX77759_MAXQ_REG_UIC_UIC_STATUS8 0x6f +#define MAX77759_MAXQ_REG_UIC_INT1_M 0x70 +#define MAX77759_MAXQ_REG_UIC_INT2_M 0x71 +#define MAX77759_MAXQ_REG_UIC_INT3_M 0x72 +#define MAX77759_MAXQ_REG_UIC_INT4_M 0x73 +#define MAX77759_MAXQ_REG_AP_DATAOUT0 0x81 +#define MAX77759_MAXQ_REG_AP_DATAOUT32 0xa1 +#define MAX77759_MAXQ_REG_AP_DATAIN0 0xb1 +#define MAX77759_MAXQ_REG_UIC_SWRST 0xe0 + +#define MAX77759_CHGR_REG_CHG_INT 0xb0 +#define MAX77759_CHGR_REG_CHG_INT2 0xb1 +#define MAX77759_CHGR_REG_CHG_INT_MASK 0xb2 +#define MAX77759_CHGR_REG_CHG_INT2_MASK 0xb3 +#define MAX77759_CHGR_REG_CHG_INT_OK 0xb4 +#define MAX77759_CHGR_REG_CHG_DETAILS_00 0xb5 +#define MAX77759_CHGR_REG_CHG_DETAILS_01 0xb6 +#define MAX77759_CHGR_REG_CHG_DETAILS_02 0xb7 +#define MAX77759_CHGR_REG_CHG_DETAILS_03 0xb8 +#define MAX77759_CHGR_REG_CHG_CNFG_00 0xb9 +#define MAX77759_CHGR_REG_CHG_CNFG_01 0xba +#define MAX77759_CHGR_REG_CHG_CNFG_02 0xbb +#define MAX77759_CHGR_REG_CHG_CNFG_03 0xbc +#define MAX77759_CHGR_REG_CHG_CNFG_04 0xbd +#define MAX77759_CHGR_REG_CHG_CNFG_05 0xbe +#define MAX77759_CHGR_REG_CHG_CNFG_06 0xbf +#define MAX77759_CHGR_REG_CHG_CNFG_07 0xc0 +#define MAX77759_CHGR_REG_CHG_CNFG_08 0xc1 +#define MAX77759_CHGR_REG_CHG_CNFG_09 0xc2 +#define MAX77759_CHGR_REG_CHG_CNFG_10 0xc3 +#define MAX77759_CHGR_REG_CHG_CNFG_11 0xc4 +#define MAX77759_CHGR_REG_CHG_CNFG_12 0xc5 +#define MAX77759_CHGR_REG_CHG_CNFG_13 0xc6 +#define MAX77759_CHGR_REG_CHG_CNFG_14 0xc7 +#define MAX77759_CHGR_REG_CHG_CNFG_15 0xc8 +#define MAX77759_CHGR_REG_CHG_CNFG_16 0xc9 +#define MAX77759_CHGR_REG_CHG_CNFG_17 0xca +#define MAX77759_CHGR_REG_CHG_CNFG_18 0xcb +#define MAX77759_CHGR_REG_CHG_CNFG_19 0xcc + +/* MaxQ opcodes for max77759_maxq_command() */ +#define MAX77759_MAXQ_OPCODE_MAXLENGTH (MAX77759_MAXQ_REG_AP_DATAOUT32 - \ + MAX77759_MAXQ_REG_AP_DATAOUT0 + \ + 1) + +#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_READ 0x21 +#define MAX77759_MAXQ_OPCODE_GPIO_TRIGGER_WRITE 0x22 +#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_READ 0x23 +#define MAX77759_MAXQ_OPCODE_GPIO_CONTROL_WRITE 0x24 +#define MAX77759_MAXQ_OPCODE_USER_SPACE_READ 0x81 +#define MAX77759_MAXQ_OPCODE_USER_SPACE_WRITE 0x82 + +/** + * struct max77759 - core max77759 internal data structure + * + * @regmap_top: Regmap for accessing TOP registers + * @maxq_lock: Lock for serializing access to MaxQ + * @regmap_maxq: Regmap for accessing MaxQ registers + * @cmd_done: Used to signal completion of a MaxQ command + * @regmap_charger: Regmap for accessing charger registers + * + * The MAX77759 comprises several sub-blocks, namely TOP, MaxQ, Charger, + * Fuel Gauge, and TCPCI. + */ +struct max77759 { + struct regmap *regmap_top; + + /* This protects MaxQ commands - only one can be active */ + struct mutex maxq_lock; + struct regmap *regmap_maxq; + struct completion cmd_done; + + struct regmap *regmap_charger; +}; + +/** + * struct max77759_maxq_command - structure containing the MaxQ command to + * send + * + * @length: The number of bytes to send. + * @cmd: The data to send. + */ +struct max77759_maxq_command { + u8 length; + u8 cmd[] __counted_by(length); +}; + +/** + * struct max77759_maxq_response - structure containing the MaxQ response + * + * @length: The number of bytes to receive. + * @rsp: The data received. Must have at least @length bytes space. + */ +struct max77759_maxq_response { + u8 length; + u8 rsp[] __counted_by(length); +}; + +/** + * max77759_maxq_command() - issue a MaxQ command and wait for the response + * and associated data + * + * @max77759: The core max77759 device handle. + * @cmd: The command to be sent. + * @rsp: Any response data associated with the command will be copied here; + * can be %NULL if the command has no response (other than ACK). + * + * Return: 0 on success, a negative error number otherwise. + */ +int max77759_maxq_command(struct max77759 *max77759, + const struct max77759_maxq_command *cmd, + struct max77759_maxq_response *rsp); + +#endif /* __LINUX_MFD_MAX77759_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index bf55206935c4..fdda6b16263b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -385,7 +385,7 @@ extern unsigned int kobjsize(const void *objp); #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 38 +# define VM_UFFD_MINOR_BIT 41 # define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ #else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ # define VM_UFFD_MINOR VM_NONE diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 56d07edd01f9..32ba5126e221 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -31,6 +31,7 @@ #define INIT_PASID 0 struct address_space; +struct futex_private_hash; struct mem_cgroup; /* @@ -1031,7 +1032,11 @@ struct mm_struct { */ seqcount_t mm_lock_seq; #endif - +#ifdef CONFIG_FUTEX_PRIVATE_HASH + struct mutex futex_hash_lock; + struct futex_private_hash __rcu *futex_phash; + struct futex_private_hash *futex_phash_new; +#endif unsigned long hiwater_rss; /* High-watermark of RSS usage */ unsigned long hiwater_vm; /* High-water virtual memory usage */ diff --git a/include/linux/mman.h b/include/linux/mman.h index bce214fece16..f4c6346a8fcd 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -155,7 +155,9 @@ calc_vm_flag_bits(struct file *file, unsigned long flags) return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | +#ifdef CONFIG_TRANSPARENT_HUGEPAGE _calc_vm_trans(flags, MAP_STACK, VM_NOHUGEPAGE) | +#endif arch_calc_vm_flag_bits(file, flags); } diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 4706c6769902..e0eddfd306ef 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -7,6 +7,7 @@ #include <linux/rwsem.h> #include <linux/tracepoint-defs.h> #include <linux/types.h> +#include <linux/cleanup.h> #define MMAP_LOCK_INITIALIZER(name) \ .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), @@ -211,6 +212,9 @@ static inline void mmap_read_unlock(struct mm_struct *mm) up_read(&mm->mmap_lock); } +DEFINE_GUARD(mmap_read_lock, struct mm_struct *, + mmap_read_lock(_T), mmap_read_unlock(_T)) + static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) { __mmap_lock_trace_released(mm, false); diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 526fce581657..ddcdf23d731c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -329,6 +329,7 @@ struct mmc_card { #define MMC_QUIRK_BROKEN_SD_CACHE (1<<15) /* Disable broken SD cache support */ #define MMC_QUIRK_BROKEN_CACHE_FLUSH (1<<16) /* Don't flush cache until the write has occurred */ #define MMC_QUIRK_BROKEN_SD_POWEROFF_NOTIFY (1<<17) /* Disable broken SD poweroff notify support */ +#define MMC_QUIRK_NO_UHS_DDR50_TUNING (1<<18) /* Disable DDR50 tuning */ bool written_flag; /* Indicates eMMC has been written since power on */ bool reenable_cmdq; /* Re-enable Command Queue */ diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index 1ed7b0d1e4f9..23ac5696fa38 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -24,7 +24,7 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id, int mmc_gpiod_set_cd_config(struct mmc_host *host, unsigned long config); int mmc_gpio_set_cd_wake(struct mmc_host *host, bool on); void mmc_gpiod_request_cd_irq(struct mmc_host *host); -bool mmc_can_gpio_cd(struct mmc_host *host); -bool mmc_can_gpio_ro(struct mmc_host *host); +bool mmc_host_can_gpio_cd(struct mmc_host *host); +bool mmc_host_can_gpio_ro(struct mmc_host *host); #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 6ccec1bf2896..b1c459f7a485 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -148,7 +148,6 @@ enum zone_stat_item { NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */ NR_MLOCK, /* mlock()ed pages found and moved off LRU */ /* Second 128 byte cacheline */ - NR_BOUNCE, #if IS_ENABLED(CONFIG_ZSMALLOC) NR_ZSPAGES, /* allocated in zsmalloc */ #endif diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index bd7e60c0b72f..ebcee9328168 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -601,7 +601,7 @@ struct dmi_system_id { #define DMI_MATCH(a, b) { .slot = a, .substr = b } #define DMI_EXACT_MATCH(a, b) { .slot = a, .substr = b, .exact_match = 1 } -#define PLATFORM_NAME_SIZE 20 +#define PLATFORM_NAME_SIZE 24 #define PLATFORM_MODULE_PREFIX "platform:" struct platform_device_id { diff --git a/include/linux/mount.h b/include/linux/mount.h index dcc17ce8a959..6904ad33ee7a 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -22,48 +22,51 @@ struct fs_context; struct file; struct path; -#define MNT_NOSUID 0x01 -#define MNT_NODEV 0x02 -#define MNT_NOEXEC 0x04 -#define MNT_NOATIME 0x08 -#define MNT_NODIRATIME 0x10 -#define MNT_RELATIME 0x20 -#define MNT_READONLY 0x40 /* does the user want this to be r/o? */ -#define MNT_NOSYMFOLLOW 0x80 - -#define MNT_SHRINKABLE 0x100 -#define MNT_WRITE_HOLD 0x200 - -#define MNT_SHARED 0x1000 /* if the vfsmount is a shared mount */ -#define MNT_UNBINDABLE 0x2000 /* if the vfsmount is a unbindable mount */ -/* - * MNT_SHARED_MASK is the set of flags that should be cleared when a - * mount becomes shared. Currently, this is only the flag that says a - * mount cannot be bind mounted, since this is how we create a mount - * that shares events with another mount. If you add a new MNT_* - * flag, consider how it interacts with shared mounts. - */ -#define MNT_SHARED_MASK (MNT_UNBINDABLE) -#define MNT_USER_SETTABLE_MASK (MNT_NOSUID | MNT_NODEV | MNT_NOEXEC \ - | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME \ - | MNT_READONLY | MNT_NOSYMFOLLOW) -#define MNT_ATIME_MASK (MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME ) - -#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ - MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) - -#define MNT_INTERNAL 0x4000 - -#define MNT_LOCK_ATIME 0x040000 -#define MNT_LOCK_NOEXEC 0x080000 -#define MNT_LOCK_NOSUID 0x100000 -#define MNT_LOCK_NODEV 0x200000 -#define MNT_LOCK_READONLY 0x400000 -#define MNT_LOCKED 0x800000 -#define MNT_DOOMED 0x1000000 -#define MNT_SYNC_UMOUNT 0x2000000 -#define MNT_MARKED 0x4000000 -#define MNT_UMOUNT 0x8000000 +enum mount_flags { + MNT_NOSUID = 0x01, + MNT_NODEV = 0x02, + MNT_NOEXEC = 0x04, + MNT_NOATIME = 0x08, + MNT_NODIRATIME = 0x10, + MNT_RELATIME = 0x20, + MNT_READONLY = 0x40, /* does the user want this to be r/o? */ + MNT_NOSYMFOLLOW = 0x80, + + MNT_SHRINKABLE = 0x100, + MNT_WRITE_HOLD = 0x200, + + MNT_SHARED = 0x1000, /* if the vfsmount is a shared mount */ + MNT_UNBINDABLE = 0x2000, /* if the vfsmount is a unbindable mount */ + + MNT_INTERNAL = 0x4000, + + MNT_LOCK_ATIME = 0x040000, + MNT_LOCK_NOEXEC = 0x080000, + MNT_LOCK_NOSUID = 0x100000, + MNT_LOCK_NODEV = 0x200000, + MNT_LOCK_READONLY = 0x400000, + MNT_LOCKED = 0x800000, + MNT_DOOMED = 0x1000000, + MNT_SYNC_UMOUNT = 0x2000000, + MNT_MARKED = 0x4000000, + MNT_UMOUNT = 0x8000000, + + /* + * MNT_SHARED_MASK is the set of flags that should be cleared when a + * mount becomes shared. Currently, this is only the flag that says a + * mount cannot be bind mounted, since this is how we create a mount + * that shares events with another mount. If you add a new MNT_* + * flag, consider how it interacts with shared mounts. + */ + MNT_SHARED_MASK = MNT_UNBINDABLE, + MNT_USER_SETTABLE_MASK = MNT_NOSUID | MNT_NODEV | MNT_NOEXEC + | MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME + | MNT_READONLY | MNT_NOSYMFOLLOW, + MNT_ATIME_MASK = MNT_NOATIME | MNT_NODIRATIME | MNT_RELATIME, + + MNT_INTERNAL_FLAGS = MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED, +}; struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ diff --git a/include/linux/msi.h b/include/linux/msi.h index 86e42742fd0f..6863540f4b71 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -229,8 +229,11 @@ struct msi_dev_domain { int msi_setup_device_data(struct device *dev); -void msi_lock_descs(struct device *dev); -void msi_unlock_descs(struct device *dev); +void __msi_lock_descs(struct device *dev); +void __msi_unlock_descs(struct device *dev); + +DEFINE_LOCK_GUARD_1(msi_descs_lock, struct device, __msi_lock_descs(_T->lock), + __msi_unlock_descs(_T->lock)); struct msi_desc *msi_domain_first_desc(struct device *dev, unsigned int domid, enum msi_desc_filter filter); @@ -420,6 +423,7 @@ struct msi_domain_info; * @msi_init: Domain specific init function for MSI interrupts * @msi_free: Domain specific function to free a MSI interrupts * @msi_prepare: Prepare the allocation of the interrupts in the domain + * @msi_teardown: Reverse the effects of @msi_prepare * @prepare_desc: Optional function to prepare the allocated MSI descriptor * in the domain * @set_desc: Set the msi descriptor for an interrupt @@ -435,8 +439,9 @@ struct msi_domain_info; * @get_hwirq, @msi_init and @msi_free are callbacks used by the underlying * irqdomain. * - * @msi_check, @msi_prepare, @prepare_desc and @set_desc are callbacks used by the - * msi_domain_alloc/free_irqs*() variants. + * @msi_check, @msi_prepare, @msi_teardown, @prepare_desc and + * @set_desc are callbacks used by the msi_domain_alloc/free_irqs*() + * variants. * * @domain_alloc_irqs, @domain_free_irqs can be used to override the * default allocation/free functions (__msi_domain_alloc/free_irqs). This @@ -458,6 +463,8 @@ struct msi_domain_ops { int (*msi_prepare)(struct irq_domain *domain, struct device *dev, int nvec, msi_alloc_info_t *arg); + void (*msi_teardown)(struct irq_domain *domain, + msi_alloc_info_t *arg); void (*prepare_desc)(struct irq_domain *domain, msi_alloc_info_t *arg, struct msi_desc *desc); void (*set_desc)(msi_alloc_info_t *arg, @@ -486,6 +493,7 @@ struct msi_domain_ops { * @handler: Optional: associated interrupt flow handler * @handler_data: Optional: associated interrupt flow handler data * @handler_name: Optional: associated interrupt flow handler name + * @alloc_data: Optional: associated interrupt allocation data * @data: Optional: domain specific data */ struct msi_domain_info { @@ -498,6 +506,7 @@ struct msi_domain_info { irq_flow_handler_t handler; void *handler_data; const char *handler_name; + msi_alloc_info_t *alloc_data; void *data; }; @@ -507,12 +516,14 @@ struct msi_domain_info { * @chip: Interrupt chip for this domain * @ops: MSI domain ops * @info: MSI domain info data + * @alloc_info: MSI domain allocation data (architecture specific) */ struct msi_domain_template { char name[48]; struct irq_chip chip; struct msi_domain_ops ops; struct msi_domain_info info; + msi_alloc_info_t alloc_info; }; /* @@ -625,6 +636,10 @@ struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); +struct irq_domain_info; +struct irq_domain *msi_create_parent_irq_domain(struct irq_domain_info *info, + const struct msi_parent_ops *msi_parent_ops); + bool msi_create_device_irq_domain(struct device *dev, unsigned int domid, const struct msi_domain_template *template, unsigned int hwsize, void *domain_data, diff --git a/include/linux/namei.h b/include/linux/namei.h index bbaf55fb3101..5d085428e471 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -70,17 +70,16 @@ int vfs_path_parent_lookup(struct filename *filename, unsigned int flags, int vfs_path_lookup(struct dentry *, struct vfsmount *, const char *, unsigned int, struct path *); -extern struct dentry *try_lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len(const char *, struct dentry *, int); -extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); -extern struct dentry *lookup_positive_unlocked(const char *, struct dentry *, int); -struct dentry *lookup_one(struct mnt_idmap *, const char *, struct dentry *, int); +extern struct dentry *try_lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_unlocked(struct qstr *, struct dentry *); +extern struct dentry *lookup_noperm_positive_unlocked(struct qstr *, struct dentry *); +struct dentry *lookup_one(struct mnt_idmap *, struct qstr *, struct dentry *); struct dentry *lookup_one_unlocked(struct mnt_idmap *idmap, - const char *name, struct dentry *base, - int len); + struct qstr *name, struct dentry *base); struct dentry *lookup_one_positive_unlocked(struct mnt_idmap *idmap, - const char *name, - struct dentry *base, int len); + struct qstr *name, + struct dentry *base); extern int follow_down_one(struct path *); extern int follow_down(struct path *path, unsigned int flags); diff --git a/include/linux/net.h b/include/linux/net.h index 0ff950eecc6b..f60fff91e1df 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -70,6 +70,7 @@ enum sock_type { SOCK_DCCP = 6, SOCK_PACKET = 10, }; +#endif /* ARCH_HAS_SOCKET_TYPES */ #define SOCK_MAX (SOCK_PACKET + 1) /* Mask which covers at least up to SOCK_MASK-1. The @@ -81,8 +82,7 @@ enum sock_type { #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif - -#endif /* ARCH_HAS_SOCKET_TYPES */ +#define SOCK_COREDUMP O_NOCTTY /** * enum sock_shutdown_cmd - Shutdown types diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 2479ed10f53e..51308f65b72f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -303,6 +303,7 @@ enum nvme_ctrl_attr { NVME_CTRL_ATTR_TBKAS = (1 << 6), NVME_CTRL_ATTR_ELBAS = (1 << 15), NVME_CTRL_ATTR_RHII = (1 << 18), + NVME_CTRL_ATTR_FDPS = (1 << 19), }; struct nvme_id_ctrl { @@ -689,6 +690,44 @@ struct nvme_rotational_media_log { __u8 rsvd24[488]; }; +struct nvme_fdp_config { + __u8 flags; +#define FDPCFG_FDPE (1U << 0) + __u8 fdpcidx; + __le16 reserved; +}; + +struct nvme_fdp_ruh_desc { + __u8 ruht; + __u8 reserved[3]; +}; + +struct nvme_fdp_config_desc { + __le16 dsze; + __u8 fdpa; + __u8 vss; + __le32 nrg; + __le16 nruh; + __le16 maxpids; + __le32 nns; + __le64 runs; + __le32 erutl; + __u8 rsvd28[36]; + struct nvme_fdp_ruh_desc ruhs[]; +}; + +struct nvme_fdp_config_log { + __le16 numfdpc; + __u8 ver; + __u8 rsvd3; + __le32 sze; + __u8 rsvd8[8]; + /* + * This is followed by variable number of nvme_fdp_config_desc + * structures, but sparse doesn't like nested variable sized arrays. + */ +}; + struct nvme_smart_log { __u8 critical_warning; __u8 temperature[2]; @@ -915,6 +954,7 @@ enum nvme_opcode { nvme_cmd_resv_register = 0x0d, nvme_cmd_resv_report = 0x0e, nvme_cmd_resv_acquire = 0x11, + nvme_cmd_io_mgmt_recv = 0x12, nvme_cmd_resv_release = 0x15, nvme_cmd_zone_mgmt_send = 0x79, nvme_cmd_zone_mgmt_recv = 0x7a, @@ -936,6 +976,7 @@ enum nvme_opcode { nvme_opcode_name(nvme_cmd_resv_register), \ nvme_opcode_name(nvme_cmd_resv_report), \ nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_io_mgmt_recv), \ nvme_opcode_name(nvme_cmd_resv_release), \ nvme_opcode_name(nvme_cmd_zone_mgmt_send), \ nvme_opcode_name(nvme_cmd_zone_mgmt_recv), \ @@ -1087,6 +1128,7 @@ enum { NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, NVME_RW_PRINFO_PRACT = 1 << 13, NVME_RW_DTYPE_STREAMS = 1 << 4, + NVME_RW_DTYPE_DPLCMT = 2 << 4, NVME_WZ_DEAC = 1 << 9, }; @@ -1174,6 +1216,38 @@ struct nvme_zone_mgmt_recv_cmd { __le32 cdw14[2]; }; +struct nvme_io_mgmt_recv_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le64 rsvd2[2]; + union nvme_data_ptr dptr; + __u8 mo; + __u8 rsvd11; + __u16 mos; + __le32 numd; + __le32 cdw12[4]; +}; + +enum { + NVME_IO_MGMT_RECV_MO_RUHS = 1, +}; + +struct nvme_fdp_ruh_status_desc { + __le16 pid; + __le16 ruhid; + __le32 earutr; + __le64 ruamw; + __u8 reserved[16]; +}; + +struct nvme_fdp_ruh_status { + __u8 rsvd0[14]; + __le16 nruhsd; + struct nvme_fdp_ruh_status_desc ruhsd[]; +}; + enum { NVME_ZRA_ZONE_REPORT = 0, NVME_ZRASF_ZONE_REPORT_ALL = 0, @@ -1309,6 +1383,7 @@ enum { NVME_FEAT_PLM_WINDOW = 0x14, NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SANITIZE = 0x17, + NVME_FEAT_FDP = 0x1d, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1329,6 +1404,7 @@ enum { NVME_LOG_ANA = 0x0c, NVME_LOG_FEATURES = 0x12, NVME_LOG_RMI = 0x16, + NVME_LOG_FDP_CONFIGS = 0x20, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), @@ -1923,6 +1999,7 @@ struct nvme_command { struct nvmf_auth_receive_command auth_receive; struct nvme_dbbuf dbbuf; struct nvme_directive_cmd directive; + struct nvme_io_mgmt_recv_cmd imr; }; }; diff --git a/include/linux/overflow.h b/include/linux/overflow.h index 0c7e3dcfe867..7b7be27ca113 100644 --- a/include/linux/overflow.h +++ b/include/linux/overflow.h @@ -396,7 +396,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * @name: Name for a variable to define. * @member: Name of the array member. * @count: Number of elements in the array; must be compile-time const. - * @initializer: initializer expression (could be empty for no init). + * @initializer: Initializer expression (e.g., pass `= { }` at minimum). */ #define _DEFINE_FLEX(type, name, member, count, initializer...) \ _Static_assert(__builtin_constant_p(count), \ @@ -404,7 +404,7 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) union { \ u8 bytes[struct_size_t(type, member, count)]; \ type obj; \ - } name##_u initializer; \ + } name##_u = { .obj initializer }; \ type *name = (type *)&name##_u /** @@ -419,6 +419,9 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @type structure with a trailing * flexible array member. * Use __struct_size(@name) to get compile-time size of it afterwards. + * Use __member_size(@name->member) to get compile-time size of @name members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. */ #define DEFINE_RAW_FLEX(type, name, member, count) \ _DEFINE_FLEX(type, name, member, count, = {}) @@ -436,8 +439,22 @@ static inline size_t __must_check size_sub(size_t minuend, size_t subtrahend) * Define a zeroed, on-stack, instance of @TYPE structure with a trailing * flexible array member. * Use __struct_size(@NAME) to get compile-time size of it afterwards. + * Use __member_size(@NAME->member) to get compile-time size of @NAME members. + * Use STACK_FLEX_ARRAY_SIZE(@name, @member) to get compile-time number of + * elements in array @member. */ #define DEFINE_FLEX(TYPE, NAME, MEMBER, COUNTER, COUNT) \ - _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .obj.COUNTER = COUNT, }) + _DEFINE_FLEX(TYPE, NAME, MEMBER, COUNT, = { .COUNTER = COUNT, }) + +/** + * STACK_FLEX_ARRAY_SIZE() - helper macro for DEFINE_FLEX() family. + * Returns the number of elements in @array. + * + * @name: Name for a variable defined in DEFINE_RAW_FLEX()/DEFINE_FLEX(). + * @array: Name of the array member. + */ +#define STACK_FLEX_ARRAY_SIZE(name, array) \ + (__member_size((name)->array) / sizeof(*(name)->array) + \ + __must_be_array((name)->array)) #endif /* __LINUX_OVERFLOW_H */ diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e6a21b62dcce..3b814ce08331 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -615,6 +615,13 @@ FOLIO_FLAG(dropbehind, FOLIO_HEAD_PAGE) PAGEFLAG_FALSE(HighMem, highmem) #endif +/* Does kmap_local_folio() only allow access to one page of the folio? */ +#ifdef CONFIG_DEBUG_KMAP_LOCAL_FORCE_MAP +#define folio_test_partial_kmap(f) true +#else +#define folio_test_partial_kmap(f) folio_test_highmem(f) +#endif + #ifdef CONFIG_SWAP static __always_inline bool folio_test_swapcache(const struct folio *folio) { diff --git a/include/linux/panic.h b/include/linux/panic.h index 2494d51707ef..4adc65766935 100644 --- a/include/linux/panic.h +++ b/include/linux/panic.h @@ -20,8 +20,6 @@ extern bool panic_triggering_all_cpu_backtrace; extern int panic_timeout; extern unsigned long panic_print; extern int panic_on_oops; -extern int panic_on_unrecovered_nmi; -extern int panic_on_io_nmi; extern int panic_on_warn; extern unsigned long panic_on_taint; diff --git a/include/linux/part_stat.h b/include/linux/part_stat.h index c5e9cac0575e..eeeff2a04529 100644 --- a/include/linux/part_stat.h +++ b/include/linux/part_stat.h @@ -79,4 +79,6 @@ static inline void part_stat_set_all(struct block_device *part, int value) #define part_stat_local_read_cpu(part, field, cpu) \ local_read(&(part_stat_get_cpu(part, field, cpu))) +unsigned int bdev_count_inflight(struct block_device *part); + #endif /* _LINUX_PART_STAT_H */ diff --git a/include/linux/pci-p2pdma.h b/include/linux/pci-p2pdma.h index 2c07aa6b7665..075c20b161d9 100644 --- a/include/linux/pci-p2pdma.h +++ b/include/linux/pci-p2pdma.h @@ -104,4 +104,89 @@ static inline struct pci_dev *pci_p2pmem_find(struct device *client) return pci_p2pmem_find_many(&client, 1); } +enum pci_p2pdma_map_type { + /* + * PCI_P2PDMA_MAP_UNKNOWN: Used internally as an initial state before + * the mapping type has been calculated. Exported routines for the API + * will never return this value. + */ + PCI_P2PDMA_MAP_UNKNOWN = 0, + + /* + * Not a PCI P2PDMA transfer. + */ + PCI_P2PDMA_MAP_NONE, + + /* + * PCI_P2PDMA_MAP_NOT_SUPPORTED: Indicates the transaction will + * traverse the host bridge and the host bridge is not in the + * allowlist. DMA Mapping routines should return an error when + * this is returned. + */ + PCI_P2PDMA_MAP_NOT_SUPPORTED, + + /* + * PCI_P2PDMA_MAP_BUS_ADDR: Indicates that two devices can talk to + * each other directly through a PCI switch and the transaction will + * not traverse the host bridge. Such a mapping should program + * the DMA engine with PCI bus addresses. + */ + PCI_P2PDMA_MAP_BUS_ADDR, + + /* + * PCI_P2PDMA_MAP_THRU_HOST_BRIDGE: Indicates two devices can talk + * to each other, but the transaction traverses a host bridge on the + * allowlist. In this case, a normal mapping either with CPU physical + * addresses (in the case of dma-direct) or IOVA addresses (in the + * case of IOMMUs) should be used to program the DMA engine. + */ + PCI_P2PDMA_MAP_THRU_HOST_BRIDGE, +}; + +struct pci_p2pdma_map_state { + struct dev_pagemap *pgmap; + enum pci_p2pdma_map_type map; + u64 bus_off; +}; + +/* helper for pci_p2pdma_state(), do not use directly */ +void __pci_p2pdma_update_state(struct pci_p2pdma_map_state *state, + struct device *dev, struct page *page); + +/** + * pci_p2pdma_state - check the P2P transfer state of a page + * @state: P2P state structure + * @dev: device to transfer to/from + * @page: page to map + * + * Check if @page is a PCI P2PDMA page, and if yes of what kind. Returns the + * map type, and updates @state with all information needed for a P2P transfer. + */ +static inline enum pci_p2pdma_map_type +pci_p2pdma_state(struct pci_p2pdma_map_state *state, struct device *dev, + struct page *page) +{ + if (IS_ENABLED(CONFIG_PCI_P2PDMA) && is_pci_p2pdma_page(page)) { + if (state->pgmap != page_pgmap(page)) + __pci_p2pdma_update_state(state, dev, page); + return state->map; + } + return PCI_P2PDMA_MAP_NONE; +} + +/** + * pci_p2pdma_bus_addr_map - Translate a physical address to a bus address + * for a PCI_P2PDMA_MAP_BUS_ADDR transfer. + * @state: P2P state structure + * @paddr: physical address to map + * + * Map a physically contiguous PCI_P2PDMA_MAP_BUS_ADDR transfer. + */ +static inline dma_addr_t +pci_p2pdma_bus_addr_map(struct pci_p2pdma_map_state *state, phys_addr_t paddr) +{ + WARN_ON_ONCE(state->map != PCI_P2PDMA_MAP_BUS_ADDR); + return paddr + state->bus_off; +} + #endif /* _LINUX_PCI_P2P_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index 51e2bd6405cd..b231cbc67a35 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1671,7 +1671,7 @@ void pci_disable_msi(struct pci_dev *dev); int pci_msix_vec_count(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); -int pci_msi_enabled(void); +bool pci_msi_enabled(void); int pci_enable_msi(struct pci_dev *dev); int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries, int minvec, int maxvec); @@ -1704,7 +1704,7 @@ static inline void pci_disable_msi(struct pci_dev *dev) { } static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } -static inline int pci_msi_enabled(void) { return 0; } +static inline bool pci_msi_enabled(void) { return false; } static inline int pci_enable_msi(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix_range(struct pci_dev *dev, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 2e28182c3af0..e2d71b6fdd84 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -3049,6 +3049,7 @@ #define PCI_DEVICE_ID_INTEL_HDA_DG1 0x490d #define PCI_DEVICE_ID_INTEL_HDA_EHL_0 0x4b55 #define PCI_DEVICE_ID_INTEL_HDA_EHL_3 0x4b58 +#define PCI_DEVICE_ID_INTEL_HDA_WCL 0x4d28 #define PCI_DEVICE_ID_INTEL_HDA_JSL_N 0x4dc8 #define PCI_DEVICE_ID_INTEL_HDA_DG2_0 0x4f90 #define PCI_DEVICE_ID_INTEL_HDA_DG2_1 0x4f91 @@ -3070,6 +3071,7 @@ #define PCI_DEVICE_ID_INTEL_5100_21 0x65f5 #define PCI_DEVICE_ID_INTEL_5100_22 0x65f6 #define PCI_DEVICE_ID_INTEL_IOAT_SCNB 0x65ff +#define PCI_DEVICE_ID_INTEL_HDA_FCL 0x67a8 #define PCI_DEVICE_ID_INTEL_82371SB_0 0x7000 #define PCI_DEVICE_ID_INTEL_82371SB_1 0x7010 #define PCI_DEVICE_ID_INTEL_82371SB_2 0x7020 diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index af7d75ede619..288f5235649a 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -43,9 +43,10 @@ is_static struct percpu_rw_semaphore name = { \ #define DEFINE_STATIC_PERCPU_RWSEM(name) \ __DEFINE_PERCPU_RWSEM(name, static) -extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool); +extern bool __percpu_down_read(struct percpu_rw_semaphore *, bool, bool); -static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +static inline void percpu_down_read_internal(struct percpu_rw_semaphore *sem, + bool freezable) { might_sleep(); @@ -63,7 +64,7 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - __percpu_down_read(sem, false); /* Unconditional memory barrier */ + __percpu_down_read(sem, false, freezable); /* Unconditional memory barrier */ /* * The preempt_enable() prevents the compiler from * bleeding the critical section out. @@ -71,6 +72,17 @@ static inline void percpu_down_read(struct percpu_rw_semaphore *sem) preempt_enable(); } +static inline void percpu_down_read(struct percpu_rw_semaphore *sem) +{ + percpu_down_read_internal(sem, false); +} + +static inline void percpu_down_read_freezable(struct percpu_rw_semaphore *sem, + bool freeze) +{ + percpu_down_read_internal(sem, freeze); +} + static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) { bool ret = true; @@ -82,7 +94,7 @@ static inline bool percpu_down_read_trylock(struct percpu_rw_semaphore *sem) if (likely(rcu_sync_is_idle(&sem->rss))) this_cpu_inc(*sem->read_count); else - ret = __percpu_down_read(sem, true); /* Unconditional memory barrier */ + ret = __percpu_down_read(sem, true, false); /* Unconditional memory barrier */ preempt_enable(); /* * The barrier() from preempt_enable() prevents the compiler from diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 52b5ea663b9f..85bf8dd9f087 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -15,11 +15,7 @@ /* enough to cover all DEFINE_PER_CPUs in modules */ #ifdef CONFIG_MODULES -#ifdef CONFIG_MEM_ALLOC_PROFILING -#define PERCPU_MODULE_RESERVE (8 << 13) -#else #define PERCPU_MODULE_RESERVE (8 << 10) -#endif #else #define PERCPU_MODULE_RESERVE 0 #endif diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 0069ba6866a4..52dc7cfab0e0 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -26,18 +26,9 @@ # include <asm/local64.h> #endif -#define PERF_GUEST_ACTIVE 0x01 -#define PERF_GUEST_USER 0x02 - -struct perf_guest_info_callbacks { - unsigned int (*state)(void); - unsigned long (*get_ip)(void); - unsigned int (*handle_intel_pt_intr)(void); -}; - #ifdef CONFIG_HAVE_HW_BREAKPOINT -#include <linux/rhashtable-types.h> -#include <asm/hw_breakpoint.h> +# include <linux/rhashtable-types.h> +# include <asm/hw_breakpoint.h> #endif #include <linux/list.h> @@ -62,19 +53,20 @@ struct perf_guest_info_callbacks { #include <linux/security.h> #include <linux/static_call.h> #include <linux/lockdep.h> + #include <asm/local.h> struct perf_callchain_entry { - __u64 nr; - __u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ + u64 nr; + u64 ip[]; /* /proc/sys/kernel/perf_event_max_stack */ }; struct perf_callchain_entry_ctx { - struct perf_callchain_entry *entry; - u32 max_stack; - u32 nr; - short contexts; - bool contexts_maxed; + struct perf_callchain_entry *entry; + u32 max_stack; + u32 nr; + short contexts; + bool contexts_maxed; }; typedef unsigned long (*perf_copy_f)(void *dst, const void *src, @@ -121,8 +113,8 @@ static __always_inline bool perf_raw_frag_last(const struct perf_raw_frag *frag) * already stored in age order, the hw_idx should be 0. */ struct perf_branch_stack { - __u64 nr; - __u64 hw_idx; + u64 nr; + u64 hw_idx; struct perf_branch_entry entries[]; }; @@ -132,10 +124,10 @@ struct task_struct; * extra PMU register associated with an event */ struct hw_perf_event_extra { - u64 config; /* register value */ - unsigned int reg; /* register address or index */ - int alloc; /* extra register already allocated */ - int idx; /* index in shared_regs->regs[] */ + u64 config; /* register value */ + unsigned int reg; /* register address or index */ + int alloc; /* extra register already allocated */ + int idx; /* index in shared_regs->regs[] */ }; /** @@ -144,8 +136,8 @@ struct hw_perf_event_extra { * PERF_EVENT_FLAG_ARCH bits are reserved for architecture-specific * usage. */ -#define PERF_EVENT_FLAG_ARCH 0x000fffff -#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 +#define PERF_EVENT_FLAG_ARCH 0x0fffffff +#define PERF_EVENT_FLAG_USER_READ_CNT 0x80000000 static_assert((PERF_EVENT_FLAG_USER_READ_CNT & PERF_EVENT_FLAG_ARCH) == 0); @@ -157,7 +149,9 @@ struct hw_perf_event { union { struct { /* hardware */ u64 config; + u64 config1; u64 last_tag; + u64 dyn_constraint; unsigned long config_base; unsigned long event_base; int event_base_rdpmc; @@ -225,9 +219,14 @@ struct hw_perf_event { /* * hw_perf_event::state flags; used to track the PERF_EF_* state. */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 + +/* the counter is stopped */ +#define PERF_HES_STOPPED 0x01 + +/* event->count up-to-date */ +#define PERF_HES_UPTODATE 0x02 + +#define PERF_HES_ARCH 0x04 int state; @@ -276,7 +275,7 @@ struct hw_perf_event { */ u64 freq_time_stamp; u64 freq_count_stamp; -#endif +#endif /* CONFIG_PERF_EVENTS */ }; struct perf_event; @@ -285,28 +284,33 @@ struct perf_event_pmu_context; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ -#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ + +/* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_ADD 0x1 + +/* txn to read event group from PMU */ +#define PERF_PMU_TXN_READ 0x2 /** * pmu::capabilities flags */ -#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 -#define PERF_PMU_CAP_NO_NMI 0x0002 -#define PERF_PMU_CAP_AUX_NO_SG 0x0004 -#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 -#define PERF_PMU_CAP_EXCLUSIVE 0x0010 -#define PERF_PMU_CAP_ITRACE 0x0020 -#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 -#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 -#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 -#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_NO_INTERRUPT 0x0001 +#define PERF_PMU_CAP_NO_NMI 0x0002 +#define PERF_PMU_CAP_AUX_NO_SG 0x0004 +#define PERF_PMU_CAP_EXTENDED_REGS 0x0008 +#define PERF_PMU_CAP_EXCLUSIVE 0x0010 +#define PERF_PMU_CAP_ITRACE 0x0020 +#define PERF_PMU_CAP_NO_EXCLUDE 0x0040 +#define PERF_PMU_CAP_AUX_OUTPUT 0x0080 +#define PERF_PMU_CAP_EXTENDED_HW_TYPE 0x0100 +#define PERF_PMU_CAP_AUX_PAUSE 0x0200 +#define PERF_PMU_CAP_AUX_PREFER_LARGE 0x0400 /** * pmu::scope */ enum perf_pmu_scope { - PERF_PMU_SCOPE_NONE = 0, + PERF_PMU_SCOPE_NONE = 0, PERF_PMU_SCOPE_CORE, PERF_PMU_SCOPE_DIE, PERF_PMU_SCOPE_CLUSTER, @@ -325,6 +329,9 @@ struct perf_output_handle; struct pmu { struct list_head entry; + spinlock_t events_lock; + struct list_head events; + struct module *module; struct device *dev; struct device *parent; @@ -387,11 +394,21 @@ struct pmu { * Flags for ->add()/->del()/ ->start()/->stop(). There are * matching hw_perf_event::state flags. */ -#define PERF_EF_START 0x01 /* start the counter when adding */ -#define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ -#define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ -#define PERF_EF_PAUSE 0x08 /* AUX area event, pause tracing */ -#define PERF_EF_RESUME 0x10 /* AUX area event, resume tracing */ + +/* start the counter when adding */ +#define PERF_EF_START 0x01 + +/* reload the counter when starting */ +#define PERF_EF_RELOAD 0x02 + +/* update the counter when stopping */ +#define PERF_EF_UPDATE 0x04 + +/* AUX area event, pause tracing */ +#define PERF_EF_PAUSE 0x08 + +/* AUX area event, resume tracing */ +#define PERF_EF_RESUME 0x10 /* * Adds/Removes a counter to/from the PMU, can be done inside a @@ -590,10 +607,10 @@ enum perf_addr_filter_action_t { * This is a hardware-agnostic filter configuration as specified by the user. */ struct perf_addr_filter { - struct list_head entry; - struct path path; - unsigned long offset; - unsigned long size; + struct list_head entry; + struct path path; + unsigned long offset; + unsigned long size; enum perf_addr_filter_action_t action; }; @@ -608,23 +625,24 @@ struct perf_addr_filter { * bundled together; see perf_event_addr_filters(). */ struct perf_addr_filters_head { - struct list_head list; - raw_spinlock_t lock; - unsigned int nr_file_filters; + struct list_head list; + raw_spinlock_t lock; + unsigned int nr_file_filters; }; struct perf_addr_filter_range { - unsigned long start; - unsigned long size; + unsigned long start; + unsigned long size; }; /** * enum perf_event_state - the states of an event: */ enum perf_event_state { - PERF_EVENT_STATE_DEAD = -4, - PERF_EVENT_STATE_EXIT = -3, - PERF_EVENT_STATE_ERROR = -2, + PERF_EVENT_STATE_DEAD = -5, + PERF_EVENT_STATE_REVOKED = -4, /* pmu gone, must not touch */ + PERF_EVENT_STATE_EXIT = -3, /* task died, still inherit */ + PERF_EVENT_STATE_ERROR = -2, /* scheduling error, can enable */ PERF_EVENT_STATE_OFF = -1, PERF_EVENT_STATE_INACTIVE = 0, PERF_EVENT_STATE_ACTIVE = 1, @@ -662,24 +680,24 @@ struct swevent_hlist { struct rcu_head rcu_head; }; -#define PERF_ATTACH_CONTEXT 0x0001 -#define PERF_ATTACH_GROUP 0x0002 -#define PERF_ATTACH_TASK 0x0004 -#define PERF_ATTACH_TASK_DATA 0x0008 -#define PERF_ATTACH_GLOBAL_DATA 0x0010 -#define PERF_ATTACH_SCHED_CB 0x0020 -#define PERF_ATTACH_CHILD 0x0040 -#define PERF_ATTACH_EXCLUSIVE 0x0080 -#define PERF_ATTACH_CALLCHAIN 0x0100 -#define PERF_ATTACH_ITRACE 0x0200 +#define PERF_ATTACH_CONTEXT 0x0001 +#define PERF_ATTACH_GROUP 0x0002 +#define PERF_ATTACH_TASK 0x0004 +#define PERF_ATTACH_TASK_DATA 0x0008 +#define PERF_ATTACH_GLOBAL_DATA 0x0010 +#define PERF_ATTACH_SCHED_CB 0x0020 +#define PERF_ATTACH_CHILD 0x0040 +#define PERF_ATTACH_EXCLUSIVE 0x0080 +#define PERF_ATTACH_CALLCHAIN 0x0100 +#define PERF_ATTACH_ITRACE 0x0200 struct bpf_prog; struct perf_cgroup; struct perf_buffer; struct pmu_event_list { - raw_spinlock_t lock; - struct list_head list; + raw_spinlock_t lock; + struct list_head list; }; /* @@ -689,12 +707,12 @@ struct pmu_event_list { * disabled is sufficient since it will hold-off the IPIs. */ #ifdef CONFIG_PROVE_LOCKING -#define lockdep_assert_event_ctx(event) \ +# define lockdep_assert_event_ctx(event) \ WARN_ON_ONCE(__lockdep_enabled && \ (this_cpu_read(hardirqs_enabled) && \ lockdep_is_held(&(event)->ctx->mutex) != LOCK_STATE_HELD)) #else -#define lockdep_assert_event_ctx(event) +# define lockdep_assert_event_ctx(event) #endif #define for_each_sibling_event(sibling, event) \ @@ -852,9 +870,9 @@ struct perf_event { #ifdef CONFIG_EVENT_TRACING struct trace_event_call *tp_event; struct event_filter *filter; -#ifdef CONFIG_FUNCTION_TRACER +# ifdef CONFIG_FUNCTION_TRACER struct ftrace_ops ftrace_ops; -#endif +# endif #endif #ifdef CONFIG_CGROUP_PERF @@ -865,6 +883,7 @@ struct perf_event { void *security; #endif struct list_head sb_list; + struct list_head pmu_list; /* * Certain events gets forwarded to another pmu internally by over- @@ -872,7 +891,7 @@ struct perf_event { * of it. event->orig_type contains original 'type' requested by * user. */ - __u32 orig_type; + u32 orig_type; #endif /* CONFIG_PERF_EVENTS */ }; @@ -937,8 +956,8 @@ static inline bool perf_pmu_ctx_is_active(struct perf_event_pmu_context *epc) } struct perf_event_groups { - struct rb_root tree; - u64 index; + struct rb_root tree; + u64 index; }; @@ -1155,7 +1174,7 @@ extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern void perf_event_itrace_started(struct perf_event *event); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); -extern void perf_pmu_unregister(struct pmu *pmu); +extern int perf_pmu_unregister(struct pmu *pmu); extern void __perf_event_task_sched_in(struct task_struct *prev, struct task_struct *task); @@ -1181,16 +1200,18 @@ extern void perf_pmu_resched(struct pmu *pmu); extern int perf_event_refresh(struct perf_event *event, int refresh); extern void perf_event_update_userpage(struct perf_event *event); extern int perf_event_release_kernel(struct perf_event *event); + extern struct perf_event * perf_event_create_kernel_counter(struct perf_event_attr *attr, - int cpu, - struct task_struct *task, - perf_overflow_handler_t callback, - void *context); + int cpu, + struct task_struct *task, + perf_overflow_handler_t callback, + void *context); + extern void perf_pmu_migrate_context(struct pmu *pmu, - int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value, - u64 *enabled, u64 *running); + int src_cpu, int dst_cpu); +extern int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1407,14 +1428,14 @@ static inline u32 perf_sample_data_size(struct perf_sample_data *data, */ static inline void perf_clear_branch_entry_bitfields(struct perf_branch_entry *br) { - br->mispred = 0; - br->predicted = 0; - br->in_tx = 0; - br->abort = 0; - br->cycles = 0; - br->type = 0; - br->spec = PERF_BR_SPEC_NA; - br->reserved = 0; + br->mispred = 0; + br->predicted = 0; + br->in_tx = 0; + br->abort = 0; + br->cycles = 0; + br->type = 0; + br->spec = PERF_BR_SPEC_NA; + br->reserved = 0; } extern void perf_output_sample(struct perf_output_handle *handle, @@ -1603,7 +1624,17 @@ extern void perf_event_bpf_event(struct bpf_prog *prog, enum perf_bpf_event_type type, u16 flags); +#define PERF_GUEST_ACTIVE 0x01 +#define PERF_GUEST_USER 0x02 + +struct perf_guest_info_callbacks { + unsigned int (*state)(void); + unsigned long (*get_ip)(void); + unsigned int (*handle_intel_pt_intr)(void); +}; + #ifdef CONFIG_GUEST_PERF_EVENTS + extern struct perf_guest_info_callbacks __rcu *perf_guest_cbs; DECLARE_STATIC_CALL(__perf_guest_state, *perf_guest_cbs->state); @@ -1614,21 +1645,27 @@ static inline unsigned int perf_guest_state(void) { return static_call(__perf_guest_state)(); } + static inline unsigned long perf_guest_get_ip(void) { return static_call(__perf_guest_get_ip)(); } + static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return static_call(__perf_guest_handle_intel_pt_intr)(); } + extern void perf_register_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); extern void perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks *cbs); -#else + +#else /* !CONFIG_GUEST_PERF_EVENTS: */ + static inline unsigned int perf_guest_state(void) { return 0; } static inline unsigned long perf_guest_get_ip(void) { return 0; } static inline unsigned int perf_guest_handle_intel_pt_intr(void) { return 0; } -#endif /* CONFIG_GUEST_PERF_EVENTS */ + +#endif /* !CONFIG_GUEST_PERF_EVENTS */ extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); @@ -1658,6 +1695,7 @@ static inline int perf_callchain_store_context(struct perf_callchain_entry_ctx * { if (ctx->contexts < sysctl_perf_event_max_contexts_per_stack) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->contexts; return 0; @@ -1671,6 +1709,7 @@ static inline int perf_callchain_store(struct perf_callchain_entry_ctx *ctx, u64 { if (ctx->nr < ctx->max_stack && !ctx->contexts_maxed) { struct perf_callchain_entry *entry = ctx->entry; + entry->ip[entry->nr++] = ip; ++ctx->nr; return 0; @@ -1697,7 +1736,7 @@ static inline int perf_is_paranoid(void) return sysctl_perf_event_paranoid > -1; } -int perf_allow_kernel(void); +extern int perf_allow_kernel(void); static inline int perf_allow_cpu(void) { @@ -1760,7 +1799,7 @@ static inline bool needs_branch_stack(struct perf_event *event) static inline bool has_aux(struct perf_event *event) { - return event->pmu->setup_aux; + return event->pmu && event->pmu->setup_aux; } static inline bool has_aux_action(struct perf_event *event) @@ -1819,7 +1858,7 @@ extern int perf_output_begin_backward(struct perf_output_handle *handle, extern void perf_output_end(struct perf_output_handle *handle); extern unsigned int perf_output_copy(struct perf_output_handle *handle, - const void *buf, unsigned int len); + const void *buf, unsigned int len); extern unsigned int perf_output_skip(struct perf_output_handle *handle, unsigned int len); extern long perf_output_copy_aux(struct perf_output_handle *aux_handle, @@ -1836,7 +1875,9 @@ extern void perf_event_task_tick(void); extern int perf_event_account_interrupt(struct perf_event *event); extern int perf_event_period(struct perf_event *event, u64 value); extern u64 perf_event_pause(struct perf_event *event, bool reset); + #else /* !CONFIG_PERF_EVENTS: */ + static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } @@ -1914,19 +1955,14 @@ static inline void perf_event_disable(struct perf_event *event) { } static inline int __perf_event_disable(void *info) { return -1; } static inline void perf_event_task_tick(void) { } static inline int perf_event_release_kernel(struct perf_event *event) { return 0; } -static inline int perf_event_period(struct perf_event *event, u64 value) -{ - return -EINVAL; -} -static inline u64 perf_event_pause(struct perf_event *event, bool reset) -{ - return 0; -} -static inline int perf_exclude_event(struct perf_event *event, struct pt_regs *regs) -{ - return 0; -} -#endif +static inline int +perf_event_period(struct perf_event *event, u64 value) { return -EINVAL; } +static inline u64 +perf_event_pause(struct perf_event *event, bool reset) { return 0; } +static inline int +perf_exclude_event(struct perf_event *event, struct pt_regs *regs) { return 0; } + +#endif /* !CONFIG_PERF_EVENTS */ #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_INTEL) extern void perf_restore_debug_store(void); @@ -1934,31 +1970,31 @@ extern void perf_restore_debug_store(void); static inline void perf_restore_debug_store(void) { } #endif -#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) +#define perf_output_put(handle, x) perf_output_copy((handle), &(x), sizeof(x)) struct perf_pmu_events_attr { - struct device_attribute attr; - u64 id; - const char *event_str; + struct device_attribute attr; + u64 id; + const char *event_str; }; struct perf_pmu_events_ht_attr { - struct device_attribute attr; - u64 id; - const char *event_str_ht; - const char *event_str_noht; + struct device_attribute attr; + u64 id; + const char *event_str_ht; + const char *event_str_noht; }; struct perf_pmu_events_hybrid_attr { - struct device_attribute attr; - u64 id; - const char *event_str; - u64 pmu_type; + struct device_attribute attr; + u64 id; + const char *event_str; + u64 pmu_type; }; struct perf_pmu_format_hybrid_attr { - struct device_attribute attr; - u64 pmu_type; + struct device_attribute attr; + u64 pmu_type; }; ssize_t perf_event_sysfs_show(struct device *dev, struct device_attribute *attr, @@ -2000,11 +2036,11 @@ static struct device_attribute format_attr_##_name = __ATTR_RO(_name) /* Performance counter hotplug functions */ #ifdef CONFIG_PERF_EVENTS -int perf_event_init_cpu(unsigned int cpu); -int perf_event_exit_cpu(unsigned int cpu); +extern int perf_event_init_cpu(unsigned int cpu); +extern int perf_event_exit_cpu(unsigned int cpu); #else -#define perf_event_init_cpu NULL -#define perf_event_exit_cpu NULL +# define perf_event_init_cpu NULL +# define perf_event_exit_cpu NULL #endif extern void arch_perf_update_userpage(struct perf_event *event, diff --git a/include/linux/pid.h b/include/linux/pid.h index 311ecebd7d56..453ae6d8a68d 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -77,7 +77,7 @@ struct file; struct pid *pidfd_pid(const struct file *file); struct pid *pidfd_get_pid(unsigned int fd, unsigned int *flags); struct task_struct *pidfd_get_task(int pidfd, unsigned int *flags); -int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret); +int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret_file); void do_notify_pidfd(struct task_struct *task); static inline struct pid *get_pid(struct pid *pid) diff --git a/include/linux/pidfs.h b/include/linux/pidfs.h index 05e6f8f4a026..77e7db194914 100644 --- a/include/linux/pidfs.h +++ b/include/linux/pidfs.h @@ -2,11 +2,19 @@ #ifndef _LINUX_PID_FS_H #define _LINUX_PID_FS_H +struct coredump_params; + struct file *pidfs_alloc_file(struct pid *pid, unsigned int flags); void __init pidfs_init(void); void pidfs_add_pid(struct pid *pid); void pidfs_remove_pid(struct pid *pid); void pidfs_exit(struct task_struct *tsk); +#ifdef CONFIG_COREDUMP +void pidfs_coredump(const struct coredump_params *cprm); +#endif extern const struct dentry_operations pidfs_dentry_operations; +int pidfs_register_pid(struct pid *pid); +void pidfs_get_pid(struct pid *pid); +void pidfs_put_pid(struct pid *pid); #endif /* _LINUX_PID_FS_H */ diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index d56a78af4af1..0b18160901a2 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -142,6 +142,8 @@ struct genpd_governor_data { bool max_off_time_changed; ktime_t next_wakeup; ktime_t next_hrtimer; + ktime_t last_enter; + bool reflect_residency; bool cached_power_down_ok; bool cached_power_down_state_idx; }; @@ -153,6 +155,8 @@ struct genpd_power_state { s64 residency_ns; u64 usage; u64 rejected; + u64 above; + u64 below; struct fwnode_handle *fwnode; u64 idle_time; void *data; @@ -285,6 +289,8 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); int pm_genpd_remove(struct generic_pm_domain *genpd); +void pm_genpd_inc_rejected(struct generic_pm_domain *genpd, + unsigned int state_idx); struct device *dev_to_genpd_dev(struct device *dev); int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); int dev_pm_genpd_add_notifier(struct device *dev, struct notifier_block *nb); @@ -336,6 +342,10 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd) return -EOPNOTSUPP; } +static inline void pm_genpd_inc_rejected(struct generic_pm_domain *genpd, + unsigned int state_idx) +{ } + static inline struct device *dev_to_genpd_dev(struct device *dev) { return ERR_PTR(-EOPNOTSUPP); diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index c247317aae38..cf477beae4bb 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -11,6 +11,7 @@ #ifndef __LINUX_OPP_H__ #define __LINUX_OPP_H__ +#include <linux/cleanup.h> #include <linux/energy_model.h> #include <linux/err.h> #include <linux/notifier.h> @@ -100,7 +101,7 @@ struct dev_pm_opp_data { #if defined(CONFIG_PM_OPP) struct opp_table *dev_pm_opp_get_opp_table(struct device *dev); -void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table); void dev_pm_opp_put_opp_table(struct opp_table *opp_table); unsigned long dev_pm_opp_get_bw(struct dev_pm_opp *opp, bool peak, int index); @@ -161,7 +162,7 @@ struct dev_pm_opp *dev_pm_opp_find_bw_ceil(struct device *dev, struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, unsigned int *bw, int index); -void dev_pm_opp_get(struct dev_pm_opp *opp); +struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp); void dev_pm_opp_put(struct dev_pm_opp *opp); int dev_pm_opp_add_dynamic(struct device *dev, struct dev_pm_opp_data *opp); @@ -196,6 +197,7 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask) void dev_pm_opp_remove_table(struct device *dev); void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask); int dev_pm_opp_sync_regulators(struct device *dev); + #else static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev) { @@ -207,7 +209,10 @@ static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device * return ERR_PTR(-EOPNOTSUPP); } -static inline void dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_get_opp_table_ref(struct opp_table *opp_table) +{ + return opp_table; +} static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {} @@ -345,7 +350,10 @@ static inline struct dev_pm_opp *dev_pm_opp_find_bw_floor(struct device *dev, return ERR_PTR(-EOPNOTSUPP); } -static inline void dev_pm_opp_get(struct dev_pm_opp *opp) {} +static inline struct dev_pm_opp *dev_pm_opp_get(struct dev_pm_opp *opp) +{ + return opp; +} static inline void dev_pm_opp_put(struct dev_pm_opp *opp) {} @@ -573,6 +581,12 @@ static inline int dev_pm_opp_of_find_icc_paths(struct device *dev, struct opp_ta } #endif +/* Scope based cleanup macro for OPP reference counting */ +DEFINE_FREE(put_opp, struct dev_pm_opp *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put(_T)) + +/* Scope based cleanup macro for OPP table reference counting */ +DEFINE_FREE(put_opp_table, struct opp_table *, if (!IS_ERR_OR_NULL(_T)) dev_pm_opp_put_opp_table(_T)) + /* OPP Configuration helpers */ static inline int dev_pm_opp_add(struct device *dev, unsigned long freq, @@ -704,4 +718,14 @@ static inline unsigned long dev_pm_opp_get_freq(struct dev_pm_opp *opp) return dev_pm_opp_get_freq_indexed(opp, 0); } +static inline int dev_pm_opp_set_level(struct device *dev, unsigned int level) +{ + struct dev_pm_opp *opp __free(put_opp) = dev_pm_opp_find_level_exact(dev, level); + + if (IS_ERR(opp)) + return PTR_ERR(opp); + + return dev_pm_opp_set_opp(dev, opp); +} + #endif /* __LINUX_OPP_H__ */ diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 7fb5a459847e..756b842dcd30 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -96,7 +96,9 @@ extern void pm_runtime_new_link(struct device *dev); extern void pm_runtime_drop_link(struct device_link *link); extern void pm_runtime_release_supplier(struct device_link *link); +int devm_pm_runtime_set_active_enabled(struct device *dev); extern int devm_pm_runtime_enable(struct device *dev); +int devm_pm_runtime_get_noresume(struct device *dev); /** * pm_suspend_ignore_children - Set runtime PM behavior regarding children. @@ -294,7 +296,9 @@ static inline bool pm_runtime_blocked(struct device *dev) { return true; } static inline void pm_runtime_allow(struct device *dev) {} static inline void pm_runtime_forbid(struct device *dev) {} +static inline int devm_pm_runtime_set_active_enabled(struct device *dev) { return 0; } static inline int devm_pm_runtime_enable(struct device *dev) { return 0; } +static inline int devm_pm_runtime_get_noresume(struct device *dev) { return 0; } static inline void pm_suspend_ignore_children(struct device *dev, bool enable) {} static inline void pm_runtime_get_noresume(struct device *dev) {} diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h index 888824592953..c4cb854971f5 100644 --- a/include/linux/power_supply.h +++ b/include/linux/power_supply.h @@ -288,6 +288,7 @@ struct power_supply_desc { struct power_supply_ext { const char *const name; u8 charge_behaviours; + u32 charge_types; const enum power_supply_property *properties; size_t num_properties; diff --git a/include/linux/psp-sev.h b/include/linux/psp-sev.h index f3cad182d4ef..0b3a36bdaa90 100644 --- a/include/linux/psp-sev.h +++ b/include/linux/psp-sev.h @@ -954,6 +954,7 @@ int sev_do_cmd(int cmd, void *data, int *psp_ret); void *psp_copy_user_blob(u64 uaddr, u32 len); void *snp_alloc_firmware_page(gfp_t mask); void snp_free_firmware_page(void *addr); +void sev_platform_shutdown(void); #else /* !CONFIG_CRYPTO_DEV_SP_PSP */ @@ -988,6 +989,8 @@ static inline void *snp_alloc_firmware_page(gfp_t mask) static inline void snp_free_firmware_page(void *addr) { } +static inline void sev_platform_shutdown(void) { } + #endif /* CONFIG_CRYPTO_DEV_SP_PSP */ #endif /* __PSP_SEV_H__ */ diff --git a/include/linux/pwm.h b/include/linux/pwm.h index 9ece4e5d3815..63a17d2b4ec8 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -218,6 +218,8 @@ static inline void pwm_init_state(const struct pwm_device *pwm, * * pwm_get_state(pwm, &state); * duty = pwm_get_relative_duty_cycle(&state, 100); + * + * Returns: rounded relative duty cycle multiplied by @scale */ static inline unsigned int pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) @@ -244,8 +246,8 @@ pwm_get_relative_duty_cycle(const struct pwm_state *state, unsigned int scale) * pwm_set_relative_duty_cycle(&state, 50, 100); * pwm_apply_might_sleep(pwm, &state); * - * This functions returns -EINVAL if @duty_cycle and/or @scale are - * inconsistent (@scale == 0 or @duty_cycle > @scale). + * Returns: 0 on success or ``-EINVAL`` if @duty_cycle and/or @scale are + * inconsistent (@scale == 0 or @duty_cycle > @scale) */ static inline int pwm_set_relative_duty_cycle(struct pwm_state *state, unsigned int duty_cycle, @@ -351,7 +353,7 @@ struct pwm_chip { * pwmchip_supports_waveform() - checks if the given chip supports waveform callbacks * @chip: The pwm_chip to test * - * Returns true iff the pwm chip support the waveform functions like + * Returns: true iff the pwm chip support the waveform functions like * pwm_set_waveform_might_sleep() and pwm_round_waveform_might_sleep() */ static inline bool pwmchip_supports_waveform(struct pwm_chip *chip) @@ -369,7 +371,7 @@ static inline struct device *pwmchip_parent(const struct pwm_chip *chip) return chip->dev.parent; } -static inline void *pwmchip_get_drvdata(struct pwm_chip *chip) +static inline void *pwmchip_get_drvdata(const struct pwm_chip *chip) { return dev_get_drvdata(&chip->dev); } diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h index b17e0cd0a30c..7aaad158ee37 100644 --- a/include/linux/ratelimit.h +++ b/include/linux/ratelimit.h @@ -22,16 +22,43 @@ static inline void ratelimit_default_init(struct ratelimit_state *rs) DEFAULT_RATELIMIT_BURST); } +static inline void ratelimit_state_inc_miss(struct ratelimit_state *rs) +{ + atomic_inc(&rs->missed); +} + +static inline int ratelimit_state_get_miss(struct ratelimit_state *rs) +{ + return atomic_read(&rs->missed); +} + +static inline int ratelimit_state_reset_miss(struct ratelimit_state *rs) +{ + return atomic_xchg_relaxed(&rs->missed, 0); +} + +static inline void ratelimit_state_reset_interval(struct ratelimit_state *rs, int interval_init) +{ + unsigned long flags; + + raw_spin_lock_irqsave(&rs->lock, flags); + rs->interval = interval_init; + rs->flags &= ~RATELIMIT_INITIALIZED; + atomic_set(&rs->rs_n_left, rs->burst); + ratelimit_state_reset_miss(rs); + raw_spin_unlock_irqrestore(&rs->lock, flags); +} + static inline void ratelimit_state_exit(struct ratelimit_state *rs) { + int m; + if (!(rs->flags & RATELIMIT_MSG_ON_RELEASE)) return; - if (rs->missed) { - pr_warn("%s: %d output lines suppressed due to ratelimiting\n", - current->comm, rs->missed); - rs->missed = 0; - } + m = ratelimit_state_reset_miss(rs); + if (m) + pr_warn("%s: %d output lines suppressed due to ratelimiting\n", current->comm, m); } static inline void diff --git a/include/linux/ratelimit_types.h b/include/linux/ratelimit_types.h index 765232ce0b5e..b19c4354540a 100644 --- a/include/linux/ratelimit_types.h +++ b/include/linux/ratelimit_types.h @@ -11,14 +11,15 @@ /* issue num suppressed message on exit */ #define RATELIMIT_MSG_ON_RELEASE BIT(0) +#define RATELIMIT_INITIALIZED BIT(1) struct ratelimit_state { raw_spinlock_t lock; /* protect the state */ int interval; int burst; - int printed; - int missed; + atomic_t rs_n_left; + atomic_t missed; unsigned int flags; unsigned long begin; }; diff --git a/include/linux/rcuref.h b/include/linux/rcuref.h index 6322d8c1c6b4..2fb2af6d9824 100644 --- a/include/linux/rcuref.h +++ b/include/linux/rcuref.h @@ -30,7 +30,11 @@ static inline void rcuref_init(rcuref_t *ref, unsigned int cnt) * rcuref_read - Read the number of held reference counts of a rcuref * @ref: Pointer to the reference count * - * Return: The number of held references (0 ... N) + * Return: The number of held references (0 ... N). The value 0 does not + * indicate that it is safe to schedule the object, protected by this reference + * counter, for deconstruction. + * If you want to know if the reference counter has been marked DEAD (as + * signaled by rcuref_put()) please use rcuread_is_dead(). */ static inline unsigned int rcuref_read(rcuref_t *ref) { @@ -40,6 +44,22 @@ static inline unsigned int rcuref_read(rcuref_t *ref) return c >= RCUREF_RELEASED ? 0 : c + 1; } +/** + * rcuref_is_dead - Check if the rcuref has been already marked dead + * @ref: Pointer to the reference count + * + * Return: True if the object has been marked DEAD. This signals that a previous + * invocation of rcuref_put() returned true on this reference counter meaning + * the protected object can safely be scheduled for deconstruction. + * Otherwise, returns false. + */ +static inline bool rcuref_is_dead(rcuref_t *ref) +{ + unsigned int c = atomic_read(&ref->refcnt); + + return (c >= RCUREF_RELEASED) && (c < RCUREF_NOREF); +} + extern __must_check bool rcuref_get_slowpath(rcuref_t *ref); /** diff --git a/include/linux/regmap.h b/include/linux/regmap.h index d17c5ea3d55d..02b83f5499b8 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -1641,6 +1641,8 @@ struct regmap_irq_chip_data; * @ack_invert: Inverted ack register: cleared bits for ack. * @clear_ack: Use this to set 1 and 0 or vice-versa to clear interrupts. * @status_invert: Inverted status register: cleared bits are active interrupts. + * @status_is_level: Status register is actuall signal level: Xor status + * register with previous value to get active interrupts. * @wake_invert: Inverted wake register: cleared bits are wake enabled. * @type_in_mask: Use the mask registers for controlling irq type. Use this if * the hardware provides separate bits for rising/falling edge @@ -1704,6 +1706,7 @@ struct regmap_irq_chip { unsigned int ack_invert:1; unsigned int clear_ack:1; unsigned int status_invert:1; + unsigned int status_is_level:1; unsigned int wake_invert:1; unsigned int type_in_mask:1; unsigned int clear_on_unmask:1; diff --git a/include/linux/regulator/max8952.h b/include/linux/regulator/max8952.h index 8712c091abf0..61dcd8e00a2f 100644 --- a/include/linux/regulator/max8952.h +++ b/include/linux/regulator/max8952.h @@ -2,7 +2,7 @@ /* * max8952.h - Voltage regulation for the Maxim 8952 * - * Copyright (C) 2010 Samsung Electrnoics + * Copyright (C) 2010 Samsung Electronics * MyungJoo Ham <myungjoo.ham@samsung.com> */ diff --git a/include/linux/regulator/pca9450.h b/include/linux/regulator/pca9450.h index b427b5873de1..85b4fecc10d8 100644 --- a/include/linux/regulator/pca9450.h +++ b/include/linux/regulator/pca9450.h @@ -35,6 +35,8 @@ enum { PCA9450_DVS_LEVEL_MAX, }; +#define PCA9450_RESTART_HANDLER_PRIORITY 130 + #define PCA9450_BUCK1_VOLTAGE_NUM 0x80 #define PCA9450_BUCK2_VOLTAGE_NUM 0x80 #define PCA9450_BUCK3_VOLTAGE_NUM 0x80 @@ -235,4 +237,7 @@ enum { #define I2C_LT_ON_RUN 0x02 #define I2C_LT_FORCE_ENABLE 0x03 +/* PCA9450_REG_SW_RST command */ +#define SW_RST_COMMAND 0x14 + #endif /* __LINUX_REG_PCA9450_H__ */ diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h index 880351ca3dfc..9ba771f2ddea 100644 --- a/include/linux/resctrl.h +++ b/include/linux/resctrl.h @@ -8,6 +8,10 @@ #include <linux/pid.h> #include <linux/resctrl_types.h> +#ifdef CONFIG_ARCH_HAS_CPU_RESCTRL +#include <asm/resctrl.h> +#endif + /* CLOSID, RMID value used by the default control group */ #define RESCTRL_RESERVED_CLOSID 0 #define RESCTRL_RESERVED_RMID 0 @@ -44,6 +48,16 @@ int proc_resctrl_show(struct seq_file *m, for_each_rdt_resource((r)) \ if ((r)->mon_capable) +enum resctrl_res_level { + RDT_RESOURCE_L3, + RDT_RESOURCE_L2, + RDT_RESOURCE_MBA, + RDT_RESOURCE_SMBA, + + /* Must be the last */ + RDT_NUM_RESOURCES, +}; + /** * enum resctrl_conf_type - The type of configuration. * @CDP_NONE: No prioritisation, both code and data are controlled or monitored. @@ -358,7 +372,7 @@ u32 resctrl_arch_get_num_closid(struct rdt_resource *r); u32 resctrl_arch_system_num_rmid_idx(void); int resctrl_arch_update_domains(struct rdt_resource *r, u32 closid); -__init bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); +bool resctrl_arch_is_evt_configurable(enum resctrl_event_id evt); /** * resctrl_arch_mon_event_config_write() - Write the config for an event. @@ -399,6 +413,9 @@ static inline u32 resctrl_get_config_index(u32 closid, } } +bool resctrl_arch_get_cdp_enabled(enum resctrl_res_level l); +int resctrl_arch_set_cdp_enabled(enum resctrl_res_level l, bool enable); + /* * Update the ctrl_val and apply this config right now. * Must be called on one of the domain's CPUs. @@ -514,7 +531,20 @@ void resctrl_arch_reset_all_ctrls(struct rdt_resource *r); extern unsigned int resctrl_rmid_realloc_threshold; extern unsigned int resctrl_rmid_realloc_limit; -int __init resctrl_init(void); -void __exit resctrl_exit(void); - +int resctrl_init(void); +void resctrl_exit(void); + +#ifdef CONFIG_RESCTRL_FS_PSEUDO_LOCK +u64 resctrl_arch_get_prefetch_disable_bits(void); +int resctrl_arch_pseudo_lock_fn(void *_plr); +int resctrl_arch_measure_cycles_lat_fn(void *_plr); +int resctrl_arch_measure_l2_residency(void *_plr); +int resctrl_arch_measure_l3_residency(void *_plr); +#else +static inline u64 resctrl_arch_get_prefetch_disable_bits(void) { return 0; } +static inline int resctrl_arch_pseudo_lock_fn(void *_plr) { return 0; } +static inline int resctrl_arch_measure_cycles_lat_fn(void *_plr) { return 0; } +static inline int resctrl_arch_measure_l2_residency(void *_plr) { return 0; } +static inline int resctrl_arch_measure_l3_residency(void *_plr) { return 0; } +#endif /* CONFIG_RESCTRL_FS_PSEUDO_LOCK */ #endif /* _RESCTRL_H */ diff --git a/include/linux/resctrl_types.h b/include/linux/resctrl_types.h index f26450b3326b..a25fb9c4070d 100644 --- a/include/linux/resctrl_types.h +++ b/include/linux/resctrl_types.h @@ -7,6 +7,9 @@ #ifndef __LINUX_RESCTRL_TYPES_H #define __LINUX_RESCTRL_TYPES_H +#define MAX_MBA_BW 100u +#define MBM_OVERFLOW_INTERVAL 1000 + /* Reads to Local DRAM Memory */ #define READS_TO_LOCAL_MEM BIT(0) @@ -31,16 +34,6 @@ /* Max event bits supported */ #define MAX_EVT_CONFIG_BITS GENMASK(6, 0) -enum resctrl_res_level { - RDT_RESOURCE_L3, - RDT_RESOURCE_L2, - RDT_RESOURCE_MBA, - RDT_RESOURCE_SMBA, - - /* Must be the last */ - RDT_NUM_RESOURCES, -}; - /* * Event IDs, the values match those used to program IA32_QM_EVTSEL before * reading IA32_QM_CTR on RDT systems. @@ -49,6 +42,9 @@ enum resctrl_event_id { QOS_L3_OCCUP_EVENT_ID = 0x01, QOS_L3_MBM_TOTAL_EVENT_ID = 0x02, QOS_L3_MBM_LOCAL_EVENT_ID = 0x03, + + /* Must be the last */ + QOS_NUM_EVENTS, }; #endif /* __LINUX_RESCTRL_TYPES_H */ diff --git a/include/linux/restart_block.h b/include/linux/restart_block.h index 13f17676c5f4..7e50bbc94e47 100644 --- a/include/linux/restart_block.h +++ b/include/linux/restart_block.h @@ -26,7 +26,7 @@ struct restart_block { unsigned long arch_data; long (*fn)(struct restart_block *); union { - /* For futex_wait and futex_wait_requeue_pi */ + /* For futex_wait() */ struct { u32 __user *uaddr; u32 val; diff --git a/include/linux/sched.h b/include/linux/sched.h index f96ac1982893..45e5953b8f32 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -44,7 +44,6 @@ #include <linux/seqlock_types.h> #include <linux/kcsan.h> #include <linux/rv.h> -#include <linux/livepatch_sched.h> #include <linux/uidgid_types.h> #include <linux/tracepoint-defs.h> #include <asm/kmap_size.h> @@ -1646,22 +1645,15 @@ struct task_struct { struct user_event_mm *user_event_mm; #endif - /* - * New fields for task_struct should be added above here, so that - * they are included in the randomized portion of task_struct. - */ - randomized_struct_fields_end - /* CPU-specific state of this task: */ struct thread_struct thread; /* - * WARNING: on x86, 'thread_struct' contains a variable-sized - * structure. It *MUST* be at the end of 'task_struct'. - * - * Do not put anything below here! + * New fields for task_struct should be added above here, so that + * they are included in the randomized portion of task_struct. */ -}; + randomized_struct_fields_end +} __attribute__ ((aligned (64))); #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) @@ -2089,9 +2081,6 @@ extern int __cond_resched(void); #if defined(CONFIG_PREEMPT_DYNAMIC) && defined(CONFIG_HAVE_PREEMPT_DYNAMIC_CALL) -void sched_dynamic_klp_enable(void); -void sched_dynamic_klp_disable(void); - DECLARE_STATIC_CALL(cond_resched, __cond_resched); static __always_inline int _cond_resched(void) @@ -2112,7 +2101,6 @@ static __always_inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return __cond_resched(); } @@ -2122,7 +2110,6 @@ static inline int _cond_resched(void) static inline int _cond_resched(void) { - klp_sched_try_switch(); return 0; } diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 7b4301b7235f..198bb5cc1774 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -195,6 +195,8 @@ struct sched_domain_topology_level { }; extern void __init set_sched_topology(struct sched_domain_topology_level *tl); +extern void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio); + # define SD_INIT_NAME(type) .name = #type @@ -223,6 +225,10 @@ static inline bool cpus_share_resources(int this_cpu, int that_cpu) return true; } +static inline void sched_update_asym_prefer_cpu(int cpu, int old_prio, int new_prio) +{ +} + #endif /* !CONFIG_SMP */ #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h index 6a4a3cec4638..923d68e07679 100644 --- a/include/linux/screen_info.h +++ b/include/linux/screen_info.h @@ -126,8 +126,17 @@ static inline unsigned int screen_info_video_type(const struct screen_info *si) return VIDEO_TYPE_CGA; } +static inline u32 __screen_info_vesapm_info_base(const struct screen_info *si) +{ + if (si->vesapm_seg < 0xc000) + return 0; + return (si->vesapm_seg << 4) + si->vesapm_off; +} + ssize_t screen_info_resources(const struct screen_info *si, struct resource *r, size_t num); +u32 __screen_info_lfb_bits_per_pixel(const struct screen_info *si); + #if defined(CONFIG_PCI) void screen_info_apply_fixups(void); struct pci_dev *screen_info_pci_dev(const struct screen_info *si); diff --git a/include/linux/security.h b/include/linux/security.h index cc9b54d95d22..dba349629229 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -563,7 +563,6 @@ int security_setselfattr(unsigned int attr, struct lsm_ctx __user *ctx, int security_getprocattr(struct task_struct *p, int lsmid, const char *name, char **value); int security_setprocattr(int lsmid, const char *name, void *value, size_t size); -int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_ismaclabel(const char *name); int security_secid_to_secctx(u32 secid, struct lsm_context *cp); int security_lsmprop_to_secctx(struct lsm_prop *prop, struct lsm_context *cp); @@ -1527,11 +1526,6 @@ static inline int security_setprocattr(int lsmid, char *name, void *value, return -EINVAL; } -static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) -{ - return 0; -} - static inline int security_ismaclabel(const char *name) { return 0; @@ -1629,6 +1623,7 @@ static inline int security_watch_key(struct key *key) #ifdef CONFIG_SECURITY_NETWORK +int security_netlink_send(struct sock *sk, struct sk_buff *skb); int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk); int security_unix_may_send(struct socket *sock, struct socket *other); int security_socket_create(int family, int type, int protocol, int kern); @@ -1684,6 +1679,11 @@ int security_sctp_assoc_established(struct sctp_association *asoc, int security_mptcp_add_subflow(struct sock *sk, struct sock *ssk); #else /* CONFIG_SECURITY_NETWORK */ +static inline int security_netlink_send(struct sock *sk, struct sk_buff *skb) +{ + return 0; +} + static inline int security_unix_stream_connect(struct sock *sock, struct sock *other, struct sock *newsk) diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index 0b273a7b9f01..5f03a39a26f7 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -104,10 +104,11 @@ static inline bool shmem_mapping(struct address_space *mapping) return false; } #endif /* CONFIG_SHMEM */ -extern void shmem_unlock_mapping(struct address_space *mapping); -extern struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, +void shmem_unlock_mapping(struct address_space *mapping); +struct page *shmem_read_mapping_page_gfp(struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); -extern void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); +int shmem_writeout(struct folio *folio, struct writeback_control *wbc); +void shmem_truncate_range(struct inode *inode, loff_t start, loff_t end); int shmem_unuse(unsigned int type); #ifdef CONFIG_TRANSPARENT_HUGEPAGE diff --git a/include/linux/spi/sh_msiof.h b/include/linux/spi/sh_msiof.h index f950d280461b..9fbef3fd4056 100644 --- a/include/linux/spi/sh_msiof.h +++ b/include/linux/spi/sh_msiof.h @@ -2,6 +2,131 @@ #ifndef __SPI_SH_MSIOF_H__ #define __SPI_SH_MSIOF_H__ +#include <linux/bitfield.h> +#include <linux/bits.h> + +#define SITMDR1 0x00 /* Transmit Mode Register 1 */ +#define SITMDR2 0x04 /* Transmit Mode Register 2 */ +#define SITMDR3 0x08 /* Transmit Mode Register 3 */ +#define SIRMDR1 0x10 /* Receive Mode Register 1 */ +#define SIRMDR2 0x14 /* Receive Mode Register 2 */ +#define SIRMDR3 0x18 /* Receive Mode Register 3 */ +#define SITSCR 0x20 /* Transmit Clock Select Register */ +#define SIRSCR 0x22 /* Receive Clock Select Register (SH, A1, APE6) */ +#define SICTR 0x28 /* Control Register */ +#define SIFCTR 0x30 /* FIFO Control Register */ +#define SISTR 0x40 /* Status Register */ +#define SIIER 0x44 /* Interrupt Enable Register */ +#define SITDR1 0x48 /* Transmit Control Data Register 1 (SH, A1) */ +#define SITDR2 0x4c /* Transmit Control Data Register 2 (SH, A1) */ +#define SITFDR 0x50 /* Transmit FIFO Data Register */ +#define SIRDR1 0x58 /* Receive Control Data Register 1 (SH, A1) */ +#define SIRDR2 0x5c /* Receive Control Data Register 2 (SH, A1) */ +#define SIRFDR 0x60 /* Receive FIFO Data Register */ + +/* SITMDR1 and SIRMDR1 */ +#define SIMDR1_TRMD BIT(31) /* Transfer Mode (1 = Master mode) */ +#define SIMDR1_SYNCMD GENMASK(29, 28) /* SYNC Mode */ +#define SIMDR1_SYNCMD_PULSE 0U /* Frame start sync pulse */ +#define SIMDR1_SYNCMD_SPI 2U /* Level mode/SPI */ +#define SIMDR1_SYNCMD_LR 3U /* L/R mode */ +#define SIMDR1_SYNCAC BIT(25) /* Sync Polarity (1 = Active-low) */ +#define SIMDR1_BITLSB BIT(24) /* MSB/LSB First (1 = LSB first) */ +#define SIMDR1_DTDL GENMASK(22, 20) /* Data Pin Bit Delay for MSIOF_SYNC */ +#define SIMDR1_SYNCDL GENMASK(18, 16) /* Frame Sync Signal Timing Delay */ +#define SIMDR1_FLD GENMASK(3, 2) /* Frame Sync Signal Interval (0-3) */ +#define SIMDR1_XXSTP BIT(0) /* Transmission/Reception Stop on FIFO */ +/* SITMDR1 */ +#define SITMDR1_PCON BIT(30) /* Transfer Signal Connection */ +#define SITMDR1_SYNCCH GENMASK(27, 26) /* Sync Signal Channel Select */ + /* 0=MSIOF_SYNC, 1=MSIOF_SS1, 2=MSIOF_SS2 */ + +/* SITMDR2 and SIRMDR2 */ +#define SIMDR2_GRP GENMASK(31, 30) /* Group Count */ +#define SIMDR2_BITLEN1 GENMASK(28, 24) /* Data Size (8-32 bits) */ +#define SIMDR2_WDLEN1 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */ +#define SIMDR2_GRPMASK GENMASK(3, 0) /* Group Output Mask 1-4 (SH, A1) */ + +/* SITMDR3 and SIRMDR3 */ +#define SIMDR3_BITLEN2 GENMASK(28, 24) /* Data Size (8-32 bits) */ +#define SIMDR3_WDLEN2 GENMASK(23, 16) /* Word Count (1-64/256 (SH, A1))) */ + +/* SITSCR and SIRSCR */ +#define SISCR_BRPS GENMASK(12, 8) /* Prescaler Setting (1-32) */ +#define SISCR_BRDV GENMASK(2, 0) /* Baud Rate Generator's Division Ratio */ + +/* SICTR */ +#define SICTR_TSCKIZ GENMASK(31, 30) /* Transmit Clock I/O Polarity Select */ +#define SICTR_TSCKIZ_SCK BIT(31) /* Disable SCK when TX disabled */ +#define SICTR_TSCKIZ_POL BIT(30) /* Transmit Clock Polarity */ +#define SICTR_RSCKIZ GENMASK(29, 28) /* Receive Clock Polarity Select */ +#define SICTR_RSCKIZ_SCK BIT(29) /* Must match CTR_TSCKIZ_SCK */ +#define SICTR_RSCKIZ_POL BIT(28) /* Receive Clock Polarity */ +#define SICTR_TEDG BIT(27) /* Transmit Timing (1 = falling edge) */ +#define SICTR_REDG BIT(26) /* Receive Timing (1 = falling edge) */ +#define SICTR_TXDIZ GENMASK(23, 22) /* Pin Output When TX is Disabled */ +#define SICTR_TXDIZ_LOW 0U /* 0 */ +#define SICTR_TXDIZ_HIGH 1U /* 1 */ +#define SICTR_TXDIZ_HIZ 2U /* High-impedance */ +#define SICTR_TSCKE BIT(15) /* Transmit Serial Clock Output Enable */ +#define SICTR_TFSE BIT(14) /* Transmit Frame Sync Signal Output Enable */ +#define SICTR_TXE BIT(9) /* Transmit Enable */ +#define SICTR_RXE BIT(8) /* Receive Enable */ +#define SICTR_TXRST BIT(1) /* Transmit Reset */ +#define SICTR_RXRST BIT(0) /* Receive Reset */ + +/* SIFCTR */ +#define SIFCTR_TFWM GENMASK(31, 29) /* Transmit FIFO Watermark */ +#define SIFCTR_TFWM_64 0U /* Transfer Request when 64 empty stages */ +#define SIFCTR_TFWM_32 1U /* Transfer Request when 32 empty stages */ +#define SIFCTR_TFWM_24 2U /* Transfer Request when 24 empty stages */ +#define SIFCTR_TFWM_16 3U /* Transfer Request when 16 empty stages */ +#define SIFCTR_TFWM_12 4U /* Transfer Request when 12 empty stages */ +#define SIFCTR_TFWM_8 5U /* Transfer Request when 8 empty stages */ +#define SIFCTR_TFWM_4 6U /* Transfer Request when 4 empty stages */ +#define SIFCTR_TFWM_1 7U /* Transfer Request when 1 empty stage */ +#define SIFCTR_TFUA GENMASK(28, 20) /* Transmit FIFO Usable Area */ +#define SIFCTR_RFWM GENMASK(15, 13) /* Receive FIFO Watermark */ +#define SIFCTR_RFWM_1 0U /* Transfer Request when 1 valid stages */ +#define SIFCTR_RFWM_4 1U /* Transfer Request when 4 valid stages */ +#define SIFCTR_RFWM_8 2U /* Transfer Request when 8 valid stages */ +#define SIFCTR_RFWM_16 3U /* Transfer Request when 16 valid stages */ +#define SIFCTR_RFWM_32 4U /* Transfer Request when 32 valid stages */ +#define SIFCTR_RFWM_64 5U /* Transfer Request when 64 valid stages */ +#define SIFCTR_RFWM_128 6U /* Transfer Request when 128 valid stages */ +#define SIFCTR_RFWM_256 7U /* Transfer Request when 256 valid stages */ +#define SIFCTR_RFUA GENMASK(12, 4) /* Receive FIFO Usable Area (0x40 = full) */ + +/* SISTR */ +#define SISTR_TFEMP BIT(29) /* Transmit FIFO Empty */ +#define SISTR_TDREQ BIT(28) /* Transmit Data Transfer Request */ +#define SISTR_TEOF BIT(23) /* Frame Transmission End */ +#define SISTR_TFSERR BIT(21) /* Transmit Frame Synchronization Error */ +#define SISTR_TFOVF BIT(20) /* Transmit FIFO Overflow */ +#define SISTR_TFUDF BIT(19) /* Transmit FIFO Underflow */ +#define SISTR_RFFUL BIT(13) /* Receive FIFO Full */ +#define SISTR_RDREQ BIT(12) /* Receive Data Transfer Request */ +#define SISTR_REOF BIT(7) /* Frame Reception End */ +#define SISTR_RFSERR BIT(5) /* Receive Frame Synchronization Error */ +#define SISTR_RFUDF BIT(4) /* Receive FIFO Underflow */ +#define SISTR_RFOVF BIT(3) /* Receive FIFO Overflow */ + +/* SIIER */ +#define SIIER_TDMAE BIT(31) /* Transmit Data DMA Transfer Req. Enable */ +#define SIIER_TFEMPE BIT(29) /* Transmit FIFO Empty Enable */ +#define SIIER_TDREQE BIT(28) /* Transmit Data Transfer Request Enable */ +#define SIIER_TEOFE BIT(23) /* Frame Transmission End Enable */ +#define SIIER_TFSERRE BIT(21) /* Transmit Frame Sync Error Enable */ +#define SIIER_TFOVFE BIT(20) /* Transmit FIFO Overflow Enable */ +#define SIIER_TFUDFE BIT(19) /* Transmit FIFO Underflow Enable */ +#define SIIER_RDMAE BIT(15) /* Receive Data DMA Transfer Req. Enable */ +#define SIIER_RFFULE BIT(13) /* Receive FIFO Full Enable */ +#define SIIER_RDREQE BIT(12) /* Receive Data Transfer Request Enable */ +#define SIIER_REOFE BIT(7) /* Frame Reception End Enable */ +#define SIIER_RFSERRE BIT(5) /* Receive Frame Sync Error Enable */ +#define SIIER_RFUDFE BIT(4) /* Receive FIFO Underflow Enable */ +#define SIIER_RFOVFE BIT(3) /* Receive FIFO Overflow Enable */ + enum { MSIOF_SPI_HOST, MSIOF_SPI_TARGET, diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 6e64f0193777..4789f91dae94 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -136,13 +136,6 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @max_speed_hz: Maximum clock rate to be used with this chip * (on this board); may be changed by the device's driver. * The spi_transfer.speed_hz can override this for each transfer. - * @chip_select: Array of physical chipselect, spi->chipselect[i] gives - * the corresponding physical CS for logical CS i. - * @mode: The spi mode defines how data is clocked out and in. - * This may be changed by the device's driver. - * The "active low" default for chipselect mode can be overridden - * (by specifying SPI_CS_HIGH) as can the "MSB first" default for - * each word in a transfer (by specifying SPI_LSB_FIRST). * @bits_per_word: Data transfers involve one or more words; word sizes * like eight or 12 bits are common. In-memory wordsizes are * powers of two bytes (e.g. 20 bit samples use 32 bits). @@ -150,6 +143,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * default (0) indicating protocol words are eight bit bytes. * The spi_transfer.bits_per_word can override this for each transfer. * @rt: Make the pump thread real time priority. + * @mode: The spi mode defines how data is clocked out and in. + * This may be changed by the device's driver. + * The "active low" default for chipselect mode can be overridden + * (by specifying SPI_CS_HIGH) as can the "MSB first" default for + * each word in a transfer (by specifying SPI_LSB_FIRST). * @irq: Negative, or the number passed to request_irq() to receive * interrupts from this device. * @controller_state: Controller's runtime state @@ -162,8 +160,7 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * the device will bind to the named driver and only the named driver. * Do not set directly, because core frees it; use driver_set_override() to * set or clear it. - * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines - * (optional, NULL when not using a GPIO line) + * @pcpu_statistics: statistics for the spi_device * @word_delay: delay to be inserted between consecutive * words of a transfer * @cs_setup: delay to be introduced by the controller after CS is asserted @@ -171,8 +168,11 @@ extern void spi_transfer_cs_change_delay_exec(struct spi_message *msg, * @cs_inactive: delay to be introduced by the controller after CS is * deasserted. If @cs_change_delay is used from @spi_transfer, then the * two delays will be added up. - * @pcpu_statistics: statistics for the spi_device + * @chip_select: Array of physical chipselect, spi->chipselect[i] gives + * the corresponding physical CS for logical CS i. * @cs_index_mask: Bit mask of the active chipselect(s) in the chipselect array + * @cs_gpiod: Array of GPIO descriptors of the corresponding chipselect lines + * (optional, NULL when not using a GPIO line) * * A @spi_device is used to interchange data between an SPI target device * (usually a discrete chip) and CPU memory. @@ -187,7 +187,6 @@ struct spi_device { struct device dev; struct spi_controller *controller; u32 max_speed_hz; - u8 chip_select[SPI_CS_CNT_MAX]; u8 bits_per_word; bool rt; #define SPI_NO_TX BIT(31) /* No transmit wire */ @@ -218,23 +217,29 @@ struct spi_device { void *controller_data; char modalias[SPI_NAME_SIZE]; const char *driver_override; - struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + + /* The statistics */ + struct spi_statistics __percpu *pcpu_statistics; + struct spi_delay word_delay; /* Inter-word delay */ + /* CS delays */ struct spi_delay cs_setup; struct spi_delay cs_hold; struct spi_delay cs_inactive; - /* The statistics */ - struct spi_statistics __percpu *pcpu_statistics; + u8 chip_select[SPI_CS_CNT_MAX]; - /* Bit mask of the chipselect(s) that the driver need to use from - * the chipselect array.When the controller is capable to handle + /* + * Bit mask of the chipselect(s) that the driver need to use from + * the chipselect array. When the controller is capable to handle * multiple chip selects & memories are connected in parallel * then more than one bit need to be set in cs_index_mask. */ u32 cs_index_mask : SPI_CS_CNT_MAX; + struct gpio_desc *cs_gpiod[SPI_CS_CNT_MAX]; /* Chip select gpio desc */ + /* * Likely need more hooks for more protocol options affecting how * the controller talks to each chip, like: @@ -500,6 +505,8 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * found. * @put_offload: release the offload instance acquired by @get_offload. * @mem_caps: controller capabilities for the handling of memory operations. + * @dtr_caps: true if controller has dtr(single/dual transfer rate) capability. + * QSPI based controller should fill this based on controller's capability. * @unprepare_message: undo any work done by prepare_message(). * @target_abort: abort the ongoing transfer request on an SPI target controller * @cs_gpiods: Array of GPIO descriptors to use as chip select lines; one per CS @@ -743,6 +750,9 @@ struct spi_controller { const struct spi_controller_mem_ops *mem_ops; const struct spi_controller_mem_caps *mem_caps; + /* SPI or QSPI controller can set to true if supports SDR/DDR transfer rate */ + bool dtr_caps; + struct spi_offload *(*get_offload)(struct spi_device *spi, const struct spi_offload_config *config); void (*put_offload)(struct spi_offload *offload); @@ -995,6 +1005,7 @@ struct spi_res { * processed the word, i.e. the "pre" timestamp should be taken before * transmitting the "pre" word, and the "post" timestamp after receiving * transmit confirmation from the controller for the "post" word. + * @dtr_mode: true if supports double transfer rate. * @timestamped: true if the transfer has been timestamped * @error: Error status logged by SPI controller driver. * @@ -1046,6 +1057,9 @@ struct spi_res { * two should both be set. User can set transfer mode with SPI_NBITS_SINGLE(1x) * SPI_NBITS_DUAL(2x) and SPI_NBITS_QUAD(4x) to support these three transfer. * + * User may also set dtr_mode to true to use dual transfer mode if desired. if + * not, default considered as single transfer mode. + * * The code that submits an spi_message (and its spi_transfers) * to the lower layers is responsible for managing its memory. * Zero-initialize every field you don't set up explicitly, to @@ -1080,6 +1094,7 @@ struct spi_transfer { unsigned tx_nbits:4; unsigned rx_nbits:4; unsigned timestamped:1; + bool dtr_mode; #define SPI_NBITS_SINGLE 0x01 /* 1-bit transfer */ #define SPI_NBITS_DUAL 0x02 /* 2-bit transfer */ #define SPI_NBITS_QUAD 0x04 /* 4-bit transfer */ @@ -1323,6 +1338,32 @@ static inline bool spi_is_bpw_supported(struct spi_device *spi, u32 bpw) } /** + * spi_bpw_to_bytes - Covert bits per word to bytes + * @bpw: Bits per word + * + * This function converts the given @bpw to bytes. The result is always + * power-of-two, e.g., + * + * =============== ================= + * Input (in bits) Output (in bytes) + * =============== ================= + * 5 1 + * 9 2 + * 21 4 + * 37 8 + * =============== ================= + * + * It will return 0 for the 0 input. + * + * Returns: + * Bytes for the given @bpw. + */ +static inline u32 spi_bpw_to_bytes(u32 bpw) +{ + return roundup_pow_of_two(BITS_TO_BYTES(bpw)); +} + +/** * spi_controller_xfer_timeout - Compute a suitable timeout value * @ctlr: SPI device * @xfer: Transfer descriptor diff --git a/include/linux/stat.h b/include/linux/stat.h index be7496a6a0dd..e3d00e7bb26d 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -57,6 +57,7 @@ struct kstat { u32 dio_read_offset_align; u32 atomic_write_unit_min; u32 atomic_write_unit_max; + u32 atomic_write_unit_max_opt; u32 atomic_write_segments_max; }; diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h index e93fbb5b0c01..3fb88a1e9898 100644 --- a/include/linux/string_helpers.h +++ b/include/linux/string_helpers.h @@ -31,6 +31,7 @@ enum string_size_units { int string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len); +int parse_int_array(const char *buf, size_t count, int **array); int parse_int_array_user(const char __user *from, size_t count, int **array); #define UNESCAPE_SPACE BIT(0) diff --git a/include/linux/suspend.h b/include/linux/suspend.h index da6ebca3ff77..b1c76c8f2c82 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -298,6 +298,11 @@ static inline void s2idle_set_ops(const struct platform_s2idle_ops *ops) {} static inline void s2idle_wake(void) {} #endif /* !CONFIG_SUSPEND */ +static inline bool pm_suspend_in_progress(void) +{ + return pm_suspend_target_state != PM_SUSPEND_ON; +} + /* struct pbe is used for creating lists of pages that should be restored * atomically during the resume from disk, because the page frames they have * occupied before the suspend are in use. @@ -470,6 +475,8 @@ extern void pm_print_active_wakeup_sources(void); extern unsigned int lock_system_sleep(void); extern void unlock_system_sleep(unsigned int); +extern bool pm_sleep_transition_in_progress(void); + #else /* !CONFIG_PM_SLEEP */ static inline int register_pm_notifier(struct notifier_block *nb) @@ -498,6 +505,8 @@ static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline unsigned int lock_system_sleep(void) { return 0; } static inline void unlock_system_sleep(unsigned int flags) {} +static inline bool pm_sleep_transition_in_progress(void) { return false; } + #endif /* !CONFIG_PM_SLEEP */ #ifdef CONFIG_PM_SLEEP_DEBUG diff --git a/include/linux/tick.h b/include/linux/tick.h index b8ddc8e631a3..ac76ae9fa36d 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -195,12 +195,6 @@ static inline bool tick_nohz_full_enabled(void) __ret; \ }) -static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) -{ - if (tick_nohz_full_enabled()) - cpumask_or(mask, mask, tick_nohz_full_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -281,7 +275,6 @@ extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } -static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } static inline void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit) { } static inline void tick_nohz_dep_clear_cpu(int cpu, enum tick_dep_bits bit) { } diff --git a/include/linux/timer.h b/include/linux/timer.h index 10596d7c3a34..f636f55c427d 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -67,44 +67,44 @@ /* * LOCKDEP and DEBUG timer interfaces. */ -void init_timer_key(struct timer_list *timer, +void timer_init_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void init_timer_on_stack_key(struct timer_list *timer, +extern void timer_init_key_on_stack(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); #else -static inline void init_timer_on_stack_key(struct timer_list *timer, +static inline void timer_init_key_on_stack(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key) { - init_timer_key(timer, func, flags, name, key); + timer_init_key(timer, func, flags, name, key); } #endif #ifdef CONFIG_LOCKDEP -#define __init_timer(_timer, _fn, _flags) \ +#define __timer_init(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\ + timer_init_key((_timer), (_fn), (_flags), #_timer, &__key);\ } while (0) -#define __init_timer_on_stack(_timer, _fn, _flags) \ +#define __timer_init_on_stack(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_on_stack_key((_timer), (_fn), (_flags), \ + timer_init_key_on_stack((_timer), (_fn), (_flags), \ #_timer, &__key); \ } while (0) #else -#define __init_timer(_timer, _fn, _flags) \ - init_timer_key((_timer), (_fn), (_flags), NULL, NULL) -#define __init_timer_on_stack(_timer, _fn, _flags) \ - init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL) +#define __timer_init(_timer, _fn, _flags) \ + timer_init_key((_timer), (_fn), (_flags), NULL, NULL) +#define __timer_init_on_stack(_timer, _fn, _flags) \ + timer_init_key_on_stack((_timer), (_fn), (_flags), NULL, NULL) #endif /** @@ -115,18 +115,18 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, * * Regular timer initialization should use either DEFINE_TIMER() above, * or timer_setup(). For timers on the stack, timer_setup_on_stack() must - * be used and must be balanced with a call to destroy_timer_on_stack(). + * be used and must be balanced with a call to timer_destroy_on_stack(). */ #define timer_setup(timer, callback, flags) \ - __init_timer((timer), (callback), (flags)) + __timer_init((timer), (callback), (flags)) #define timer_setup_on_stack(timer, callback, flags) \ - __init_timer_on_stack((timer), (callback), (flags)) + __timer_init_on_stack((timer), (callback), (flags)) #ifdef CONFIG_DEBUG_OBJECTS_TIMERS -extern void destroy_timer_on_stack(struct timer_list *timer); +extern void timer_destroy_on_stack(struct timer_list *timer); #else -static inline void destroy_timer_on_stack(struct timer_list *timer) { } +static inline void timer_destroy_on_stack(struct timer_list *timer) { } #endif #define from_timer(var, callback_timer, timer_fieldname) \ @@ -156,28 +156,26 @@ extern int timer_reduce(struct timer_list *timer, unsigned long expires); * The jiffies value which is added to now, when there is no timer * in the timer wheel: */ -#define NEXT_TIMER_MAX_DELTA ((1UL << 30) - 1) +#define TIMER_NEXT_MAX_DELTA ((1UL << 30) - 1) extern void add_timer(struct timer_list *timer); extern void add_timer_local(struct timer_list *timer); extern void add_timer_global(struct timer_list *timer); -extern int try_to_del_timer_sync(struct timer_list *timer); +extern int timer_delete_sync_try(struct timer_list *timer); extern int timer_delete_sync(struct timer_list *timer); extern int timer_delete(struct timer_list *timer); extern int timer_shutdown_sync(struct timer_list *timer); extern int timer_shutdown(struct timer_list *timer); -extern void init_timers(void); +extern void timers_init(void); struct hrtimer; extern enum hrtimer_restart it_real_fn(struct hrtimer *); -unsigned long __round_jiffies(unsigned long j, int cpu); unsigned long __round_jiffies_relative(unsigned long j, int cpu); unsigned long round_jiffies(unsigned long j); unsigned long round_jiffies_relative(unsigned long j); -unsigned long __round_jiffies_up(unsigned long j, int cpu); unsigned long __round_jiffies_up_relative(unsigned long j, int cpu); unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); diff --git a/include/linux/topology.h b/include/linux/topology.h index 24e715f0f6d2..cd6b4bdc9cfd 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -332,4 +332,13 @@ sched_numa_hop_mask(unsigned int node, unsigned int hops) !IS_ERR_OR_NULL(mask); \ __hops++) +DECLARE_PER_CPU(unsigned long, cpu_scale); + +static inline unsigned long topology_get_cpu_scale(int cpu) +{ + return per_cpu(cpu_scale, cpu); +} + +void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); + #endif /* _LINUX_TOPOLOGY_H */ diff --git a/include/linux/tpm_svsm.h b/include/linux/tpm_svsm.h new file mode 100644 index 000000000000..38e341f9761a --- /dev/null +++ b/include/linux/tpm_svsm.h @@ -0,0 +1,149 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2023 James.Bottomley@HansenPartnership.com + * Copyright (C) 2025 Red Hat, Inc. All Rights Reserved. + * + * Helpers for the SVSM_VTPM_CMD calls used by the vTPM protocol defined by the + * AMD SVSM spec [1]. + * + * The vTPM protocol follows the Official TPM 2.0 Reference Implementation + * (originally by Microsoft, now part of the TCG) simulator protocol. + * + * [1] "Secure VM Service Module for SEV-SNP Guests" + * Publication # 58019 Revision: 1.00 + */ +#ifndef _TPM_SVSM_H_ +#define _TPM_SVSM_H_ + +#include <linux/errno.h> +#include <linux/string.h> +#include <linux/types.h> + +#define SVSM_VTPM_MAX_BUFFER 4096 /* max req/resp buffer size */ + +/** + * struct svsm_vtpm_request - Generic request for single word command + * @cmd: The command to send + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 15: vTPM Common Request/Response Structure + * Byte Size In/Out Description + * Offset (Bytes) + * 0x000 4 In Platform command + * Out Platform command response size + */ +struct svsm_vtpm_request { + u32 cmd; +}; + +/** + * struct svsm_vtpm_response - Generic response + * @size: The response size (zero if nothing follows) + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 15: vTPM Common Request/Response Structure + * Byte Size In/Out Description + * Offset (Bytes) + * 0x000 4 In Platform command + * Out Platform command response size + * + * Note: most TCG Simulator commands simply return zero here with no indication + * of success or failure. + */ +struct svsm_vtpm_response { + u32 size; +}; + +/** + * struct svsm_vtpm_cmd_request - Structure for a TPM_SEND_COMMAND request + * @cmd: The command to send (must be TPM_SEND_COMMAND) + * @locality: The locality + * @buf_size: The size of the input buffer following + * @buf: A buffer of size buf_size + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 16: TPM_SEND_COMMAND Request Structure + * Byte Size Meaning + * Offset (Bytes) + * 0x000 4 Platform command (8) + * 0x004 1 Locality (must-be-0) + * 0x005 4 TPM Command size (in bytes) + * 0x009 Variable TPM Command + * + * Note: the TCG Simulator expects @buf_size to be equal to the size of the + * specific TPM command, otherwise an TPM_RC_COMMAND_SIZE error is returned. + */ +struct svsm_vtpm_cmd_request { + u32 cmd; + u8 locality; + u32 buf_size; + u8 buf[]; +} __packed; + +/** + * struct svsm_vtpm_cmd_response - Structure for a TPM_SEND_COMMAND response + * @buf_size: The size of the output buffer following + * @buf: A buffer of size buf_size + * + * Defined by AMD SVSM spec [1] in section "8.2 SVSM_VTPM_CMD Call" - + * Table 17: TPM_SEND_COMMAND Response Structure + * Byte Size Meaning + * Offset (Bytes) + * 0x000 4 Response size (in bytes) + * 0x004 Variable Response + */ +struct svsm_vtpm_cmd_response { + u32 buf_size; + u8 buf[]; +}; + +/** + * svsm_vtpm_cmd_request_fill() - Fill a TPM_SEND_COMMAND request to be sent to SVSM + * @req: The struct svsm_vtpm_cmd_request to fill + * @locality: The locality + * @buf: The buffer from where to copy the payload of the command + * @len: The size of the buffer + * + * Return: 0 on success, negative error code on failure. + */ +static inline int +svsm_vtpm_cmd_request_fill(struct svsm_vtpm_cmd_request *req, u8 locality, + const u8 *buf, size_t len) +{ + if (len > SVSM_VTPM_MAX_BUFFER - sizeof(*req)) + return -EINVAL; + + req->cmd = 8; /* TPM_SEND_COMMAND */ + req->locality = locality; + req->buf_size = len; + + memcpy(req->buf, buf, len); + + return 0; +} + +/** + * svsm_vtpm_cmd_response_parse() - Parse a TPM_SEND_COMMAND response received from SVSM + * @resp: The struct svsm_vtpm_cmd_response to parse + * @buf: The buffer where to copy the response + * @len: The size of the buffer + * + * Return: buffer size filled with the response on success, negative error + * code on failure. + */ +static inline int +svsm_vtpm_cmd_response_parse(const struct svsm_vtpm_cmd_response *resp, u8 *buf, + size_t len) +{ + if (len < resp->buf_size) + return -E2BIG; + + if (resp->buf_size > SVSM_VTPM_MAX_BUFFER - sizeof(*resp)) + return -EINVAL; // Invalid response from the platform TPM + + memcpy(buf, resp->buf, resp->buf_size); + + return resp->buf_size; +} + +#endif /* _TPM_SVSM_H_ */ diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 939ceabcaf06..335c360d4f9b 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -33,7 +33,6 @@ #define MODULE_VERMAGIC_MODVERSIONS "" #endif #ifdef RANDSTRUCT -#include <generated/randstruct_hash.h> #define MODULE_RANDSTRUCT "RANDSTRUCT_" RANDSTRUCT_HASHED_SEED #else #define MODULE_RANDSTRUCT diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 5ca8d4dd149d..a40a905e5e1b 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -169,8 +169,13 @@ void *__vmalloc_node_noprof(unsigned long size, unsigned long align, gfp_t gfp_m int node, const void *caller) __alloc_size(1); #define __vmalloc_node(...) alloc_hooks(__vmalloc_node_noprof(__VA_ARGS__)) -void *vmalloc_huge_noprof(unsigned long size, gfp_t gfp_mask) __alloc_size(1); -#define vmalloc_huge(...) alloc_hooks(vmalloc_huge_noprof(__VA_ARGS__)) +void *vmalloc_huge_node_noprof(unsigned long size, gfp_t gfp_mask, int node) __alloc_size(1); +#define vmalloc_huge_node(...) alloc_hooks(vmalloc_huge_node_noprof(__VA_ARGS__)) + +static inline void *vmalloc_huge(unsigned long size, gfp_t gfp_mask) +{ + return vmalloc_huge_node(size, gfp_mask, NUMA_NO_NODE); +} extern void *__vmalloc_array_noprof(size_t n, size_t size, gfp_t flags) __alloc_size(1, 2); #define __vmalloc_array(...) alloc_hooks(__vmalloc_array_noprof(__VA_ARGS__)) diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index b0dc957c3e56..6e30f275da77 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -316,7 +316,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ - __init_timer(&(_work)->timer, \ + __timer_init(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) @@ -324,7 +324,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ - __init_timer_on_stack(&(_work)->timer, \ + __timer_init_on_stack(&(_work)->timer, \ delayed_work_timer_fn, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) @@ -480,7 +480,7 @@ void workqueue_softirq_dead(unsigned int cpu); * executing at most one work item for the workqueue. * * For unbound workqueues, @max_active limits the number of in-flight work items - * for the whole system. e.g. @max_active of 16 indicates that that there can be + * for the whole system. e.g. @max_active of 16 indicates that there can be * at most 16 work items executing for the workqueue in the whole system. * * As sharing the same active counter for an unbound workqueue across multiple |