From 3af8588c77186bf08e55e7281da83d88373481d7 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 8 Jun 2020 17:28:50 +0200 Subject: fork: fold legacy_clone_args_valid() into _do_fork() This separate helper only existed to guarantee the mutual exclusivity of CLONE_PIDFD and CLONE_PARENT_SETTID for legacy clone since CLONE_PIDFD abuses the parent_tid field to return the pidfd. But we can actually handle this uniformely thus removing the helper. For legacy clone we can detect that CLONE_PIDFD is specified in conjunction with CLONE_PARENT_SETTID because they will share the same memory which is invalid and for clone3() setting the separate pidfd and parent_tid fields to the same memory is bogus as well. So fold that helper directly into _do_fork() by detecting this case. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Al Viro Cc: Geert Uytterhoeven Cc: "Matthew Wilcox (Oracle)" Cc: "Peter Zijlstra (Intel)" Cc: linux-m68k@lists.linux-m68k.org Cc: x86@kernel.org Signed-off-by: Christian Brauner --- include/linux/sched/task.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 38359071236a..ddce0ea515d1 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -96,7 +96,6 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern long _do_fork(struct kernel_clone_args *kargs); -extern bool legacy_clone_args_valid(const struct kernel_clone_args *kargs); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); -- cgit From ff2a91127b374c75ae024b31d22f23ad49d16eb4 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 24 May 2020 20:57:00 +0200 Subject: fork: remove do_fork() Now that all architectures have been switched to use _do_fork() and the new struct kernel_clone_args calling convention we can remove the legacy do_fork() helper completely. The calling convention used to be brittle and do_fork() didn't buy us anything. The only calling convention accepted should be based on struct kernel_clone_args going forward. It's cleaner and uniform. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Al Viro Cc: "Matthew Wilcox (Oracle)" Cc: "Peter Zijlstra (Intel)" Signed-off-by: Christian Brauner --- include/linux/sched/task.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ddce0ea515d1..9f03c44941fb 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -96,7 +96,6 @@ extern void exit_files(struct task_struct *); extern void exit_itimers(struct signal_struct *); extern long _do_fork(struct kernel_clone_args *kargs); -extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -- cgit From 140c8180eb7c7cbda399f64474788b86db72db32 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Sun, 24 May 2020 23:34:20 +0200 Subject: arch: remove HAVE_COPY_THREAD_TLS All architectures support copy_thread_tls() now, so remove the legacy copy_thread() function and the HAVE_COPY_THREAD_TLS config option. Everyone uses the same process creation calling convention based on copy_thread_tls() and struct kernel_clone_args. This will make it easier to maintain the core process creation code under kernel/, simplifies the callpaths and makes the identical for all architectures. Cc: linux-arch@vger.kernel.org Acked-by: Thomas Bogendoerfer Acked-by: Greentime Hu Acked-by: Geert Uytterhoeven Reviewed-by: Kees Cook Signed-off-by: Christian Brauner --- include/linux/sched/task.h | 15 +-------------- 1 file changed, 1 insertion(+), 14 deletions(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 9f03c44941fb..77cbe14c3034 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -65,22 +65,9 @@ extern void fork_init(void); extern void release_task(struct task_struct * p); -#ifdef CONFIG_HAVE_COPY_THREAD_TLS extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, struct task_struct *, unsigned long); -#else -extern int copy_thread(unsigned long, unsigned long, unsigned long, - struct task_struct *); - -/* Architectures that haven't opted into copy_thread_tls get the tls argument - * via pt_regs, so ignore the tls argument passed via C. */ -static inline int copy_thread_tls( - unsigned long clone_flags, unsigned long sp, unsigned long arg, - struct task_struct *p, unsigned long tls) -{ - return copy_thread(clone_flags, sp, arg, p); -} -#endif + extern void flush_thread(void); #ifdef CONFIG_HAVE_EXIT_THREAD -- cgit From 714acdbd1c94e7e3ab90f6b6938f1ccb27b662f0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Thu, 11 Jun 2020 11:04:15 +0200 Subject: arch: rename copy_thread_tls() back to copy_thread() Now that HAVE_COPY_THREAD_TLS has been removed, rename copy_thread_tls() back simply copy_thread(). It's a simpler name, and doesn't imply that only tls is copied here. This finishes an outstanding chunk of internal process creation work since we've added clone3(). Cc: linux-arch@vger.kernel.org Acked-by: Thomas Bogendoerfer A Acked-by: Stafford Horne Acked-by: Greentime Hu Acked-by: Geert Uytterhoeven A Reviewed-by: Kees Cook Signed-off-by: Christian Brauner --- include/linux/sched/task.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 77cbe14c3034..b6253f2ea96a 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -65,8 +65,8 @@ extern void fork_init(void); extern void release_task(struct task_struct * p); -extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, - struct task_struct *, unsigned long); +extern int copy_thread(unsigned long, unsigned long, unsigned long, + struct task_struct *, unsigned long); extern void flush_thread(void); -- cgit From dd6f843a9fca8f225c86fee5f50da429c369c045 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Sat, 18 Jul 2020 11:32:51 +0300 Subject: tasks: add put_task_struct_many() put_task_struct_many() is as put_task_struct() but puts several references at once. Useful to batching it. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/sched/task.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 38359071236a..1301077f9c24 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -126,6 +126,12 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } +static inline void put_task_struct_many(struct task_struct *t, int nr) +{ + if (refcount_sub_and_test(nr, &t->usage)) + __put_task_struct(t); +} + void put_task_struct_rcu_user(struct task_struct *task); #ifdef CONFIG_ARCH_WANTS_DYNAMIC_TASK_STRUCT -- cgit From 13685c4a08fca9dd76bf53bfcbadc044ab2a08cb Mon Sep 17 00:00:00 2001 From: Qais Yousef Date: Thu, 16 Jul 2020 12:03:45 +0100 Subject: sched/uclamp: Add a new sysctl to control RT default boost value RT tasks by default run at the highest capacity/performance level. When uclamp is selected this default behavior is retained by enforcing the requested uclamp.min (p->uclamp_req[UCLAMP_MIN]) of the RT tasks to be uclamp_none(UCLAMP_MAX), which is SCHED_CAPACITY_SCALE; the maximum value. This is also referred to as 'the default boost value of RT tasks'. See commit 1a00d999971c ("sched/uclamp: Set default clamps for RT tasks"). On battery powered devices, it is desired to control this default (currently hardcoded) behavior at runtime to reduce energy consumed by RT tasks. For example, a mobile device manufacturer where big.LITTLE architecture is dominant, the performance of the little cores varies across SoCs, and on high end ones the big cores could be too power hungry. Given the diversity of SoCs, the new knob allows manufactures to tune the best performance/power for RT tasks for the particular hardware they run on. They could opt to further tune the value when the user selects a different power saving mode or when the device is actively charging. The runtime aspect of it further helps in creating a single kernel image that can be run on multiple devices that require different tuning. Keep in mind that a lot of RT tasks in the system are created by the kernel. On Android for instance I can see over 50 RT tasks, only a handful of which created by the Android framework. To control the default behavior globally by system admins and device integrator, introduce the new sysctl_sched_uclamp_util_min_rt_default to change the default boost value of the RT tasks. I anticipate this to be mostly in the form of modifying the init script of a particular device. To avoid polluting the fast path with unnecessary code, the approach taken is to synchronously do the update by traversing all the existing tasks in the system. This could race with a concurrent fork(), which is dealt with by introducing sched_post_fork() function which will ensure the racy fork will get the right update applied. Tested on Juno-r2 in combination with the RT capacity awareness [1]. By default an RT task will go to the highest capacity CPU and run at the maximum frequency, which is particularly energy inefficient on high end mobile devices because the biggest core[s] are 'huge' and power hungry. With this patch the RT task can be controlled to run anywhere by default, and doesn't cause the frequency to be maximum all the time. Yet any task that really needs to be boosted can easily escape this default behavior by modifying its requested uclamp.min value (p->uclamp_req[UCLAMP_MIN]) via sched_setattr() syscall. [1] 804d402fb6f6: ("sched/rt: Make RT capacity-aware") Signed-off-by: Qais Yousef Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200716110347.19553-2-qais.yousef@arm.com --- include/linux/sched/task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index 38359071236a..e7ddab095baf 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -55,6 +55,7 @@ extern asmlinkage void schedule_tail(struct task_struct *prev); extern void init_idle(struct task_struct *idle, int cpu); extern int sched_fork(unsigned long clone_flags, struct task_struct *p); +extern void sched_post_fork(struct task_struct *p); extern void sched_dead(struct task_struct *p); void __noreturn do_task_dead(void); -- cgit From 8043fc147a97ec2eefc582487f344f2cbe86d12e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Aug 2020 18:34:10 -0700 Subject: kernel: add a kernel_wait helper Add a helper that waits for a pid and stores the status in the passed in kernel pointer. Use it to fix the usage of kernel_wait4 in call_usermodehelper_exec_sync that only happens to work due to the implicit set_fs(KERNEL_DS) for kernel threads. Signed-off-by: Christoph Hellwig Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Acked-by: "Eric W. Biederman" Cc: Luis Chamberlain Link: http://lkml.kernel.org/r/20200721130449.5008-1-hch@lst.de Signed-off-by: Linus Torvalds --- include/linux/sched/task.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/sched/task.h') diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h index ae3060f0b0c9..a98965007eef 100644 --- a/include/linux/sched/task.h +++ b/include/linux/sched/task.h @@ -88,6 +88,7 @@ struct task_struct *fork_idle(int); struct mm_struct *copy_init_mm(void); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); extern long kernel_wait4(pid_t, int __user *, int, struct rusage *); +int kernel_wait(pid_t pid, int *stat); extern void free_task(struct task_struct *tsk); -- cgit