From 35d7bdc86031a2c1ae05ac27dfa93b2acdcbaecc Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Apr 2021 10:20:25 +0200 Subject: kernel/fork: factor out replacing the current MM exe_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's factor the main logic out into replace_mm_exe_file(), such that all mm->exe_file logic is contained in kernel/fork.c. While at it, perform some simple cleanups that are possible now that we're simplifying the individual functions. Acked-by: Christian Brauner Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 7ca22e6e694a..48c6fa9ab792 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2581,6 +2581,7 @@ extern int mm_take_all_locks(struct mm_struct *mm); extern void mm_drop_all_locks(struct mm_struct *mm); extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern struct file *get_task_exe_file(struct task_struct *task); -- cgit From fe69d560b5bd9ec77b5d5749bd7027344daef47e Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Apr 2021 10:29:59 +0200 Subject: kernel/fork: always deny write access to current MM exe_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We want to remove VM_DENYWRITE only currently only used when mapping the executable during exec. During exec, we already deny_write_access() the executable, however, after exec completes the VMAs mapped with VM_DENYWRITE effectively keeps write access denied via deny_write_access(). Let's deny write access when setting or replacing the MM exe_file. With this change, we can remove VM_DENYWRITE for mapping executables. Make set_mm_exe_file() return an error in case deny_write_access() fails; note that this should never happen, because exec code does a deny_write_access() early and keeps write access denied when calling set_mm_exe_file. However, it makes the code easier to read and makes set_mm_exe_file() and replace_mm_exe_file() look more similar. This represents a minor user space visible change: sys_prctl(PR_SET_MM_MAP/EXE_FILE) can now fail if the file is already opened writable. Also, after sys_prctl(PR_SET_MM_MAP/EXE_FILE) the file cannot be opened writable. Note that we can already fail with -EACCES if the file doesn't have execute permissions. Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 48c6fa9ab792..56b1cd41db61 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2580,7 +2580,7 @@ static inline int check_data_rlimit(unsigned long rlim, extern int mm_take_all_locks(struct mm_struct *mm); extern void mm_drop_all_locks(struct mm_struct *mm); -extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); +extern int set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern int replace_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file); extern struct file *get_mm_exe_file(struct mm_struct *mm); extern struct file *get_task_exe_file(struct task_struct *task); -- cgit From 8d0920bde5eb8ec7e567939b85e65a0596c8580d Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Thu, 22 Apr 2021 12:08:20 +0200 Subject: mm: remove VM_DENYWRITE MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit All in-tree users of MAP_DENYWRITE are gone. MAP_DENYWRITE cannot be set from user space, so all users are gone; let's remove it. Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- include/linux/mm.h | 1 - include/linux/mman.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 56b1cd41db61..257995f62e83 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -281,7 +281,6 @@ extern unsigned int kobjsize(const void *objp); #define VM_GROWSDOWN 0x00000100 /* general info on the segment */ #define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */ #define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ -#define VM_DENYWRITE 0x00000800 /* ETXTBSY on write attempts.. */ #define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ #define VM_LOCKED 0x00002000 diff --git a/include/linux/mman.h b/include/linux/mman.h index ebb09a964272..bd9aadda047b 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -153,7 +153,6 @@ static inline unsigned long calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | - _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ) | arch_calc_vm_flag_bits(flags); -- cgit From 6128b3af2a5e42386aa7faf37609b57f39fb7d00 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Apr 2021 09:38:14 +0200 Subject: mm: ignore MAP_DENYWRITE in ksys_mmap_pgoff() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's also remove masking off MAP_DENYWRITE from ksys_mmap_pgoff(): the last in-tree occurrence of MAP_DENYWRITE is now in LEGACY_MAP_MASK, which accepts the flag e.g., for MAP_SHARED_VALIDATE; however, the flag is ignored throughout the kernel now. Add a comment to LEGACY_MAP_MASK stating that MAP_DENYWRITE is ignored. Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- include/linux/mman.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index bd9aadda047b..b66e91b8176c 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -32,7 +32,8 @@ * The historical set of flags that all mmap implementations implicitly * support when a ->mmap_validate() op is not provided in file_operations. * - * MAP_EXECUTABLE is completely ignored throughout the kernel. + * MAP_EXECUTABLE and MAP_DENYWRITE are completely ignored throughout the + * kernel. */ #define LEGACY_MAP_MASK (MAP_SHARED \ | MAP_PRIVATE \ -- cgit From 592ca09be8333bd226f50100328a905bfc377133 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 23 Apr 2021 09:45:45 +0200 Subject: fs: update documentation of get_write_access() and friends MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As VM_DENYWRITE does no longer exists, let's spring-clean the documentation of get_write_access() and friends. Acked-by: "Eric W. Biederman" Acked-by: Christian König Signed-off-by: David Hildenbrand --- include/linux/fs.h | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216..e0dc3e96ed72 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3055,15 +3055,20 @@ static inline void file_end_write(struct file *file) } /* + * This is used for regular files where some users -- especially the + * currently executed binary in a process, previously handled via + * VM_DENYWRITE -- cannot handle concurrent write (and maybe mmap + * read-write shared) accesses. + * * get_write_access() gets write permission for a file. * put_write_access() releases this write permission. - * This is used for regular files. - * We cannot support write (and maybe mmap read-write shared) accesses and - * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode - * can have the following values: - * 0: no writers, no VM_DENYWRITE mappings - * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist - * > 0: (i_writecount) users are writing to the file. + * deny_write_access() denies write access to a file. + * allow_write_access() re-enables write access to a file. + * + * The i_writecount field of an inode can have the following values: + * 0: no write access, no denied write access + * < 0: (-i_writecount) users that denied write access to the file. + * > 0: (i_writecount) users that have write access to the file. * * Normally we operate on that counter with atomic_{inc,dec} and it's safe * except for the cases where we don't hold i_writecount yet. Then we need to -- cgit