From cf2e340f4249b781b3d2beb41e891d08581f0e10 Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:58:58 -0800 Subject: [PATCH] unshare system call -v5: system call handler function sys_unshare system call handler function accepts the same flags as clone system call, checks constraints on each of the flags and invokes corresponding unshare functions to disassociate respective process context if it was being shared with another task. Here is the link to a program for testing unshare system call. http://prdownloads.sourceforge.net/audit/unshare_test.c?download Please note that because of a problem in rmdir associated with bind mounts and clone with CLONE_NEWNS, the test fails while trying to remove temporary test directory. You can remove that temporary directory by doing rmdir, twice, from the command line. The first will fail with EBUSY, but the second will succeed. I have reported the problem to Ram Pai and Al Viro with a small program which reproduces the problem. Al told us yesterday that he will be looking at the problem soon. I have tried multiple rmdirs from the unshare_test program itself, but for some reason that is not working. Doing two rmdirs from command line does seem to remove the directory. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Andi Kleen Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 232 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 232 insertions(+) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 7f0ab5ee948c..6eb9362775f9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1323,3 +1323,235 @@ void __init proc_caches_init(void) sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); } + + +/* + * Check constraints on flags passed to the unshare system call and + * force unsharing of additional process context as appropriate. + */ +static inline void check_unshare_flags(unsigned long *flags_ptr) +{ + /* + * If unsharing a thread from a thread group, must also + * unshare vm. + */ + if (*flags_ptr & CLONE_THREAD) + *flags_ptr |= CLONE_VM; + + /* + * If unsharing vm, must also unshare signal handlers. + */ + if (*flags_ptr & CLONE_VM) + *flags_ptr |= CLONE_SIGHAND; + + /* + * If unsharing signal handlers and the task was created + * using CLONE_THREAD, then must unshare the thread + */ + if ((*flags_ptr & CLONE_SIGHAND) && + (atomic_read(¤t->signal->count) > 1)) + *flags_ptr |= CLONE_THREAD; + + /* + * If unsharing namespace, must also unshare filesystem information. + */ + if (*flags_ptr & CLONE_NEWNS) + *flags_ptr |= CLONE_FS; +} + +/* + * Unsharing of tasks created with CLONE_THREAD is not supported yet + */ +static int unshare_thread(unsigned long unshare_flags) +{ + if (unshare_flags & CLONE_THREAD) + return -EINVAL; + + return 0; +} + +/* + * Unsharing of fs info for tasks created with CLONE_FS is not supported yet + */ +static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) +{ + struct fs_struct *fs = current->fs; + + if ((unshare_flags & CLONE_FS) && + (fs && atomic_read(&fs->count) > 1)) + return -EINVAL; + + return 0; +} + +/* + * Unsharing of namespace for tasks created without CLONE_NEWNS is not + * supported yet + */ +static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp) +{ + struct namespace *ns = current->namespace; + + if ((unshare_flags & CLONE_NEWNS) && + (ns && atomic_read(&ns->count) > 1)) + return -EINVAL; + + return 0; +} + +/* + * Unsharing of sighand for tasks created with CLONE_SIGHAND is not + * supported yet + */ +static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct **new_sighp) +{ + struct sighand_struct *sigh = current->sighand; + + if ((unshare_flags & CLONE_SIGHAND) && + (sigh && atomic_read(&sigh->count) > 1)) + return -EINVAL; + else + return 0; +} + +/* + * Unsharing of vm for tasks created with CLONE_VM is not supported yet + */ +static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) +{ + struct mm_struct *mm = current->mm; + + if ((unshare_flags & CLONE_VM) && + (mm && atomic_read(&mm->mm_users) > 1)) + return -EINVAL; + + return 0; + +} + +/* + * Unsharing of files for tasks created with CLONE_FILES is not supported yet + */ +static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) +{ + struct files_struct *fd = current->files; + + if ((unshare_flags & CLONE_FILES) && + (fd && atomic_read(&fd->count) > 1)) + return -EINVAL; + + return 0; +} + +/* + * Unsharing of semundo for tasks created with CLONE_SYSVSEM is not + * supported yet + */ +static int unshare_semundo(unsigned long unshare_flags, struct sem_undo_list **new_ulistp) +{ + if (unshare_flags & CLONE_SYSVSEM) + return -EINVAL; + + return 0; +} + +/* + * unshare allows a process to 'unshare' part of the process + * context which was originally shared using clone. copy_* + * functions used by do_fork() cannot be used here directly + * because they modify an inactive task_struct that is being + * constructed. Here we are modifying the current, active, + * task_struct. + */ +asmlinkage long sys_unshare(unsigned long unshare_flags) +{ + int err = 0; + struct fs_struct *fs, *new_fs = NULL; + struct namespace *ns, *new_ns = NULL; + struct sighand_struct *sigh, *new_sigh = NULL; + struct mm_struct *mm, *new_mm = NULL, *active_mm = NULL; + struct files_struct *fd, *new_fd = NULL; + struct sem_undo_list *new_ulist = NULL; + + check_unshare_flags(&unshare_flags); + + if ((err = unshare_thread(unshare_flags))) + goto bad_unshare_out; + if ((err = unshare_fs(unshare_flags, &new_fs))) + goto bad_unshare_cleanup_thread; + if ((err = unshare_namespace(unshare_flags, &new_ns))) + goto bad_unshare_cleanup_fs; + if ((err = unshare_sighand(unshare_flags, &new_sigh))) + goto bad_unshare_cleanup_ns; + if ((err = unshare_vm(unshare_flags, &new_mm))) + goto bad_unshare_cleanup_sigh; + if ((err = unshare_fd(unshare_flags, &new_fd))) + goto bad_unshare_cleanup_vm; + if ((err = unshare_semundo(unshare_flags, &new_ulist))) + goto bad_unshare_cleanup_fd; + + if (new_fs || new_ns || new_sigh || new_mm || new_fd || new_ulist) { + + task_lock(current); + + if (new_fs) { + fs = current->fs; + current->fs = new_fs; + new_fs = fs; + } + + if (new_ns) { + ns = current->namespace; + current->namespace = new_ns; + new_ns = ns; + } + + if (new_sigh) { + sigh = current->sighand; + current->sighand = new_sigh; + new_sigh = sigh; + } + + if (new_mm) { + mm = current->mm; + active_mm = current->active_mm; + current->mm = new_mm; + current->active_mm = new_mm; + activate_mm(active_mm, new_mm); + new_mm = mm; + } + + if (new_fd) { + fd = current->files; + current->files = new_fd; + new_fd = fd; + } + + task_unlock(current); + } + +bad_unshare_cleanup_fd: + if (new_fd) + put_files_struct(new_fd); + +bad_unshare_cleanup_vm: + if (new_mm) + mmput(new_mm); + +bad_unshare_cleanup_sigh: + if (new_sigh) + if (atomic_dec_and_test(&new_sigh->count)) + kmem_cache_free(sighand_cachep, new_sigh); + +bad_unshare_cleanup_ns: + if (new_ns) + put_namespace(new_ns); + +bad_unshare_cleanup_fs: + if (new_fs) + put_fs_struct(new_fs); + +bad_unshare_cleanup_thread: +bad_unshare_out: + return err; +} -- cgit From 99d1419d96d7df9cfa56bc977810be831bd5ef64 Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:58:59 -0800 Subject: [PATCH] unshare system call -v5: unshare filesystem info If filesystem structure is being shared, allocate a new one and copy information from the current, shared, structure. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Andi Kleen Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 6eb9362775f9..598e5c27242c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1371,15 +1371,18 @@ static int unshare_thread(unsigned long unshare_flags) } /* - * Unsharing of fs info for tasks created with CLONE_FS is not supported yet + * Unshare the filesystem structure if it is being shared */ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) { struct fs_struct *fs = current->fs; if ((unshare_flags & CLONE_FS) && - (fs && atomic_read(&fs->count) > 1)) - return -EINVAL; + (fs && atomic_read(&fs->count) > 1)) { + *new_fsp = __copy_fs_struct(current->fs); + if (!*new_fsp) + return -ENOMEM; + } return 0; } -- cgit From 741a295130606143edbf9fc740f633dbc1e6225f Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:59:00 -0800 Subject: [PATCH] unshare system call -v5: unshare namespace If the namespace structure is being shared, allocate a new one and copy information from the current, shared, structure. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Andi Kleen Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 598e5c27242c..07dd241aa1e0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1388,16 +1388,21 @@ static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp) } /* - * Unsharing of namespace for tasks created without CLONE_NEWNS is not - * supported yet + * Unshare the namespace structure if it is being shared */ -static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp) +static int unshare_namespace(unsigned long unshare_flags, struct namespace **new_nsp, struct fs_struct *new_fs) { struct namespace *ns = current->namespace; if ((unshare_flags & CLONE_NEWNS) && - (ns && atomic_read(&ns->count) > 1)) - return -EINVAL; + (ns && atomic_read(&ns->count) > 1)) { + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + *new_nsp = dup_namespace(current, new_fs ? new_fs : current->fs); + if (!*new_nsp) + return -ENOMEM; + } return 0; } @@ -1482,7 +1487,7 @@ asmlinkage long sys_unshare(unsigned long unshare_flags) goto bad_unshare_out; if ((err = unshare_fs(unshare_flags, &new_fs))) goto bad_unshare_cleanup_thread; - if ((err = unshare_namespace(unshare_flags, &new_ns))) + if ((err = unshare_namespace(unshare_flags, &new_ns, new_fs))) goto bad_unshare_cleanup_fs; if ((err = unshare_sighand(unshare_flags, &new_sigh))) goto bad_unshare_cleanup_ns; -- cgit From a0a7ec308f1be5957b20a1a535d21f683dfd83f0 Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:59:01 -0800 Subject: [PATCH] unshare system call -v5: unshare vm If vm structure is being shared, allocate a new one and copy information from the current, shared, structure. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Andi Kleen Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 87 ++++++++++++++++++++++++++++++++++++++--------------------- 1 file changed, 56 insertions(+), 31 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index 07dd241aa1e0..d1aceaea3f33 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -446,6 +446,55 @@ void mm_release(struct task_struct *tsk, struct mm_struct *mm) } } +/* + * Allocate a new mm structure and copy contents from the + * mm structure of the passed in task structure. + */ +static struct mm_struct *dup_mm(struct task_struct *tsk) +{ + struct mm_struct *mm, *oldmm = current->mm; + int err; + + if (!oldmm) + return NULL; + + mm = allocate_mm(); + if (!mm) + goto fail_nomem; + + memcpy(mm, oldmm, sizeof(*mm)); + + if (!mm_init(mm)) + goto fail_nomem; + + if (init_new_context(tsk, mm)) + goto fail_nocontext; + + err = dup_mmap(mm, oldmm); + if (err) + goto free_pt; + + mm->hiwater_rss = get_mm_rss(mm); + mm->hiwater_vm = mm->total_vm; + + return mm; + +free_pt: + mmput(mm); + +fail_nomem: + return NULL; + +fail_nocontext: + /* + * If init_new_context() failed, we cannot use mmput() to free the mm + * because it calls destroy_context() + */ + mm_free_pgd(mm); + free_mm(mm); + return NULL; +} + static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) { struct mm_struct * mm, *oldmm; @@ -473,43 +522,17 @@ static int copy_mm(unsigned long clone_flags, struct task_struct * tsk) } retval = -ENOMEM; - mm = allocate_mm(); + mm = dup_mm(tsk); if (!mm) goto fail_nomem; - /* Copy the current MM stuff.. */ - memcpy(mm, oldmm, sizeof(*mm)); - if (!mm_init(mm)) - goto fail_nomem; - - if (init_new_context(tsk,mm)) - goto fail_nocontext; - - retval = dup_mmap(mm, oldmm); - if (retval) - goto free_pt; - - mm->hiwater_rss = get_mm_rss(mm); - mm->hiwater_vm = mm->total_vm; - good_mm: tsk->mm = mm; tsk->active_mm = mm; return 0; -free_pt: - mmput(mm); fail_nomem: return retval; - -fail_nocontext: - /* - * If init_new_context() failed, we cannot use mmput() to free the mm - * because it calls destroy_context() - */ - mm_free_pgd(mm); - free_mm(mm); - return retval; } static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old) @@ -1423,18 +1446,20 @@ static int unshare_sighand(unsigned long unshare_flags, struct sighand_struct ** } /* - * Unsharing of vm for tasks created with CLONE_VM is not supported yet + * Unshare vm if it is being shared */ static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) { struct mm_struct *mm = current->mm; if ((unshare_flags & CLONE_VM) && - (mm && atomic_read(&mm->mm_users) > 1)) - return -EINVAL; + (mm && atomic_read(&mm->mm_users) > 1)) { + *new_mmp = dup_mm(current); + if (!*new_mmp) + return -ENOMEM; + } return 0; - } /* -- cgit From a016f3389c06606dd80e687942ff3c71d41823c4 Mon Sep 17 00:00:00 2001 From: JANAK DESAI Date: Tue, 7 Feb 2006 12:59:02 -0800 Subject: [PATCH] unshare system call -v5: unshare files If the file descriptor structure is being shared, allocate a new one and copy information from the current, shared, structure. Signed-off-by: Janak Desai Cc: Al Viro Cc: Christoph Hellwig Cc: Michael Kerrisk Cc: Andi Kleen Cc: Paul Mackerras Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- kernel/fork.c | 81 +++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 51 insertions(+), 30 deletions(-) (limited to 'kernel/fork.c') diff --git a/kernel/fork.c b/kernel/fork.c index d1aceaea3f33..8e88b374cee9 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -620,32 +620,17 @@ out: return newf; } -static int copy_files(unsigned long clone_flags, struct task_struct * tsk) +/* + * Allocate a new files structure and copy contents from the + * passed in files structure. + */ +static struct files_struct *dup_fd(struct files_struct *oldf, int *errorp) { - struct files_struct *oldf, *newf; + struct files_struct *newf; struct file **old_fds, **new_fds; - int open_files, size, i, error = 0, expand; + int open_files, size, i, expand; struct fdtable *old_fdt, *new_fdt; - /* - * A background process may not have any files ... - */ - oldf = current->files; - if (!oldf) - goto out; - - if (clone_flags & CLONE_FILES) { - atomic_inc(&oldf->count); - goto out; - } - - /* - * Note: we may be using current for both targets (See exec.c) - * This works because we cache current->files (old) as oldf. Don't - * break this. - */ - tsk->files = NULL; - error = -ENOMEM; newf = alloc_files(); if (!newf) goto out; @@ -674,9 +659,9 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) if (expand) { spin_unlock(&oldf->file_lock); spin_lock(&newf->file_lock); - error = expand_files(newf, open_files-1); + *errorp = expand_files(newf, open_files-1); spin_unlock(&newf->file_lock); - if (error < 0) + if (*errorp < 0) goto out_release; new_fdt = files_fdtable(newf); /* @@ -725,10 +710,8 @@ static int copy_files(unsigned long clone_flags, struct task_struct * tsk) memset(&new_fdt->close_on_exec->fds_bits[start], 0, left); } - tsk->files = newf; - error = 0; out: - return error; + return newf; out_release: free_fdset (new_fdt->close_on_exec, new_fdt->max_fdset); @@ -738,6 +721,40 @@ out_release: goto out; } +static int copy_files(unsigned long clone_flags, struct task_struct * tsk) +{ + struct files_struct *oldf, *newf; + int error = 0; + + /* + * A background process may not have any files ... + */ + oldf = current->files; + if (!oldf) + goto out; + + if (clone_flags & CLONE_FILES) { + atomic_inc(&oldf->count); + goto out; + } + + /* + * Note: we may be using current for both targets (See exec.c) + * This works because we cache current->files (old) as oldf. Don't + * break this. + */ + tsk->files = NULL; + error = -ENOMEM; + newf = dup_fd(oldf, &error); + if (!newf) + goto out; + + tsk->files = newf; + error = 0; +out: + return error; +} + /* * Helper to unshare the files of the current task. * We don't want to expose copy_files internals to @@ -1463,15 +1480,19 @@ static int unshare_vm(unsigned long unshare_flags, struct mm_struct **new_mmp) } /* - * Unsharing of files for tasks created with CLONE_FILES is not supported yet + * Unshare file descriptor table if it is being shared */ static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp) { struct files_struct *fd = current->files; + int error = 0; if ((unshare_flags & CLONE_FILES) && - (fd && atomic_read(&fd->count) > 1)) - return -EINVAL; + (fd && atomic_read(&fd->count) > 1)) { + *new_fdp = dup_fd(fd, &error); + if (!*new_fdp) + return error; + } return 0; } -- cgit