diff options
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r-- | arch/um/os-Linux/Makefile | 2 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/Makefile | 13 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/etap.h | 21 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/ethertap_kern.c | 100 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/ethertap_user.c | 248 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/tuntap.h | 21 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/tuntap_kern.c | 86 | ||||
-rw-r--r-- | arch/um/os-Linux/drivers/tuntap_user.c | 215 | ||||
-rw-r--r-- | arch/um/os-Linux/file.c | 15 | ||||
-rw-r--r-- | arch/um/os-Linux/internal.h | 5 | ||||
-rw-r--r-- | arch/um/os-Linux/process.c | 31 | ||||
-rw-r--r-- | arch/um/os-Linux/registers.c | 4 | ||||
-rw-r--r-- | arch/um/os-Linux/sigio.c | 3 | ||||
-rw-r--r-- | arch/um/os-Linux/signal.c | 19 | ||||
-rw-r--r-- | arch/um/os-Linux/skas/mem.c | 103 | ||||
-rw-r--r-- | arch/um/os-Linux/skas/process.c | 482 | ||||
-rw-r--r-- | arch/um/os-Linux/start_up.c | 195 |
17 files changed, 689 insertions, 874 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile index 049dfa5bc9c6..fae836713487 100644 --- a/arch/um/os-Linux/Makefile +++ b/arch/um/os-Linux/Makefile @@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \ registers.o sigio.o signal.o start_up.o time.o tty.o \ - umid.o user_syms.o util.o drivers/ skas/ + umid.o user_syms.o util.o skas/ CFLAGS_signal.o += -Wframe-larger-than=4096 diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile deleted file mode 100644 index cf2d75bb1884..000000000000 --- a/arch/um/os-Linux/drivers/Makefile +++ /dev/null @@ -1,13 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -# -# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com) -# - -ethertap-objs := ethertap_kern.o ethertap_user.o -tuntap-objs := tuntap_kern.o tuntap_user.o - -obj-y = -obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o -obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o - -include $(srctree)/arch/um/scripts/Makefile.rules diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h deleted file mode 100644 index a475259f90e1..000000000000 --- a/arch/um/os-Linux/drivers/etap.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __DRIVERS_ETAP_H -#define __DRIVERS_ETAP_H - -#include <net_user.h> - -struct ethertap_data { - char *dev_name; - char *gate_addr; - int data_fd; - int control_fd; - void *dev; -}; - -extern const struct net_user_info ethertap_user_info; - -#endif diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c deleted file mode 100644 index 5e5ee40680ce..000000000000 --- a/arch/um/os-Linux/drivers/ethertap_kern.c +++ /dev/null @@ -1,100 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <linux/init.h> -#include <linux/netdevice.h> -#include "etap.h" -#include <net_kern.h> - -struct ethertap_init { - char *dev_name; - char *gate_addr; -}; - -static void etap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct ethertap_data *epri; - struct ethertap_init *init = data; - - pri = netdev_priv(dev); - epri = (struct ethertap_data *) pri->user; - epri->dev_name = init->dev_name; - epri->gate_addr = init->gate_addr; - epri->data_fd = -1; - epri->control_fd = -1; - epri->dev = dev; - - printk(KERN_INFO "ethertap backend - %s", epri->dev_name); - if (epri->gate_addr != NULL) - printk(KERN_CONT ", IP = %s", epri->gate_addr); - printk(KERN_CONT "\n"); -} - -static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - int len; - - len = net_recvfrom(fd, skb_mac_header(skb), - skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP); - if (len <= 0) - return(len); - - skb_pull(skb, 2); - len -= 2; - return len; -} - -static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - skb_push(skb, 2); - return net_send(fd, skb->data, skb->len); -} - -const struct net_kern_info ethertap_kern_info = { - .init = etap_init, - .protocol = eth_protocol, - .read = etap_read, - .write = etap_write, -}; - -static int ethertap_setup(char *str, char **mac_out, void *data) -{ - struct ethertap_init *init = data; - - *init = ((struct ethertap_init) - { .dev_name = NULL, - .gate_addr = NULL }); - if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out, - &init->gate_addr)) - return 0; - if (init->dev_name == NULL) { - printk(KERN_ERR "ethertap_setup : Missing tap device name\n"); - return 0; - } - - return 1; -} - -static struct transport ethertap_transport = { - .list = LIST_HEAD_INIT(ethertap_transport.list), - .name = "ethertap", - .setup = ethertap_setup, - .user = ðertap_user_info, - .kern = ðertap_kern_info, - .private_size = sizeof(struct ethertap_data), - .setup_size = sizeof(struct ethertap_init), -}; - -static int register_ethertap(void) -{ - register_transport(ðertap_transport); - return 0; -} - -late_initcall(register_ethertap); diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c deleted file mode 100644 index bdf215c0eca7..000000000000 --- a/arch/um/os-Linux/drivers/ethertap_user.c +++ /dev/null @@ -1,248 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and - * James Leu (jleu@mindspring.net). - * Copyright (C) 2001 by various other people who didn't put their name here. - */ - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include "etap.h" -#include <os.h> -#include <net_user.h> -#include <um_malloc.h> - -#define MAX_PACKET ETH_MAX_PACKET - -static int etap_user_init(void *data, void *dev) -{ - struct ethertap_data *pri = data; - - pri->dev = dev; - return 0; -} - -struct addr_change { - enum { ADD_ADDR, DEL_ADDR } what; - unsigned char addr[4]; - unsigned char netmask[4]; -}; - -static void etap_change(int op, unsigned char *addr, unsigned char *netmask, - int fd) -{ - struct addr_change change; - char *output; - int n; - - change.what = op; - memcpy(change.addr, addr, sizeof(change.addr)); - memcpy(change.netmask, netmask, sizeof(change.netmask)); - CATCH_EINTR(n = write(fd, &change, sizeof(change))); - if (n != sizeof(change)) { - printk(UM_KERN_ERR "etap_change - request failed, err = %d\n", - errno); - return; - } - - output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL); - if (output == NULL) - printk(UM_KERN_ERR "etap_change : Failed to allocate output " - "buffer\n"); - read_output(fd, output, UM_KERN_PAGE_SIZE); - if (output != NULL) { - printk("%s", output); - kfree(output); - } -} - -static void etap_open_addr(unsigned char *addr, unsigned char *netmask, - void *arg) -{ - etap_change(ADD_ADDR, addr, netmask, *((int *) arg)); -} - -static void etap_close_addr(unsigned char *addr, unsigned char *netmask, - void *arg) -{ - etap_change(DEL_ADDR, addr, netmask, *((int *) arg)); -} - -struct etap_pre_exec_data { - int control_remote; - int control_me; - int data_me; -}; - -static void etap_pre_exec(void *arg) -{ - struct etap_pre_exec_data *data = arg; - - dup2(data->control_remote, 1); - close(data->data_me); - close(data->control_me); -} - -static int etap_tramp(char *dev, char *gate, int control_me, - int control_remote, int data_me, int data_remote) -{ - struct etap_pre_exec_data pe_data; - int pid, err, n; - char version_buf[sizeof("nnnnn\0")]; - char data_fd_buf[sizeof("nnnnnn\0")]; - char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")]; - char *setup_args[] = { "uml_net", version_buf, "ethertap", dev, - data_fd_buf, gate_buf, NULL }; - char *nosetup_args[] = { "uml_net", version_buf, "ethertap", - dev, data_fd_buf, NULL }; - char **args, c; - - sprintf(data_fd_buf, "%d", data_remote); - sprintf(version_buf, "%d", UML_NET_VERSION); - if (gate != NULL) { - strscpy(gate_buf, gate); - args = setup_args; - } - else args = nosetup_args; - - err = 0; - pe_data.control_remote = control_remote; - pe_data.control_me = control_me; - pe_data.data_me = data_me; - pid = run_helper(etap_pre_exec, &pe_data, args); - - if (pid < 0) - err = pid; - close(data_remote); - close(control_remote); - CATCH_EINTR(n = read(control_me, &c, sizeof(c))); - if (n != sizeof(c)) { - err = -errno; - printk(UM_KERN_ERR "etap_tramp : read of status failed, " - "err = %d\n", -err); - return err; - } - if (c != 1) { - printk(UM_KERN_ERR "etap_tramp : uml_net failed\n"); - err = helper_wait(pid); - } - return err; -} - -static int etap_open(void *data) -{ - struct ethertap_data *pri = data; - char *output; - int data_fds[2], control_fds[2], err, output_len; - - err = tap_open_common(pri->dev, pri->gate_addr); - if (err) - return err; - - err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "etap_open - data socketpair failed - " - "err = %d\n", errno); - return err; - } - - err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "etap_open - control socketpair failed - " - "err = %d\n", errno); - goto out_close_data; - } - - err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0], - control_fds[1], data_fds[0], data_fds[1]); - output_len = UM_KERN_PAGE_SIZE; - output = uml_kmalloc(output_len, UM_GFP_KERNEL); - read_output(control_fds[0], output, output_len); - - if (output == NULL) - printk(UM_KERN_ERR "etap_open : failed to allocate output " - "buffer\n"); - else { - printk("%s", output); - kfree(output); - } - - if (err < 0) { - printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err); - goto out_close_control; - } - - pri->data_fd = data_fds[0]; - pri->control_fd = control_fds[0]; - iter_addresses(pri->dev, etap_open_addr, &pri->control_fd); - return data_fds[0]; - -out_close_control: - close(control_fds[0]); - close(control_fds[1]); -out_close_data: - close(data_fds[0]); - close(data_fds[1]); - return err; -} - -static void etap_close(int fd, void *data) -{ - struct ethertap_data *pri = data; - - iter_addresses(pri->dev, etap_close_addr, &pri->control_fd); - close(fd); - - if (shutdown(pri->data_fd, SHUT_RDWR) < 0) - printk(UM_KERN_ERR "etap_close - shutdown data socket failed, " - "errno = %d\n", errno); - - if (shutdown(pri->control_fd, SHUT_RDWR) < 0) - printk(UM_KERN_ERR "etap_close - shutdown control socket " - "failed, errno = %d\n", errno); - - close(pri->data_fd); - pri->data_fd = -1; - close(pri->control_fd); - pri->control_fd = -1; -} - -static void etap_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct ethertap_data *pri = data; - - tap_check_ips(pri->gate_addr, addr); - if (pri->control_fd == -1) - return; - etap_open_addr(addr, netmask, &pri->control_fd); -} - -static void etap_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct ethertap_data *pri = data; - - if (pri->control_fd == -1) - return; - - etap_close_addr(addr, netmask, &pri->control_fd); -} - -const struct net_user_info ethertap_user_info = { - .init = etap_user_init, - .open = etap_open, - .close = etap_close, - .remove = NULL, - .add_address = etap_add_addr, - .delete_address = etap_del_addr, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP, -}; diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h deleted file mode 100644 index e364e42abfc5..000000000000 --- a/arch/um/os-Linux/drivers/tuntap.h +++ /dev/null @@ -1,21 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#ifndef __UM_TUNTAP_H -#define __UM_TUNTAP_H - -#include <net_user.h> - -struct tuntap_data { - char *dev_name; - int fixed_config; - char *gate_addr; - int fd; - void *dev; -}; - -extern const struct net_user_info tuntap_user_info; - -#endif diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c deleted file mode 100644 index ff022d9cf0dd..000000000000 --- a/arch/um/os-Linux/drivers/tuntap_kern.c +++ /dev/null @@ -1,86 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <linux/netdevice.h> -#include <linux/init.h> -#include <linux/skbuff.h> -#include <asm/errno.h> -#include <net_kern.h> -#include "tuntap.h" - -struct tuntap_init { - char *dev_name; - char *gate_addr; -}; - -static void tuntap_init(struct net_device *dev, void *data) -{ - struct uml_net_private *pri; - struct tuntap_data *tpri; - struct tuntap_init *init = data; - - pri = netdev_priv(dev); - tpri = (struct tuntap_data *) pri->user; - tpri->dev_name = init->dev_name; - tpri->fixed_config = (init->dev_name != NULL); - tpri->gate_addr = init->gate_addr; - tpri->fd = -1; - tpri->dev = dev; - - printk(KERN_INFO "TUN/TAP backend - "); - if (tpri->gate_addr != NULL) - printk(KERN_CONT "IP = %s", tpri->gate_addr); - printk(KERN_CONT "\n"); -} - -static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_read(fd, skb_mac_header(skb), - skb->dev->mtu + ETH_HEADER_OTHER); -} - -static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp) -{ - return net_write(fd, skb->data, skb->len); -} - -const struct net_kern_info tuntap_kern_info = { - .init = tuntap_init, - .protocol = eth_protocol, - .read = tuntap_read, - .write = tuntap_write, -}; - -static int tuntap_setup(char *str, char **mac_out, void *data) -{ - struct tuntap_init *init = data; - - *init = ((struct tuntap_init) - { .dev_name = NULL, - .gate_addr = NULL }); - if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out, - &init->gate_addr)) - return 0; - - return 1; -} - -static struct transport tuntap_transport = { - .list = LIST_HEAD_INIT(tuntap_transport.list), - .name = "tuntap", - .setup = tuntap_setup, - .user = &tuntap_user_info, - .kern = &tuntap_kern_info, - .private_size = sizeof(struct tuntap_data), - .setup_size = sizeof(struct tuntap_init), -}; - -static int register_tuntap(void) -{ - register_transport(&tuntap_transport); - return 0; -} - -late_initcall(register_tuntap); diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c deleted file mode 100644 index 91f0e27ca3a6..000000000000 --- a/arch/um/os-Linux/drivers/tuntap_user.c +++ /dev/null @@ -1,215 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) - */ - -#include <stdio.h> -#include <unistd.h> -#include <errno.h> -#include <string.h> -#include <linux/if_tun.h> -#include <net/if.h> -#include <sys/ioctl.h> -#include <sys/socket.h> -#include <sys/wait.h> -#include <sys/uio.h> -#include <kern_util.h> -#include <os.h> -#include "tuntap.h" - -static int tuntap_user_init(void *data, void *dev) -{ - struct tuntap_data *pri = data; - - pri->dev = dev; - return 0; -} - -static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct tuntap_data *pri = data; - - tap_check_ips(pri->gate_addr, addr); - if ((pri->fd == -1) || pri->fixed_config) - return; - open_addr(addr, netmask, pri->dev_name); -} - -static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask, - void *data) -{ - struct tuntap_data *pri = data; - - if ((pri->fd == -1) || pri->fixed_config) - return; - close_addr(addr, netmask, pri->dev_name); -} - -struct tuntap_pre_exec_data { - int stdout_fd; - int close_me; -}; - -static void tuntap_pre_exec(void *arg) -{ - struct tuntap_pre_exec_data *data = arg; - - dup2(data->stdout_fd, 1); - close(data->close_me); -} - -static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote, - char *buffer, int buffer_len, int *used_out) -{ - struct tuntap_pre_exec_data data; - char version_buf[sizeof("nnnnn\0")]; - char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate, - NULL }; - char buf[CMSG_SPACE(sizeof(*fd_out))]; - struct msghdr msg; - struct cmsghdr *cmsg; - struct iovec iov; - int pid, n, err; - - sprintf(version_buf, "%d", UML_NET_VERSION); - - data.stdout_fd = remote; - data.close_me = me; - - pid = run_helper(tuntap_pre_exec, &data, argv); - - if (pid < 0) - return pid; - - close(remote); - - msg.msg_name = NULL; - msg.msg_namelen = 0; - if (buffer != NULL) { - iov = ((struct iovec) { buffer, buffer_len }); - msg.msg_iov = &iov; - msg.msg_iovlen = 1; - } - else { - msg.msg_iov = NULL; - msg.msg_iovlen = 0; - } - msg.msg_control = buf; - msg.msg_controllen = sizeof(buf); - msg.msg_flags = 0; - n = recvmsg(me, &msg, 0); - *used_out = n; - if (n < 0) { - err = -errno; - printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - " - "errno = %d\n", errno); - return err; - } - helper_wait(pid); - - cmsg = CMSG_FIRSTHDR(&msg); - if (cmsg == NULL) { - printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " - "message\n"); - return -EINVAL; - } - if ((cmsg->cmsg_level != SOL_SOCKET) || - (cmsg->cmsg_type != SCM_RIGHTS)) { - printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a " - "descriptor\n"); - return -EINVAL; - } - *fd_out = ((int *) CMSG_DATA(cmsg))[0]; - os_set_exec_close(*fd_out); - return 0; -} - -static int tuntap_open(void *data) -{ - struct ifreq ifr; - struct tuntap_data *pri = data; - char *output, *buffer; - int err, fds[2], len, used; - - err = tap_open_common(pri->dev, pri->gate_addr); - if (err < 0) - return err; - - if (pri->fixed_config) { - pri->fd = os_open_file("/dev/net/tun", - of_cloexec(of_rdwr(OPENFLAGS())), 0); - if (pri->fd < 0) { - printk(UM_KERN_ERR "Failed to open /dev/net/tun, " - "err = %d\n", -pri->fd); - return pri->fd; - } - memset(&ifr, 0, sizeof(ifr)); - ifr.ifr_flags = IFF_TAP | IFF_NO_PI; - strscpy(ifr.ifr_name, pri->dev_name); - if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) { - err = -errno; - printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n", - errno); - close(pri->fd); - return err; - } - } - else { - err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds); - if (err) { - err = -errno; - printk(UM_KERN_ERR "tuntap_open : socketpair failed - " - "errno = %d\n", errno); - return err; - } - - buffer = get_output_buffer(&len); - if (buffer != NULL) - len--; - used = 0; - - err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0], - fds[1], buffer, len, &used); - - output = buffer; - if (err < 0) { - printk("%s", output); - free_output_buffer(buffer); - printk(UM_KERN_ERR "tuntap_open_tramp failed - " - "err = %d\n", -err); - return err; - } - - pri->dev_name = uml_strdup(buffer); - output += IFNAMSIZ; - printk("%s", output); - free_output_buffer(buffer); - - close(fds[0]); - iter_addresses(pri->dev, open_addr, pri->dev_name); - } - - return pri->fd; -} - -static void tuntap_close(int fd, void *data) -{ - struct tuntap_data *pri = data; - - if (!pri->fixed_config) - iter_addresses(pri->dev, close_addr, pri->dev_name); - close(fd); - pri->fd = -1; -} - -const struct net_user_info tuntap_user_info = { - .init = tuntap_user_init, - .open = tuntap_open, - .close = tuntap_close, - .remove = NULL, - .add_address = tuntap_add_addr, - .delete_address = tuntap_del_addr, - .mtu = ETH_MAX_PACKET, - .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER, -}; diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c index a0d01c68ce3e..617886d1fb1e 100644 --- a/arch/um/os-Linux/file.c +++ b/arch/um/os-Linux/file.c @@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf) return 0; } -int os_set_slip(int fd) -{ - int disc, sencap; - - disc = N_SLIP; - if (ioctl(fd, TIOCSETD, &disc) < 0) - return -errno; - - sencap = 0; - if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0) - return -errno; - - return 0; -} - int os_mode_fd(int fd, int mode) { int err; diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h index 317fca190c2b..5d8d3b0817a9 100644 --- a/arch/um/os-Linux/internal.h +++ b/arch/um/os-Linux/internal.h @@ -2,6 +2,9 @@ #ifndef __UM_OS_LINUX_INTERNAL_H #define __UM_OS_LINUX_INTERNAL_H +#include <mm_id.h> +#include <stub-data.h> + /* * elf_aux.c */ @@ -16,5 +19,5 @@ void check_tmpexec(void); * skas/process.c */ void wait_stub_done(int pid); - +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys); #endif /* __UM_OS_LINUX_INTERNAL_H */ diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c index 184566edeee9..00b49e90d05f 100644 --- a/arch/um/os-Linux/process.c +++ b/arch/um/os-Linux/process.c @@ -18,17 +18,29 @@ #include <init.h> #include <longjmp.h> #include <os.h> +#include <skas/skas.h> void os_alarm_process(int pid) { + if (pid <= 0) + return; + kill(pid, SIGALRM); } void os_kill_process(int pid, int reap_child) { + if (pid <= 0) + return; + + /* Block signals until child is reaped */ + block_signals(); + kill(pid, SIGKILL); if (reap_child) CATCH_EINTR(waitpid(pid, NULL, __WALL)); + + unblock_signals(); } /* Kill off a ptraced child by all means available. kill it normally first, @@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child) void os_kill_ptraced_process(int pid, int reap_child) { + if (pid <= 0) + return; + + /* Block signals until child is reaped */ + block_signals(); + kill(pid, SIGKILL); ptrace(PTRACE_KILL, pid); ptrace(PTRACE_CONT, pid); if (reap_child) CATCH_EINTR(waitpid(pid, NULL, __WALL)); + + unblock_signals(); +} + +pid_t os_reap_child(void) +{ + int status; + + /* Try to reap a child */ + return waitpid(-1, &status, WNOHANG); } /* Don't use the glibc version, which caches the result in TLS. It misses some @@ -151,6 +179,9 @@ void init_new_thread_signals(void) set_handler(SIGBUS); signal(SIGHUP, SIG_IGN); set_handler(SIGIO); + /* We (currently) only use the child reaper IRQ in seccomp mode */ + if (using_seccomp) + set_handler(SIGCHLD); signal(SIGWINCH, SIG_IGN); } diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c index d7ca148807b2..bfba2cbc9478 100644 --- a/arch/um/os-Linux/registers.c +++ b/arch/um/os-Linux/registers.c @@ -14,8 +14,8 @@ /* This is set once at boot time and not changed thereafter */ -static unsigned long exec_regs[MAX_REG_NR]; -static unsigned long *exec_fp_regs; +unsigned long exec_regs[MAX_REG_NR]; +unsigned long *exec_fp_regs; int init_pid_registers(int pid) { diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c index a05a6ecee756..6de145f8fe3d 100644 --- a/arch/um/os-Linux/sigio.c +++ b/arch/um/os-Linux/sigio.c @@ -12,6 +12,7 @@ #include <signal.h> #include <string.h> #include <sys/epoll.h> +#include <asm/unistd.h> #include <kern_util.h> #include <init.h> #include <os.h> @@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused) __func__, errno); } - CATCH_EINTR(r = tgkill(pid, pid, SIGIO)); + CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO)); if (r < 0) printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n", __func__, errno); diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c index e71e5b4878d1..11f07f498270 100644 --- a/arch/um/os-Linux/signal.c +++ b/arch/um/os-Linux/signal.c @@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) = [SIGBUS] = relay_signal, [SIGSEGV] = segv_handler, [SIGIO] = sigio_handler, + [SIGCHLD] = sigchld_handler, }; static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) @@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) } /* enable signals if sig isn't IRQ signal */ - if ((sig != SIGIO) && (sig != SIGWINCH)) + if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD)) unblock_signals_trace(); (*sig_info[sig])(sig, si, &r, mc); @@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc) #define SIGALRM_BIT 1 #define SIGALRM_MASK (1 << SIGALRM_BIT) +#define SIGCHLD_BIT 2 +#define SIGCHLD_MASK (1 << SIGCHLD_BIT) + int signals_enabled; #if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT) static int signals_blocked, signals_blocked_pending; @@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc) return; } + if (!enabled && (sig == SIGCHLD)) { + signals_pending |= SIGCHLD_MASK; + return; + } + block_signals_trace(); sig_handler_common(sig, si, mc); @@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = { [SIGIO] = sig_handler, [SIGWINCH] = sig_handler, + /* SIGCHLD is only actually registered in seccomp mode. */ + [SIGCHLD] = sig_handler, [SIGALRM] = timer_alarm_handler, [SIGUSR1] = sigusr1_handler, @@ -309,6 +320,12 @@ void unblock_signals(void) if (save_pending & SIGIO_MASK) sig_handler_common(SIGIO, NULL, NULL); + if (save_pending & SIGCHLD_MASK) { + struct uml_pt_regs regs = {}; + + sigchld_handler(SIGCHLD, NULL, ®s, NULL); + } + /* Do not reenter the handler */ if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK))) diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c index d7f1814b0e5a..8b9921ac3ef8 100644 --- a/arch/um/os-Linux/skas/mem.c +++ b/arch/um/os-Linux/skas/mem.c @@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp) print_hex_dump(UM_KERN_ERR, " syscall data: ", 0, 16, 4, sc, sizeof(*sc), 0); + + if (using_seccomp) { + printk(UM_KERN_ERR "%s: FD map num: %d", __func__, + mm_idp->syscall_fd_num); + print_hex_dump(UM_KERN_ERR, + " FD map: ", 0, 16, + sizeof(mm_idp->syscall_fd_map[0]), + mm_idp->syscall_fd_map, + sizeof(mm_idp->syscall_fd_map), 0); + } } static inline unsigned long *check_init_stack(struct mm_id * mm_idp, @@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) int n, i; int err, pid = mm_idp->pid; - n = ptrace_setregs(pid, syscall_regs); - if (n < 0) { - printk(UM_KERN_ERR "Registers - \n"); - for (i = 0; i < MAX_REG_NR; i++) - printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); - panic("%s : PTRACE_SETREGS failed, errno = %d\n", - __func__, -n); - } - /* Inform process how much we have filled in. */ proc_data->syscall_data_len = mm_idp->syscall_data_len; - err = ptrace(PTRACE_CONT, pid, 0, 0); - if (err) - panic("Failed to continue stub, pid = %d, errno = %d\n", pid, - errno); - - wait_stub_done(pid); + if (using_seccomp) { + proc_data->restart_wait = 1; + wait_stub_done_seccomp(mm_idp, 0, 1); + } else { + n = ptrace_setregs(pid, syscall_regs); + if (n < 0) { + printk(UM_KERN_ERR "Registers -\n"); + for (i = 0; i < MAX_REG_NR; i++) + printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]); + panic("%s : PTRACE_SETREGS failed, errno = %d\n", + __func__, -n); + } + + err = ptrace(PTRACE_CONT, pid, 0, 0); + if (err) + panic("Failed to continue stub, pid = %d, errno = %d\n", + pid, errno); + + wait_stub_done(pid); + } /* - * proc_data->err will be non-zero if there was an (unexpected) error. + * proc_data->err will be negative if there was an (unexpected) error. * In that case, syscall_data_len points to the last executed syscall, * otherwise it will be zero (but we do not need to rely on that). */ @@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp) mm_idp->syscall_data_len = 0; } + if (using_seccomp) + mm_idp->syscall_fd_num = 0; + return mm_idp->syscall_data_len; } @@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp, return NULL; } +static int get_stub_fd(struct mm_id *mm_idp, int fd) +{ + int i; + + /* Find an FD slot (or flush and use first) */ + if (!using_seccomp) + return fd; + + /* Already crashed, value does not matter */ + if (mm_idp->syscall_data_len < 0) + return 0; + + /* Find existing FD in map if we can allocate another syscall */ + if (mm_idp->syscall_data_len < + ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) { + for (i = 0; i < mm_idp->syscall_fd_num; i++) { + if (mm_idp->syscall_fd_map[i] == fd) + return i; + } + + if (mm_idp->syscall_fd_num < STUB_MAX_FDS) { + i = mm_idp->syscall_fd_num; + mm_idp->syscall_fd_map[i] = fd; + + mm_idp->syscall_fd_num++; + + return i; + } + } + + /* FD map full or no syscall space available, continue after flush */ + do_syscall_stub(mm_idp); + mm_idp->syscall_fd_map[0] = fd; + mm_idp->syscall_fd_num = 1; + + return 0; +} + int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, int phys_fd, unsigned long long offset) { @@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot, /* Compress with previous syscall if that is possible */ sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt); - if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd && + if (sc && sc->mem.prot == prot && sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) { - sc->mem.length += len; - return 0; + int prev_fd = sc->mem.fd; + + if (using_seccomp) + prev_fd = mm_idp->syscall_fd_map[sc->mem.fd]; + + if (phys_fd == prev_fd) { + sc->mem.length += len; + return 0; + } } + phys_fd = get_stub_fd(mm_idp, phys_fd); + sc = syscall_stub_alloc(mm_idp); sc->syscall = STUB_SYSCALL_MMAP; sc->mem.addr = virt; diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c index ae2aea062f06..e42ffac23e3c 100644 --- a/arch/um/os-Linux/skas/process.c +++ b/arch/um/os-Linux/skas/process.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de) * Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -15,6 +16,7 @@ #include <sys/mman.h> #include <sys/wait.h> #include <sys/stat.h> +#include <sys/socket.h> #include <asm/unistd.h> #include <as-layout.h> #include <init.h> @@ -25,8 +27,11 @@ #include <registers.h> #include <skas.h> #include <sysdep/stub.h> +#include <sysdep/mcontext.h> +#include <linux/futex.h> #include <linux/threads.h> #include <timetravel.h> +#include <asm-generic/rwonce.h> #include "../internal.h" int is_skas_winch(int pid, int fd, void *data) @@ -142,6 +147,105 @@ bad_wait: fatal_sigsegv(); } +void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys) +{ + struct stub_data *data = (void *)mm_idp->stack; + int ret; + + do { + const char byte = 0; + struct iovec iov = { + .iov_base = (void *)&byte, + .iov_len = sizeof(byte), + }; + union { + char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))]; + struct cmsghdr align; + } ctrl; + struct msghdr msgh = { + .msg_iov = &iov, + .msg_iovlen = 1, + }; + + if (!running) { + if (mm_idp->syscall_fd_num) { + unsigned int fds_size = + sizeof(int) * mm_idp->syscall_fd_num; + struct cmsghdr *cmsg; + + msgh.msg_control = ctrl.data; + msgh.msg_controllen = CMSG_SPACE(fds_size); + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(fds_size); + memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map, + fds_size); + + CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock, + &msgh, 0)); + } + + data->signal = 0; + data->futex = FUTEX_IN_CHILD; + CATCH_EINTR(syscall(__NR_futex, &data->futex, + FUTEX_WAKE, 1, NULL, NULL, 0)); + } + + do { + /* + * We need to check whether the child is still alive + * before and after the FUTEX_WAIT call. Before, in + * case it just died but we still updated data->futex + * to FUTEX_IN_CHILD. And after, in case it died while + * we were waiting (and SIGCHLD woke us up, see the + * IRQ handler in mmu.c). + * + * Either way, if PID is negative, then we have no + * choice but to kill the task. + */ + if (__READ_ONCE(mm_idp->pid) < 0) + goto out_kill; + + ret = syscall(__NR_futex, &data->futex, + FUTEX_WAIT, FUTEX_IN_CHILD, + NULL, NULL, 0); + if (ret < 0 && errno != EINTR && errno != EAGAIN) { + printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (data->futex == FUTEX_IN_CHILD); + + if (__READ_ONCE(mm_idp->pid) < 0) + goto out_kill; + + running = 0; + + /* We may receive a SIGALRM before SIGSYS, iterate again. */ + } while (wait_sigsys && data->signal == SIGALRM); + + if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) { + printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__); + goto out_kill; + } + + if (wait_sigsys && data->signal != SIGSYS) { + printk(UM_KERN_ERR "%s : expected SIGSYS but got %d", + __func__, data->signal); + goto out_kill; + } + + return; + +out_kill: + printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n", + __func__, mm_idp->pid, errno); + /* This is not true inside start_userspace */ + if (current_mm_id() == mm_idp) + fatal_sigsegv(); +} + extern unsigned long current_stub_stack(void); static void get_skas_faultinfo(int pid, struct faultinfo *fi) @@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi) memcpy(fi, (void *)current_stub_stack(), sizeof(*fi)); } -static void handle_segv(int pid, struct uml_pt_regs *regs) -{ - get_skas_faultinfo(pid, ®s->faultinfo); - segv(regs->faultinfo, 0, 1, NULL, NULL); -} - static void handle_trap(int pid, struct uml_pt_regs *regs) { if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END)) @@ -181,29 +279,48 @@ extern char __syscall_stub_start[]; static int stub_exe_fd; +struct tramp_data { + struct stub_data *stub_data; + /* 0 is inherited, 1 is the kernel side */ + int sockpair[2]; +}; + #ifndef CLOSE_RANGE_CLOEXEC #define CLOSE_RANGE_CLOEXEC (1U << 2) #endif -static int userspace_tramp(void *stack) +static int userspace_tramp(void *data) { + struct tramp_data *tramp_data = data; char *const argv[] = { "uml-userspace", NULL }; - int pipe_fds[2]; unsigned long long offset; struct stub_init_data init_data = { + .seccomp = using_seccomp, .stub_start = STUB_START, - .segv_handler = STUB_CODE + - (unsigned long) stub_segv_handler - - (unsigned long) __syscall_stub_start, }; struct iomem_region *iomem; int ret; + if (using_seccomp) { + init_data.signal_handler = STUB_CODE + + (unsigned long) stub_signal_interrupt - + (unsigned long) __syscall_stub_start; + init_data.signal_restorer = STUB_CODE + + (unsigned long) stub_signal_restorer - + (unsigned long) __syscall_stub_start; + } else { + init_data.signal_handler = STUB_CODE + + (unsigned long) stub_segv_handler - + (unsigned long) __syscall_stub_start; + init_data.signal_restorer = 0; + } + init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start), &offset); init_data.stub_code_offset = MMAP_OFFSET(offset); - init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset); + init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data), + &offset); init_data.stub_data_offset = MMAP_OFFSET(offset); /* @@ -214,20 +331,21 @@ static int userspace_tramp(void *stack) syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC); fcntl(init_data.stub_data_fd, F_SETFD, 0); - for (iomem = iomem_regions; iomem; iomem = iomem->next) - fcntl(iomem->fd, F_SETFD, 0); - /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */ - if (pipe(pipe_fds)) - exit(2); + /* In SECCOMP mode, these FDs are passed when needed */ + if (!using_seccomp) { + for (iomem = iomem_regions; iomem; iomem = iomem->next) + fcntl(iomem->fd, F_SETFD, 0); + } - if (dup2(pipe_fds[0], 0) < 0) + /* dup2 signaling FD/socket to STDIN */ + if (dup2(tramp_data->sockpair[0], 0) < 0) exit(3); - close(pipe_fds[0]); + close(tramp_data->sockpair[0]); /* Write init_data and close write side */ - ret = write(pipe_fds[1], &init_data, sizeof(init_data)); - close(pipe_fds[1]); + ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data)); + close(tramp_data->sockpair[1]); if (ret != sizeof(init_data)) exit(4); @@ -315,11 +433,12 @@ static int __init init_stub_exe_fd(void) } __initcall(init_stub_exe_fd); +int using_seccomp; int userspace_pid[NR_CPUS]; /** * start_userspace() - prepare a new userspace process - * @stub_stack: pointer to the stub stack. + * @mm_id: The corresponding struct mm_id * * Setups a new temporary stack page that is used while userspace_tramp() runs * Clones the kernel process into a new userspace process, with FDs only. @@ -328,11 +447,15 @@ int userspace_pid[NR_CPUS]; * when negative: an error number. * FIXME: can PIDs become negative?! */ -int start_userspace(unsigned long stub_stack) +int start_userspace(struct mm_id *mm_id) { + struct stub_data *proc_data = (void *)mm_id->stack; + struct tramp_data tramp_data = { + .stub_data = proc_data, + }; void *stack; unsigned long sp; - int pid, status, n, err; + int status, n, err; /* setup a temporary stack page */ stack = mmap(NULL, UM_KERN_PAGE_SIZE, @@ -348,40 +471,55 @@ int start_userspace(unsigned long stub_stack) /* set stack pointer to the end of the stack page, so it can grow downwards */ sp = (unsigned long)stack + UM_KERN_PAGE_SIZE; - /* clone into new userspace process */ - pid = clone(userspace_tramp, (void *) sp, + /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */ + if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) { + err = -errno; + printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n", + __func__, errno); + return err; + } + + if (using_seccomp) + proc_data->futex = FUTEX_IN_CHILD; + + mm_id->pid = clone(userspace_tramp, (void *) sp, CLONE_VFORK | CLONE_VM | SIGCHLD, - (void *)stub_stack); - if (pid < 0) { + (void *)&tramp_data); + if (mm_id->pid < 0) { err = -errno; printk(UM_KERN_ERR "%s : clone failed, errno = %d\n", __func__, errno); - return err; + goto out_close; } - do { - CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL)); - if (n < 0) { + if (using_seccomp) { + wait_stub_done_seccomp(mm_id, 1, 1); + } else { + do { + CATCH_EINTR(n = waitpid(mm_id->pid, &status, + WUNTRACED | __WALL)); + if (n < 0) { + err = -errno; + printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + __func__, errno); + goto out_kill; + } + } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); + + if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { + err = -EINVAL; + printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", + __func__, status); + goto out_kill; + } + + if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL, + (void *) PTRACE_O_TRACESYSGOOD) < 0) { err = -errno; - printk(UM_KERN_ERR "%s : wait failed, errno = %d\n", + printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", __func__, errno); goto out_kill; } - } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM)); - - if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) { - err = -EINVAL; - printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n", - __func__, status); - goto out_kill; - } - - if (ptrace(PTRACE_SETOPTIONS, pid, NULL, - (void *) PTRACE_O_TRACESYSGOOD) < 0) { - err = -errno; - printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n", - __func__, errno); - goto out_kill; } if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) { @@ -391,10 +529,22 @@ int start_userspace(unsigned long stub_stack) goto out_kill; } - return pid; + close(tramp_data.sockpair[0]); + if (using_seccomp) + mm_id->sock = tramp_data.sockpair[1]; + else + close(tramp_data.sockpair[1]); + + return 0; + +out_kill: + os_kill_ptraced_process(mm_id->pid, 1); +out_close: + close(tramp_data.sockpair[0]); + close(tramp_data.sockpair[1]); + + mm_id->pid = -1; - out_kill: - os_kill_ptraced_process(pid, 1); return err; } @@ -404,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies; void userspace(struct uml_pt_regs *regs) { int err, status, op, pid = userspace_pid[0]; - siginfo_t si; + siginfo_t si_ptrace; + siginfo_t *si; + int sig; /* Handle any immediate reschedules or signals */ interrupt_end(); @@ -437,103 +589,177 @@ void userspace(struct uml_pt_regs *regs) current_mm_sync(); - /* Flush out any pending syscalls */ - err = syscall_stub_flush(current_mm_id()); - if (err) { - if (err == -ENOMEM) - report_enomem(); + if (using_seccomp) { + struct mm_id *mm_id = current_mm_id(); + struct stub_data *proc_data = (void *) mm_id->stack; + int ret; - printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", - __func__, -err); - fatal_sigsegv(); - } + ret = set_stub_state(regs, proc_data, singlestepping()); + if (ret) { + printk(UM_KERN_ERR "%s - failed to set regs: %d", + __func__, ret); + fatal_sigsegv(); + } - /* - * This can legitimately fail if the process loads a - * bogus value into a segment register. It will - * segfault and PTRACE_GETREGS will read that value - * out of the process. However, PTRACE_SETREGS will - * fail. In this case, there is nothing to do but - * just kill the process. - */ - if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { - printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + /* Must have been reset by the syscall caller */ + if (proc_data->restart_wait != 0) + panic("Programming error: Flag to only run syscalls in child was not cleared!"); - if (put_fp_registers(pid, regs->fp)) { - printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + /* Mark pending syscalls for flushing */ + proc_data->syscall_data_len = mm_id->syscall_data_len; - if (singlestepping()) - op = PTRACE_SYSEMU_SINGLESTEP; - else - op = PTRACE_SYSEMU; + wait_stub_done_seccomp(mm_id, 0, 0); - if (ptrace(op, pid, 0, 0)) { - printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", - __func__, op, errno); - fatal_sigsegv(); - } + sig = proc_data->signal; - CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); - if (err < 0) { - printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + if (sig == SIGTRAP && proc_data->err != 0) { + printk(UM_KERN_ERR "%s - Error flushing stub syscalls", + __func__); + syscall_stub_dump_error(mm_id); + mm_id->syscall_data_len = proc_data->err; + fatal_sigsegv(); + } - regs->is_user = 1; - if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { - printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + mm_id->syscall_data_len = 0; + mm_id->syscall_fd_num = 0; - if (get_fp_registers(pid, regs->fp)) { - printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", - __func__, errno); - fatal_sigsegv(); - } + ret = get_stub_state(regs, proc_data, NULL); + if (ret) { + printk(UM_KERN_ERR "%s - failed to get regs: %d", + __func__, ret); + fatal_sigsegv(); + } - UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si)) + panic("%s - Invalid siginfo offset from child", + __func__); + si = (void *)&proc_data->sigstack[proc_data->si_offset]; + + regs->is_user = 1; + + /* Fill in ORIG_RAX and extract fault information */ + PT_SYSCALL_NR(regs->gp) = si->si_syscall; + if (sig == SIGSEGV) { + mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset]; - if (WIFSTOPPED(status)) { - int sig = WSTOPSIG(status); + GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext); + } + } else { + /* Flush out any pending syscalls */ + err = syscall_stub_flush(current_mm_id()); + if (err) { + if (err == -ENOMEM) + report_enomem(); + + printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d", + __func__, -err); + fatal_sigsegv(); + } - /* These signal handlers need the si argument. - * The SIGIO and SIGALARM handlers which constitute the - * majority of invocations, do not use it. + /* + * This can legitimately fail if the process loads a + * bogus value into a segment register. It will + * segfault and PTRACE_GETREGS will read that value + * out of the process. However, PTRACE_SETREGS will + * fail. In this case, there is nothing to do but + * just kill the process. */ - switch (sig) { - case SIGSEGV: - case SIGTRAP: - case SIGILL: - case SIGBUS: - case SIGFPE: - case SIGWINCH: - ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si); - break; + if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); } - switch (sig) { - case SIGSEGV: - if (PTRACE_FULL_FAULTINFO) { + if (put_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (singlestepping()) + op = PTRACE_SYSEMU_SINGLESTEP; + else + op = PTRACE_SYSEMU; + + if (ptrace(op, pid, 0, 0)) { + printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n", + __func__, op, errno); + fatal_sigsegv(); + } + + CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL)); + if (err < 0) { + printk(UM_KERN_ERR "%s - wait failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + regs->is_user = 1; + if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) { + printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (get_fp_registers(pid, regs->fp)) { + printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n", + __func__, errno); + fatal_sigsegv(); + } + + if (WIFSTOPPED(status)) { + sig = WSTOPSIG(status); + + /* + * These signal handlers need the si argument + * and SIGSEGV needs the faultinfo. + * The SIGIO and SIGALARM handlers which constitute + * the majority of invocations, do not use it. + */ + switch (sig) { + case SIGSEGV: get_skas_faultinfo(pid, ®s->faultinfo); - (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si, - regs, NULL); + fallthrough; + case SIGTRAP: + case SIGILL: + case SIGBUS: + case SIGFPE: + case SIGWINCH: + ptrace(PTRACE_GETSIGINFO, pid, 0, + (struct siginfo *)&si_ptrace); + si = &si_ptrace; + break; + default: + si = NULL; + break; } - else handle_segv(pid, regs); + } else { + sig = 0; + } + } + + UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */ + + if (sig) { + switch (sig) { + case SIGSEGV: + if (using_seccomp || PTRACE_FULL_FAULTINFO) + (*sig_info[SIGSEGV])(SIGSEGV, + (struct siginfo *)si, + regs, NULL); + else + segv(regs->faultinfo, 0, 1, NULL, NULL); + + break; + case SIGSYS: + handle_syscall(regs); break; case SIGTRAP + 0x80: handle_trap(pid, regs); break; case SIGTRAP: - relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL); + relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL); break; case SIGALRM: break; @@ -543,7 +769,7 @@ void userspace(struct uml_pt_regs *regs) case SIGFPE: case SIGWINCH: block_signals_trace(); - (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL); + (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL); unblock_signals_trace(); break; default: diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c index 93fc82c01aba..a827c2e01aa5 100644 --- a/arch/um/os-Linux/start_up.c +++ b/arch/um/os-Linux/start_up.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 /* + * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net> * Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com) */ @@ -24,6 +25,13 @@ #include <kern_util.h> #include <mem_user.h> #include <ptrace_user.h> +#include <stdbool.h> +#include <stub-data.h> +#include <sys/prctl.h> +#include <linux/seccomp.h> +#include <linux/filter.h> +#include <sysdep/mcontext.h> +#include <sysdep/stub.h> #include <registers.h> #include <skas.h> #include "internal.h" @@ -224,6 +232,140 @@ static void __init check_ptrace(void) check_sysemu(); } +extern unsigned long host_fp_size; +extern unsigned long exec_regs[MAX_REG_NR]; +extern unsigned long *exec_fp_regs; + +__initdata static struct stub_data *seccomp_test_stub_data; + +static void __init sigsys_handler(int sig, siginfo_t *info, void *p) +{ + ucontext_t *uc = p; + + /* Stow away the location of the mcontext in the stack */ + seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext - + (unsigned long)&seccomp_test_stub_data->sigstack[0]; + + /* Prevent libc from clearing memory (mctx_offset in particular) */ + syscall(__NR_exit, 0); +} + +static int __init seccomp_helper(void *data) +{ + static struct sock_filter filter[] = { + BPF_STMT(BPF_LD | BPF_W | BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP), + }; + static struct sock_fprog prog = { + .len = ARRAY_SIZE(filter), + .filter = filter, + }; + struct sigaction sa; + + /* close_range is needed for the stub */ + if (stub_syscall3(__NR_close_range, 1, ~0U, 0)) + exit(1); + + set_sigstack(seccomp_test_stub_data->sigstack, + sizeof(seccomp_test_stub_data->sigstack)); + + sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO; + sa.sa_sigaction = (void *) sigsys_handler; + sa.sa_restorer = NULL; + if (sigaction(SIGSYS, &sa, NULL) < 0) + exit(2); + + prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER, + SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0) + exit(3); + + sleep(0); + + /* Never reached. */ + _exit(4); +} + +static bool __init init_seccomp(void) +{ + int pid; + int status; + int n; + unsigned long sp; + + /* + * We check that we can install a seccomp filter and then exit(0) + * from a trapped syscall. + * + * Note that we cannot verify that no seccomp filter already exists + * for a syscall that results in the process/thread to be killed. + */ + + os_info("Checking that seccomp filters can be installed..."); + + seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANON, 0, 0); + + /* Use the syscall data area as stack, we just need something */ + sp = (unsigned long)&seccomp_test_stub_data->syscall_data + + sizeof(seccomp_test_stub_data->syscall_data) - + sizeof(void *); + pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL); + + if (pid < 0) + fatal_perror("check_seccomp : clone failed"); + + CATCH_EINTR(n = waitpid(pid, &status, __WCLONE)); + if (n < 0) + fatal_perror("check_seccomp : waitpid failed"); + + if (WIFEXITED(status) && WEXITSTATUS(status) == 0) { + struct uml_pt_regs *regs; + unsigned long fp_size; + int r; + + /* Fill in the host_fp_size from the mcontext. */ + regs = calloc(1, sizeof(struct uml_pt_regs)); + get_stub_state(regs, seccomp_test_stub_data, &fp_size); + host_fp_size = fp_size; + free(regs); + + /* Repeat with the correct size */ + regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size); + r = get_stub_state(regs, seccomp_test_stub_data, NULL); + + /* Store as the default startup registers */ + exec_fp_regs = malloc(host_fp_size); + memcpy(exec_regs, regs->gp, sizeof(exec_regs)); + memcpy(exec_fp_regs, regs->fp, host_fp_size); + + munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data)); + + free(regs); + + if (r) { + os_info("failed to fetch registers: %d\n", r); + return false; + } + + os_info("OK\n"); + return true; + } + + if (WIFEXITED(status) && WEXITSTATUS(status) == 2) + os_info("missing\n"); + else + os_info("error\n"); + + munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data)); + return false; +} + + static void __init check_coredump_limit(void) { struct rlimit lim; @@ -278,6 +420,44 @@ void __init get_host_cpu_features( } } +static int seccomp_config __initdata; + +static int __init uml_seccomp_config(char *line, int *add) +{ + *add = 0; + + if (strcmp(line, "off") == 0) + seccomp_config = 0; + else if (strcmp(line, "auto") == 0) + seccomp_config = 1; + else if (strcmp(line, "on") == 0) + seccomp_config = 2; + else + fatal("Invalid seccomp option '%s', expected on/auto/off\n", + line); + + return 0; +} + +__uml_setup("seccomp=", uml_seccomp_config, +"seccomp=<on/auto/off>\n" +" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n" +" processes work collaboratively with the kernel instead of being\n" +" traced using ptrace. All syscalls from the application are caught and\n" +" redirected using a signal. This signal handler in turn is permitted to\n" +" do the selected set of syscalls to communicate with the UML kernel and\n" +" do the required memory management.\n" +"\n" +" This method is overall faster than the ptrace based userspace, primarily\n" +" because it reduces the number of context switches for (minor) page faults.\n" +"\n" +" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n" +" userspace from reading and writing all physical memory. Userspace\n" +" processes could also trick the stub into disabling SIGALRM which\n" +" prevents it from being interrupted for scheduling purposes.\n" +"\n" +" This is insecure and should only be used with a trusted userspace\n\n" +); void __init os_early_checks(void) { @@ -286,13 +466,24 @@ void __init os_early_checks(void) /* Print out the core dump limits early */ check_coredump_limit(); - check_ptrace(); - /* Need to check this early because mmapping happens before the * kernel is running. */ check_tmpexec(); + if (seccomp_config) { + if (init_seccomp()) { + using_seccomp = 1; + return; + } + + if (seccomp_config == 2) + fatal("SECCOMP userspace requested but not functional!\n"); + } + + using_seccomp = 0; + check_ptrace(); + pid = start_ptraced_child(); if (init_pid_registers(pid)) fatal("Failed to initialize default registers"); |