summaryrefslogtreecommitdiff
path: root/arch/um/os-Linux
diff options
context:
space:
mode:
Diffstat (limited to 'arch/um/os-Linux')
-rw-r--r--arch/um/os-Linux/Makefile2
-rw-r--r--arch/um/os-Linux/drivers/Makefile13
-rw-r--r--arch/um/os-Linux/drivers/etap.h21
-rw-r--r--arch/um/os-Linux/drivers/ethertap_kern.c100
-rw-r--r--arch/um/os-Linux/drivers/ethertap_user.c248
-rw-r--r--arch/um/os-Linux/drivers/tuntap.h21
-rw-r--r--arch/um/os-Linux/drivers/tuntap_kern.c86
-rw-r--r--arch/um/os-Linux/drivers/tuntap_user.c215
-rw-r--r--arch/um/os-Linux/file.c15
-rw-r--r--arch/um/os-Linux/internal.h5
-rw-r--r--arch/um/os-Linux/process.c31
-rw-r--r--arch/um/os-Linux/registers.c4
-rw-r--r--arch/um/os-Linux/sigio.c3
-rw-r--r--arch/um/os-Linux/signal.c19
-rw-r--r--arch/um/os-Linux/skas/mem.c103
-rw-r--r--arch/um/os-Linux/skas/process.c482
-rw-r--r--arch/um/os-Linux/start_up.c195
17 files changed, 689 insertions, 874 deletions
diff --git a/arch/um/os-Linux/Makefile b/arch/um/os-Linux/Makefile
index 049dfa5bc9c6..fae836713487 100644
--- a/arch/um/os-Linux/Makefile
+++ b/arch/um/os-Linux/Makefile
@@ -8,7 +8,7 @@ KCOV_INSTRUMENT := n
obj-y = execvp.o file.o helper.o irq.o main.o mem.o process.o \
registers.o sigio.o signal.o start_up.o time.o tty.o \
- umid.o user_syms.o util.o drivers/ skas/
+ umid.o user_syms.o util.o skas/
CFLAGS_signal.o += -Wframe-larger-than=4096
diff --git a/arch/um/os-Linux/drivers/Makefile b/arch/um/os-Linux/drivers/Makefile
deleted file mode 100644
index cf2d75bb1884..000000000000
--- a/arch/um/os-Linux/drivers/Makefile
+++ /dev/null
@@ -1,13 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-#
-# Copyright (C) 2000, 2002 Jeff Dike (jdike@karaya.com)
-#
-
-ethertap-objs := ethertap_kern.o ethertap_user.o
-tuntap-objs := tuntap_kern.o tuntap_user.o
-
-obj-y =
-obj-$(CONFIG_UML_NET_ETHERTAP) += ethertap.o
-obj-$(CONFIG_UML_NET_TUNTAP) += tuntap.o
-
-include $(srctree)/arch/um/scripts/Makefile.rules
diff --git a/arch/um/os-Linux/drivers/etap.h b/arch/um/os-Linux/drivers/etap.h
deleted file mode 100644
index a475259f90e1..000000000000
--- a/arch/um/os-Linux/drivers/etap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __DRIVERS_ETAP_H
-#define __DRIVERS_ETAP_H
-
-#include <net_user.h>
-
-struct ethertap_data {
- char *dev_name;
- char *gate_addr;
- int data_fd;
- int control_fd;
- void *dev;
-};
-
-extern const struct net_user_info ethertap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/ethertap_kern.c b/arch/um/os-Linux/drivers/ethertap_kern.c
deleted file mode 100644
index 5e5ee40680ce..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_kern.c
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <linux/init.h>
-#include <linux/netdevice.h>
-#include "etap.h"
-#include <net_kern.h>
-
-struct ethertap_init {
- char *dev_name;
- char *gate_addr;
-};
-
-static void etap_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct ethertap_data *epri;
- struct ethertap_init *init = data;
-
- pri = netdev_priv(dev);
- epri = (struct ethertap_data *) pri->user;
- epri->dev_name = init->dev_name;
- epri->gate_addr = init->gate_addr;
- epri->data_fd = -1;
- epri->control_fd = -1;
- epri->dev = dev;
-
- printk(KERN_INFO "ethertap backend - %s", epri->dev_name);
- if (epri->gate_addr != NULL)
- printk(KERN_CONT ", IP = %s", epri->gate_addr);
- printk(KERN_CONT "\n");
-}
-
-static int etap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- int len;
-
- len = net_recvfrom(fd, skb_mac_header(skb),
- skb->dev->mtu + 2 + ETH_HEADER_ETHERTAP);
- if (len <= 0)
- return(len);
-
- skb_pull(skb, 2);
- len -= 2;
- return len;
-}
-
-static int etap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- skb_push(skb, 2);
- return net_send(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info ethertap_kern_info = {
- .init = etap_init,
- .protocol = eth_protocol,
- .read = etap_read,
- .write = etap_write,
-};
-
-static int ethertap_setup(char *str, char **mac_out, void *data)
-{
- struct ethertap_init *init = data;
-
- *init = ((struct ethertap_init)
- { .dev_name = NULL,
- .gate_addr = NULL });
- if (tap_setup_common(str, "ethertap", &init->dev_name, mac_out,
- &init->gate_addr))
- return 0;
- if (init->dev_name == NULL) {
- printk(KERN_ERR "ethertap_setup : Missing tap device name\n");
- return 0;
- }
-
- return 1;
-}
-
-static struct transport ethertap_transport = {
- .list = LIST_HEAD_INIT(ethertap_transport.list),
- .name = "ethertap",
- .setup = ethertap_setup,
- .user = &ethertap_user_info,
- .kern = &ethertap_kern_info,
- .private_size = sizeof(struct ethertap_data),
- .setup_size = sizeof(struct ethertap_init),
-};
-
-static int register_ethertap(void)
-{
- register_transport(&ethertap_transport);
- return 0;
-}
-
-late_initcall(register_ethertap);
diff --git a/arch/um/os-Linux/drivers/ethertap_user.c b/arch/um/os-Linux/drivers/ethertap_user.c
deleted file mode 100644
index bdf215c0eca7..000000000000
--- a/arch/um/os-Linux/drivers/ethertap_user.c
+++ /dev/null
@@ -1,248 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- * Copyright (C) 2001 Lennert Buytenhek (buytenh@gnu.org) and
- * James Leu (jleu@mindspring.net).
- * Copyright (C) 2001 by various other people who didn't put their name here.
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include "etap.h"
-#include <os.h>
-#include <net_user.h>
-#include <um_malloc.h>
-
-#define MAX_PACKET ETH_MAX_PACKET
-
-static int etap_user_init(void *data, void *dev)
-{
- struct ethertap_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-struct addr_change {
- enum { ADD_ADDR, DEL_ADDR } what;
- unsigned char addr[4];
- unsigned char netmask[4];
-};
-
-static void etap_change(int op, unsigned char *addr, unsigned char *netmask,
- int fd)
-{
- struct addr_change change;
- char *output;
- int n;
-
- change.what = op;
- memcpy(change.addr, addr, sizeof(change.addr));
- memcpy(change.netmask, netmask, sizeof(change.netmask));
- CATCH_EINTR(n = write(fd, &change, sizeof(change)));
- if (n != sizeof(change)) {
- printk(UM_KERN_ERR "etap_change - request failed, err = %d\n",
- errno);
- return;
- }
-
- output = uml_kmalloc(UM_KERN_PAGE_SIZE, UM_GFP_KERNEL);
- if (output == NULL)
- printk(UM_KERN_ERR "etap_change : Failed to allocate output "
- "buffer\n");
- read_output(fd, output, UM_KERN_PAGE_SIZE);
- if (output != NULL) {
- printk("%s", output);
- kfree(output);
- }
-}
-
-static void etap_open_addr(unsigned char *addr, unsigned char *netmask,
- void *arg)
-{
- etap_change(ADD_ADDR, addr, netmask, *((int *) arg));
-}
-
-static void etap_close_addr(unsigned char *addr, unsigned char *netmask,
- void *arg)
-{
- etap_change(DEL_ADDR, addr, netmask, *((int *) arg));
-}
-
-struct etap_pre_exec_data {
- int control_remote;
- int control_me;
- int data_me;
-};
-
-static void etap_pre_exec(void *arg)
-{
- struct etap_pre_exec_data *data = arg;
-
- dup2(data->control_remote, 1);
- close(data->data_me);
- close(data->control_me);
-}
-
-static int etap_tramp(char *dev, char *gate, int control_me,
- int control_remote, int data_me, int data_remote)
-{
- struct etap_pre_exec_data pe_data;
- int pid, err, n;
- char version_buf[sizeof("nnnnn\0")];
- char data_fd_buf[sizeof("nnnnnn\0")];
- char gate_buf[sizeof("nnn.nnn.nnn.nnn\0")];
- char *setup_args[] = { "uml_net", version_buf, "ethertap", dev,
- data_fd_buf, gate_buf, NULL };
- char *nosetup_args[] = { "uml_net", version_buf, "ethertap",
- dev, data_fd_buf, NULL };
- char **args, c;
-
- sprintf(data_fd_buf, "%d", data_remote);
- sprintf(version_buf, "%d", UML_NET_VERSION);
- if (gate != NULL) {
- strscpy(gate_buf, gate);
- args = setup_args;
- }
- else args = nosetup_args;
-
- err = 0;
- pe_data.control_remote = control_remote;
- pe_data.control_me = control_me;
- pe_data.data_me = data_me;
- pid = run_helper(etap_pre_exec, &pe_data, args);
-
- if (pid < 0)
- err = pid;
- close(data_remote);
- close(control_remote);
- CATCH_EINTR(n = read(control_me, &c, sizeof(c)));
- if (n != sizeof(c)) {
- err = -errno;
- printk(UM_KERN_ERR "etap_tramp : read of status failed, "
- "err = %d\n", -err);
- return err;
- }
- if (c != 1) {
- printk(UM_KERN_ERR "etap_tramp : uml_net failed\n");
- err = helper_wait(pid);
- }
- return err;
-}
-
-static int etap_open(void *data)
-{
- struct ethertap_data *pri = data;
- char *output;
- int data_fds[2], control_fds[2], err, output_len;
-
- err = tap_open_common(pri->dev, pri->gate_addr);
- if (err)
- return err;
-
- err = socketpair(AF_UNIX, SOCK_DGRAM, 0, data_fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "etap_open - data socketpair failed - "
- "err = %d\n", errno);
- return err;
- }
-
- err = socketpair(AF_UNIX, SOCK_STREAM, 0, control_fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "etap_open - control socketpair failed - "
- "err = %d\n", errno);
- goto out_close_data;
- }
-
- err = etap_tramp(pri->dev_name, pri->gate_addr, control_fds[0],
- control_fds[1], data_fds[0], data_fds[1]);
- output_len = UM_KERN_PAGE_SIZE;
- output = uml_kmalloc(output_len, UM_GFP_KERNEL);
- read_output(control_fds[0], output, output_len);
-
- if (output == NULL)
- printk(UM_KERN_ERR "etap_open : failed to allocate output "
- "buffer\n");
- else {
- printk("%s", output);
- kfree(output);
- }
-
- if (err < 0) {
- printk(UM_KERN_ERR "etap_tramp failed - err = %d\n", -err);
- goto out_close_control;
- }
-
- pri->data_fd = data_fds[0];
- pri->control_fd = control_fds[0];
- iter_addresses(pri->dev, etap_open_addr, &pri->control_fd);
- return data_fds[0];
-
-out_close_control:
- close(control_fds[0]);
- close(control_fds[1]);
-out_close_data:
- close(data_fds[0]);
- close(data_fds[1]);
- return err;
-}
-
-static void etap_close(int fd, void *data)
-{
- struct ethertap_data *pri = data;
-
- iter_addresses(pri->dev, etap_close_addr, &pri->control_fd);
- close(fd);
-
- if (shutdown(pri->data_fd, SHUT_RDWR) < 0)
- printk(UM_KERN_ERR "etap_close - shutdown data socket failed, "
- "errno = %d\n", errno);
-
- if (shutdown(pri->control_fd, SHUT_RDWR) < 0)
- printk(UM_KERN_ERR "etap_close - shutdown control socket "
- "failed, errno = %d\n", errno);
-
- close(pri->data_fd);
- pri->data_fd = -1;
- close(pri->control_fd);
- pri->control_fd = -1;
-}
-
-static void etap_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct ethertap_data *pri = data;
-
- tap_check_ips(pri->gate_addr, addr);
- if (pri->control_fd == -1)
- return;
- etap_open_addr(addr, netmask, &pri->control_fd);
-}
-
-static void etap_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct ethertap_data *pri = data;
-
- if (pri->control_fd == -1)
- return;
-
- etap_close_addr(addr, netmask, &pri->control_fd);
-}
-
-const struct net_user_info ethertap_user_info = {
- .init = etap_user_init,
- .open = etap_open,
- .close = etap_close,
- .remove = NULL,
- .add_address = etap_add_addr,
- .delete_address = etap_del_addr,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_ETHERTAP,
-};
diff --git a/arch/um/os-Linux/drivers/tuntap.h b/arch/um/os-Linux/drivers/tuntap.h
deleted file mode 100644
index e364e42abfc5..000000000000
--- a/arch/um/os-Linux/drivers/tuntap.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#ifndef __UM_TUNTAP_H
-#define __UM_TUNTAP_H
-
-#include <net_user.h>
-
-struct tuntap_data {
- char *dev_name;
- int fixed_config;
- char *gate_addr;
- int fd;
- void *dev;
-};
-
-extern const struct net_user_info tuntap_user_info;
-
-#endif
diff --git a/arch/um/os-Linux/drivers/tuntap_kern.c b/arch/um/os-Linux/drivers/tuntap_kern.c
deleted file mode 100644
index ff022d9cf0dd..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_kern.c
+++ /dev/null
@@ -1,86 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <linux/netdevice.h>
-#include <linux/init.h>
-#include <linux/skbuff.h>
-#include <asm/errno.h>
-#include <net_kern.h>
-#include "tuntap.h"
-
-struct tuntap_init {
- char *dev_name;
- char *gate_addr;
-};
-
-static void tuntap_init(struct net_device *dev, void *data)
-{
- struct uml_net_private *pri;
- struct tuntap_data *tpri;
- struct tuntap_init *init = data;
-
- pri = netdev_priv(dev);
- tpri = (struct tuntap_data *) pri->user;
- tpri->dev_name = init->dev_name;
- tpri->fixed_config = (init->dev_name != NULL);
- tpri->gate_addr = init->gate_addr;
- tpri->fd = -1;
- tpri->dev = dev;
-
- printk(KERN_INFO "TUN/TAP backend - ");
- if (tpri->gate_addr != NULL)
- printk(KERN_CONT "IP = %s", tpri->gate_addr);
- printk(KERN_CONT "\n");
-}
-
-static int tuntap_read(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_read(fd, skb_mac_header(skb),
- skb->dev->mtu + ETH_HEADER_OTHER);
-}
-
-static int tuntap_write(int fd, struct sk_buff *skb, struct uml_net_private *lp)
-{
- return net_write(fd, skb->data, skb->len);
-}
-
-const struct net_kern_info tuntap_kern_info = {
- .init = tuntap_init,
- .protocol = eth_protocol,
- .read = tuntap_read,
- .write = tuntap_write,
-};
-
-static int tuntap_setup(char *str, char **mac_out, void *data)
-{
- struct tuntap_init *init = data;
-
- *init = ((struct tuntap_init)
- { .dev_name = NULL,
- .gate_addr = NULL });
- if (tap_setup_common(str, "tuntap", &init->dev_name, mac_out,
- &init->gate_addr))
- return 0;
-
- return 1;
-}
-
-static struct transport tuntap_transport = {
- .list = LIST_HEAD_INIT(tuntap_transport.list),
- .name = "tuntap",
- .setup = tuntap_setup,
- .user = &tuntap_user_info,
- .kern = &tuntap_kern_info,
- .private_size = sizeof(struct tuntap_data),
- .setup_size = sizeof(struct tuntap_init),
-};
-
-static int register_tuntap(void)
-{
- register_transport(&tuntap_transport);
- return 0;
-}
-
-late_initcall(register_tuntap);
diff --git a/arch/um/os-Linux/drivers/tuntap_user.c b/arch/um/os-Linux/drivers/tuntap_user.c
deleted file mode 100644
index 91f0e27ca3a6..000000000000
--- a/arch/um/os-Linux/drivers/tuntap_user.c
+++ /dev/null
@@ -1,215 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2001 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
- */
-
-#include <stdio.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/if_tun.h>
-#include <net/if.h>
-#include <sys/ioctl.h>
-#include <sys/socket.h>
-#include <sys/wait.h>
-#include <sys/uio.h>
-#include <kern_util.h>
-#include <os.h>
-#include "tuntap.h"
-
-static int tuntap_user_init(void *data, void *dev)
-{
- struct tuntap_data *pri = data;
-
- pri->dev = dev;
- return 0;
-}
-
-static void tuntap_add_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct tuntap_data *pri = data;
-
- tap_check_ips(pri->gate_addr, addr);
- if ((pri->fd == -1) || pri->fixed_config)
- return;
- open_addr(addr, netmask, pri->dev_name);
-}
-
-static void tuntap_del_addr(unsigned char *addr, unsigned char *netmask,
- void *data)
-{
- struct tuntap_data *pri = data;
-
- if ((pri->fd == -1) || pri->fixed_config)
- return;
- close_addr(addr, netmask, pri->dev_name);
-}
-
-struct tuntap_pre_exec_data {
- int stdout_fd;
- int close_me;
-};
-
-static void tuntap_pre_exec(void *arg)
-{
- struct tuntap_pre_exec_data *data = arg;
-
- dup2(data->stdout_fd, 1);
- close(data->close_me);
-}
-
-static int tuntap_open_tramp(char *gate, int *fd_out, int me, int remote,
- char *buffer, int buffer_len, int *used_out)
-{
- struct tuntap_pre_exec_data data;
- char version_buf[sizeof("nnnnn\0")];
- char *argv[] = { "uml_net", version_buf, "tuntap", "up", gate,
- NULL };
- char buf[CMSG_SPACE(sizeof(*fd_out))];
- struct msghdr msg;
- struct cmsghdr *cmsg;
- struct iovec iov;
- int pid, n, err;
-
- sprintf(version_buf, "%d", UML_NET_VERSION);
-
- data.stdout_fd = remote;
- data.close_me = me;
-
- pid = run_helper(tuntap_pre_exec, &data, argv);
-
- if (pid < 0)
- return pid;
-
- close(remote);
-
- msg.msg_name = NULL;
- msg.msg_namelen = 0;
- if (buffer != NULL) {
- iov = ((struct iovec) { buffer, buffer_len });
- msg.msg_iov = &iov;
- msg.msg_iovlen = 1;
- }
- else {
- msg.msg_iov = NULL;
- msg.msg_iovlen = 0;
- }
- msg.msg_control = buf;
- msg.msg_controllen = sizeof(buf);
- msg.msg_flags = 0;
- n = recvmsg(me, &msg, 0);
- *used_out = n;
- if (n < 0) {
- err = -errno;
- printk(UM_KERN_ERR "tuntap_open_tramp : recvmsg failed - "
- "errno = %d\n", errno);
- return err;
- }
- helper_wait(pid);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- if (cmsg == NULL) {
- printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
- "message\n");
- return -EINVAL;
- }
- if ((cmsg->cmsg_level != SOL_SOCKET) ||
- (cmsg->cmsg_type != SCM_RIGHTS)) {
- printk(UM_KERN_ERR "tuntap_open_tramp : didn't receive a "
- "descriptor\n");
- return -EINVAL;
- }
- *fd_out = ((int *) CMSG_DATA(cmsg))[0];
- os_set_exec_close(*fd_out);
- return 0;
-}
-
-static int tuntap_open(void *data)
-{
- struct ifreq ifr;
- struct tuntap_data *pri = data;
- char *output, *buffer;
- int err, fds[2], len, used;
-
- err = tap_open_common(pri->dev, pri->gate_addr);
- if (err < 0)
- return err;
-
- if (pri->fixed_config) {
- pri->fd = os_open_file("/dev/net/tun",
- of_cloexec(of_rdwr(OPENFLAGS())), 0);
- if (pri->fd < 0) {
- printk(UM_KERN_ERR "Failed to open /dev/net/tun, "
- "err = %d\n", -pri->fd);
- return pri->fd;
- }
- memset(&ifr, 0, sizeof(ifr));
- ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
- strscpy(ifr.ifr_name, pri->dev_name);
- if (ioctl(pri->fd, TUNSETIFF, &ifr) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "TUNSETIFF failed, errno = %d\n",
- errno);
- close(pri->fd);
- return err;
- }
- }
- else {
- err = socketpair(AF_UNIX, SOCK_DGRAM, 0, fds);
- if (err) {
- err = -errno;
- printk(UM_KERN_ERR "tuntap_open : socketpair failed - "
- "errno = %d\n", errno);
- return err;
- }
-
- buffer = get_output_buffer(&len);
- if (buffer != NULL)
- len--;
- used = 0;
-
- err = tuntap_open_tramp(pri->gate_addr, &pri->fd, fds[0],
- fds[1], buffer, len, &used);
-
- output = buffer;
- if (err < 0) {
- printk("%s", output);
- free_output_buffer(buffer);
- printk(UM_KERN_ERR "tuntap_open_tramp failed - "
- "err = %d\n", -err);
- return err;
- }
-
- pri->dev_name = uml_strdup(buffer);
- output += IFNAMSIZ;
- printk("%s", output);
- free_output_buffer(buffer);
-
- close(fds[0]);
- iter_addresses(pri->dev, open_addr, pri->dev_name);
- }
-
- return pri->fd;
-}
-
-static void tuntap_close(int fd, void *data)
-{
- struct tuntap_data *pri = data;
-
- if (!pri->fixed_config)
- iter_addresses(pri->dev, close_addr, pri->dev_name);
- close(fd);
- pri->fd = -1;
-}
-
-const struct net_user_info tuntap_user_info = {
- .init = tuntap_user_init,
- .open = tuntap_open,
- .close = tuntap_close,
- .remove = NULL,
- .add_address = tuntap_add_addr,
- .delete_address = tuntap_del_addr,
- .mtu = ETH_MAX_PACKET,
- .max_packet = ETH_MAX_PACKET + ETH_HEADER_OTHER,
-};
diff --git a/arch/um/os-Linux/file.c b/arch/um/os-Linux/file.c
index a0d01c68ce3e..617886d1fb1e 100644
--- a/arch/um/os-Linux/file.c
+++ b/arch/um/os-Linux/file.c
@@ -106,21 +106,6 @@ int os_get_ifname(int fd, char* namebuf)
return 0;
}
-int os_set_slip(int fd)
-{
- int disc, sencap;
-
- disc = N_SLIP;
- if (ioctl(fd, TIOCSETD, &disc) < 0)
- return -errno;
-
- sencap = 0;
- if (ioctl(fd, SIOCSIFENCAP, &sencap) < 0)
- return -errno;
-
- return 0;
-}
-
int os_mode_fd(int fd, int mode)
{
int err;
diff --git a/arch/um/os-Linux/internal.h b/arch/um/os-Linux/internal.h
index 317fca190c2b..5d8d3b0817a9 100644
--- a/arch/um/os-Linux/internal.h
+++ b/arch/um/os-Linux/internal.h
@@ -2,6 +2,9 @@
#ifndef __UM_OS_LINUX_INTERNAL_H
#define __UM_OS_LINUX_INTERNAL_H
+#include <mm_id.h>
+#include <stub-data.h>
+
/*
* elf_aux.c
*/
@@ -16,5 +19,5 @@ void check_tmpexec(void);
* skas/process.c
*/
void wait_stub_done(int pid);
-
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys);
#endif /* __UM_OS_LINUX_INTERNAL_H */
diff --git a/arch/um/os-Linux/process.c b/arch/um/os-Linux/process.c
index 184566edeee9..00b49e90d05f 100644
--- a/arch/um/os-Linux/process.c
+++ b/arch/um/os-Linux/process.c
@@ -18,17 +18,29 @@
#include <init.h>
#include <longjmp.h>
#include <os.h>
+#include <skas/skas.h>
void os_alarm_process(int pid)
{
+ if (pid <= 0)
+ return;
+
kill(pid, SIGALRM);
}
void os_kill_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
+ /* Block signals until child is reaped */
+ block_signals();
+
kill(pid, SIGKILL);
if (reap_child)
CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+ unblock_signals();
}
/* Kill off a ptraced child by all means available. kill it normally first,
@@ -38,11 +50,27 @@ void os_kill_process(int pid, int reap_child)
void os_kill_ptraced_process(int pid, int reap_child)
{
+ if (pid <= 0)
+ return;
+
+ /* Block signals until child is reaped */
+ block_signals();
+
kill(pid, SIGKILL);
ptrace(PTRACE_KILL, pid);
ptrace(PTRACE_CONT, pid);
if (reap_child)
CATCH_EINTR(waitpid(pid, NULL, __WALL));
+
+ unblock_signals();
+}
+
+pid_t os_reap_child(void)
+{
+ int status;
+
+ /* Try to reap a child */
+ return waitpid(-1, &status, WNOHANG);
}
/* Don't use the glibc version, which caches the result in TLS. It misses some
@@ -151,6 +179,9 @@ void init_new_thread_signals(void)
set_handler(SIGBUS);
signal(SIGHUP, SIG_IGN);
set_handler(SIGIO);
+ /* We (currently) only use the child reaper IRQ in seccomp mode */
+ if (using_seccomp)
+ set_handler(SIGCHLD);
signal(SIGWINCH, SIG_IGN);
}
diff --git a/arch/um/os-Linux/registers.c b/arch/um/os-Linux/registers.c
index d7ca148807b2..bfba2cbc9478 100644
--- a/arch/um/os-Linux/registers.c
+++ b/arch/um/os-Linux/registers.c
@@ -14,8 +14,8 @@
/* This is set once at boot time and not changed thereafter */
-static unsigned long exec_regs[MAX_REG_NR];
-static unsigned long *exec_fp_regs;
+unsigned long exec_regs[MAX_REG_NR];
+unsigned long *exec_fp_regs;
int init_pid_registers(int pid)
{
diff --git a/arch/um/os-Linux/sigio.c b/arch/um/os-Linux/sigio.c
index a05a6ecee756..6de145f8fe3d 100644
--- a/arch/um/os-Linux/sigio.c
+++ b/arch/um/os-Linux/sigio.c
@@ -12,6 +12,7 @@
#include <signal.h>
#include <string.h>
#include <sys/epoll.h>
+#include <asm/unistd.h>
#include <kern_util.h>
#include <init.h>
#include <os.h>
@@ -46,7 +47,7 @@ static void *write_sigio_thread(void *unused)
__func__, errno);
}
- CATCH_EINTR(r = tgkill(pid, pid, SIGIO));
+ CATCH_EINTR(r = syscall(__NR_tgkill, pid, pid, SIGIO));
if (r < 0)
printk(UM_KERN_ERR "%s: tgkill failed, errno = %d\n",
__func__, errno);
diff --git a/arch/um/os-Linux/signal.c b/arch/um/os-Linux/signal.c
index e71e5b4878d1..11f07f498270 100644
--- a/arch/um/os-Linux/signal.c
+++ b/arch/um/os-Linux/signal.c
@@ -29,6 +29,7 @@ void (*sig_info[NSIG])(int, struct siginfo *, struct uml_pt_regs *, void *mc) =
[SIGBUS] = relay_signal,
[SIGSEGV] = segv_handler,
[SIGIO] = sigio_handler,
+ [SIGCHLD] = sigchld_handler,
};
static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
@@ -44,7 +45,7 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
}
/* enable signals if sig isn't IRQ signal */
- if ((sig != SIGIO) && (sig != SIGWINCH))
+ if ((sig != SIGIO) && (sig != SIGWINCH) && (sig != SIGCHLD))
unblock_signals_trace();
(*sig_info[sig])(sig, si, &r, mc);
@@ -64,6 +65,9 @@ static void sig_handler_common(int sig, struct siginfo *si, mcontext_t *mc)
#define SIGALRM_BIT 1
#define SIGALRM_MASK (1 << SIGALRM_BIT)
+#define SIGCHLD_BIT 2
+#define SIGCHLD_MASK (1 << SIGCHLD_BIT)
+
int signals_enabled;
#if IS_ENABLED(CONFIG_UML_TIME_TRAVEL_SUPPORT)
static int signals_blocked, signals_blocked_pending;
@@ -102,6 +106,11 @@ static void sig_handler(int sig, struct siginfo *si, mcontext_t *mc)
return;
}
+ if (!enabled && (sig == SIGCHLD)) {
+ signals_pending |= SIGCHLD_MASK;
+ return;
+ }
+
block_signals_trace();
sig_handler_common(sig, si, mc);
@@ -181,6 +190,8 @@ static void (*handlers[_NSIG])(int sig, struct siginfo *si, mcontext_t *mc) = {
[SIGIO] = sig_handler,
[SIGWINCH] = sig_handler,
+ /* SIGCHLD is only actually registered in seccomp mode. */
+ [SIGCHLD] = sig_handler,
[SIGALRM] = timer_alarm_handler,
[SIGUSR1] = sigusr1_handler,
@@ -309,6 +320,12 @@ void unblock_signals(void)
if (save_pending & SIGIO_MASK)
sig_handler_common(SIGIO, NULL, NULL);
+ if (save_pending & SIGCHLD_MASK) {
+ struct uml_pt_regs regs = {};
+
+ sigchld_handler(SIGCHLD, NULL, &regs, NULL);
+ }
+
/* Do not reenter the handler */
if ((save_pending & SIGALRM_MASK) && (!(signals_active & SIGALRM_MASK)))
diff --git a/arch/um/os-Linux/skas/mem.c b/arch/um/os-Linux/skas/mem.c
index d7f1814b0e5a..8b9921ac3ef8 100644
--- a/arch/um/os-Linux/skas/mem.c
+++ b/arch/um/os-Linux/skas/mem.c
@@ -43,6 +43,16 @@ void syscall_stub_dump_error(struct mm_id *mm_idp)
print_hex_dump(UM_KERN_ERR, " syscall data: ", 0,
16, 4, sc, sizeof(*sc), 0);
+
+ if (using_seccomp) {
+ printk(UM_KERN_ERR "%s: FD map num: %d", __func__,
+ mm_idp->syscall_fd_num);
+ print_hex_dump(UM_KERN_ERR,
+ " FD map: ", 0, 16,
+ sizeof(mm_idp->syscall_fd_map[0]),
+ mm_idp->syscall_fd_map,
+ sizeof(mm_idp->syscall_fd_map), 0);
+ }
}
static inline unsigned long *check_init_stack(struct mm_id * mm_idp,
@@ -80,27 +90,32 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
int n, i;
int err, pid = mm_idp->pid;
- n = ptrace_setregs(pid, syscall_regs);
- if (n < 0) {
- printk(UM_KERN_ERR "Registers - \n");
- for (i = 0; i < MAX_REG_NR; i++)
- printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
- panic("%s : PTRACE_SETREGS failed, errno = %d\n",
- __func__, -n);
- }
-
/* Inform process how much we have filled in. */
proc_data->syscall_data_len = mm_idp->syscall_data_len;
- err = ptrace(PTRACE_CONT, pid, 0, 0);
- if (err)
- panic("Failed to continue stub, pid = %d, errno = %d\n", pid,
- errno);
-
- wait_stub_done(pid);
+ if (using_seccomp) {
+ proc_data->restart_wait = 1;
+ wait_stub_done_seccomp(mm_idp, 0, 1);
+ } else {
+ n = ptrace_setregs(pid, syscall_regs);
+ if (n < 0) {
+ printk(UM_KERN_ERR "Registers -\n");
+ for (i = 0; i < MAX_REG_NR; i++)
+ printk(UM_KERN_ERR "\t%d\t0x%lx\n", i, syscall_regs[i]);
+ panic("%s : PTRACE_SETREGS failed, errno = %d\n",
+ __func__, -n);
+ }
+
+ err = ptrace(PTRACE_CONT, pid, 0, 0);
+ if (err)
+ panic("Failed to continue stub, pid = %d, errno = %d\n",
+ pid, errno);
+
+ wait_stub_done(pid);
+ }
/*
- * proc_data->err will be non-zero if there was an (unexpected) error.
+ * proc_data->err will be negative if there was an (unexpected) error.
* In that case, syscall_data_len points to the last executed syscall,
* otherwise it will be zero (but we do not need to rely on that).
*/
@@ -113,6 +128,9 @@ static inline long do_syscall_stub(struct mm_id *mm_idp)
mm_idp->syscall_data_len = 0;
}
+ if (using_seccomp)
+ mm_idp->syscall_fd_num = 0;
+
return mm_idp->syscall_data_len;
}
@@ -175,6 +193,44 @@ static struct stub_syscall *syscall_stub_get_previous(struct mm_id *mm_idp,
return NULL;
}
+static int get_stub_fd(struct mm_id *mm_idp, int fd)
+{
+ int i;
+
+ /* Find an FD slot (or flush and use first) */
+ if (!using_seccomp)
+ return fd;
+
+ /* Already crashed, value does not matter */
+ if (mm_idp->syscall_data_len < 0)
+ return 0;
+
+ /* Find existing FD in map if we can allocate another syscall */
+ if (mm_idp->syscall_data_len <
+ ARRAY_SIZE(((struct stub_data *)NULL)->syscall_data)) {
+ for (i = 0; i < mm_idp->syscall_fd_num; i++) {
+ if (mm_idp->syscall_fd_map[i] == fd)
+ return i;
+ }
+
+ if (mm_idp->syscall_fd_num < STUB_MAX_FDS) {
+ i = mm_idp->syscall_fd_num;
+ mm_idp->syscall_fd_map[i] = fd;
+
+ mm_idp->syscall_fd_num++;
+
+ return i;
+ }
+ }
+
+ /* FD map full or no syscall space available, continue after flush */
+ do_syscall_stub(mm_idp);
+ mm_idp->syscall_fd_map[0] = fd;
+ mm_idp->syscall_fd_num = 1;
+
+ return 0;
+}
+
int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
int phys_fd, unsigned long long offset)
{
@@ -182,12 +238,21 @@ int map(struct mm_id *mm_idp, unsigned long virt, unsigned long len, int prot,
/* Compress with previous syscall if that is possible */
sc = syscall_stub_get_previous(mm_idp, STUB_SYSCALL_MMAP, virt);
- if (sc && sc->mem.prot == prot && sc->mem.fd == phys_fd &&
+ if (sc && sc->mem.prot == prot &&
sc->mem.offset == MMAP_OFFSET(offset - sc->mem.length)) {
- sc->mem.length += len;
- return 0;
+ int prev_fd = sc->mem.fd;
+
+ if (using_seccomp)
+ prev_fd = mm_idp->syscall_fd_map[sc->mem.fd];
+
+ if (phys_fd == prev_fd) {
+ sc->mem.length += len;
+ return 0;
+ }
}
+ phys_fd = get_stub_fd(mm_idp, phys_fd);
+
sc = syscall_stub_alloc(mm_idp);
sc->syscall = STUB_SYSCALL_MMAP;
sc->mem.addr = virt;
diff --git a/arch/um/os-Linux/skas/process.c b/arch/um/os-Linux/skas/process.c
index ae2aea062f06..e42ffac23e3c 100644
--- a/arch/um/os-Linux/skas/process.c
+++ b/arch/um/os-Linux/skas/process.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2015 Thomas Meyer (thomas@m3y3r.de)
* Copyright (C) 2002- 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -15,6 +16,7 @@
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/stat.h>
+#include <sys/socket.h>
#include <asm/unistd.h>
#include <as-layout.h>
#include <init.h>
@@ -25,8 +27,11 @@
#include <registers.h>
#include <skas.h>
#include <sysdep/stub.h>
+#include <sysdep/mcontext.h>
+#include <linux/futex.h>
#include <linux/threads.h>
#include <timetravel.h>
+#include <asm-generic/rwonce.h>
#include "../internal.h"
int is_skas_winch(int pid, int fd, void *data)
@@ -142,6 +147,105 @@ bad_wait:
fatal_sigsegv();
}
+void wait_stub_done_seccomp(struct mm_id *mm_idp, int running, int wait_sigsys)
+{
+ struct stub_data *data = (void *)mm_idp->stack;
+ int ret;
+
+ do {
+ const char byte = 0;
+ struct iovec iov = {
+ .iov_base = (void *)&byte,
+ .iov_len = sizeof(byte),
+ };
+ union {
+ char data[CMSG_SPACE(sizeof(mm_idp->syscall_fd_map))];
+ struct cmsghdr align;
+ } ctrl;
+ struct msghdr msgh = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ if (!running) {
+ if (mm_idp->syscall_fd_num) {
+ unsigned int fds_size =
+ sizeof(int) * mm_idp->syscall_fd_num;
+ struct cmsghdr *cmsg;
+
+ msgh.msg_control = ctrl.data;
+ msgh.msg_controllen = CMSG_SPACE(fds_size);
+ cmsg = CMSG_FIRSTHDR(&msgh);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(fds_size);
+ memcpy(CMSG_DATA(cmsg), mm_idp->syscall_fd_map,
+ fds_size);
+
+ CATCH_EINTR(syscall(__NR_sendmsg, mm_idp->sock,
+ &msgh, 0));
+ }
+
+ data->signal = 0;
+ data->futex = FUTEX_IN_CHILD;
+ CATCH_EINTR(syscall(__NR_futex, &data->futex,
+ FUTEX_WAKE, 1, NULL, NULL, 0));
+ }
+
+ do {
+ /*
+ * We need to check whether the child is still alive
+ * before and after the FUTEX_WAIT call. Before, in
+ * case it just died but we still updated data->futex
+ * to FUTEX_IN_CHILD. And after, in case it died while
+ * we were waiting (and SIGCHLD woke us up, see the
+ * IRQ handler in mmu.c).
+ *
+ * Either way, if PID is negative, then we have no
+ * choice but to kill the task.
+ */
+ if (__READ_ONCE(mm_idp->pid) < 0)
+ goto out_kill;
+
+ ret = syscall(__NR_futex, &data->futex,
+ FUTEX_WAIT, FUTEX_IN_CHILD,
+ NULL, NULL, 0);
+ if (ret < 0 && errno != EINTR && errno != EAGAIN) {
+ printk(UM_KERN_ERR "%s : FUTEX_WAIT failed, errno = %d\n",
+ __func__, errno);
+ goto out_kill;
+ }
+ } while (data->futex == FUTEX_IN_CHILD);
+
+ if (__READ_ONCE(mm_idp->pid) < 0)
+ goto out_kill;
+
+ running = 0;
+
+ /* We may receive a SIGALRM before SIGSYS, iterate again. */
+ } while (wait_sigsys && data->signal == SIGALRM);
+
+ if (data->mctx_offset > sizeof(data->sigstack) - sizeof(mcontext_t)) {
+ printk(UM_KERN_ERR "%s : invalid mcontext offset", __func__);
+ goto out_kill;
+ }
+
+ if (wait_sigsys && data->signal != SIGSYS) {
+ printk(UM_KERN_ERR "%s : expected SIGSYS but got %d",
+ __func__, data->signal);
+ goto out_kill;
+ }
+
+ return;
+
+out_kill:
+ printk(UM_KERN_ERR "%s : failed to wait for stub, pid = %d, errno = %d\n",
+ __func__, mm_idp->pid, errno);
+ /* This is not true inside start_userspace */
+ if (current_mm_id() == mm_idp)
+ fatal_sigsegv();
+}
+
extern unsigned long current_stub_stack(void);
static void get_skas_faultinfo(int pid, struct faultinfo *fi)
@@ -163,12 +267,6 @@ static void get_skas_faultinfo(int pid, struct faultinfo *fi)
memcpy(fi, (void *)current_stub_stack(), sizeof(*fi));
}
-static void handle_segv(int pid, struct uml_pt_regs *regs)
-{
- get_skas_faultinfo(pid, &regs->faultinfo);
- segv(regs->faultinfo, 0, 1, NULL, NULL);
-}
-
static void handle_trap(int pid, struct uml_pt_regs *regs)
{
if ((UPT_IP(regs) >= STUB_START) && (UPT_IP(regs) < STUB_END))
@@ -181,29 +279,48 @@ extern char __syscall_stub_start[];
static int stub_exe_fd;
+struct tramp_data {
+ struct stub_data *stub_data;
+ /* 0 is inherited, 1 is the kernel side */
+ int sockpair[2];
+};
+
#ifndef CLOSE_RANGE_CLOEXEC
#define CLOSE_RANGE_CLOEXEC (1U << 2)
#endif
-static int userspace_tramp(void *stack)
+static int userspace_tramp(void *data)
{
+ struct tramp_data *tramp_data = data;
char *const argv[] = { "uml-userspace", NULL };
- int pipe_fds[2];
unsigned long long offset;
struct stub_init_data init_data = {
+ .seccomp = using_seccomp,
.stub_start = STUB_START,
- .segv_handler = STUB_CODE +
- (unsigned long) stub_segv_handler -
- (unsigned long) __syscall_stub_start,
};
struct iomem_region *iomem;
int ret;
+ if (using_seccomp) {
+ init_data.signal_handler = STUB_CODE +
+ (unsigned long) stub_signal_interrupt -
+ (unsigned long) __syscall_stub_start;
+ init_data.signal_restorer = STUB_CODE +
+ (unsigned long) stub_signal_restorer -
+ (unsigned long) __syscall_stub_start;
+ } else {
+ init_data.signal_handler = STUB_CODE +
+ (unsigned long) stub_segv_handler -
+ (unsigned long) __syscall_stub_start;
+ init_data.signal_restorer = 0;
+ }
+
init_data.stub_code_fd = phys_mapping(uml_to_phys(__syscall_stub_start),
&offset);
init_data.stub_code_offset = MMAP_OFFSET(offset);
- init_data.stub_data_fd = phys_mapping(uml_to_phys(stack), &offset);
+ init_data.stub_data_fd = phys_mapping(uml_to_phys(tramp_data->stub_data),
+ &offset);
init_data.stub_data_offset = MMAP_OFFSET(offset);
/*
@@ -214,20 +331,21 @@ static int userspace_tramp(void *stack)
syscall(__NR_close_range, 0, ~0U, CLOSE_RANGE_CLOEXEC);
fcntl(init_data.stub_data_fd, F_SETFD, 0);
- for (iomem = iomem_regions; iomem; iomem = iomem->next)
- fcntl(iomem->fd, F_SETFD, 0);
- /* Create a pipe for init_data (no CLOEXEC) and dup2 to STDIN */
- if (pipe(pipe_fds))
- exit(2);
+ /* In SECCOMP mode, these FDs are passed when needed */
+ if (!using_seccomp) {
+ for (iomem = iomem_regions; iomem; iomem = iomem->next)
+ fcntl(iomem->fd, F_SETFD, 0);
+ }
- if (dup2(pipe_fds[0], 0) < 0)
+ /* dup2 signaling FD/socket to STDIN */
+ if (dup2(tramp_data->sockpair[0], 0) < 0)
exit(3);
- close(pipe_fds[0]);
+ close(tramp_data->sockpair[0]);
/* Write init_data and close write side */
- ret = write(pipe_fds[1], &init_data, sizeof(init_data));
- close(pipe_fds[1]);
+ ret = write(tramp_data->sockpair[1], &init_data, sizeof(init_data));
+ close(tramp_data->sockpair[1]);
if (ret != sizeof(init_data))
exit(4);
@@ -315,11 +433,12 @@ static int __init init_stub_exe_fd(void)
}
__initcall(init_stub_exe_fd);
+int using_seccomp;
int userspace_pid[NR_CPUS];
/**
* start_userspace() - prepare a new userspace process
- * @stub_stack: pointer to the stub stack.
+ * @mm_id: The corresponding struct mm_id
*
* Setups a new temporary stack page that is used while userspace_tramp() runs
* Clones the kernel process into a new userspace process, with FDs only.
@@ -328,11 +447,15 @@ int userspace_pid[NR_CPUS];
* when negative: an error number.
* FIXME: can PIDs become negative?!
*/
-int start_userspace(unsigned long stub_stack)
+int start_userspace(struct mm_id *mm_id)
{
+ struct stub_data *proc_data = (void *)mm_id->stack;
+ struct tramp_data tramp_data = {
+ .stub_data = proc_data,
+ };
void *stack;
unsigned long sp;
- int pid, status, n, err;
+ int status, n, err;
/* setup a temporary stack page */
stack = mmap(NULL, UM_KERN_PAGE_SIZE,
@@ -348,40 +471,55 @@ int start_userspace(unsigned long stub_stack)
/* set stack pointer to the end of the stack page, so it can grow downwards */
sp = (unsigned long)stack + UM_KERN_PAGE_SIZE;
- /* clone into new userspace process */
- pid = clone(userspace_tramp, (void *) sp,
+ /* socket pair for init data and SECCOMP FD passing (no CLOEXEC here) */
+ if (socketpair(AF_UNIX, SOCK_STREAM, 0, tramp_data.sockpair)) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : socketpair failed, errno = %d\n",
+ __func__, errno);
+ return err;
+ }
+
+ if (using_seccomp)
+ proc_data->futex = FUTEX_IN_CHILD;
+
+ mm_id->pid = clone(userspace_tramp, (void *) sp,
CLONE_VFORK | CLONE_VM | SIGCHLD,
- (void *)stub_stack);
- if (pid < 0) {
+ (void *)&tramp_data);
+ if (mm_id->pid < 0) {
err = -errno;
printk(UM_KERN_ERR "%s : clone failed, errno = %d\n",
__func__, errno);
- return err;
+ goto out_close;
}
- do {
- CATCH_EINTR(n = waitpid(pid, &status, WUNTRACED | __WALL));
- if (n < 0) {
+ if (using_seccomp) {
+ wait_stub_done_seccomp(mm_id, 1, 1);
+ } else {
+ do {
+ CATCH_EINTR(n = waitpid(mm_id->pid, &status,
+ WUNTRACED | __WALL));
+ if (n < 0) {
+ err = -errno;
+ printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+ __func__, errno);
+ goto out_kill;
+ }
+ } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
+
+ if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
+ err = -EINVAL;
+ printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
+ __func__, status);
+ goto out_kill;
+ }
+
+ if (ptrace(PTRACE_SETOPTIONS, mm_id->pid, NULL,
+ (void *) PTRACE_O_TRACESYSGOOD) < 0) {
err = -errno;
- printk(UM_KERN_ERR "%s : wait failed, errno = %d\n",
+ printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
__func__, errno);
goto out_kill;
}
- } while (WIFSTOPPED(status) && (WSTOPSIG(status) == SIGALRM));
-
- if (!WIFSTOPPED(status) || (WSTOPSIG(status) != SIGSTOP)) {
- err = -EINVAL;
- printk(UM_KERN_ERR "%s : expected SIGSTOP, got status = %d\n",
- __func__, status);
- goto out_kill;
- }
-
- if (ptrace(PTRACE_SETOPTIONS, pid, NULL,
- (void *) PTRACE_O_TRACESYSGOOD) < 0) {
- err = -errno;
- printk(UM_KERN_ERR "%s : PTRACE_SETOPTIONS failed, errno = %d\n",
- __func__, errno);
- goto out_kill;
}
if (munmap(stack, UM_KERN_PAGE_SIZE) < 0) {
@@ -391,10 +529,22 @@ int start_userspace(unsigned long stub_stack)
goto out_kill;
}
- return pid;
+ close(tramp_data.sockpair[0]);
+ if (using_seccomp)
+ mm_id->sock = tramp_data.sockpair[1];
+ else
+ close(tramp_data.sockpair[1]);
+
+ return 0;
+
+out_kill:
+ os_kill_ptraced_process(mm_id->pid, 1);
+out_close:
+ close(tramp_data.sockpair[0]);
+ close(tramp_data.sockpair[1]);
+
+ mm_id->pid = -1;
- out_kill:
- os_kill_ptraced_process(pid, 1);
return err;
}
@@ -404,7 +554,9 @@ extern unsigned long tt_extra_sched_jiffies;
void userspace(struct uml_pt_regs *regs)
{
int err, status, op, pid = userspace_pid[0];
- siginfo_t si;
+ siginfo_t si_ptrace;
+ siginfo_t *si;
+ int sig;
/* Handle any immediate reschedules or signals */
interrupt_end();
@@ -437,103 +589,177 @@ void userspace(struct uml_pt_regs *regs)
current_mm_sync();
- /* Flush out any pending syscalls */
- err = syscall_stub_flush(current_mm_id());
- if (err) {
- if (err == -ENOMEM)
- report_enomem();
+ if (using_seccomp) {
+ struct mm_id *mm_id = current_mm_id();
+ struct stub_data *proc_data = (void *) mm_id->stack;
+ int ret;
- printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
- __func__, -err);
- fatal_sigsegv();
- }
+ ret = set_stub_state(regs, proc_data, singlestepping());
+ if (ret) {
+ printk(UM_KERN_ERR "%s - failed to set regs: %d",
+ __func__, ret);
+ fatal_sigsegv();
+ }
- /*
- * This can legitimately fail if the process loads a
- * bogus value into a segment register. It will
- * segfault and PTRACE_GETREGS will read that value
- * out of the process. However, PTRACE_SETREGS will
- * fail. In this case, there is nothing to do but
- * just kill the process.
- */
- if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
- printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ /* Must have been reset by the syscall caller */
+ if (proc_data->restart_wait != 0)
+ panic("Programming error: Flag to only run syscalls in child was not cleared!");
- if (put_fp_registers(pid, regs->fp)) {
- printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ /* Mark pending syscalls for flushing */
+ proc_data->syscall_data_len = mm_id->syscall_data_len;
- if (singlestepping())
- op = PTRACE_SYSEMU_SINGLESTEP;
- else
- op = PTRACE_SYSEMU;
+ wait_stub_done_seccomp(mm_id, 0, 0);
- if (ptrace(op, pid, 0, 0)) {
- printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
- __func__, op, errno);
- fatal_sigsegv();
- }
+ sig = proc_data->signal;
- CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
- if (err < 0) {
- printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ if (sig == SIGTRAP && proc_data->err != 0) {
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls",
+ __func__);
+ syscall_stub_dump_error(mm_id);
+ mm_id->syscall_data_len = proc_data->err;
+ fatal_sigsegv();
+ }
- regs->is_user = 1;
- if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
- printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ mm_id->syscall_data_len = 0;
+ mm_id->syscall_fd_num = 0;
- if (get_fp_registers(pid, regs->fp)) {
- printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
- __func__, errno);
- fatal_sigsegv();
- }
+ ret = get_stub_state(regs, proc_data, NULL);
+ if (ret) {
+ printk(UM_KERN_ERR "%s - failed to get regs: %d",
+ __func__, ret);
+ fatal_sigsegv();
+ }
- UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+ if (proc_data->si_offset > sizeof(proc_data->sigstack) - sizeof(*si))
+ panic("%s - Invalid siginfo offset from child",
+ __func__);
+ si = (void *)&proc_data->sigstack[proc_data->si_offset];
+
+ regs->is_user = 1;
+
+ /* Fill in ORIG_RAX and extract fault information */
+ PT_SYSCALL_NR(regs->gp) = si->si_syscall;
+ if (sig == SIGSEGV) {
+ mcontext_t *mcontext = (void *)&proc_data->sigstack[proc_data->mctx_offset];
- if (WIFSTOPPED(status)) {
- int sig = WSTOPSIG(status);
+ GET_FAULTINFO_FROM_MC(regs->faultinfo, mcontext);
+ }
+ } else {
+ /* Flush out any pending syscalls */
+ err = syscall_stub_flush(current_mm_id());
+ if (err) {
+ if (err == -ENOMEM)
+ report_enomem();
+
+ printk(UM_KERN_ERR "%s - Error flushing stub syscalls: %d",
+ __func__, -err);
+ fatal_sigsegv();
+ }
- /* These signal handlers need the si argument.
- * The SIGIO and SIGALARM handlers which constitute the
- * majority of invocations, do not use it.
+ /*
+ * This can legitimately fail if the process loads a
+ * bogus value into a segment register. It will
+ * segfault and PTRACE_GETREGS will read that value
+ * out of the process. However, PTRACE_SETREGS will
+ * fail. In this case, there is nothing to do but
+ * just kill the process.
*/
- switch (sig) {
- case SIGSEGV:
- case SIGTRAP:
- case SIGILL:
- case SIGBUS:
- case SIGFPE:
- case SIGWINCH:
- ptrace(PTRACE_GETSIGINFO, pid, 0, (struct siginfo *)&si);
- break;
+ if (ptrace(PTRACE_SETREGS, pid, 0, regs->gp)) {
+ printk(UM_KERN_ERR "%s - ptrace set regs failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
}
- switch (sig) {
- case SIGSEGV:
- if (PTRACE_FULL_FAULTINFO) {
+ if (put_fp_registers(pid, regs->fp)) {
+ printk(UM_KERN_ERR "%s - ptrace set fp regs failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (singlestepping())
+ op = PTRACE_SYSEMU_SINGLESTEP;
+ else
+ op = PTRACE_SYSEMU;
+
+ if (ptrace(op, pid, 0, 0)) {
+ printk(UM_KERN_ERR "%s - ptrace continue failed, op = %d, errno = %d\n",
+ __func__, op, errno);
+ fatal_sigsegv();
+ }
+
+ CATCH_EINTR(err = waitpid(pid, &status, WUNTRACED | __WALL));
+ if (err < 0) {
+ printk(UM_KERN_ERR "%s - wait failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ regs->is_user = 1;
+ if (ptrace(PTRACE_GETREGS, pid, 0, regs->gp)) {
+ printk(UM_KERN_ERR "%s - PTRACE_GETREGS failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (get_fp_registers(pid, regs->fp)) {
+ printk(UM_KERN_ERR "%s - get_fp_registers failed, errno = %d\n",
+ __func__, errno);
+ fatal_sigsegv();
+ }
+
+ if (WIFSTOPPED(status)) {
+ sig = WSTOPSIG(status);
+
+ /*
+ * These signal handlers need the si argument
+ * and SIGSEGV needs the faultinfo.
+ * The SIGIO and SIGALARM handlers which constitute
+ * the majority of invocations, do not use it.
+ */
+ switch (sig) {
+ case SIGSEGV:
get_skas_faultinfo(pid,
&regs->faultinfo);
- (*sig_info[SIGSEGV])(SIGSEGV, (struct siginfo *)&si,
- regs, NULL);
+ fallthrough;
+ case SIGTRAP:
+ case SIGILL:
+ case SIGBUS:
+ case SIGFPE:
+ case SIGWINCH:
+ ptrace(PTRACE_GETSIGINFO, pid, 0,
+ (struct siginfo *)&si_ptrace);
+ si = &si_ptrace;
+ break;
+ default:
+ si = NULL;
+ break;
}
- else handle_segv(pid, regs);
+ } else {
+ sig = 0;
+ }
+ }
+
+ UPT_SYSCALL_NR(regs) = -1; /* Assume: It's not a syscall */
+
+ if (sig) {
+ switch (sig) {
+ case SIGSEGV:
+ if (using_seccomp || PTRACE_FULL_FAULTINFO)
+ (*sig_info[SIGSEGV])(SIGSEGV,
+ (struct siginfo *)si,
+ regs, NULL);
+ else
+ segv(regs->faultinfo, 0, 1, NULL, NULL);
+
+ break;
+ case SIGSYS:
+ handle_syscall(regs);
break;
case SIGTRAP + 0x80:
handle_trap(pid, regs);
break;
case SIGTRAP:
- relay_signal(SIGTRAP, (struct siginfo *)&si, regs, NULL);
+ relay_signal(SIGTRAP, (struct siginfo *)si, regs, NULL);
break;
case SIGALRM:
break;
@@ -543,7 +769,7 @@ void userspace(struct uml_pt_regs *regs)
case SIGFPE:
case SIGWINCH:
block_signals_trace();
- (*sig_info[sig])(sig, (struct siginfo *)&si, regs, NULL);
+ (*sig_info[sig])(sig, (struct siginfo *)si, regs, NULL);
unblock_signals_trace();
break;
default:
diff --git a/arch/um/os-Linux/start_up.c b/arch/um/os-Linux/start_up.c
index 93fc82c01aba..a827c2e01aa5 100644
--- a/arch/um/os-Linux/start_up.c
+++ b/arch/um/os-Linux/start_up.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
+ * Copyright (C) 2021 Benjamin Berg <benjamin@sipsolutions.net>
* Copyright (C) 2000 - 2007 Jeff Dike (jdike@{addtoit,linux.intel}.com)
*/
@@ -24,6 +25,13 @@
#include <kern_util.h>
#include <mem_user.h>
#include <ptrace_user.h>
+#include <stdbool.h>
+#include <stub-data.h>
+#include <sys/prctl.h>
+#include <linux/seccomp.h>
+#include <linux/filter.h>
+#include <sysdep/mcontext.h>
+#include <sysdep/stub.h>
#include <registers.h>
#include <skas.h>
#include "internal.h"
@@ -224,6 +232,140 @@ static void __init check_ptrace(void)
check_sysemu();
}
+extern unsigned long host_fp_size;
+extern unsigned long exec_regs[MAX_REG_NR];
+extern unsigned long *exec_fp_regs;
+
+__initdata static struct stub_data *seccomp_test_stub_data;
+
+static void __init sigsys_handler(int sig, siginfo_t *info, void *p)
+{
+ ucontext_t *uc = p;
+
+ /* Stow away the location of the mcontext in the stack */
+ seccomp_test_stub_data->mctx_offset = (unsigned long)&uc->uc_mcontext -
+ (unsigned long)&seccomp_test_stub_data->sigstack[0];
+
+ /* Prevent libc from clearing memory (mctx_offset in particular) */
+ syscall(__NR_exit, 0);
+}
+
+static int __init seccomp_helper(void *data)
+{
+ static struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD | BPF_W | BPF_ABS,
+ offsetof(struct seccomp_data, nr)),
+ BPF_JUMP(BPF_JMP | BPF_JEQ | BPF_K, __NR_clock_nanosleep, 1, 0),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_ALLOW),
+ BPF_STMT(BPF_RET | BPF_K, SECCOMP_RET_TRAP),
+ };
+ static struct sock_fprog prog = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ struct sigaction sa;
+
+ /* close_range is needed for the stub */
+ if (stub_syscall3(__NR_close_range, 1, ~0U, 0))
+ exit(1);
+
+ set_sigstack(seccomp_test_stub_data->sigstack,
+ sizeof(seccomp_test_stub_data->sigstack));
+
+ sa.sa_flags = SA_ONSTACK | SA_NODEFER | SA_SIGINFO;
+ sa.sa_sigaction = (void *) sigsys_handler;
+ sa.sa_restorer = NULL;
+ if (sigaction(SIGSYS, &sa, NULL) < 0)
+ exit(2);
+
+ prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ if (syscall(__NR_seccomp, SECCOMP_SET_MODE_FILTER,
+ SECCOMP_FILTER_FLAG_TSYNC, &prog) != 0)
+ exit(3);
+
+ sleep(0);
+
+ /* Never reached. */
+ _exit(4);
+}
+
+static bool __init init_seccomp(void)
+{
+ int pid;
+ int status;
+ int n;
+ unsigned long sp;
+
+ /*
+ * We check that we can install a seccomp filter and then exit(0)
+ * from a trapped syscall.
+ *
+ * Note that we cannot verify that no seccomp filter already exists
+ * for a syscall that results in the process/thread to be killed.
+ */
+
+ os_info("Checking that seccomp filters can be installed...");
+
+ seccomp_test_stub_data = mmap(0, sizeof(*seccomp_test_stub_data),
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANON, 0, 0);
+
+ /* Use the syscall data area as stack, we just need something */
+ sp = (unsigned long)&seccomp_test_stub_data->syscall_data +
+ sizeof(seccomp_test_stub_data->syscall_data) -
+ sizeof(void *);
+ pid = clone(seccomp_helper, (void *)sp, CLONE_VFORK | CLONE_VM, NULL);
+
+ if (pid < 0)
+ fatal_perror("check_seccomp : clone failed");
+
+ CATCH_EINTR(n = waitpid(pid, &status, __WCLONE));
+ if (n < 0)
+ fatal_perror("check_seccomp : waitpid failed");
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 0) {
+ struct uml_pt_regs *regs;
+ unsigned long fp_size;
+ int r;
+
+ /* Fill in the host_fp_size from the mcontext. */
+ regs = calloc(1, sizeof(struct uml_pt_regs));
+ get_stub_state(regs, seccomp_test_stub_data, &fp_size);
+ host_fp_size = fp_size;
+ free(regs);
+
+ /* Repeat with the correct size */
+ regs = calloc(1, sizeof(struct uml_pt_regs) + host_fp_size);
+ r = get_stub_state(regs, seccomp_test_stub_data, NULL);
+
+ /* Store as the default startup registers */
+ exec_fp_regs = malloc(host_fp_size);
+ memcpy(exec_regs, regs->gp, sizeof(exec_regs));
+ memcpy(exec_fp_regs, regs->fp, host_fp_size);
+
+ munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+
+ free(regs);
+
+ if (r) {
+ os_info("failed to fetch registers: %d\n", r);
+ return false;
+ }
+
+ os_info("OK\n");
+ return true;
+ }
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == 2)
+ os_info("missing\n");
+ else
+ os_info("error\n");
+
+ munmap(seccomp_test_stub_data, sizeof(*seccomp_test_stub_data));
+ return false;
+}
+
+
static void __init check_coredump_limit(void)
{
struct rlimit lim;
@@ -278,6 +420,44 @@ void __init get_host_cpu_features(
}
}
+static int seccomp_config __initdata;
+
+static int __init uml_seccomp_config(char *line, int *add)
+{
+ *add = 0;
+
+ if (strcmp(line, "off") == 0)
+ seccomp_config = 0;
+ else if (strcmp(line, "auto") == 0)
+ seccomp_config = 1;
+ else if (strcmp(line, "on") == 0)
+ seccomp_config = 2;
+ else
+ fatal("Invalid seccomp option '%s', expected on/auto/off\n",
+ line);
+
+ return 0;
+}
+
+__uml_setup("seccomp=", uml_seccomp_config,
+"seccomp=<on/auto/off>\n"
+" Configure whether or not SECCOMP is used. With SECCOMP, userspace\n"
+" processes work collaboratively with the kernel instead of being\n"
+" traced using ptrace. All syscalls from the application are caught and\n"
+" redirected using a signal. This signal handler in turn is permitted to\n"
+" do the selected set of syscalls to communicate with the UML kernel and\n"
+" do the required memory management.\n"
+"\n"
+" This method is overall faster than the ptrace based userspace, primarily\n"
+" because it reduces the number of context switches for (minor) page faults.\n"
+"\n"
+" However, the SECCOMP filter is not (yet) restrictive enough to prevent\n"
+" userspace from reading and writing all physical memory. Userspace\n"
+" processes could also trick the stub into disabling SIGALRM which\n"
+" prevents it from being interrupted for scheduling purposes.\n"
+"\n"
+" This is insecure and should only be used with a trusted userspace\n\n"
+);
void __init os_early_checks(void)
{
@@ -286,13 +466,24 @@ void __init os_early_checks(void)
/* Print out the core dump limits early */
check_coredump_limit();
- check_ptrace();
-
/* Need to check this early because mmapping happens before the
* kernel is running.
*/
check_tmpexec();
+ if (seccomp_config) {
+ if (init_seccomp()) {
+ using_seccomp = 1;
+ return;
+ }
+
+ if (seccomp_config == 2)
+ fatal("SECCOMP userspace requested but not functional!\n");
+ }
+
+ using_seccomp = 0;
+ check_ptrace();
+
pid = start_ptraced_child();
if (init_pid_registers(pid))
fatal("Failed to initialize default registers");