diff options
-rw-r--r-- | Documentation/networking/mptcp-sysctl.rst | 23 | ||||
-rw-r--r-- | include/net/mptcp.h | 14 | ||||
-rw-r--r-- | net/mptcp/ctrl.c | 113 | ||||
-rw-r--r-- | net/mptcp/pm.c | 97 | ||||
-rw-r--r-- | net/mptcp/pm_kernel.c | 16 | ||||
-rw-r--r-- | net/mptcp/pm_netlink.c | 6 | ||||
-rw-r--r-- | net/mptcp/pm_userspace.c | 10 | ||||
-rw-r--r-- | net/mptcp/protocol.h | 17 | ||||
-rwxr-xr-x | tools/testing/selftests/net/mptcp/userspace_pm.sh | 30 |
9 files changed, 301 insertions, 25 deletions
diff --git a/Documentation/networking/mptcp-sysctl.rst b/Documentation/networking/mptcp-sysctl.rst index 03e1d3610333..5bfab01eff5a 100644 --- a/Documentation/networking/mptcp-sysctl.rst +++ b/Documentation/networking/mptcp-sysctl.rst @@ -30,6 +30,10 @@ allow_join_initial_addr_port - BOOLEAN Default: 1 +available_path_managers - STRING + Shows the available path managers choices that are registered. More + path managers may be available, but not loaded. + available_schedulers - STRING Shows the available schedulers choices that are registered. More packet schedulers may be available, but not loaded. @@ -72,6 +76,23 @@ enabled - BOOLEAN Default: 1 (enabled) +path_manager - STRING + Set the default path manager name to use for each new MPTCP + socket. In-kernel path management will control subflow + connections and address advertisements according to + per-namespace values configured over the MPTCP netlink + API. Userspace path management puts per-MPTCP-connection subflow + connection decisions and address advertisements under control of + a privileged userspace program, at the cost of more netlink + traffic to propagate all of the related events and commands. + + This is a per-namespace sysctl. + + * "kernel" - In-kernel path manager + * "userspace" - Userspace path manager + + Default: "kernel" + pm_type - INTEGER Set the default path manager type to use for each new MPTCP socket. In-kernel path management will control subflow @@ -84,6 +105,8 @@ pm_type - INTEGER This is a per-namespace sysctl. + Deprecated since v6.15, use path_manager instead. + * 0 - In-kernel path manager * 1 - Userspace path manager diff --git a/include/net/mptcp.h b/include/net/mptcp.h index 2c85ca92bb1c..bfbad695951c 100644 --- a/include/net/mptcp.h +++ b/include/net/mptcp.h @@ -14,6 +14,7 @@ struct mptcp_info; struct mptcp_sock; +struct mptcp_pm_addr_entry; struct seq_file; /* MPTCP sk_buff extension data */ @@ -121,6 +122,19 @@ struct mptcp_sched_ops { void (*release)(struct mptcp_sock *msk); } ____cacheline_aligned_in_smp; +#define MPTCP_PM_NAME_MAX 16 +#define MPTCP_PM_MAX 128 +#define MPTCP_PM_BUF_MAX (MPTCP_PM_NAME_MAX * MPTCP_PM_MAX) + +struct mptcp_pm_ops { + char name[MPTCP_PM_NAME_MAX]; + struct module *owner; + struct list_head list; + + void (*init)(struct mptcp_sock *msk); + void (*release)(struct mptcp_sock *msk); +} ____cacheline_aligned_in_smp; + #ifdef CONFIG_MPTCP void mptcp_init(void); diff --git a/net/mptcp/ctrl.c b/net/mptcp/ctrl.c index be6c0237e10b..d9290c5bb6c7 100644 --- a/net/mptcp/ctrl.c +++ b/net/mptcp/ctrl.c @@ -39,6 +39,7 @@ struct mptcp_pernet { u8 allow_join_initial_addr_port; u8 pm_type; char scheduler[MPTCP_SCHED_NAME_MAX]; + char path_manager[MPTCP_PM_NAME_MAX]; }; static struct mptcp_pernet *mptcp_get_pernet(const struct net *net) @@ -83,6 +84,11 @@ int mptcp_get_pm_type(const struct net *net) return mptcp_get_pernet(net)->pm_type; } +const char *mptcp_get_path_manager(const struct net *net) +{ + return mptcp_get_pernet(net)->path_manager; +} + const char *mptcp_get_scheduler(const struct net *net) { return mptcp_get_pernet(net)->scheduler; @@ -101,6 +107,7 @@ static void mptcp_pernet_set_defaults(struct mptcp_pernet *pernet) pernet->stale_loss_cnt = 4; pernet->pm_type = MPTCP_PM_TYPE_KERNEL; strscpy(pernet->scheduler, "default", sizeof(pernet->scheduler)); + strscpy(pernet->path_manager, "kernel", sizeof(pernet->path_manager)); } #ifdef CONFIG_SYSCTL @@ -174,6 +181,96 @@ static int proc_blackhole_detect_timeout(const struct ctl_table *table, return ret; } +static int mptcp_set_path_manager(char *path_manager, const char *name) +{ + struct mptcp_pm_ops *pm_ops; + int ret = 0; + + rcu_read_lock(); + pm_ops = mptcp_pm_find(name); + if (pm_ops) + strscpy(path_manager, name, MPTCP_PM_NAME_MAX); + else + ret = -ENOENT; + rcu_read_unlock(); + + return ret; +} + +static int proc_path_manager(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct mptcp_pernet *pernet = container_of(ctl->data, + struct mptcp_pernet, + path_manager); + char (*path_manager)[MPTCP_PM_NAME_MAX] = ctl->data; + char pm_name[MPTCP_PM_NAME_MAX]; + const struct ctl_table tbl = { + .data = pm_name, + .maxlen = MPTCP_PM_NAME_MAX, + }; + int ret; + + strscpy(pm_name, *path_manager, MPTCP_PM_NAME_MAX); + + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + if (write && ret == 0) { + ret = mptcp_set_path_manager(*path_manager, pm_name); + if (ret == 0) { + u8 pm_type = __MPTCP_PM_TYPE_NR; + + if (strncmp(pm_name, "kernel", MPTCP_PM_NAME_MAX) == 0) + pm_type = MPTCP_PM_TYPE_KERNEL; + else if (strncmp(pm_name, "userspace", MPTCP_PM_NAME_MAX) == 0) + pm_type = MPTCP_PM_TYPE_USERSPACE; + pernet->pm_type = pm_type; + } + } + + return ret; +} + +static int proc_pm_type(const struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct mptcp_pernet *pernet = container_of(ctl->data, + struct mptcp_pernet, + pm_type); + int ret; + + ret = proc_dou8vec_minmax(ctl, write, buffer, lenp, ppos); + if (write && ret == 0) { + u8 pm_type = READ_ONCE(*(u8 *)ctl->data); + char *pm_name = ""; + + if (pm_type == MPTCP_PM_TYPE_KERNEL) + pm_name = "kernel"; + else if (pm_type == MPTCP_PM_TYPE_USERSPACE) + pm_name = "userspace"; + mptcp_set_path_manager(pernet->path_manager, pm_name); + } + + return ret; +} + +static int proc_available_path_managers(const struct ctl_table *ctl, + int write, void *buffer, + size_t *lenp, loff_t *ppos) +{ + struct ctl_table tbl = { .maxlen = MPTCP_PM_BUF_MAX, }; + int ret; + + tbl.data = kmalloc(tbl.maxlen, GFP_USER); + if (!tbl.data) + return -ENOMEM; + + mptcp_pm_get_available(tbl.data, MPTCP_PM_BUF_MAX); + ret = proc_dostring(&tbl, write, buffer, lenp, ppos); + kfree(tbl.data); + + return ret; +} + static struct ctl_table mptcp_sysctl_table[] = { { .procname = "enabled", @@ -218,7 +315,7 @@ static struct ctl_table mptcp_sysctl_table[] = { .procname = "pm_type", .maxlen = sizeof(u8), .mode = 0644, - .proc_handler = proc_dou8vec_minmax, + .proc_handler = proc_pm_type, .extra1 = SYSCTL_ZERO, .extra2 = &mptcp_pm_type_max }, @@ -253,6 +350,18 @@ static struct ctl_table mptcp_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dou8vec_minmax, }, + { + .procname = "path_manager", + .maxlen = MPTCP_PM_NAME_MAX, + .mode = 0644, + .proc_handler = proc_path_manager, + }, + { + .procname = "available_path_managers", + .maxlen = MPTCP_PM_BUF_MAX, + .mode = 0444, + .proc_handler = proc_available_path_managers, + }, }; static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) @@ -278,6 +387,8 @@ static int mptcp_pernet_new_table(struct net *net, struct mptcp_pernet *pernet) table[8].data = &pernet->close_timeout; table[9].data = &pernet->blackhole_timeout; table[10].data = &pernet->syn_retrans_before_tcp_fallback; + table[11].data = &pernet->path_manager; + /* table[12] is for available_path_managers which is read-only info */ hdr = register_net_sysctl_sz(net, MPTCP_SYSCTL_PATH, table, ARRAY_SIZE(mptcp_sysctl_table)); diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c index 833839d7286e..18b19dbccbba 100644 --- a/net/mptcp/pm.c +++ b/net/mptcp/pm.c @@ -5,6 +5,8 @@ */ #define pr_fmt(fmt) "MPTCP: " fmt +#include <linux/rculist.h> +#include <linux/spinlock.h> #include "protocol.h" #include "mib.h" @@ -18,6 +20,9 @@ struct mptcp_pm_add_entry { struct mptcp_sock *sock; }; +static DEFINE_SPINLOCK(mptcp_pm_list_lock); +static LIST_HEAD(mptcp_pm_list); + /* path manager helpers */ /* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses, @@ -511,13 +516,13 @@ void mptcp_pm_fully_established(struct mptcp_sock *msk, const struct sock *ssk) * be sure to serve this event only once. */ if (READ_ONCE(pm->work_pending) && - !(msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) + !(pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED))) mptcp_pm_schedule_work(msk, MPTCP_PM_ESTABLISHED); - if ((msk->pm.status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) + if ((pm->status & BIT(MPTCP_PM_ALREADY_ESTABLISHED)) == 0) announce = true; - msk->pm.status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); + pm->status |= BIT(MPTCP_PM_ALREADY_ESTABLISHED); spin_unlock_bh(&pm->lock); if (announce) @@ -978,10 +983,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk) u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk)); struct mptcp_pm_data *pm = &msk->pm; - pm->add_addr_signaled = 0; - pm->add_addr_accepted = 0; - pm->local_addr_used = 0; - pm->subflows = 0; + memset(&pm->reset, 0, sizeof(pm->reset)); pm->rm_list_tx.nr = 0; pm->rm_list_rx.nr = 0; WRITE_ONCE(pm->pm_type, pm_type); @@ -1000,16 +1002,9 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk) !!mptcp_pm_get_add_addr_accept_max(msk) && subflows_allowed); WRITE_ONCE(pm->accept_subflow, subflows_allowed); - } else { - WRITE_ONCE(pm->work_pending, 0); - WRITE_ONCE(pm->accept_addr, 0); - WRITE_ONCE(pm->accept_subflow, 0); - } - WRITE_ONCE(pm->addr_signal, 0); - WRITE_ONCE(pm->remote_deny_join_id0, false); - pm->status = 0; - bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + bitmap_fill(pm->id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); + } } void mptcp_pm_data_init(struct mptcp_sock *msk) @@ -1022,5 +1017,75 @@ void mptcp_pm_data_init(struct mptcp_sock *msk) void __init mptcp_pm_init(void) { + mptcp_pm_kernel_register(); + mptcp_pm_userspace_register(); mptcp_pm_nl_init(); } + +/* Must be called with rcu read lock held */ +struct mptcp_pm_ops *mptcp_pm_find(const char *name) +{ + struct mptcp_pm_ops *pm_ops; + + list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) { + if (!strcmp(pm_ops->name, name)) + return pm_ops; + } + + return NULL; +} + +int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops) +{ + return 0; +} + +int mptcp_pm_register(struct mptcp_pm_ops *pm_ops) +{ + int ret; + + ret = mptcp_pm_validate(pm_ops); + if (ret) + return ret; + + spin_lock(&mptcp_pm_list_lock); + if (mptcp_pm_find(pm_ops->name)) { + spin_unlock(&mptcp_pm_list_lock); + return -EEXIST; + } + list_add_tail_rcu(&pm_ops->list, &mptcp_pm_list); + spin_unlock(&mptcp_pm_list_lock); + + pr_debug("%s registered\n", pm_ops->name); + return 0; +} + +void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops) +{ + /* skip unregistering the default path manager */ + if (WARN_ON_ONCE(pm_ops == &mptcp_pm_kernel)) + return; + + spin_lock(&mptcp_pm_list_lock); + list_del_rcu(&pm_ops->list); + spin_unlock(&mptcp_pm_list_lock); +} + +/* Build string with list of available path manager values. + * Similar to tcp_get_available_congestion_control() + */ +void mptcp_pm_get_available(char *buf, size_t maxlen) +{ + struct mptcp_pm_ops *pm_ops; + size_t offs = 0; + + rcu_read_lock(); + list_for_each_entry_rcu(pm_ops, &mptcp_pm_list, list) { + offs += snprintf(buf + offs, maxlen - offs, "%s%s", + offs == 0 ? "" : " ", pm_ops->name); + + if (WARN_ON_ONCE(offs >= maxlen)) + break; + } + rcu_read_unlock(); +} diff --git a/net/mptcp/pm_kernel.c b/net/mptcp/pm_kernel.c index daf8f98a3164..d39e7c178460 100644 --- a/net/mptcp/pm_kernel.c +++ b/net/mptcp/pm_kernel.c @@ -710,11 +710,10 @@ int mptcp_pm_nl_get_local_id(struct mptcp_sock *msk, return ret; /* address not found, add to local list */ - entry = kmalloc(sizeof(*entry), GFP_ATOMIC); + entry = kmemdup(skc, sizeof(*skc), GFP_ATOMIC); if (!entry) return -ENOMEM; - *entry = *skc; entry->addr.port = 0; ret = mptcp_pm_nl_append_new_local_addr(pernet, entry, true, false); if (ret < 0) @@ -817,13 +816,12 @@ int mptcp_pm_nl_add_addr_doit(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - entry = kzalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); + entry = kmemdup(&addr, sizeof(addr), GFP_KERNEL_ACCOUNT); if (!entry) { GENL_SET_ERR_MSG(info, "can't allocate addr"); return -ENOMEM; } - *entry = addr; if (entry->addr.port) { ret = mptcp_pm_nl_create_listen_socket(skb->sk, entry); if (ret) { @@ -1400,11 +1398,15 @@ static struct pernet_operations mptcp_pm_pernet_ops = { .size = sizeof(struct pm_nl_pernet), }; -void __init mptcp_pm_nl_init(void) +struct mptcp_pm_ops mptcp_pm_kernel = { + .name = "kernel", + .owner = THIS_MODULE, +}; + +void __init mptcp_pm_kernel_register(void) { if (register_pernet_subsys(&mptcp_pm_pernet_ops) < 0) panic("Failed to register MPTCP PM pernet subsystem.\n"); - if (genl_register_family(&mptcp_genl_family)) - panic("Failed to register MPTCP PM netlink family\n"); + mptcp_pm_register(&mptcp_pm_kernel); } diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index b2e5bbdcd5df..50aaf259959a 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -625,3 +625,9 @@ struct genl_family mptcp_genl_family __ro_after_init = { .mcgrps = mptcp_pm_mcgrps, .n_mcgrps = ARRAY_SIZE(mptcp_pm_mcgrps), }; + +void __init mptcp_pm_nl_init(void) +{ + if (genl_register_family(&mptcp_genl_family)) + panic("Failed to register MPTCP PM netlink family\n"); +} diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c index 13856df22673..2cb62f026b1f 100644 --- a/net/mptcp/pm_userspace.c +++ b/net/mptcp/pm_userspace.c @@ -682,3 +682,13 @@ int mptcp_userspace_pm_get_addr(u8 id, struct mptcp_pm_addr_entry *addr, sock_put(sk); return ret; } + +static struct mptcp_pm_ops mptcp_pm_userspace = { + .name = "userspace", + .owner = THIS_MODULE, +}; + +void __init mptcp_pm_userspace_register(void) +{ + mptcp_pm_register(&mptcp_pm_userspace); +} diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index c51b6a22d5e0..d409586b5977 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -223,6 +223,8 @@ struct mptcp_pm_data { spinlock_t lock; /*protects the whole PM data */ + struct_group(reset, + u8 addr_signal; bool server_side; bool work_pending; @@ -235,6 +237,9 @@ struct mptcp_pm_data { u8 pm_type; u8 subflows; u8 status; + + ); + DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1); struct mptcp_rm_list rm_list_tx; struct mptcp_rm_list rm_list_rx; @@ -694,6 +699,7 @@ int mptcp_allow_join_id0(const struct net *net); unsigned int mptcp_stale_loss_cnt(const struct net *net); unsigned int mptcp_close_timeout(const struct sock *sk); int mptcp_get_pm_type(const struct net *net); +const char *mptcp_get_path_manager(const struct net *net); const char *mptcp_get_scheduler(const struct net *net); void mptcp_active_disable(struct sock *sk); @@ -1045,6 +1051,15 @@ int mptcp_pm_remove_addr(struct mptcp_sock *msk, const struct mptcp_rm_list *rm_ void mptcp_pm_remove_addr_entry(struct mptcp_sock *msk, struct mptcp_pm_addr_entry *entry); +/* the default path manager, used in mptcp_pm_unregister */ +extern struct mptcp_pm_ops mptcp_pm_kernel; + +struct mptcp_pm_ops *mptcp_pm_find(const char *name); +int mptcp_pm_register(struct mptcp_pm_ops *pm_ops); +void mptcp_pm_unregister(struct mptcp_pm_ops *pm_ops); +int mptcp_pm_validate(struct mptcp_pm_ops *pm_ops); +void mptcp_pm_get_available(char *buf, size_t maxlen); + void mptcp_userspace_pm_free_local_addr_list(struct mptcp_sock *msk); void mptcp_event(enum mptcp_event_type type, const struct mptcp_sock *msk, @@ -1147,6 +1162,8 @@ static inline u8 subflow_get_local_id(const struct mptcp_subflow_context *subflo return local_id; } +void __init mptcp_pm_kernel_register(void); +void __init mptcp_pm_userspace_register(void); void __init mptcp_pm_nl_init(void); void mptcp_pm_worker(struct mptcp_sock *msk); void __mptcp_pm_kernel_worker(struct mptcp_sock *msk); diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index 3651f73451cf..333064b0b5ac 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -117,7 +117,36 @@ cleanup() trap cleanup EXIT # Create and configure network namespaces for testing +print_title "Init" mptcp_lib_ns_init ns1 ns2 + +# check path_manager and pm_type sysctl mapping +if [ -f /proc/sys/net/mptcp/path_manager ]; then + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=userspace + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.path_manager=error 2>/dev/null + pm_type="$(ip netns exec "$ns1" sysctl -n net.mptcp.pm_type)" + if [ "${pm_type}" != "1" ]; then + test_fail "unexpected pm_type after error: ${pm_type}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi + + ip netns exec "$ns1" sysctl -q net.mptcp.pm_type=0 + pm_name="$(ip netns exec "$ns1" sysctl -n net.mptcp.path_manager)" + if [ "${pm_name}" != "kernel" ]; then + test_fail "unexpected path-manager: ${pm_name}" + mptcp_lib_result_print_all_tap + exit ${KSFT_FAIL} + fi +fi + for i in "$ns1" "$ns2" ;do ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done @@ -152,7 +181,6 @@ mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid sleep 0.5 mptcp_lib_subtests_last_ts_reset -print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass |