diff options
Diffstat (limited to 'net')
928 files changed, 33934 insertions, 50044 deletions
diff --git a/net/6lowpan/6lowpan_i.h b/net/6lowpan/6lowpan_i.h index a67caee11929..53cf446ce2e3 100644 --- a/net/6lowpan/6lowpan_i.h +++ b/net/6lowpan/6lowpan_i.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __6LOWPAN_I_H #define __6LOWPAN_I_H diff --git a/net/6lowpan/Makefile b/net/6lowpan/Makefile index 12d131ab2324..2247b96dbc75 100644 --- a/net/6lowpan/Makefile +++ b/net/6lowpan/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_6LOWPAN) += 6lowpan.o 6lowpan-y := core.o iphc.o nhc.o ndisc.o diff --git a/net/6lowpan/nhc.h b/net/6lowpan/nhc.h index 803041400136..67951c40734b 100644 --- a/net/6lowpan/nhc.h +++ b/net/6lowpan/nhc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __6LOWPAN_NHC_H #define __6LOWPAN_NHC_H diff --git a/net/802/Makefile b/net/802/Makefile index 37e654d6615e..19406a87bdaa 100644 --- a/net/802/Makefile +++ b/net/802/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux 802.x protocol layers. # diff --git a/net/8021q/Makefile b/net/8021q/Makefile index 7bc8db08d7ef..9b703454b93e 100644 --- a/net/8021q/Makefile +++ b/net/8021q/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux VLAN layer. # diff --git a/net/8021q/vlan.c b/net/8021q/vlan.c index 9649579b5b9f..8dfdd94e430f 100644 --- a/net/8021q/vlan.c +++ b/net/8021q/vlan.c @@ -138,7 +138,7 @@ int vlan_check_real_dev(struct net_device *real_dev, return 0; } -int register_vlan_dev(struct net_device *dev) +int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack) { struct vlan_dev_priv *vlan = vlan_dev_priv(dev); struct net_device *real_dev = vlan->real_dev; @@ -174,7 +174,7 @@ int register_vlan_dev(struct net_device *dev) if (err < 0) goto out_uninit_mvrp; - err = netdev_upper_dev_link(real_dev, dev); + err = netdev_upper_dev_link(real_dev, dev, extack); if (err) goto out_unregister_netdev; @@ -270,7 +270,7 @@ static int register_vlan_device(struct net_device *real_dev, u16 vlan_id) vlan->flags = VLAN_FLAG_REORDER_HDR; new_dev->rtnl_link_ops = &vlan_link_ops; - err = register_vlan_dev(new_dev); + err = register_vlan_dev(new_dev, NULL); if (err < 0) goto out_free_newdev; @@ -328,6 +328,9 @@ static void vlan_transfer_features(struct net_device *dev, vlandev->fcoe_ddp_xid = dev->fcoe_ddp_xid; #endif + vlandev->priv_flags &= ~IFF_XMIT_DST_RELEASE; + vlandev->priv_flags |= (vlan->real_dev->priv_flags & IFF_XMIT_DST_RELEASE); + netdev_update_features(vlandev); } @@ -376,6 +379,9 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, dev->name); vlan_vid_add(dev, htons(ETH_P_8021Q), 0); } + if (event == NETDEV_DOWN && + (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) + vlan_vid_del(dev, htons(ETH_P_8021Q), 0); vlan_info = rtnl_dereference(dev->vlan_info); if (!vlan_info) @@ -423,9 +429,6 @@ static int vlan_device_event(struct notifier_block *unused, unsigned long event, struct net_device *tmp; LIST_HEAD(close_list); - if (dev->features & NETIF_F_HW_VLAN_CTAG_FILTER) - vlan_vid_del(dev, htons(ETH_P_8021Q), 0); - /* Put all VLANs for this dev in the down state too. */ vlan_group_for_each_dev(grp, i, vlandev) { flgs = vlandev->flags; diff --git a/net/8021q/vlan.h b/net/8021q/vlan.h index df8bd65dd370..a8ba51030b75 100644 --- a/net/8021q/vlan.h +++ b/net/8021q/vlan.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __BEN_VLAN_802_1Q_INC__ #define __BEN_VLAN_802_1Q_INC__ @@ -107,7 +108,7 @@ void vlan_dev_get_realdev_name(const struct net_device *dev, char *result); int vlan_check_real_dev(struct net_device *real_dev, __be16 protocol, u16 vlan_id); void vlan_setup(struct net_device *dev); -int register_vlan_dev(struct net_device *dev); +int register_vlan_dev(struct net_device *dev, struct netlink_ext_ack *extack); void unregister_vlan_dev(struct net_device *dev, struct list_head *head); bool vlan_dev_inherit_address(struct net_device *dev, struct net_device *real_dev); diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index e2ed69850489..64aa9f755e1d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/skbuff.h> #include <linux/netdevice.h> #include <linux/if_vlan.h> @@ -21,6 +22,12 @@ bool vlan_do_receive(struct sk_buff **skbp) if (unlikely(!skb)) return false; + if (unlikely(!(vlan_dev->flags & IFF_UP))) { + kfree_skb(skb); + *skbp = NULL; + return false; + } + skb->dev = vlan_dev; if (unlikely(skb->pkt_type == PACKET_OTHERHOST)) { /* Our lower layer thinks this is not local, let's make sure. diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c index 5e831de3103e..6689c0b272a7 100644 --- a/net/8021q/vlan_netlink.c +++ b/net/8021q/vlan_netlink.c @@ -143,6 +143,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, vlan->vlan_proto = proto; vlan->vlan_id = nla_get_u16(data[IFLA_VLAN_ID]); vlan->real_dev = real_dev; + dev->priv_flags |= (real_dev->priv_flags & IFF_XMIT_DST_RELEASE); vlan->flags = VLAN_FLAG_REORDER_HDR; err = vlan_check_real_dev(real_dev, vlan->vlan_proto, vlan->vlan_id); @@ -160,7 +161,7 @@ static int vlan_newlink(struct net *src_net, struct net_device *dev, if (err < 0) return err; - return register_vlan_dev(dev); + return register_vlan_dev(dev, extack); } static inline size_t vlan_qos_map_size(unsigned int n) diff --git a/net/8021q/vlanproc.h b/net/8021q/vlanproc.h index 8838a2e92eb6..48cd4b4784e8 100644 --- a/net/8021q/vlanproc.h +++ b/net/8021q/vlanproc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __BEN_VLAN_PROC_INC__ #define __BEN_VLAN_PROC_INC__ diff --git a/net/9p/Makefile b/net/9p/Makefile index 697ea7caf466..c0486cfc85d9 100644 --- a/net/9p/Makefile +++ b/net/9p/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_NET_9P) := 9pnet.o obj-$(CONFIG_NET_9P_XEN) += 9pnet_xen.o obj-$(CONFIG_NET_9P_VIRTIO) += 9pnet_virtio.o diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index ddfa86648f95..903a190319b9 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -272,6 +272,7 @@ static int p9_fd_read(struct p9_client *client, void *v, int len) { int ret; struct p9_trans_fd *ts = NULL; + loff_t pos; if (client && client->status != Disconnected) ts = client->trans; @@ -282,7 +283,8 @@ static int p9_fd_read(struct p9_client *client, void *v, int len) if (!(ts->rd->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking read ...\n"); - ret = kernel_read(ts->rd, ts->rd->f_pos, v, len); + pos = ts->rd->f_pos; + ret = kernel_read(ts->rd, v, len, &pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; @@ -420,8 +422,7 @@ error: static int p9_fd_write(struct p9_client *client, void *v, int len) { - int ret; - mm_segment_t oldfs; + ssize_t ret; struct p9_trans_fd *ts = NULL; if (client && client->status != Disconnected) @@ -433,12 +434,7 @@ static int p9_fd_write(struct p9_client *client, void *v, int len) if (!(ts->wr->f_flags & O_NONBLOCK)) p9_debug(P9_DEBUG_ERROR, "blocking write ...\n"); - oldfs = get_fs(); - set_fs(get_ds()); - /* The cast to a user pointer is valid due to the set_fs() */ - ret = vfs_write(ts->wr, (__force void __user *)v, len, &ts->wr->f_pos); - set_fs(oldfs); - + ret = kernel_write(ts->wr, v, len, &ts->wr->f_pos); if (ret <= 0 && ret != -ERESTARTSYS && ret != -EAGAIN) client->status = Disconnected; return ret; diff --git a/net/Kconfig b/net/Kconfig index 7d57ef34b79c..9dba2715919d 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -166,13 +166,6 @@ menuconfig NETFILTER if NETFILTER -config NETFILTER_DEBUG - bool "Network packet filtering debugging" - depends on NETFILTER - help - You can say Y here if you want to get additional messages useful in - debugging the netfilter code. - config NETFILTER_ADVANCED bool "Advanced netfilter configuration" depends on NETFILTER @@ -235,6 +228,7 @@ source "net/openvswitch/Kconfig" source "net/vmw_vsock/Kconfig" source "net/netlink/Kconfig" source "net/mpls/Kconfig" +source "net/nsh/Kconfig" source "net/hsr/Kconfig" source "net/switchdev/Kconfig" source "net/l3mdev/Kconfig" @@ -301,6 +295,18 @@ config BPF_JIT /proc/sys/net/core/bpf_jit_harden (optional) /proc/sys/net/core/bpf_jit_kallsyms (optional) +config BPF_STREAM_PARSER + bool "enable BPF STREAM_PARSER" + depends on BPF_SYSCALL + select STREAM_PARSER + ---help--- + Enabling this allows a stream parser to be used with + BPF_MAP_TYPE_SOCKMAP. + + BPF_MAP_TYPE_SOCKMAP provides a map type to use with network sockets. + It can be used to enforce socket policy, implement socket redirects, + etc. + config NET_FLOW_LIMIT bool depends on RPS @@ -364,7 +370,6 @@ endmenu source "net/ax25/Kconfig" source "net/can/Kconfig" -source "net/irda/Kconfig" source "net/bluetooth/Kconfig" source "net/rxrpc/Kconfig" source "net/kcm/Kconfig" diff --git a/net/Makefile b/net/Makefile index bed80fa398b7..14fede520840 100644 --- a/net/Makefile +++ b/net/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the linux networking. # @@ -31,7 +32,6 @@ obj-$(CONFIG_NETROM) += netrom/ obj-$(CONFIG_ROSE) += rose/ obj-$(CONFIG_AX25) += ax25/ obj-$(CONFIG_CAN) += can/ -obj-$(CONFIG_IRDA) += irda/ obj-$(CONFIG_BT) += bluetooth/ obj-$(CONFIG_SUNRPC) += sunrpc/ obj-$(CONFIG_AF_RXRPC) += rxrpc/ @@ -76,6 +76,7 @@ obj-$(CONFIG_NET_IFE) += ife/ obj-$(CONFIG_OPENVSWITCH) += openvswitch/ obj-$(CONFIG_VSOCKETS) += vmw_vsock/ obj-$(CONFIG_MPLS) += mpls/ +obj-$(CONFIG_NET_NSH) += nsh/ obj-$(CONFIG_HSR) += hsr/ ifneq ($(CONFIG_NET_SWITCHDEV),) obj-y += switchdev/ diff --git a/net/appletalk/dev.c b/net/appletalk/dev.c index e4158b8b926d..284c8e585533 100644 --- a/net/appletalk/dev.c +++ b/net/appletalk/dev.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Moved here from drivers/net/net_init.c, which is: * Written 1993,1994,1995 by Donald Becker. diff --git a/net/appletalk/sysctl_net_atalk.c b/net/appletalk/sysctl_net_atalk.c index ebb864361f7a..c744a853fa5f 100644 --- a/net/appletalk/sysctl_net_atalk.c +++ b/net/appletalk/sysctl_net_atalk.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * sysctl_net_atalk.c: sysctl interface to net AppleTalk subsystem. * diff --git a/net/atm/Makefile b/net/atm/Makefile index cc50bd1ff1de..bfec0f2d83b5 100644 --- a/net/atm/Makefile +++ b/net/atm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the ATM Protocol Families. # diff --git a/net/atm/addr.c b/net/atm/addr.c index dcda35c66f15..0530b63f509a 100644 --- a/net/atm/addr.c +++ b/net/atm/addr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/addr.c - Local ATM address registry */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/addr.h b/net/atm/addr.h index 6837e9e7eb13..da3f848411a0 100644 --- a/net/atm/addr.h +++ b/net/atm/addr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* net/atm/addr.h - Local ATM address registry */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/atm_misc.c b/net/atm/atm_misc.c index 876fbe83e2e4..a30b83c1cb3f 100644 --- a/net/atm/atm_misc.c +++ b/net/atm/atm_misc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/atm_misc.c - Various functions for use by ATM drivers */ /* Written 1995-2000 by Werner Almesberger, EPFL ICA */ diff --git a/net/atm/atm_sysfs.c b/net/atm/atm_sysfs.c index 350bf62b2ae3..5d2fed9f5710 100644 --- a/net/atm/atm_sysfs.c +++ b/net/atm/atm_sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* ATM driver model support. */ #include <linux/kernel.h> diff --git a/net/atm/clip.c b/net/atm/clip.c index f271a7bcf5b2..d4f6029d5109 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -153,7 +153,7 @@ static int neigh_check_cb(struct neighbour *n) return 1; } -static void idle_timer_check(unsigned long dummy) +static void idle_timer_check(struct timer_list *unused) { write_lock(&arp_tbl.lock); __neigh_for_each_release(&arp_tbl, neigh_check_cb); @@ -617,7 +617,7 @@ static void atmarpd_close(struct atm_vcc *vcc) module_put(THIS_MODULE); } -static struct atmdev_ops atmarpd_dev_ops = { +static const struct atmdev_ops atmarpd_dev_ops = { .close = atmarpd_close }; @@ -887,7 +887,7 @@ static int __init atm_clip_init(void) register_netdevice_notifier(&clip_dev_notifier); register_inetaddr_notifier(&clip_inet_notifier); - setup_timer(&idle_timer, idle_timer_check, 0); + timer_setup(&idle_timer, idle_timer_check, 0); #ifdef CONFIG_PROC_FS { diff --git a/net/atm/common.h b/net/atm/common.h index 959436b87182..d9d583712a91 100644 --- a/net/atm/common.h +++ b/net/atm/common.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* net/atm/common.h - ATM sockets (common part for PVC and SVC) */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c index bbd3b639992e..2ff0e5e470e3 100644 --- a/net/atm/ioctl.c +++ b/net/atm/ioctl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* ATM ioctl handling */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/lec.c b/net/atm/lec.c index 093fe8707731..c976196da3ea 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -486,7 +486,7 @@ static void lec_atm_close(struct atm_vcc *vcc) module_put(THIS_MODULE); } -static struct atmdev_ops lecdev_ops = { +static const struct atmdev_ops lecdev_ops = { .close = lec_atm_close, .send = lec_atm_send }; @@ -1232,7 +1232,7 @@ static void lane2_associate_ind(struct net_device *dev, const u8 *mac_addr, #define LEC_ARP_REFRESH_INTERVAL (3*HZ) static void lec_arp_check_expire(struct work_struct *work); -static void lec_arp_expire_arp(unsigned long data); +static void lec_arp_expire_arp(struct timer_list *t); /* * Arp table funcs @@ -1559,8 +1559,7 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, } ether_addr_copy(to_return->mac_addr, mac_addr); INIT_HLIST_NODE(&to_return->next); - setup_timer(&to_return->timer, lec_arp_expire_arp, - (unsigned long)to_return); + timer_setup(&to_return->timer, lec_arp_expire_arp, 0); to_return->last_used = jiffies; to_return->priv = priv; skb_queue_head_init(&to_return->tx_wait); @@ -1569,11 +1568,11 @@ static struct lec_arp_table *make_entry(struct lec_priv *priv, } /* Arp sent timer expired */ -static void lec_arp_expire_arp(unsigned long data) +static void lec_arp_expire_arp(struct timer_list *t) { struct lec_arp_table *entry; - entry = (struct lec_arp_table *)data; + entry = from_timer(entry, t, timer); pr_debug("\n"); if (entry->status == ESI_ARP_PENDING) { @@ -1591,10 +1590,10 @@ static void lec_arp_expire_arp(unsigned long data) } /* Unknown/unused vcc expire, remove associated entry */ -static void lec_arp_expire_vcc(unsigned long data) +static void lec_arp_expire_vcc(struct timer_list *t) { unsigned long flags; - struct lec_arp_table *to_remove = (struct lec_arp_table *)data; + struct lec_arp_table *to_remove = from_timer(to_remove, t, timer); struct lec_priv *priv = to_remove->priv; del_timer(&to_remove->timer); @@ -1799,7 +1798,7 @@ static struct atm_vcc *lec_arp_resolve(struct lec_priv *priv, else send_to_lecd(priv, l_arp_xmt, mac_to_find, NULL, NULL); entry->timer.expires = jiffies + (1 * HZ); - entry->timer.function = lec_arp_expire_arp; + entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_arp; add_timer(&entry->timer); found = priv->mcast_vcc; } @@ -1999,7 +1998,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->old_recv_push = old_push; entry->status = ESI_UNKNOWN; entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; + entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc; hlist_add_head(&entry->next, &priv->lec_no_forward); add_timer(&entry->timer); dump_arp_table(priv); @@ -2083,7 +2082,7 @@ lec_vcc_added(struct lec_priv *priv, const struct atmlec_ioc *ioc_data, entry->status = ESI_UNKNOWN; hlist_add_head(&entry->next, &priv->lec_arp_empty_ones); entry->timer.expires = jiffies + priv->vcc_timeout_period; - entry->timer.function = lec_arp_expire_vcc; + entry->timer.function = (TIMER_FUNC_TYPE)lec_arp_expire_vcc; add_timer(&entry->timer); pr_debug("After vcc was added\n"); dump_arp_table(priv); diff --git a/net/atm/lec.h b/net/atm/lec.h index 4149db1b7885..be0e2667bd8c 100644 --- a/net/atm/lec.h +++ b/net/atm/lec.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Lan Emulation client header file * diff --git a/net/atm/lec_arpc.h b/net/atm/lec_arpc.h index d923f53812a3..1205d8792d28 100644 --- a/net/atm/lec_arpc.h +++ b/net/atm/lec_arpc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Lec arp cache * diff --git a/net/atm/mpc.c b/net/atm/mpc.c index 680a4b9095a1..e882d8b5db05 100644 --- a/net/atm/mpc.c +++ b/net/atm/mpc.c @@ -95,7 +95,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb, static int mpoa_event_listener(struct notifier_block *mpoa_notifier, unsigned long event, void *dev); static void mpc_timer_refresh(void); -static void mpc_cache_check(unsigned long checking_time); +static void mpc_cache_check(struct timer_list *unused); static struct llc_snap_hdr llc_snap_mpoa_ctrl = { 0xaa, 0xaa, 0x03, @@ -121,7 +121,7 @@ static struct notifier_block mpoa_notifier = { struct mpoa_client *mpcs = NULL; /* FIXME */ static struct atm_mpoa_qos *qos_head = NULL; -static DEFINE_TIMER(mpc_timer, NULL, 0, 0); +static DEFINE_TIMER(mpc_timer, NULL); static struct mpoa_client *find_mpc_by_itfnum(int itf) @@ -779,7 +779,7 @@ static void mpc_push(struct atm_vcc *vcc, struct sk_buff *skb) netif_rx(new_skb); } -static struct atmdev_ops mpc_ops = { /* only send is required */ +static const struct atmdev_ops mpc_ops = { /* only send is required */ .close = mpoad_close, .send = msg_from_mpoad }; @@ -799,7 +799,6 @@ static int atm_mpoa_mpoad_attach(struct atm_vcc *vcc, int arg) int err; if (mpcs == NULL) { - init_timer(&mpc_timer); mpc_timer_refresh(); /* This lets us now how our LECs are doing */ @@ -1408,15 +1407,17 @@ static void clean_up(struct k_message *msg, struct mpoa_client *mpc, int action) msg_to_mpoad(msg, mpc); } +static unsigned long checking_time; + static void mpc_timer_refresh(void) { mpc_timer.expires = jiffies + (MPC_P2 * HZ); - mpc_timer.data = mpc_timer.expires; - mpc_timer.function = mpc_cache_check; + checking_time = mpc_timer.expires; + mpc_timer.function = (TIMER_FUNC_TYPE)mpc_cache_check; add_timer(&mpc_timer); } -static void mpc_cache_check(unsigned long checking_time) +static void mpc_cache_check(struct timer_list *unused) { struct mpoa_client *mpc = mpcs; static unsigned long previous_resolving_check_time; diff --git a/net/atm/mpc.h b/net/atm/mpc.h index cfc7b745aa91..454abd07651a 100644 --- a/net/atm/mpc.h +++ b/net/atm/mpc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _MPC_H_ #define _MPC_H_ diff --git a/net/atm/mpoa_caches.c b/net/atm/mpoa_caches.c index 4ccaa16b1eb1..e01450bb32d6 100644 --- a/net/atm/mpoa_caches.c +++ b/net/atm/mpoa_caches.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/types.h> #include <linux/atmmpc.h> #include <linux/slab.h> diff --git a/net/atm/mpoa_caches.h b/net/atm/mpoa_caches.h index 30fe34841ced..6a266669ebf4 100644 --- a/net/atm/mpoa_caches.h +++ b/net/atm/mpoa_caches.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef MPOA_CACHES_H #define MPOA_CACHES_H diff --git a/net/atm/mpoa_proc.c b/net/atm/mpoa_proc.c index 2df34eb5d65f..8a0c17e1c203 100644 --- a/net/atm/mpoa_proc.c +++ b/net/atm/mpoa_proc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ #ifdef CONFIG_PROC_FS diff --git a/net/atm/proc.c b/net/atm/proc.c index 4caca2a90ec4..642f9272ab95 100644 --- a/net/atm/proc.c +++ b/net/atm/proc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/proc.c - ATM /proc interface * * Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA diff --git a/net/atm/protocols.h b/net/atm/protocols.h index acdfc856222d..18d4d008bac3 100644 --- a/net/atm/protocols.h +++ b/net/atm/protocols.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* net/atm/protocols.h - ATM protocol handler entry points */ /* Written 1995-1997 by Werner Almesberger, EPFL LRC */ diff --git a/net/atm/pvc.c b/net/atm/pvc.c index 040207ec399f..e1140b3bdcaa 100644 --- a/net/atm/pvc.c +++ b/net/atm/pvc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/pvc.c - ATM PVC sockets */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/raw.c b/net/atm/raw.c index 821c0797553d..ee10e8d46185 100644 --- a/net/atm/raw.c +++ b/net/atm/raw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/raw.c - Raw AAL0 and AAL5 transports */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/resources.c b/net/atm/resources.c index 918244757b7d..bada395ecdb1 100644 --- a/net/atm/resources.c +++ b/net/atm/resources.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/resources.c - Statically allocated resources */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/resources.h b/net/atm/resources.h index 521431e30507..048232e4d4c6 100644 --- a/net/atm/resources.h +++ b/net/atm/resources.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* net/atm/resources.h - ATM-related resources */ /* Written 1995-1998 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/signaling.c b/net/atm/signaling.c index 983c3a21a133..6c11cdf4dd4c 100644 --- a/net/atm/signaling.c +++ b/net/atm/signaling.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/signaling.c - ATM signaling */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ @@ -217,7 +218,7 @@ static void sigd_close(struct atm_vcc *vcc) read_unlock(&vcc_sklist_lock); } -static struct atmdev_ops sigd_dev_ops = { +static const struct atmdev_ops sigd_dev_ops = { .close = sigd_close, .send = sigd_send }; diff --git a/net/atm/signaling.h b/net/atm/signaling.h index 08b2a69cc572..2df8220f7ab5 100644 --- a/net/atm/signaling.h +++ b/net/atm/signaling.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* net/atm/signaling.h - ATM signaling */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/atm/svc.c b/net/atm/svc.c index 5589de7086af..c458adcbc177 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* net/atm/svc.c - ATM SVC sockets */ /* Written 1995-2000 by Werner Almesberger, EPFL LRC/ICA */ diff --git a/net/ax25/Makefile b/net/ax25/Makefile index 43c46d2cafb6..2e53affc8568 100644 --- a/net/ax25/Makefile +++ b/net/ax25/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux AX.25 layer. # diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index f3f9d18891de..06eac1f50c5e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -268,9 +268,9 @@ void ax25_destroy_socket(ax25_cb *); /* * Handler for deferred kills. */ -static void ax25_destroy_timer(unsigned long data) +static void ax25_destroy_timer(struct timer_list *t) { - ax25_cb *ax25=(ax25_cb *)data; + ax25_cb *ax25 = from_timer(ax25, t, dtimer); struct sock *sk; sk=ax25->sk; @@ -326,8 +326,7 @@ void ax25_destroy_socket(ax25_cb *ax25) if (ax25->sk != NULL) { if (sk_has_allocations(ax25->sk)) { /* Defer: outstanding buffers */ - setup_timer(&ax25->dtimer, ax25_destroy_timer, - (unsigned long)ax25); + timer_setup(&ax25->dtimer, ax25_destroy_timer, 0); ax25->dtimer.expires = jiffies + 2 * HZ; add_timer(&ax25->dtimer); } else { diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 5fb2104b7304..e9d11313d45b 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -29,7 +29,7 @@ #include <linux/mm.h> #include <linux/interrupt.h> -static void ax25_ds_timeout(unsigned long); +static void ax25_ds_timeout(struct timer_list *); /* * Add DAMA slave timeout timer to timer list. @@ -41,8 +41,7 @@ static void ax25_ds_timeout(unsigned long); void ax25_ds_setup_timer(ax25_dev *ax25_dev) { - setup_timer(&ax25_dev->dama.slave_timer, ax25_ds_timeout, - (unsigned long)ax25_dev); + timer_setup(&ax25_dev->dama.slave_timer, ax25_ds_timeout, 0); } void ax25_ds_del_timer(ax25_dev *ax25_dev) @@ -66,9 +65,9 @@ void ax25_ds_set_timer(ax25_dev *ax25_dev) * Silently discard all (slave) connections in case our master forgot us... */ -static void ax25_ds_timeout(unsigned long arg) +static void ax25_ds_timeout(struct timer_list *t) { - ax25_dev *ax25_dev = (struct ax25_dev *) arg; + ax25_dev *ax25_dev = from_timer(ax25_dev, t, dama.slave_timer); ax25_cb *ax25; if (ax25_dev == NULL || !ax25_dev->dama.slave) diff --git a/net/ax25/ax25_timer.c b/net/ax25/ax25_timer.c index 23a6f38a80bf..c47b7ee1e4da 100644 --- a/net/ax25/ax25_timer.c +++ b/net/ax25/ax25_timer.c @@ -33,20 +33,19 @@ #include <linux/mm.h> #include <linux/interrupt.h> -static void ax25_heartbeat_expiry(unsigned long); -static void ax25_t1timer_expiry(unsigned long); -static void ax25_t2timer_expiry(unsigned long); -static void ax25_t3timer_expiry(unsigned long); -static void ax25_idletimer_expiry(unsigned long); +static void ax25_heartbeat_expiry(struct timer_list *); +static void ax25_t1timer_expiry(struct timer_list *); +static void ax25_t2timer_expiry(struct timer_list *); +static void ax25_t3timer_expiry(struct timer_list *); +static void ax25_idletimer_expiry(struct timer_list *); void ax25_setup_timers(ax25_cb *ax25) { - setup_timer(&ax25->timer, ax25_heartbeat_expiry, (unsigned long)ax25); - setup_timer(&ax25->t1timer, ax25_t1timer_expiry, (unsigned long)ax25); - setup_timer(&ax25->t2timer, ax25_t2timer_expiry, (unsigned long)ax25); - setup_timer(&ax25->t3timer, ax25_t3timer_expiry, (unsigned long)ax25); - setup_timer(&ax25->idletimer, ax25_idletimer_expiry, - (unsigned long)ax25); + timer_setup(&ax25->timer, ax25_heartbeat_expiry, 0); + timer_setup(&ax25->t1timer, ax25_t1timer_expiry, 0); + timer_setup(&ax25->t2timer, ax25_t2timer_expiry, 0); + timer_setup(&ax25->t3timer, ax25_t3timer_expiry, 0); + timer_setup(&ax25->idletimer, ax25_idletimer_expiry, 0); } void ax25_start_heartbeat(ax25_cb *ax25) @@ -120,10 +119,10 @@ unsigned long ax25_display_timer(struct timer_list *timer) EXPORT_SYMBOL(ax25_display_timer); -static void ax25_heartbeat_expiry(unsigned long param) +static void ax25_heartbeat_expiry(struct timer_list *t) { int proto = AX25_PROTO_STD_SIMPLEX; - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, timer); if (ax25->ax25_dev) proto = ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]; @@ -145,9 +144,9 @@ static void ax25_heartbeat_expiry(unsigned long param) } } -static void ax25_t1timer_expiry(unsigned long param) +static void ax25_t1timer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, t1timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: @@ -164,9 +163,9 @@ static void ax25_t1timer_expiry(unsigned long param) } } -static void ax25_t2timer_expiry(unsigned long param) +static void ax25_t2timer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, t2timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: @@ -183,9 +182,9 @@ static void ax25_t2timer_expiry(unsigned long param) } } -static void ax25_t3timer_expiry(unsigned long param) +static void ax25_t3timer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, t3timer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: @@ -204,9 +203,9 @@ static void ax25_t3timer_expiry(unsigned long param) } } -static void ax25_idletimer_expiry(unsigned long param) +static void ax25_idletimer_expiry(struct timer_list *t) { - ax25_cb *ax25 = (ax25_cb *)param; + ax25_cb *ax25 = from_timer(ax25, t, idletimer); switch (ax25->ax25_dev->values[AX25_VALUES_PROTOCOL]) { case AX25_PROTO_STD_SIMPLEX: diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c index a3501173e200..1b659ab652fb 100644 --- a/net/batman-adv/bat_iv_ogm.c +++ b/net/batman-adv/bat_iv_ogm.c @@ -729,11 +729,9 @@ static void batadv_iv_ogm_aggregate(struct batadv_forw_packet *forw_packet_aggr, const unsigned char *packet_buff, int packet_len, bool direct_link) { - unsigned char *skb_buff; unsigned long new_direct_link_flag; - skb_buff = skb_put_data(forw_packet_aggr->skb, packet_buff, - packet_len); + skb_put_data(forw_packet_aggr->skb, packet_buff, packet_len); forw_packet_aggr->packet_len += packet_len; forw_packet_aggr->num_packets++; @@ -918,8 +916,8 @@ static void batadv_iv_ogm_schedule(struct batadv_hard_iface *hard_iface) u16 tvlv_len = 0; unsigned long send_time; - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) return; /* the interface gets activated here to avoid race conditions between @@ -1266,7 +1264,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, * drops as they can't send and receive at the same time. */ tq_iface_penalty = BATADV_TQ_MAX_VALUE; - if (if_outgoing && (if_incoming == if_outgoing) && + if (if_outgoing && if_incoming == if_outgoing && batadv_is_wifi_hardif(if_outgoing)) tq_iface_penalty = batadv_hop_penalty(BATADV_TQ_MAX_VALUE, bat_priv); @@ -1281,7 +1279,7 @@ static bool batadv_iv_ogm_calc_tq(struct batadv_orig_node *orig_node, batadv_ogm_packet->tq = combined_tq; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "bidirectional: orig = %-15pM neigh = %-15pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", + "bidirectional: orig = %pM neigh = %pM => own_bcast = %2i, real recv = %2i, local tq: %3i, asym_penalty: %3i, iface_penalty: %3i, total tq: %3i, if_incoming = %s, if_outgoing = %s\n", orig_node->orig, orig_neigh_node->orig, total_count, neigh_rq_count, tq_own, tq_asym_penalty, tq_iface_penalty, batadv_ogm_packet->tq, if_incoming->net_dev->name, @@ -1371,7 +1369,7 @@ batadv_iv_ogm_update_seqnos(const struct ethhdr *ethhdr, ret = BATADV_NEIGH_DUP; } else { set_mark = 0; - if (is_dup && (ret != BATADV_NEIGH_DUP)) + if (is_dup && ret != BATADV_NEIGH_DUP) ret = BATADV_ORIG_DUP; } @@ -1517,7 +1515,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, /* drop packet if sender is not a direct neighbor and if we * don't route towards it */ - if (!is_single_hop_neigh && (!orig_neigh_router)) { + if (!is_single_hop_neigh && !orig_neigh_router) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM via unknown neighbor!\n"); goto out_neigh; @@ -1537,7 +1535,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, sameseq = orig_ifinfo->last_real_seqno == ntohl(ogm_packet->seqno); similar_ttl = (orig_ifinfo->last_ttl - 3) <= ogm_packet->ttl; - if (is_bidirect && ((dup_status == BATADV_NO_DUP) || + if (is_bidirect && (dup_status == BATADV_NO_DUP || (sameseq && similar_ttl))) { batadv_iv_ogm_orig_update(bat_priv, orig_node, orig_ifinfo, ethhdr, @@ -1555,8 +1553,8 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, /* OGMs from secondary interfaces should only scheduled once * per interface where it has been received, not multiple times */ - if ((ogm_packet->ttl <= 2) && - (if_incoming != if_outgoing)) { + if (ogm_packet->ttl <= 2 && + if_incoming != if_outgoing) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Drop packet: OGM from secondary interface and wrong outgoing interface\n"); goto out_neigh; @@ -1592,7 +1590,7 @@ batadv_iv_ogm_process_per_outif(const struct sk_buff *skb, int ogm_offset, if_incoming, if_outgoing); out_neigh: - if ((orig_neigh_node) && (!is_single_hop_neigh)) + if (orig_neigh_node && !is_single_hop_neigh) batadv_orig_node_put(orig_neigh_node); out: if (router_ifinfo) @@ -2525,9 +2523,9 @@ batadv_iv_gw_get_best_gw_node(struct batadv_priv *bat_priv) tmp_gw_factor *= 100 * 100; tmp_gw_factor >>= 18; - if ((tmp_gw_factor > max_gw_factor) || - ((tmp_gw_factor == max_gw_factor) && - (tq_avg > max_tq))) { + if (tmp_gw_factor > max_gw_factor || + (tmp_gw_factor == max_gw_factor && + tq_avg > max_tq)) { if (curr_gw) batadv_gw_node_put(curr_gw); curr_gw = gw_node; diff --git a/net/batman-adv/bat_v.c b/net/batman-adv/bat_v.c index 4e2724c5b33d..341ceab8338d 100644 --- a/net/batman-adv/bat_v.c +++ b/net/batman-adv/bat_v.c @@ -19,7 +19,6 @@ #include "main.h" #include <linux/atomic.h> -#include <linux/bug.h> #include <linux/cache.h> #include <linux/errno.h> #include <linux/if_ether.h> @@ -623,11 +622,11 @@ static int batadv_v_neigh_cmp(struct batadv_neigh_node *neigh1, int ret = 0; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; ret = ifinfo1->bat_v.throughput - ifinfo2->bat_v.throughput; @@ -649,11 +648,11 @@ static bool batadv_v_neigh_is_sob(struct batadv_neigh_node *neigh1, bool ret = false; ifinfo1 = batadv_neigh_ifinfo_get(neigh1, if_outgoing1); - if (WARN_ON(!ifinfo1)) + if (!ifinfo1) goto err_ifinfo1; ifinfo2 = batadv_neigh_ifinfo_get(neigh2, if_outgoing2); - if (WARN_ON(!ifinfo2)) + if (!ifinfo2) goto err_ifinfo2; threshold = ifinfo1->bat_v.throughput / 4; @@ -767,7 +766,7 @@ batadv_v_gw_get_best_gw_node(struct batadv_priv *bat_priv) if (batadv_v_gw_throughput_get(gw_node, &bw) < 0) goto next; - if (curr_gw && (bw <= max_bw)) + if (curr_gw && bw <= max_bw) goto next; if (curr_gw) diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index bd1064d98e16..1de992c58b35 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -134,7 +134,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) hard_iface->bat_v.flags &= ~BATADV_FULL_DUPLEX; throughput = link_settings.base.speed; - if (throughput && (throughput != SPEED_UNKNOWN)) + if (throughput && throughput != SPEED_UNKNOWN) return throughput * 10; } @@ -263,8 +263,8 @@ static void batadv_v_elp_periodic_work(struct work_struct *work) goto out; /* we are in the process of shutting this interface down */ - if ((hard_iface->if_status == BATADV_IF_NOT_IN_USE) || - (hard_iface->if_status == BATADV_IF_TO_BE_REMOVED)) + if (hard_iface->if_status == BATADV_IF_NOT_IN_USE || + hard_iface->if_status == BATADV_IF_TO_BE_REMOVED) goto out; /* the interface was enabled but may not be ready yet */ diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 1e3dc374bfde..c251445a42a0 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -137,7 +137,7 @@ static void batadv_v_ogm_send(struct work_struct *work) struct batadv_priv *bat_priv; struct batadv_ogm2_packet *ogm_packet; struct sk_buff *skb, *skb_tmp; - unsigned char *ogm_buff, *pkt_buff; + unsigned char *ogm_buff; int ogm_buff_len; u16 tvlv_len = 0; int ret; @@ -166,7 +166,7 @@ static void batadv_v_ogm_send(struct work_struct *work) goto reschedule; skb_reserve(skb, ETH_HLEN); - pkt_buff = skb_put_data(skb, ogm_buff, ogm_buff_len); + skb_put_data(skb, ogm_buff, ogm_buff_len); ogm_packet = (struct batadv_ogm2_packet *)skb->data; ogm_packet->seqno = htonl(atomic_read(&bat_priv->bat_v.ogm_seqno)); @@ -200,7 +200,7 @@ static void batadv_v_ogm_send(struct work_struct *work) type = "unknown"; } - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselve on %s surpressed: %s\n", + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 from ourselves on %s suppressed: %s\n", hard_iface->net_dev->name, type); batadv_hardif_put(hard_iface); @@ -304,8 +304,8 @@ static u32 batadv_v_forward_penalty(struct batadv_priv *bat_priv, * due to the store & forward characteristics of WIFI. * Very low throughput values are the exception. */ - if ((throughput > 10) && - (if_incoming == if_outgoing) && + if (throughput > 10 && + if_incoming == if_outgoing && !(if_incoming->bat_v.flags & BATADV_FULL_DUPLEX)) return throughput / 2; @@ -455,7 +455,7 @@ static int batadv_v_ogm_metric_update(struct batadv_priv *bat_priv, /* drop packets with old seqnos, however accept the first packet after * a host has been rebooted. */ - if ((seq_diff < 0) && !protection_started) + if (seq_diff < 0 && !protection_started) goto out; neigh_node->last_seen = jiffies; @@ -568,8 +568,8 @@ static bool batadv_v_ogm_route_update(struct batadv_priv *bat_priv, router_throughput = router_ifinfo->bat_v.throughput; neigh_throughput = neigh_ifinfo->bat_v.throughput; - if ((neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF) && - (router_throughput >= neigh_throughput)) + if (neigh_seq_diff < BATADV_OGM_MAX_ORIGDIFF && + router_throughput >= neigh_throughput) goto out; } @@ -621,7 +621,7 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv, return; /* only unknown & newer OGMs contain TVLVs we are interested in */ - if ((seqno_age > 0) && (if_outgoing == BATADV_IF_DEFAULT)) + if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT) batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL, (unsigned char *)(ogm2 + 1), @@ -683,18 +683,18 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, ogm_throughput = ntohl(ogm_packet->throughput); batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, troughput %u, TTL %u, V %u, tvlv_len %u)\n", + "Received OGM2 packet via NB: %pM, IF: %s [%pM] (from OG: %pM, seqno %u, throughput %u, TTL %u, V %u, tvlv_len %u)\n", ethhdr->h_source, if_incoming->net_dev->name, if_incoming->net_dev->dev_addr, ogm_packet->orig, ntohl(ogm_packet->seqno), ogm_throughput, ogm_packet->ttl, ogm_packet->version, ntohs(ogm_packet->tvlv_len)); - /* If the troughput metric is 0, immediately drop the packet. No need to - * create orig_node / neigh_node for an unusable route. + /* If the throughput metric is 0, immediately drop the packet. No need + * to create orig_node / neigh_node for an unusable route. */ if (ogm_throughput == 0) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, - "Drop packet: originator packet with troughput metric of 0\n"); + "Drop packet: originator packet with throughput metric of 0\n"); return; } @@ -762,7 +762,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, type = "unknown"; } - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s surpressed: %s\n", + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "OGM2 packet from %pM on %s suppressed: %s\n", ogm_packet->orig, hard_iface->net_dev->name, type); diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c index 6930d6b50f99..760c0de72582 100644 --- a/net/batman-adv/distributed-arp-table.c +++ b/net/batman-adv/distributed-arp-table.c @@ -492,8 +492,8 @@ static bool batadv_is_orig_node_eligible(struct batadv_dat_candidate *res, /* this is an hash collision with the temporary selected node. Choose * the one with the lowest address */ - if ((tmp_max == max) && max_orig_node && - (batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0)) + if (tmp_max == max && max_orig_node && + batadv_compare_eth(candidate->orig, max_orig_node->orig) > 0) goto out; ret = true; @@ -834,7 +834,7 @@ int batadv_dat_cache_seq_print_text(struct seq_file *seq, void *offset) last_seen_msecs = last_seen_msecs % 60000; last_seen_secs = last_seen_msecs / 1000; - seq_printf(seq, " * %15pI4 %14pM %4i %6i:%02i\n", + seq_printf(seq, " * %15pI4 %pM %4i %6i:%02i\n", &dat_entry->ip, dat_entry->mac_addr, batadv_print_vid(dat_entry->vid), last_seen_mins, last_seen_secs); diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c index de9955d5224d..10d521f0b17f 100644 --- a/net/batman-adv/gateway_client.c +++ b/net/batman-adv/gateway_client.c @@ -248,12 +248,12 @@ void batadv_gw_election(struct batadv_priv *bat_priv) } } - if ((curr_gw) && (!next_gw)) { + if (curr_gw && !next_gw) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Removing selected gateway - no gateway in range\n"); batadv_throw_uevent(bat_priv, BATADV_UEV_GW, BATADV_UEV_DEL, NULL); - } else if ((!curr_gw) && (next_gw)) { + } else if (!curr_gw && next_gw) { batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "Adding route to gateway %pM (bandwidth: %u.%u/%u.%u MBit, tq: %i)\n", next_gw->orig_node->orig, @@ -411,8 +411,8 @@ void batadv_gw_node_update(struct batadv_priv *bat_priv, goto out; } - if ((gw_node->bandwidth_down == ntohl(gateway->bandwidth_down)) && - (gw_node->bandwidth_up == ntohl(gateway->bandwidth_up))) + if (gw_node->bandwidth_down == ntohl(gateway->bandwidth_down) && + gw_node->bandwidth_up == ntohl(gateway->bandwidth_up)) goto out; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c index 33940c5c74a8..2c26039c23fc 100644 --- a/net/batman-adv/gateway_common.c +++ b/net/batman-adv/gateway_common.c @@ -56,8 +56,8 @@ bool batadv_parse_throughput(struct net_device *net_dev, char *buff, if (strncasecmp(tmp_ptr, "mbit", 4) == 0) bw_unit_type = BATADV_BW_UNIT_MBIT; - if ((strncasecmp(tmp_ptr, "kbit", 4) == 0) || - (bw_unit_type == BATADV_BW_UNIT_MBIT)) + if (strncasecmp(tmp_ptr, "kbit", 4) == 0 || + bw_unit_type == BATADV_BW_UNIT_MBIT) *tmp_ptr = '\0'; } @@ -190,7 +190,7 @@ ssize_t batadv_gw_bandwidth_set(struct net_device *net_dev, char *buff, if (!up_new) up_new = 1; - if ((down_curr == down_new) && (up_curr == up_new)) + if (down_curr == down_new && up_curr == up_new) return count; batadv_gw_reselect(bat_priv); @@ -224,16 +224,16 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, /* only fetch the tvlv value if the handler wasn't called via the * CIFNOTFND flag and if there is data to fetch */ - if ((flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) || - (tvlv_value_len < sizeof(gateway))) { + if (flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND || + tvlv_value_len < sizeof(gateway)) { gateway.bandwidth_down = 0; gateway.bandwidth_up = 0; } else { gateway_ptr = tvlv_value; gateway.bandwidth_down = gateway_ptr->bandwidth_down; gateway.bandwidth_up = gateway_ptr->bandwidth_up; - if ((gateway.bandwidth_down == 0) || - (gateway.bandwidth_up == 0)) { + if (gateway.bandwidth_down == 0 || + gateway.bandwidth_up == 0) { gateway.bandwidth_down = 0; gateway.bandwidth_up = 0; } @@ -242,8 +242,8 @@ static void batadv_gw_tvlv_ogm_handler_v1(struct batadv_priv *bat_priv, batadv_gw_node_update(bat_priv, orig, &gateway); /* restart gateway selection */ - if ((gateway.bandwidth_down != 0) && - (atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT)) + if (gateway.bandwidth_down != 0 && + atomic_read(&bat_priv->gw.mode) == BATADV_GW_MODE_CLIENT) batadv_gw_check_election(bat_priv, orig); } diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index e348f76ea8c1..4e3d5340ad96 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -504,8 +504,8 @@ static void batadv_check_known_mac_addr(const struct net_device *net_dev) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; if (hard_iface->net_dev == net_dev) @@ -568,8 +568,8 @@ int batadv_hardif_min_mtu(struct net_device *soft_iface) rcu_read_lock(); list_for_each_entry_rcu(hard_iface, &batadv_hardif_list, list) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) continue; if (hard_iface->soft_iface != soft_iface) @@ -654,8 +654,8 @@ out: static void batadv_hardif_deactivate_interface(struct batadv_hard_iface *hard_iface) { - if ((hard_iface->if_status != BATADV_IF_ACTIVE) && - (hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED)) + if (hard_iface->if_status != BATADV_IF_ACTIVE && + hard_iface->if_status != BATADV_IF_TO_BE_ACTIVATED) return; hard_iface->if_status = BATADV_IF_INACTIVE; @@ -738,7 +738,7 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, bat_priv = netdev_priv(hard_iface->soft_iface); ret = netdev_master_upper_dev_link(hard_iface->net_dev, - soft_iface, NULL, NULL); + soft_iface, NULL, NULL, NULL); if (ret) goto err_dev; diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c index 8ead292886d1..bded31121d12 100644 --- a/net/batman-adv/icmp_socket.c +++ b/net/batman-adv/icmp_socket.c @@ -132,10 +132,10 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf, size_t packet_len; int error; - if ((file->f_flags & O_NONBLOCK) && (socket_client->queue_len == 0)) + if ((file->f_flags & O_NONBLOCK) && socket_client->queue_len == 0) return -EAGAIN; - if ((!buf) || (count < sizeof(struct batadv_icmp_packet))) + if (!buf || count < sizeof(struct batadv_icmp_packet)) return -EINVAL; if (!access_ok(VERIFY_WRITE, buf, count)) diff --git a/net/batman-adv/main.c b/net/batman-adv/main.c index fb381fb26a66..4daed7ad46f2 100644 --- a/net/batman-adv/main.c +++ b/net/batman-adv/main.c @@ -73,8 +73,8 @@ * list traversals just rcu-locked */ struct list_head batadv_hardif_list; -static int (*batadv_rx_handler[256])(struct sk_buff *, - struct batadv_hard_iface *); +static int (*batadv_rx_handler[256])(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); unsigned char batadv_broadcast_addr[] = {0xff, 0xff, 0xff, 0xff, 0xff, 0xff}; @@ -540,12 +540,12 @@ batadv_recv_handler_register(u8 packet_type, int (*recv_handler)(struct sk_buff *, struct batadv_hard_iface *)) { - int (*curr)(struct sk_buff *, - struct batadv_hard_iface *); + int (*curr)(struct sk_buff *skb, + struct batadv_hard_iface *recv_if); curr = batadv_rx_handler[packet_type]; - if ((curr != batadv_recv_unhandled_packet) && - (curr != batadv_recv_unhandled_unicast_packet)) + if (curr != batadv_recv_unhandled_packet && + curr != batadv_recv_unhandled_unicast_packet) return -EBUSY; batadv_rx_handler[packet_type] = recv_handler; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 2be8f1f46529..edb2f239d04d 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -24,7 +24,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2017.2" +#define BATADV_SOURCE_VERSION "2017.4" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c index d327670641ac..e553a8770a89 100644 --- a/net/batman-adv/multicast.c +++ b/net/batman-adv/multicast.c @@ -1126,7 +1126,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv, bool orig_initialized; if (orig_mcast_enabled && tvlv_value && - (tvlv_value_len >= sizeof(mcast_flags))) + tvlv_value_len >= sizeof(mcast_flags)) mcast_flags = *(u8 *)tvlv_value; spin_lock_bh(&orig->mcast_handler_lock); diff --git a/net/batman-adv/originator.c b/net/batman-adv/originator.c index 8e2a4b205257..2967b86c13da 100644 --- a/net/batman-adv/originator.c +++ b/net/batman-adv/originator.c @@ -1062,9 +1062,9 @@ batadv_purge_neigh_ifinfo(struct batadv_priv *bat_priv, continue; /* don't purge if the interface is not (going) down */ - if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && - (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && - (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + if (if_outgoing->if_status != BATADV_IF_INACTIVE && + if_outgoing->if_status != BATADV_IF_NOT_IN_USE && + if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1106,9 +1106,9 @@ batadv_purge_orig_ifinfo(struct batadv_priv *bat_priv, continue; /* don't purge if the interface is not (going) down */ - if ((if_outgoing->if_status != BATADV_IF_INACTIVE) && - (if_outgoing->if_status != BATADV_IF_NOT_IN_USE) && - (if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED)) + if (if_outgoing->if_status != BATADV_IF_INACTIVE && + if_outgoing->if_status != BATADV_IF_NOT_IN_USE && + if_outgoing->if_status != BATADV_IF_TO_BE_REMOVED) continue; batadv_dbg(BATADV_DBG_BATMAN, bat_priv, @@ -1155,13 +1155,13 @@ batadv_purge_orig_neighbors(struct batadv_priv *bat_priv, last_seen = neigh_node->last_seen; if_incoming = neigh_node->if_incoming; - if ((batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT)) || - (if_incoming->if_status == BATADV_IF_INACTIVE) || - (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || - (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) { - if ((if_incoming->if_status == BATADV_IF_INACTIVE) || - (if_incoming->if_status == BATADV_IF_NOT_IN_USE) || - (if_incoming->if_status == BATADV_IF_TO_BE_REMOVED)) + if (batadv_has_timed_out(last_seen, BATADV_PURGE_TIMEOUT) || + if_incoming->if_status == BATADV_IF_INACTIVE || + if_incoming->if_status == BATADV_IF_NOT_IN_USE || + if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) { + if (if_incoming->if_status == BATADV_IF_INACTIVE || + if_incoming->if_status == BATADV_IF_NOT_IN_USE || + if_incoming->if_status == BATADV_IF_TO_BE_REMOVED) batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "neighbor purge: originator %pM, neighbor: %pM, iface: %s\n", orig_node->orig, neigh_node->addr, diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index f10e3ff26f9d..40d9bf3e5bfe 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -93,14 +93,14 @@ static void _batadv_update_route(struct batadv_priv *bat_priv, batadv_orig_ifinfo_put(orig_ifinfo); /* route deleted */ - if ((curr_router) && (!neigh_node)) { + if (curr_router && !neigh_node) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Deleting route towards: %pM\n", orig_node->orig); batadv_tt_global_del_orig(bat_priv, orig_node, -1, "Deleted route towards originator"); /* route added */ - } else if ((!curr_router) && (neigh_node)) { + } else if (!curr_router && neigh_node) { batadv_dbg(BATADV_DBG_ROUTES, bat_priv, "Adding route towards: %pM (via %pM)\n", orig_node->orig, neigh_node->addr); @@ -381,7 +381,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, /* add record route information if not full */ if ((icmph->msg_type == BATADV_ECHO_REPLY || icmph->msg_type == BATADV_ECHO_REQUEST) && - (skb->len >= sizeof(struct batadv_icmp_packet_rr))) { + skb->len >= sizeof(struct batadv_icmp_packet_rr)) { if (skb_linearize(skb) < 0) goto free_skb; diff --git a/net/batman-adv/send.c b/net/batman-adv/send.c index d239a9d72ac3..7895323fd2a7 100644 --- a/net/batman-adv/send.c +++ b/net/batman-adv/send.c @@ -142,7 +142,7 @@ int batadv_send_unicast_skb(struct sk_buff *skb, #ifdef CONFIG_BATMAN_ADV_BATMAN_V hardif_neigh = batadv_hardif_neigh_get(neigh->if_incoming, neigh->addr); - if ((hardif_neigh) && (ret != NET_XMIT_DROP)) + if (hardif_neigh && ret != NET_XMIT_DROP) hardif_neigh->bat_v.last_unicast_tx = jiffies; if (hardif_neigh) @@ -615,8 +615,8 @@ batadv_forw_packet_list_steal(struct hlist_head *forw_list, * we delete only packets belonging to the given interface */ if (hard_iface && - (forw_packet->if_incoming != hard_iface) && - (forw_packet->if_outgoing != hard_iface)) + forw_packet->if_incoming != hard_iface && + forw_packet->if_outgoing != hard_iface) continue; hlist_del(&forw_packet->list); @@ -911,7 +911,7 @@ static void batadv_send_outstanding_bcast_packet(struct work_struct *work) type = "unknown"; } - batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s surpressed: %s\n", + batadv_dbg(BATADV_DBG_BATMAN, bat_priv, "BCAST packet from orig %pM on %s suppressed: %s\n", bcast_packet->orig, hard_iface->net_dev->name, type); diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 10f7edfb176e..9f673cdfecf8 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -69,8 +69,8 @@ int batadv_skb_head_push(struct sk_buff *skb, unsigned int len) int result; /* TODO: We must check if we can release all references to non-payload - * data using skb_header_release in our skbs to allow skb_cow_header to - * work optimally. This means that those skbs are not allowed to read + * data using __skb_header_release in our skbs to allow skb_cow_header + * to work optimally. This means that those skbs are not allowed to read * or write any data which is before the current position of skb->data * after that call and thus allow other skbs with the same data buffer * to write freely in that area. @@ -160,7 +160,7 @@ static int batadv_interface_set_mac_addr(struct net_device *dev, void *p) static int batadv_interface_change_mtu(struct net_device *dev, int new_mtu) { /* check ranges */ - if ((new_mtu < 68) || (new_mtu > batadv_hardif_min_mtu(dev))) + if (new_mtu < 68 || new_mtu > batadv_hardif_min_mtu(dev)) return -EINVAL; dev->mtu = new_mtu; @@ -863,11 +863,13 @@ free_bat_counters: * batadv_softif_slave_add - Add a slave interface to a batadv_soft_interface * @dev: batadv_soft_interface used as master interface * @slave_dev: net_device which should become the slave interface + * @extack: extended ACK report struct * * Return: 0 if successful or error otherwise. */ static int batadv_softif_slave_add(struct net_device *dev, - struct net_device *slave_dev) + struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct batadv_hard_iface *hard_iface; struct net *net = dev_net(dev); diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index 0ae8b30e4eaa..aa187fd42475 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -925,8 +925,8 @@ static int batadv_store_mesh_iface_finish(struct net_device *net_dev, if (hard_iface->if_status == status_tmp) goto out; - if ((hard_iface->soft_iface) && - (strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0)) + if (hard_iface->soft_iface && + strncmp(hard_iface->soft_iface->name, ifname, IFNAMSIZ) == 0) goto out; if (status_tmp == BATADV_IF_NOT_IN_USE) { diff --git a/net/batman-adv/tp_meter.c b/net/batman-adv/tp_meter.c index bfe8effe9238..4b90033f35a8 100644 --- a/net/batman-adv/tp_meter.c +++ b/net/batman-adv/tp_meter.c @@ -1206,7 +1206,7 @@ static int batadv_tp_send_ack(struct batadv_priv *bat_priv, const u8 *dst, /* send the ack */ r = batadv_send_skb_to_orig(skb, orig_node, NULL); - if (unlikely(r < 0) || (r == NET_XMIT_DROP)) { + if (unlikely(r < 0) || r == NET_XMIT_DROP) { ret = BATADV_TP_REASON_DST_UNREACHABLE; goto out; } diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c index ab3b654b05cc..4e2576fc0c59 100644 --- a/net/bluetooth/6lowpan.c +++ b/net/bluetooth/6lowpan.c @@ -273,9 +273,6 @@ static int iphc_decompress(struct sk_buff *skb, struct net_device *netdev, struct lowpan_peer *peer) { const u8 *saddr; - struct lowpan_btle_dev *dev; - - dev = lowpan_btle_dev(netdev); saddr = peer->lladdr; @@ -618,12 +615,8 @@ static void ifup(struct net_device *netdev) static void ifdown(struct net_device *netdev) { - int err; - rtnl_lock(); - err = dev_close(netdev); - if (err < 0) - BT_INFO("iface %s cannot be closed (%d)", netdev->name, err); + dev_close(netdev); rtnl_unlock(); } diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig index 68f951b3e85a..db82a40875e8 100644 --- a/net/bluetooth/Kconfig +++ b/net/bluetooth/Kconfig @@ -45,6 +45,11 @@ config BT_BREDR bool "Bluetooth Classic (BR/EDR) features" depends on BT default y + help + Bluetooth Classic includes support for Basic Rate (BR) + available with Bluetooth version 1.0b or later and support + for Enhanced Data Rate (EDR) available with Bluetooth + version 2.0 or later. source "net/bluetooth/rfcomm/Kconfig" @@ -58,11 +63,18 @@ config BT_HS bool "Bluetooth High Speed (HS) features" depends on BT_BREDR default y + help + Bluetooth High Speed includes support for off-loading + Bluetooth connections via 802.11 (wifi) physical layer + available with Bluetooth version 3.0 or later. config BT_LE bool "Bluetooth Low Energy (LE) features" depends on BT default y + help + Bluetooth Low Energy includes support low-energy physical + layer available with Bluetooth version 4.0 or later. config BT_6LOWPAN tristate "Bluetooth 6LoWPAN support" diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile index 5d0a113e2e40..fda41c0b4781 100644 --- a/net/bluetooth/Makefile +++ b/net/bluetooth/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux Bluetooth subsystem. # diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c index aad994edd3bb..51c2cf2d8923 100644 --- a/net/bluetooth/a2mp.c +++ b/net/bluetooth/a2mp.c @@ -573,7 +573,7 @@ static int a2mp_discphyslink_req(struct amp_mgr *mgr, struct sk_buff *skb, hcon = hci_conn_hash_lookup_ba(hdev, AMP_LINK, &mgr->l2cap_conn->hcon->dst); if (!hcon) { - BT_ERR("No phys link exist"); + bt_dev_err(hdev, "no phys link exist"); rsp.status = A2MP_STATUS_NO_PHYSICAL_LINK_EXISTS; goto clean; } diff --git a/net/bluetooth/amp.c b/net/bluetooth/amp.c index ebcab5bbadd7..78bec8df8525 100644 --- a/net/bluetooth/amp.c +++ b/net/bluetooth/amp.c @@ -187,7 +187,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type) /* Legacy key */ if (conn->key_type < 3) { - BT_ERR("Legacy key type %d", conn->key_type); + bt_dev_err(hdev, "legacy key type %d", conn->key_type); return -EACCES; } @@ -207,7 +207,7 @@ int phylink_gen_key(struct hci_conn *conn, u8 *data, u8 *len, u8 *type) /* Derive Generic AMP Link Key (gamp) */ err = hmac_sha256(keybuf, HCI_AMP_LINK_KEY_SIZE, "gamp", 4, gamp_key); if (err) { - BT_ERR("Could not derive Generic AMP Key: err %d", err); + bt_dev_err(hdev, "could not derive Generic AMP Key: err %d", err); return err; } diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c index c7b1a9aee579..2155ce802877 100644 --- a/net/bluetooth/ecdh_helper.c +++ b/net/bluetooth/ecdh_helper.c @@ -23,7 +23,6 @@ #include "ecdh_helper.h" #include <linux/scatterlist.h> -#include <crypto/kpp.h> #include <crypto/ecdh.h> struct ecdh_completion { @@ -50,55 +49,35 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) out[i] = __swab64(in[ndigits - 1 - i]); } -bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], - u8 secret[32]) +/* compute_ecdh_secret() - function assumes that the private key was + * already set. + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @public_key: pair's ecc public key. + * secret: memory where the ecdh computed shared secret will be saved. + * + * Return: zero on success; error code in case of error. + */ +int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64], + u8 secret[32]) { - struct crypto_kpp *tfm; struct kpp_request *req; - struct ecdh p; + u8 *tmp; struct ecdh_completion result; struct scatterlist src, dst; - u8 *tmp, *buf; - unsigned int buf_len; - int err = -ENOMEM; + int err; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) - return false; + return -ENOMEM; - tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); - if (IS_ERR(tfm)) { - pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", - PTR_ERR(tfm)); + req = kpp_request_alloc(tfm, GFP_KERNEL); + if (!req) { + err = -ENOMEM; goto free_tmp; } - req = kpp_request_alloc(tfm, GFP_KERNEL); - if (!req) - goto free_kpp; - init_completion(&result.completion); - /* Security Manager Protocol holds digits in litte-endian order - * while ECC API expect big-endian data - */ - swap_digits((u64 *)private_key, (u64 *)tmp, 4); - p.key = (char *)tmp; - p.key_size = 32; - /* Set curve_id */ - p.curve_id = ECC_CURVE_NIST_P256; - buf_len = crypto_ecdh_key_len(&p); - buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) - goto free_req; - - crypto_ecdh_encode_key(buf, buf_len, &p); - - /* Set A private Key */ - err = crypto_kpp_set_secret(tfm, (void *)buf, buf_len); - if (err) - goto free_all; - swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */ swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */ @@ -123,104 +102,129 @@ bool compute_ecdh_secret(const u8 public_key[64], const u8 private_key[32], memcpy(secret, tmp, 32); free_all: - kzfree(buf); -free_req: kpp_request_free(req); -free_kpp: - crypto_free_kpp(tfm); free_tmp: - kfree(tmp); - return (err == 0); + kzfree(tmp); + return err; } -bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]) +/* set_ecdh_privkey() - set or generate ecc private key. + * + * Function generates an ecc private key in the crypto subsystem when receiving + * a NULL private key or sets the received key when not NULL. + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @private_key: user's ecc private key. When not NULL, the key is expected + * in little endian format. + * + * Return: zero on success; error code in case of error. + */ +int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 private_key[32]) +{ + u8 *buf, *tmp = NULL; + unsigned int buf_len; + int err; + struct ecdh p = {0}; + + p.curve_id = ECC_CURVE_NIST_P256; + + if (private_key) { + tmp = kmalloc(32, GFP_KERNEL); + if (!tmp) + return -ENOMEM; + swap_digits((u64 *)private_key, (u64 *)tmp, 4); + p.key = tmp; + p.key_size = 32; + } + + buf_len = crypto_ecdh_key_len(&p); + buf = kmalloc(buf_len, GFP_KERNEL); + if (!buf) { + err = -ENOMEM; + goto free_tmp; + } + + err = crypto_ecdh_encode_key(buf, buf_len, &p); + if (err) + goto free_all; + + err = crypto_kpp_set_secret(tfm, buf, buf_len); + /* fall through */ +free_all: + kzfree(buf); +free_tmp: + kzfree(tmp); + return err; +} + +/* generate_ecdh_public_key() - function assumes that the private key was + * already set. + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @public_key: memory where the computed ecc public key will be saved. + * + * Return: zero on success; error code in case of error. + */ +int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]) { - struct crypto_kpp *tfm; struct kpp_request *req; - struct ecdh p; + u8 *tmp; struct ecdh_completion result; struct scatterlist dst; - u8 *tmp, *buf; - unsigned int buf_len; - int err = -ENOMEM; - const unsigned short max_tries = 16; - unsigned short tries = 0; + int err; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) - return false; + return -ENOMEM; - tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); - if (IS_ERR(tfm)) { - pr_err("alg: kpp: Failed to load tfm for kpp: %ld\n", - PTR_ERR(tfm)); + req = kpp_request_alloc(tfm, GFP_KERNEL); + if (!req) { + err = -ENOMEM; goto free_tmp; } - req = kpp_request_alloc(tfm, GFP_KERNEL); - if (!req) - goto free_kpp; - init_completion(&result.completion); + sg_init_one(&dst, tmp, 64); + kpp_request_set_input(req, NULL, 0); + kpp_request_set_output(req, &dst, 64); + kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, + ecdh_complete, &result); - /* Set curve_id */ - p.curve_id = ECC_CURVE_NIST_P256; - p.key_size = 32; - buf_len = crypto_ecdh_key_len(&p); - buf = kmalloc(buf_len, GFP_KERNEL); - if (!buf) - goto free_req; - - do { - if (tries++ >= max_tries) - goto free_all; - - /* Set private Key */ - p.key = (char *)private_key; - crypto_ecdh_encode_key(buf, buf_len, &p); - err = crypto_kpp_set_secret(tfm, buf, buf_len); - if (err) - goto free_all; - - sg_init_one(&dst, tmp, 64); - kpp_request_set_input(req, NULL, 0); - kpp_request_set_output(req, &dst, 64); - kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG, - ecdh_complete, &result); - - err = crypto_kpp_generate_public_key(req); - - if (err == -EINPROGRESS) { - wait_for_completion(&result.completion); - err = result.err; - } - - /* Private key is not valid. Regenerate */ - if (err == -EINVAL) - continue; - - if (err < 0) - goto free_all; - else - break; - - } while (true); - - /* Keys are handed back in little endian as expected by Security - * Manager Protocol + err = crypto_kpp_generate_public_key(req); + if (err == -EINPROGRESS) { + wait_for_completion(&result.completion); + err = result.err; + } + if (err < 0) + goto free_all; + + /* The public key is handed back in little endian as expected by + * the Security Manager Protocol. */ swap_digits((u64 *)tmp, (u64 *)public_key, 4); /* x */ swap_digits((u64 *)&tmp[32], (u64 *)&public_key[32], 4); /* y */ - swap_digits((u64 *)private_key, (u64 *)tmp, 4); - memcpy(private_key, tmp, 32); free_all: - kzfree(buf); -free_req: kpp_request_free(req); -free_kpp: - crypto_free_kpp(tfm); free_tmp: kfree(tmp); - return (err == 0); + return err; +} + +/* generate_ecdh_keys() - generate ecc key pair. + * + * @tfm: KPP tfm handle allocated with crypto_alloc_kpp(). + * @public_key: memory where the computed ecc public key will be saved. + * + * Return: zero on success; error code in case of error. + */ +int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]) +{ + int err; + + err = set_ecdh_privkey(tfm, NULL); + if (err) + return err; + + return generate_ecdh_public_key(tfm, public_key); } diff --git a/net/bluetooth/ecdh_helper.h b/net/bluetooth/ecdh_helper.h index 7a423faf76e5..a6f8d03d4aaf 100644 --- a/net/bluetooth/ecdh_helper.h +++ b/net/bluetooth/ecdh_helper.h @@ -20,8 +20,11 @@ * COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS * SOFTWARE IS DISCLAIMED. */ +#include <crypto/kpp.h> #include <linux/types.h> -bool compute_ecdh_secret(const u8 pub_a[64], const u8 priv_b[32], - u8 secret[32]); -bool generate_ecdh_keys(u8 public_key[64], u8 private_key[32]); +int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 pair_public_key[64], + u8 secret[32]); +int set_ecdh_privkey(struct crypto_kpp *tfm, const u8 *private_key); +int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64]); +int generate_ecdh_keys(struct crypto_kpp *tfm, u8 public_key[64]); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index dc59eae54717..a9682534c377 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -729,8 +729,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) goto done; } - BT_ERR("HCI request failed to create LE connection: status 0x%2.2x", - status); + bt_dev_err(hdev, "request failed to create LE connection: " + "status 0x%2.2x", status); if (!conn) goto done; @@ -907,7 +907,7 @@ struct hci_conn *hci_connect_le(struct hci_dev *hdev, bdaddr_t *dst, */ if (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE) { - skb_queue_purge(&req.cmd_q); + hci_req_purge(&req); hci_conn_del(conn); return ERR_PTR(-EBUSY); } diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 6bc679cd3481..40d260f2bea5 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -267,7 +267,7 @@ static int hci_init1_req(struct hci_request *req, unsigned long opt) amp_init1(req); break; default: - BT_ERR("Unknown device type %d", hdev->dev_type); + bt_dev_err(hdev, "Unknown device type %d", hdev->dev_type); break; } @@ -2150,8 +2150,7 @@ static void hci_error_reset(struct work_struct *work) if (hdev->hw_error) hdev->hw_error(hdev, hdev->hw_error_code); else - BT_ERR("%s hardware error 0x%2.2x", hdev->name, - hdev->hw_error_code); + bt_dev_err(hdev, "hardware error 0x%2.2x", hdev->hw_error_code); if (hci_dev_do_close(hdev)) return; @@ -2524,9 +2523,9 @@ static void hci_cmd_timeout(struct work_struct *work) struct hci_command_hdr *sent = (void *) hdev->sent_cmd->data; u16 opcode = __le16_to_cpu(sent->opcode); - BT_ERR("%s command 0x%4.4x tx timeout", hdev->name, opcode); + bt_dev_err(hdev, "command 0x%4.4x tx timeout", opcode); } else { - BT_ERR("%s command tx timeout", hdev->name); + bt_dev_err(hdev, "command tx timeout"); } atomic_set(&hdev->cmd_cnt, 1); @@ -2858,7 +2857,7 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev, params = kzalloc(sizeof(*params), GFP_KERNEL); if (!params) { - BT_ERR("Out of memory"); + bt_dev_err(hdev, "out of memory"); return NULL; } @@ -3393,7 +3392,7 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb) err = hdev->send(hdev, skb); if (err < 0) { - BT_ERR("%s sending frame failed (%d)", hdev->name, err); + bt_dev_err(hdev, "sending frame failed (%d)", err); kfree_skb(skb); } } @@ -3408,7 +3407,7 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen, skb = hci_prepare_cmd(hdev, opcode, plen, param); if (!skb) { - BT_ERR("%s no memory for command", hdev->name); + bt_dev_err(hdev, "no memory for command"); return -ENOMEM; } @@ -3493,7 +3492,7 @@ static void hci_queue_acl(struct hci_chan *chan, struct sk_buff_head *queue, hci_add_acl_hdr(skb, chan->handle, flags); break; default: - BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); + bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); return; } @@ -3618,7 +3617,7 @@ static struct hci_conn *hci_low_sent(struct hci_dev *hdev, __u8 type, break; default: cnt = 0; - BT_ERR("Unknown link type"); + bt_dev_err(hdev, "unknown link type %d", conn->type); } q = cnt / num; @@ -3635,15 +3634,15 @@ static void hci_link_tx_to(struct hci_dev *hdev, __u8 type) struct hci_conn_hash *h = &hdev->conn_hash; struct hci_conn *c; - BT_ERR("%s link tx timeout", hdev->name); + bt_dev_err(hdev, "link tx timeout"); rcu_read_lock(); /* Kill stalled connections */ list_for_each_entry_rcu(c, &h->list, list) { if (c->type == type && c->sent) { - BT_ERR("%s killing stalled connection %pMR", - hdev->name, &c->dst); + bt_dev_err(hdev, "killing stalled connection %pMR", + &c->dst); hci_disconnect(c, HCI_ERROR_REMOTE_USER_TERM); } } @@ -3724,7 +3723,7 @@ static struct hci_chan *hci_chan_sent(struct hci_dev *hdev, __u8 type, break; default: cnt = 0; - BT_ERR("Unknown link type"); + bt_dev_err(hdev, "unknown link type %d", chan->conn->type); } q = cnt / num; @@ -4066,8 +4065,8 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb) l2cap_recv_acldata(conn, skb, flags); return; } else { - BT_ERR("%s ACL packet for unknown connection handle %d", - hdev->name, handle); + bt_dev_err(hdev, "ACL packet for unknown connection handle %d", + handle); } kfree_skb(skb); @@ -4097,8 +4096,8 @@ static void hci_scodata_packet(struct hci_dev *hdev, struct sk_buff *skb) sco_recv_scodata(conn, skb); return; } else { - BT_ERR("%s SCO packet for unknown connection handle %d", - hdev->name, handle); + bt_dev_err(hdev, "SCO packet for unknown connection handle %d", + handle); } kfree_skb(skb); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 0b4dba08a14e..cd3bbb766c24 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -1188,7 +1188,8 @@ static void hci_cc_le_set_scan_enable(struct hci_dev *hdev, break; default: - BT_ERR("Used reserved LE_Scan_Enable param %d", cp->enable); + bt_dev_err(hdev, "use of reserved LE_Scan_Enable param %d", + cp->enable); break; } @@ -1485,7 +1486,7 @@ static void hci_cs_create_conn(struct hci_dev *hdev, __u8 status) conn = hci_conn_add(hdev, ACL_LINK, &cp->bdaddr, HCI_ROLE_MASTER); if (!conn) - BT_ERR("No memory for new connection"); + bt_dev_err(hdev, "no memory for new connection"); } } @@ -2269,7 +2270,7 @@ static void hci_conn_request_evt(struct hci_dev *hdev, struct sk_buff *skb) conn = hci_conn_add(hdev, ev->link_type, &ev->bdaddr, HCI_ROLE_SLAVE); if (!conn) { - BT_ERR("No memory for new connection"); + bt_dev_err(hdev, "no memory for new connection"); hci_dev_unlock(hdev); return; } @@ -2431,7 +2432,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!hci_conn_ssp_enabled(conn) && test_bit(HCI_CONN_REAUTH_PEND, &conn->flags)) { - BT_INFO("re-auth of legacy device is not possible."); + bt_dev_info(hdev, "re-auth of legacy device is not possible."); } else { set_bit(HCI_CONN_AUTH, &conn->flags); conn->sec_level = conn->pending_sec_level; @@ -2535,8 +2536,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, BT_DBG("%s status 0x%02x", hdev->name, status); if (!skb || skb->len < sizeof(*rp)) { - BT_ERR("%s invalid HCI Read Encryption Key Size response", - hdev->name); + bt_dev_err(hdev, "invalid read key size response"); return; } @@ -2554,8 +2554,8 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, * supported. */ if (rp->status) { - BT_ERR("%s failed to read key size for handle %u", hdev->name, - handle); + bt_dev_err(hdev, "failed to read key size for handle %u", + handle); conn->enc_key_size = HCI_LINK_KEY_SIZE; } else { conn->enc_key_size = rp->key_size; @@ -2664,7 +2664,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb) hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); if (hci_req_run_skb(&req, read_enc_key_size_complete)) { - BT_ERR("Sending HCI Read Encryption Key Size failed"); + bt_dev_err(hdev, "sending read key size failed"); conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; } @@ -3197,7 +3197,7 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) int i; if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_PACKET_BASED) { - BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); + bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); return; } @@ -3249,7 +3249,8 @@ static void hci_num_comp_pkts_evt(struct hci_dev *hdev, struct sk_buff *skb) break; default: - BT_ERR("Unknown type %d conn %p", conn->type, conn); + bt_dev_err(hdev, "unknown type %d conn %p", + conn->type, conn); break; } } @@ -3271,7 +3272,7 @@ static struct hci_conn *__hci_conn_lookup_handle(struct hci_dev *hdev, return chan->conn; break; default: - BT_ERR("%s unknown dev_type %d", hdev->name, hdev->dev_type); + bt_dev_err(hdev, "unknown dev_type %d", hdev->dev_type); break; } @@ -3284,7 +3285,7 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb) int i; if (hdev->flow_ctl_mode != HCI_FLOW_CTL_MODE_BLOCK_BASED) { - BT_ERR("Wrong event for mode %d", hdev->flow_ctl_mode); + bt_dev_err(hdev, "wrong event for mode %d", hdev->flow_ctl_mode); return; } @@ -3320,7 +3321,8 @@ static void hci_num_comp_blocks_evt(struct hci_dev *hdev, struct sk_buff *skb) break; default: - BT_ERR("Unknown type %d conn %p", conn->type, conn); + bt_dev_err(hdev, "unknown type %d conn %p", + conn->type, conn); break; } } @@ -4479,7 +4481,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb) if (!conn) { conn = hci_conn_add(hdev, LE_LINK, &ev->bdaddr, ev->role); if (!conn) { - BT_ERR("No memory for new connection"); + bt_dev_err(hdev, "no memory for new connection"); goto unlock; } @@ -4749,8 +4751,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, case LE_ADV_SCAN_RSP: break; default: - BT_ERR_RATELIMITED("Unknown advertising packet type: 0x%02x", - type); + bt_dev_err_ratelimited(hdev, "unknown advertising packet " + "type: 0x%02x", type); return; } @@ -4769,8 +4771,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, /* Adjust for actual length */ if (len != real_len) { - BT_ERR_RATELIMITED("%s advertising data length corrected", - hdev->name); + bt_dev_err_ratelimited(hdev, "advertising data len corrected"); len = real_len; } @@ -5192,7 +5193,7 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, return false; if (skb->len < sizeof(*hdr)) { - BT_ERR("Too short HCI event"); + bt_dev_err(hdev, "too short HCI event"); return false; } @@ -5206,12 +5207,13 @@ static bool hci_get_cmd_complete(struct hci_dev *hdev, u16 opcode, } if (hdr->evt != HCI_EV_CMD_COMPLETE) { - BT_DBG("Last event is not cmd complete (0x%2.2x)", hdr->evt); + bt_dev_err(hdev, "last event is not cmd complete (0x%2.2x)", + hdr->evt); return false; } if (skb->len < sizeof(*ev)) { - BT_ERR("Too short cmd_complete event"); + bt_dev_err(hdev, "too short cmd_complete event"); return false; } diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index b73ac149de34..abc0f3224dd1 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -41,6 +41,11 @@ void hci_req_init(struct hci_request *req, struct hci_dev *hdev) req->err = 0; } +void hci_req_purge(struct hci_request *req) +{ + skb_queue_purge(&req->cmd_q); +} + static int req_run(struct hci_request *req, hci_req_complete_t complete, hci_req_complete_skb_t complete_skb) { @@ -331,8 +336,8 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, skb = hci_prepare_cmd(hdev, opcode, plen, param); if (!skb) { - BT_ERR("%s no memory for command (opcode 0x%4.4x)", - hdev->name, opcode); + bt_dev_err(hdev, "no memory for command (opcode 0x%4.4x)", + opcode); req->err = -ENOMEM; return; } @@ -1421,7 +1426,7 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy, err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); if (err < 0) { - BT_ERR("%s failed to generate new RPA", hdev->name); + bt_dev_err(hdev, "failed to generate new RPA"); return err; } @@ -1783,7 +1788,7 @@ int hci_abort_conn(struct hci_conn *conn, u8 reason) err = hci_req_run(&req, abort_conn_complete); if (err && err != -ENODATA) { - BT_ERR("Failed to run HCI request: err %d", err); + bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err); return err; } @@ -1867,7 +1872,8 @@ static void le_scan_disable_work(struct work_struct *work) hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); if (status) { - BT_ERR("Failed to disable LE scan: status 0x%02x", status); + bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x", + status); return; } @@ -1898,7 +1904,7 @@ static void le_scan_disable_work(struct work_struct *work) hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, HCI_CMD_TIMEOUT, &status); if (status) { - BT_ERR("Inquiry failed: status 0x%02x", status); + bt_dev_err(hdev, "inquiry failed: status 0x%02x", status); goto discov_stopped; } @@ -1940,7 +1946,8 @@ static void le_scan_restart_work(struct work_struct *work) hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); if (status) { - BT_ERR("Failed to restart LE scan: status %d", status); + bt_dev_err(hdev, "failed to restart LE scan: status %d", + status); return; } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index dde77bd59f91..702beb140d9f 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -36,6 +36,7 @@ struct hci_request { }; void hci_req_init(struct hci_request *req, struct hci_dev *hdev); +void hci_req_purge(struct hci_request *req); int hci_req_run(struct hci_request *req, hci_req_complete_t complete); int hci_req_run_skb(struct hci_request *req, hci_req_complete_skb_t complete); void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 65d734c165bd..923e9a271872 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -251,15 +251,13 @@ void hci_send_to_sock(struct hci_dev *hdev, struct sk_buff *skb) } /* Send frame to sockets with specific channel */ -void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, - int flag, struct sock *skip_sk) +static void __hci_send_to_channel(unsigned short channel, struct sk_buff *skb, + int flag, struct sock *skip_sk) { struct sock *sk; BT_DBG("channel %u len %d", channel, skb->len); - read_lock(&hci_sk_list.lock); - sk_for_each(sk, &hci_sk_list.head) { struct sk_buff *nskb; @@ -285,6 +283,13 @@ void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, kfree_skb(nskb); } +} + +void hci_send_to_channel(unsigned short channel, struct sk_buff *skb, + int flag, struct sock *skip_sk) +{ + read_lock(&hci_sk_list.lock); + __hci_send_to_channel(channel, skb, flag, skip_sk); read_unlock(&hci_sk_list.lock); } @@ -388,8 +393,8 @@ void hci_send_monitor_ctrl_event(struct hci_dev *hdev, u16 event, hdr->index = index; hdr->len = cpu_to_le16(skb->len - HCI_MON_HDR_SIZE); - hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, - HCI_SOCK_TRUSTED, NULL); + __hci_send_to_channel(HCI_CHANNEL_MONITOR, skb, + HCI_SOCK_TRUSTED, NULL); kfree_skb(skb); } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index ca7a35ebaefb..9874844a95a9 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* Bluetooth HCI driver model support. */ #include <linux/module.h> @@ -13,7 +14,7 @@ static void bt_link_release(struct device *dev) kfree(conn); } -static struct device_type bt_link = { +static const struct device_type bt_link = { .name = "link", .release = bt_link_release, }; @@ -50,7 +51,7 @@ void hci_conn_add_sysfs(struct hci_conn *conn) dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { - BT_ERR("Failed to register connection device"); + bt_dev_err(hdev, "failed to register connection device"); return; } @@ -86,7 +87,7 @@ static void bt_host_release(struct device *dev) module_put(THIS_MODULE); } -static struct device_type bt_host = { +static const struct device_type bt_host = { .name = "host", .release = bt_host_release, }; diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index 002743ea509c..8112893037bd 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -734,7 +734,7 @@ static void hidp_stop(struct hid_device *hid) hid->claimed = 0; } -static struct hid_ll_driver hidp_hid_driver = { +struct hid_ll_driver hidp_hid_driver = { .parse = hidp_parse, .start = hidp_start, .stop = hidp_stop, @@ -743,6 +743,7 @@ static struct hid_ll_driver hidp_hid_driver = { .raw_request = hidp_raw_request, .output_report = hidp_output_report, }; +EXPORT_SYMBOL_GPL(hidp_hid_driver); /* This function sets up the hid device. It does not add it to the HID system. That is done in hidp_add_connection(). */ diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 303c779bfe38..43ba91c440bc 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -58,7 +58,7 @@ static struct sk_buff *l2cap_build_cmd(struct l2cap_conn *conn, u8 code, u8 ident, u16 dlen, void *data); static void l2cap_send_cmd(struct l2cap_conn *conn, u8 ident, u8 code, u16 len, void *data); -static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data); +static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size); static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, @@ -1473,7 +1473,7 @@ static void l2cap_conn_start(struct l2cap_conn *conn) set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -2987,12 +2987,15 @@ static inline int l2cap_get_conf_opt(void **ptr, int *type, int *olen, return len; } -static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) +static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val, size_t size) { struct l2cap_conf_opt *opt = *ptr; BT_DBG("type 0x%2.2x len %u val 0x%lx", type, len, val); + if (size < L2CAP_CONF_OPT_SIZE + len) + return; + opt->type = type; opt->len = len; @@ -3017,7 +3020,7 @@ static void l2cap_add_conf_opt(void **ptr, u8 type, u8 len, unsigned long val) *ptr += L2CAP_CONF_OPT_SIZE + len; } -static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan) +static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan, size_t size) { struct l2cap_conf_efs efs; @@ -3045,7 +3048,7 @@ static void l2cap_add_opt_efs(void **ptr, struct l2cap_chan *chan) } l2cap_add_conf_opt(ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, size); } static void l2cap_ack_timeout(struct work_struct *work) @@ -3191,11 +3194,12 @@ static inline void l2cap_txwin_setup(struct l2cap_chan *chan) chan->ack_win = chan->tx_win; } -static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) +static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_req *req = data; struct l2cap_conf_rfc rfc = { .mode = chan->mode }; void *ptr = req->data; + void *endptr = data + data_size; u16 size; BT_DBG("chan %p", chan); @@ -3220,7 +3224,7 @@ static int l2cap_build_conf_req(struct l2cap_chan *chan, void *data) done: if (chan->imtu != L2CAP_DEFAULT_MTU) - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); switch (chan->mode) { case L2CAP_MODE_BASIC: @@ -3239,7 +3243,7 @@ done: rfc.max_pdu_size = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); break; case L2CAP_MODE_ERTM: @@ -3259,21 +3263,21 @@ done: L2CAP_DEFAULT_TX_WINDOW); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) - l2cap_add_opt_efs(&ptr, chan); + l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (test_bit(FLAG_EXT_CTRL, &chan->flags)) l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, - chan->tx_win); + chan->tx_win, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, - chan->fcs); + chan->fcs, endptr - ptr); } break; @@ -3291,17 +3295,17 @@ done: rfc.max_pdu_size = cpu_to_le16(size); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) - l2cap_add_opt_efs(&ptr, chan); + l2cap_add_opt_efs(&ptr, chan, endptr - ptr); if (chan->conn->feat_mask & L2CAP_FEAT_FCS) if (chan->fcs == L2CAP_FCS_NONE || test_bit(CONF_RECV_NO_FCS, &chan->conf_state)) { chan->fcs = L2CAP_FCS_NONE; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FCS, 1, - chan->fcs); + chan->fcs, endptr - ptr); } break; } @@ -3312,10 +3316,11 @@ done: return ptr - data; } -static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data) +static int l2cap_parse_conf_req(struct l2cap_chan *chan, void *data, size_t data_size) { struct l2cap_conf_rsp *rsp = data; void *ptr = rsp->data; + void *endptr = data + data_size; void *req = chan->conf_req; int len = chan->conf_len; int type, hint, olen; @@ -3417,7 +3422,7 @@ done: return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); } if (result == L2CAP_CONF_SUCCESS) { @@ -3430,7 +3435,7 @@ done: chan->omtu = mtu; set_bit(CONF_MTU_DONE, &chan->conf_state); } - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->omtu, endptr - ptr); if (remote_efs) { if (chan->local_stype != L2CAP_SERV_NOTRAFIC && @@ -3444,7 +3449,7 @@ done: l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, endptr - ptr); } else { /* Send PENDING Conf Rsp */ result = L2CAP_CONF_PENDING; @@ -3477,7 +3482,7 @@ done: set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); + sizeof(rfc), (unsigned long) &rfc, endptr - ptr); if (test_bit(FLAG_EFS_ENABLE, &chan->flags)) { chan->remote_id = efs.id; @@ -3491,7 +3496,7 @@ done: le32_to_cpu(efs.sdu_itime); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, endptr - ptr); } break; @@ -3505,7 +3510,7 @@ done: set_bit(CONF_MODE_DONE, &chan->conf_state); l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, sizeof(rfc), - (unsigned long) &rfc); + (unsigned long) &rfc, endptr - ptr); break; @@ -3527,10 +3532,11 @@ done: } static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, - void *data, u16 *result) + void *data, size_t size, u16 *result) { struct l2cap_conf_req *req = data; void *ptr = req->data; + void *endptr = data + size; int type, olen; unsigned long val; struct l2cap_conf_rfc rfc = { .mode = L2CAP_MODE_BASIC }; @@ -3548,13 +3554,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, chan->imtu = L2CAP_DEFAULT_MIN_MTU; } else chan->imtu = val; - l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu); + l2cap_add_conf_opt(&ptr, L2CAP_CONF_MTU, 2, chan->imtu, endptr - ptr); break; case L2CAP_CONF_FLUSH_TO: chan->flush_to = val; l2cap_add_conf_opt(&ptr, L2CAP_CONF_FLUSH_TO, - 2, chan->flush_to); + 2, chan->flush_to, endptr - ptr); break; case L2CAP_CONF_RFC: @@ -3568,13 +3574,13 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, chan->fcs = 0; l2cap_add_conf_opt(&ptr, L2CAP_CONF_RFC, - sizeof(rfc), (unsigned long) &rfc); + sizeof(rfc), (unsigned long) &rfc, endptr - ptr); break; case L2CAP_CONF_EWS: chan->ack_win = min_t(u16, val, chan->ack_win); l2cap_add_conf_opt(&ptr, L2CAP_CONF_EWS, 2, - chan->tx_win); + chan->tx_win, endptr - ptr); break; case L2CAP_CONF_EFS: @@ -3587,7 +3593,7 @@ static int l2cap_parse_conf_rsp(struct l2cap_chan *chan, void *rsp, int len, return -ECONNREFUSED; l2cap_add_conf_opt(&ptr, L2CAP_CONF_EFS, sizeof(efs), - (unsigned long) &efs); + (unsigned long) &efs, endptr - ptr); break; case L2CAP_CONF_FCS: @@ -3692,7 +3698,7 @@ void __l2cap_connect_rsp_defer(struct l2cap_chan *chan) return; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -3900,7 +3906,7 @@ sendresp: u8 buf[128]; set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -3978,7 +3984,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, break; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, req), req); + l2cap_build_conf_req(chan, req, sizeof(req)), req); chan->num_conf_req++; break; @@ -4090,7 +4096,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, } /* Complete config. */ - len = l2cap_parse_conf_req(chan, rsp); + len = l2cap_parse_conf_req(chan, rsp, sizeof(rsp)); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto unlock; @@ -4124,7 +4130,7 @@ static inline int l2cap_config_req(struct l2cap_conn *conn, if (!test_and_set_bit(CONF_REQ_SENT, &chan->conf_state)) { u8 buf[64]; l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } @@ -4184,7 +4190,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, char buf[64]; len = l2cap_parse_conf_rsp(chan, rsp->data, len, - buf, &result); + buf, sizeof(buf), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; @@ -4214,7 +4220,7 @@ static inline int l2cap_config_rsp(struct l2cap_conn *conn, /* throw out any old stored conf requests */ result = L2CAP_CONF_SUCCESS; len = l2cap_parse_conf_rsp(chan, rsp->data, len, - req, &result); + req, sizeof(req), &result); if (len < 0) { l2cap_send_disconn_req(chan, ECONNRESET); goto done; @@ -4791,7 +4797,7 @@ static void l2cap_do_create(struct l2cap_chan *chan, int result, set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(chan->conn, l2cap_get_ident(chan->conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), buf); + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } } @@ -7465,7 +7471,7 @@ static void l2cap_security_cfm(struct hci_conn *hcon, u8 status, u8 encrypt) set_bit(CONF_REQ_SENT, &chan->conf_state); l2cap_send_cmd(conn, l2cap_get_ident(conn), L2CAP_CONF_REQ, - l2cap_build_conf_req(chan, buf), + l2cap_build_conf_req(chan, buf, sizeof(buf)), buf); chan->num_conf_req++; } diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c index aa4cf64e32a6..63e65d9b4b24 100644 --- a/net/bluetooth/lib.c +++ b/net/bluetooth/lib.c @@ -30,10 +30,10 @@ #include <net/bluetooth/bluetooth.h> -void baswap(bdaddr_t *dst, bdaddr_t *src) +void baswap(bdaddr_t *dst, const bdaddr_t *src) { - unsigned char *d = (unsigned char *) dst; - unsigned char *s = (unsigned char *) src; + const unsigned char *s = (const unsigned char *)src; + unsigned char *d = (unsigned char *)dst; unsigned int i; for (i = 0; i < 6; i++) diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 1fba2a03f8ae..6e9fc86d8daf 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -2159,8 +2159,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { - BT_ERR("load_link_keys: too big key_count value %u", - key_count); + bt_dev_err(hdev, "load_link_keys: too big key_count value %u", + key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -2168,8 +2168,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data, expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_link_key_info); if (expected_len != len) { - BT_ERR("load_link_keys: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_link_keys: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -2561,7 +2561,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data, memcpy(&ncp.addr, &cp->addr, sizeof(ncp.addr)); - BT_ERR("PIN code is not 16 bytes long"); + bt_dev_err(hdev, "PIN code is not 16 bytes long"); err = send_pin_code_neg_reply(sk, hdev, &ncp); if (err >= 0) @@ -3391,7 +3391,8 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev, MGMT_OP_ADD_REMOTE_OOB_DATA, status, &cp->addr, sizeof(cp->addr)); } else { - BT_ERR("add_remote_oob_data: invalid length of %u bytes", len); + bt_dev_err(hdev, "add_remote_oob_data: invalid len of %u bytes", + len); err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_REMOTE_OOB_DATA, MGMT_STATUS_INVALID_PARAMS); } @@ -3604,8 +3605,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, uuid_count = __le16_to_cpu(cp->uuid_count); if (uuid_count > max_uuid_count) { - BT_ERR("service_discovery: too big uuid_count value %u", - uuid_count); + bt_dev_err(hdev, "service_discovery: too big uuid_count value %u", + uuid_count); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &cp->type, @@ -3615,8 +3616,8 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev, expected_len = sizeof(*cp) + uuid_count * 16; if (expected_len != len) { - BT_ERR("service_discovery: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "service_discovery: expected %u bytes, got %u bytes", + expected_len, len); err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_START_SERVICE_DISCOVERY, MGMT_STATUS_INVALID_PARAMS, &cp->type, @@ -3943,7 +3944,7 @@ static void set_advertising_complete(struct hci_dev *hdev, u8 status, err = hci_req_run(&req, enable_advertising_instance); if (err) - BT_ERR("Failed to re-configure advertising"); + bt_dev_err(hdev, "failed to re-configure advertising"); unlock: hci_dev_unlock(hdev); @@ -4664,15 +4665,16 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data, irk_count = __le16_to_cpu(cp->irk_count); if (irk_count > max_irk_count) { - BT_ERR("load_irks: too big irk_count value %u", irk_count); + bt_dev_err(hdev, "load_irks: too big irk_count value %u", + irk_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } expected_len = sizeof(*cp) + irk_count * sizeof(struct mgmt_irk_info); if (expected_len != len) { - BT_ERR("load_irks: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_irks: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS, MGMT_STATUS_INVALID_PARAMS); } @@ -4745,7 +4747,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, key_count = __le16_to_cpu(cp->key_count); if (key_count > max_key_count) { - BT_ERR("load_ltks: too big key_count value %u", key_count); + bt_dev_err(hdev, "load_ltks: too big key_count value %u", + key_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -4753,8 +4756,8 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev, expected_len = sizeof(*cp) + key_count * sizeof(struct mgmt_ltk_info); if (expected_len != len) { - BT_ERR("load_keys: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_keys: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS, MGMT_STATUS_INVALID_PARAMS); } @@ -4873,14 +4876,15 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status, } if (!cp) { - BT_ERR("invalid sent_cmd in conn_info response"); + bt_dev_err(hdev, "invalid sent_cmd in conn_info response"); goto unlock; } handle = __le16_to_cpu(cp->handle); conn = hci_conn_hash_lookup_handle(hdev, handle); if (!conn) { - BT_ERR("unknown handle (%d) in conn_info response", handle); + bt_dev_err(hdev, "unknown handle (%d) in conn_info response", + handle); goto unlock; } @@ -5477,8 +5481,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, param_count = __le16_to_cpu(cp->param_count); if (param_count > max_param_count) { - BT_ERR("load_conn_param: too big param_count value %u", - param_count); + bt_dev_err(hdev, "load_conn_param: too big param_count value %u", + param_count); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_INVALID_PARAMS); } @@ -5486,8 +5490,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, expected_len = sizeof(*cp) + param_count * sizeof(struct mgmt_conn_param); if (expected_len != len) { - BT_ERR("load_conn_param: expected %u bytes, got %u bytes", - expected_len, len); + bt_dev_err(hdev, "load_conn_param: expected %u bytes, got %u bytes", + expected_len, len); return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_CONN_PARAM, MGMT_STATUS_INVALID_PARAMS); } @@ -5512,7 +5516,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, } else if (param->addr.type == BDADDR_LE_RANDOM) { addr_type = ADDR_LE_DEV_RANDOM; } else { - BT_ERR("Ignoring invalid connection parameters"); + bt_dev_err(hdev, "ignoring invalid connection parameters"); continue; } @@ -5525,14 +5529,14 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data, min, max, latency, timeout); if (hci_check_conn_params(min, max, latency, timeout) < 0) { - BT_ERR("Ignoring invalid connection parameters"); + bt_dev_err(hdev, "ignoring invalid connection parameters"); continue; } hci_param = hci_conn_params_add(hdev, ¶m->addr.bdaddr, addr_type); if (!hci_param) { - BT_ERR("Failed to add connection parameters"); + bt_dev_err(hdev, "failed to add connection parameters"); continue; } @@ -6383,6 +6387,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev, if (skb_queue_empty(&req.cmd_q) || !hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) { + hci_req_purge(&req); rp.instance = cp->instance; err = mgmt_cmd_complete(sk, hdev->id, MGMT_OP_REMOVE_ADVERTISING, diff --git a/net/bluetooth/selftest.c b/net/bluetooth/selftest.c index ee92c925ecc5..03e3c89c3046 100644 --- a/net/bluetooth/selftest.c +++ b/net/bluetooth/selftest.c @@ -138,12 +138,12 @@ static const u8 dhkey_3[32] __initconst = { 0x7c, 0x1c, 0xf9, 0x49, 0xe6, 0xd7, 0xaa, 0x70, }; -static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], - const u8 pub_a[64], const u8 pub_b[64], - const u8 dhkey[32]) +static int __init test_ecdh_sample(struct crypto_kpp *tfm, const u8 priv_a[32], + const u8 priv_b[32], const u8 pub_a[64], + const u8 pub_b[64], const u8 dhkey[32]) { u8 *tmp, *dhkey_a, *dhkey_b; - int ret = 0; + int ret; tmp = kmalloc(64, GFP_KERNEL); if (!tmp) @@ -152,19 +152,32 @@ static int __init test_ecdh_sample(const u8 priv_a[32], const u8 priv_b[32], dhkey_a = &tmp[0]; dhkey_b = &tmp[32]; - compute_ecdh_secret(pub_b, priv_a, dhkey_a); - compute_ecdh_secret(pub_a, priv_b, dhkey_b); + ret = set_ecdh_privkey(tfm, priv_a); + if (ret) + goto out; + + ret = compute_ecdh_secret(tfm, pub_b, dhkey_a); + if (ret) + goto out; if (memcmp(dhkey_a, dhkey, 32)) { ret = -EINVAL; goto out; } + ret = set_ecdh_privkey(tfm, priv_b); + if (ret) + goto out; + + ret = compute_ecdh_secret(tfm, pub_a, dhkey_b); + if (ret) + goto out; + if (memcmp(dhkey_b, dhkey, 32)) ret = -EINVAL; - + /* fall through*/ out: - kfree(dhkey_a); + kfree(tmp); return ret; } @@ -185,30 +198,43 @@ static const struct file_operations test_ecdh_fops = { static int __init test_ecdh(void) { + struct crypto_kpp *tfm; ktime_t calltime, delta, rettime; - unsigned long long duration; + unsigned long long duration = 0; int err; calltime = ktime_get(); - err = test_ecdh_sample(priv_a_1, priv_b_1, pub_a_1, pub_b_1, dhkey_1); + tfm = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm)) { + BT_ERR("Unable to create ECDH crypto context"); + err = PTR_ERR(tfm); + goto done; + } + + err = test_ecdh_sample(tfm, priv_a_1, priv_b_1, pub_a_1, pub_b_1, + dhkey_1); if (err) { BT_ERR("ECDH sample 1 failed"); goto done; } - err = test_ecdh_sample(priv_a_2, priv_b_2, pub_a_2, pub_b_2, dhkey_2); + err = test_ecdh_sample(tfm, priv_a_2, priv_b_2, pub_a_2, pub_b_2, + dhkey_2); if (err) { BT_ERR("ECDH sample 2 failed"); goto done; } - err = test_ecdh_sample(priv_a_3, priv_a_3, pub_a_3, pub_a_3, dhkey_3); + err = test_ecdh_sample(tfm, priv_a_3, priv_a_3, pub_a_3, pub_a_3, + dhkey_3); if (err) { BT_ERR("ECDH sample 3 failed"); goto done; } + crypto_free_kpp(tfm); + rettime = ktime_get(); delta = ktime_sub(rettime, calltime); duration = (unsigned long long) ktime_to_ns(delta) >> 10; diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c index a0ef89772c36..01117ae84f1d 100644 --- a/net/bluetooth/smp.c +++ b/net/bluetooth/smp.c @@ -26,6 +26,7 @@ #include <crypto/algapi.h> #include <crypto/b128ops.h> #include <crypto/hash.h> +#include <crypto/kpp.h> #include <net/bluetooth/bluetooth.h> #include <net/bluetooth/hci_core.h> @@ -83,7 +84,6 @@ enum { struct smp_dev { /* Secure Connections OOB data */ u8 local_pk[64]; - u8 local_sk[32]; u8 local_rand[16]; bool debug_key; @@ -92,6 +92,7 @@ struct smp_dev { struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; }; struct smp_chan { @@ -124,13 +125,13 @@ struct smp_chan { /* Secure Connections variables */ u8 local_pk[64]; - u8 local_sk[32]; u8 remote_pk[64]; u8 dhkey[32]; u8 mackey[16]; struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; }; /* These debug key values are defined in the SMP section of the core @@ -565,22 +566,22 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { BT_DBG("Using debug keys"); + err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk); + if (err) + return err; memcpy(smp->local_pk, debug_pk, 64); - memcpy(smp->local_sk, debug_sk, 32); smp->debug_key = true; } else { while (true) { - /* Seed private key with random number */ - get_random_bytes(smp->local_sk, 32); - - /* Generate local key pair for Secure Connections */ - if (!generate_ecdh_keys(smp->local_pk, smp->local_sk)) - return -EIO; + /* Generate key pair for Secure Connections */ + err = generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk); + if (err) + return err; /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (crypto_memneq(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_pk, debug_pk, 64)) break; } smp->debug_key = false; @@ -588,7 +589,6 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16]) SMP_DBG("OOB Public Key X: %32phN", smp->local_pk); SMP_DBG("OOB Public Key Y: %32phN", smp->local_pk + 32); - SMP_DBG("OOB Private Key: %32phN", smp->local_sk); get_random_bytes(smp->local_rand, 16); @@ -771,6 +771,7 @@ static void smp_chan_destroy(struct l2cap_conn *conn) crypto_free_cipher(smp->tfm_aes); crypto_free_shash(smp->tfm_cmac); + crypto_free_kpp(smp->tfm_ecdh); /* Ensure that we don't leave any debug key around if debug key * support hasn't been explicitly enabled. @@ -995,7 +996,8 @@ static u8 smp_random(struct smp_chan *smp) return SMP_UNSPECIFIED; if (crypto_memneq(smp->pcnf, confirm, sizeof(smp->pcnf))) { - BT_ERR("Pairing failed (confirmation values mismatch)"); + bt_dev_err(hcon->hdev, "pairing failed " + "(confirmation values mismatch)"); return SMP_CONFIRM_FAILED; } @@ -1209,7 +1211,7 @@ static void sc_generate_ltk(struct smp_chan *smp) key = hci_find_link_key(hdev, &hcon->dst); if (!key) { - BT_ERR("%s No Link Key found to generate LTK", hdev->name); + bt_dev_err(hdev, "no Link Key found to generate LTK"); return; } @@ -1391,16 +1393,19 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) smp->tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); if (IS_ERR(smp->tfm_aes)) { BT_ERR("Unable to create AES crypto context"); - kzfree(smp); - return NULL; + goto zfree_smp; } smp->tfm_cmac = crypto_alloc_shash("cmac(aes)", 0, 0); if (IS_ERR(smp->tfm_cmac)) { BT_ERR("Unable to create CMAC crypto context"); - crypto_free_cipher(smp->tfm_aes); - kzfree(smp); - return NULL; + goto free_cipher; + } + + smp->tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(smp->tfm_ecdh)) { + BT_ERR("Unable to create ECDH crypto context"); + goto free_shash; } smp->conn = conn; @@ -1413,6 +1418,14 @@ static struct smp_chan *smp_chan_create(struct l2cap_conn *conn) hci_conn_hold(conn->hcon); return smp; + +free_shash: + crypto_free_shash(smp->tfm_cmac); +free_cipher: + crypto_free_cipher(smp->tfm_aes); +zfree_smp: + kzfree(smp); + return NULL; } static int sc_mackey_and_ltk(struct smp_chan *smp, u8 mackey[16], u8 ltk[16]) @@ -1883,7 +1896,6 @@ static u8 sc_send_public_key(struct smp_chan *smp) smp_dev = chan->data; memcpy(smp->local_pk, smp_dev->local_pk, 64); - memcpy(smp->local_sk, smp_dev->local_sk, 32); memcpy(smp->lr, smp_dev->local_rand, 16); if (smp_dev->debug_key) @@ -1894,22 +1906,20 @@ static u8 sc_send_public_key(struct smp_chan *smp) if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) { BT_DBG("Using debug keys"); + if (set_ecdh_privkey(smp->tfm_ecdh, debug_sk)) + return SMP_UNSPECIFIED; memcpy(smp->local_pk, debug_pk, 64); - memcpy(smp->local_sk, debug_sk, 32); set_bit(SMP_FLAG_DEBUG_KEY, &smp->flags); } else { while (true) { - /* Seed private key with random number */ - get_random_bytes(smp->local_sk, 32); - - /* Generate local key pair for Secure Connections */ - if (!generate_ecdh_keys(smp->local_pk, smp->local_sk)) + /* Generate key pair for Secure Connections */ + if (generate_ecdh_keys(smp->tfm_ecdh, smp->local_pk)) return SMP_UNSPECIFIED; /* This is unlikely, but we need to check that * we didn't accidentially generate a debug key. */ - if (crypto_memneq(smp->local_sk, debug_sk, 32)) + if (crypto_memneq(smp->local_pk, debug_pk, 64)) break; } } @@ -1917,7 +1927,6 @@ static u8 sc_send_public_key(struct smp_chan *smp) done: SMP_DBG("Local Public Key X: %32phN", smp->local_pk); SMP_DBG("Local Public Key Y: %32phN", smp->local_pk + 32); - SMP_DBG("Local Private Key: %32phN", smp->local_sk); smp_send_cmd(smp->conn, SMP_CMD_PUBLIC_KEY, 64, smp->local_pk); @@ -2059,11 +2068,11 @@ static int fixup_sc_false_positive(struct smp_chan *smp) return SMP_UNSPECIFIED; if (hci_dev_test_flag(hdev, HCI_SC_ONLY)) { - BT_ERR("Refusing SMP SC -> legacy fallback in SC-only mode"); + bt_dev_err(hdev, "refusing legacy fallback in SC-only mode"); return SMP_UNSPECIFIED; } - BT_ERR("Trying to fall back to legacy SMP"); + bt_dev_err(hdev, "trying to fall back to legacy SMP"); req = (void *) &smp->preq[1]; rsp = (void *) &smp->prsp[1]; @@ -2074,7 +2083,7 @@ static int fixup_sc_false_positive(struct smp_chan *smp) auth = req->auth_req & AUTH_REQ_MASK(hdev); if (tk_request(conn, 0, auth, rsp->io_capability, req->io_capability)) { - BT_ERR("Failed to fall back to legacy SMP"); + bt_dev_err(hdev, "failed to fall back to legacy SMP"); return SMP_UNSPECIFIED; } @@ -2347,7 +2356,7 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level) chan = conn->smp; if (!chan) { - BT_ERR("SMP security requested but not available"); + bt_dev_err(hcon->hdev, "security requested but not available"); return 1; } @@ -2540,7 +2549,7 @@ static int smp_cmd_ident_addr_info(struct l2cap_conn *conn, */ if (!bacmp(&info->bdaddr, BDADDR_ANY) || !hci_is_identity_address(&info->bdaddr, info->addr_type)) { - BT_ERR("Ignoring IRK with no identity address"); + bt_dev_err(hcon->hdev, "ignoring IRK with no identity address"); goto distribute; } @@ -2645,6 +2654,7 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) struct l2cap_chan *chan = conn->smp; struct smp_chan *smp = chan->data; struct hci_dev *hdev = hcon->hdev; + struct crypto_kpp *tfm_ecdh; struct smp_cmd_pairing_confirm cfm; int err; @@ -2677,7 +2687,18 @@ static int smp_cmd_public_key(struct l2cap_conn *conn, struct sk_buff *skb) SMP_DBG("Remote Public Key X: %32phN", smp->remote_pk); SMP_DBG("Remote Public Key Y: %32phN", smp->remote_pk + 32); - if (!compute_ecdh_secret(smp->remote_pk, smp->local_sk, smp->dhkey)) + /* Compute the shared secret on the same crypto tfm on which the private + * key was set/generated. + */ + if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) { + struct smp_dev *smp_dev = chan->data; + + tfm_ecdh = smp_dev->tfm_ecdh; + } else { + tfm_ecdh = smp->tfm_ecdh; + } + + if (compute_ecdh_secret(tfm_ecdh, smp->remote_pk, smp->dhkey)) return SMP_UNSPECIFIED; SMP_DBG("DHKey %32phN", smp->dhkey); @@ -2933,8 +2954,8 @@ done: return err; drop: - BT_ERR("%s unexpected SMP command 0x%02x from %pMR", hcon->hdev->name, - code, &hcon->dst); + bt_dev_err(hcon->hdev, "unexpected SMP command 0x%02x from %pMR", + code, &hcon->dst); kfree_skb(skb); return 0; } @@ -3001,8 +3022,7 @@ static void bredr_pairing(struct l2cap_chan *chan) smp = smp_chan_create(conn); if (!smp) { - BT_ERR("%s unable to create SMP context for BR/EDR", - hdev->name); + bt_dev_err(hdev, "unable to create SMP context for BR/EDR"); return; } @@ -3169,6 +3189,7 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) struct smp_dev *smp; struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; if (cid == L2CAP_CID_SMP_BREDR) { smp = NULL; @@ -3194,8 +3215,18 @@ static struct l2cap_chan *smp_add_cid(struct hci_dev *hdev, u16 cid) return ERR_CAST(tfm_cmac); } + tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm_ecdh)) { + BT_ERR("Unable to create ECDH crypto context"); + crypto_free_shash(tfm_cmac); + crypto_free_cipher(tfm_aes); + kzfree(smp); + return ERR_CAST(tfm_ecdh); + } + smp->tfm_aes = tfm_aes; smp->tfm_cmac = tfm_cmac; + smp->tfm_ecdh = tfm_ecdh; smp->min_key_size = SMP_MIN_ENC_KEY_SIZE; smp->max_key_size = SMP_MAX_ENC_KEY_SIZE; @@ -3205,6 +3236,7 @@ create_chan: if (smp) { crypto_free_cipher(smp->tfm_aes); crypto_free_shash(smp->tfm_cmac); + crypto_free_kpp(smp->tfm_ecdh); kzfree(smp); } return ERR_PTR(-ENOMEM); @@ -3252,6 +3284,7 @@ static void smp_del_chan(struct l2cap_chan *chan) chan->data = NULL; crypto_free_cipher(smp->tfm_aes); crypto_free_shash(smp->tfm_cmac); + crypto_free_kpp(smp->tfm_ecdh); kzfree(smp); } @@ -3490,25 +3523,18 @@ void smp_unregister(struct hci_dev *hdev) #if IS_ENABLED(CONFIG_BT_SELFTEST_SMP) -static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits) +static int __init test_debug_key(struct crypto_kpp *tfm_ecdh) { - int i; - - for (i = 0; i < ndigits; i++) - out[i] = __swab64(in[ndigits - 1 - i]); -} - -static int __init test_debug_key(void) -{ - u8 pk[64], sk[32]; - - swap_digits((u64 *)debug_sk, (u64 *)sk, 4); + u8 pk[64]; + int err; - if (!generate_ecdh_keys(pk, sk)) - return -EINVAL; + err = set_ecdh_privkey(tfm_ecdh, debug_sk); + if (err) + return err; - if (crypto_memneq(sk, debug_sk, 32)) - return -EINVAL; + err = generate_ecdh_public_key(tfm_ecdh, pk); + if (err) + return err; if (crypto_memneq(pk, debug_pk, 64)) return -EINVAL; @@ -3763,7 +3789,8 @@ static const struct file_operations test_smp_fops = { }; static int __init run_selftests(struct crypto_cipher *tfm_aes, - struct crypto_shash *tfm_cmac) + struct crypto_shash *tfm_cmac, + struct crypto_kpp *tfm_ecdh) { ktime_t calltime, delta, rettime; unsigned long long duration; @@ -3771,7 +3798,7 @@ static int __init run_selftests(struct crypto_cipher *tfm_aes, calltime = ktime_get(); - err = test_debug_key(); + err = test_debug_key(tfm_ecdh); if (err) { BT_ERR("debug_key test failed"); goto done; @@ -3848,6 +3875,7 @@ int __init bt_selftest_smp(void) { struct crypto_cipher *tfm_aes; struct crypto_shash *tfm_cmac; + struct crypto_kpp *tfm_ecdh; int err; tfm_aes = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC); @@ -3863,10 +3891,19 @@ int __init bt_selftest_smp(void) return PTR_ERR(tfm_cmac); } - err = run_selftests(tfm_aes, tfm_cmac); + tfm_ecdh = crypto_alloc_kpp("ecdh", CRYPTO_ALG_INTERNAL, 0); + if (IS_ERR(tfm_ecdh)) { + BT_ERR("Unable to create ECDH crypto context"); + crypto_free_shash(tfm_cmac); + crypto_free_cipher(tfm_aes); + return PTR_ERR(tfm_ecdh); + } + + err = run_selftests(tfm_aes, tfm_cmac, tfm_ecdh); crypto_free_shash(tfm_cmac); crypto_free_cipher(tfm_aes); + crypto_free_kpp(tfm_ecdh); return err; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 6be41a44d688..a86e6687026e 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -133,7 +133,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, if (is_l2) __skb_push(skb, ETH_HLEN); if (is_direct_pkt_access) - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); retval = bpf_test_run(prog, skb, repeat, &duration); if (!is_l2) __skb_push(skb, ETH_HLEN); @@ -162,6 +162,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, xdp.data_hard_start = data; xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN; + xdp.data_meta = xdp.data; xdp.data_end = xdp.data + size; retval = bpf_test_run(prog, &xdp, repeat, &duration); diff --git a/net/bridge/Makefile b/net/bridge/Makefile index 40b1ede527ca..ac9ef337f0fa 100644 --- a/net/bridge/Makefile +++ b/net/bridge/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the IEEE 802.1d ethernet bridging layer. # @@ -7,7 +8,7 @@ obj-$(CONFIG_BRIDGE) += bridge.o bridge-y := br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \ br_ioctl.o br_stp.o br_stp_bpdu.o \ br_stp_if.o br_stp_timer.o br_netlink.o \ - br_netlink_tunnel.o + br_netlink_tunnel.o br_arp_nd_proxy.o bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o diff --git a/net/bridge/br.c b/net/bridge/br.c index 1407d1ba7577..6bf06e756df2 100644 --- a/net/bridge/br.c +++ b/net/bridge/br.c @@ -112,7 +112,7 @@ static int br_device_event(struct notifier_block *unused, unsigned long event, v /* Events that may cause spanning tree to refresh */ if (event == NETDEV_CHANGEADDR || event == NETDEV_UP || event == NETDEV_CHANGE || event == NETDEV_DOWN) - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); return NOTIFY_DONE; } diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c new file mode 100644 index 000000000000..2cf7716254be --- /dev/null +++ b/net/bridge/br_arp_nd_proxy.c @@ -0,0 +1,469 @@ +/* + * Handle bridge arp/nd proxy/suppress + * + * Copyright (C) 2017 Cumulus Networks + * Copyright (c) 2017 Roopa Prabhu <roopa@cumulusnetworks.com> + * + * Authors: + * Roopa Prabhu <roopa@cumulusnetworks.com> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/kernel.h> +#include <linux/netdevice.h> +#include <linux/etherdevice.h> +#include <linux/neighbour.h> +#include <net/arp.h> +#include <linux/if_vlan.h> +#include <linux/inetdevice.h> +#include <net/addrconf.h> +#if IS_ENABLED(CONFIG_IPV6) +#include <net/ip6_checksum.h> +#endif + +#include "br_private.h" + +void br_recalculate_neigh_suppress_enabled(struct net_bridge *br) +{ + struct net_bridge_port *p; + bool neigh_suppress = false; + + list_for_each_entry(p, &br->port_list, list) { + if (p->flags & BR_NEIGH_SUPPRESS) { + neigh_suppress = true; + break; + } + } + + br->neigh_suppress_enabled = neigh_suppress; +} + +#if IS_ENABLED(CONFIG_INET) +static void br_arp_send(struct net_bridge *br, struct net_bridge_port *p, + struct net_device *dev, __be32 dest_ip, __be32 src_ip, + const unsigned char *dest_hw, + const unsigned char *src_hw, + const unsigned char *target_hw, + __be16 vlan_proto, u16 vlan_tci) +{ + struct net_bridge_vlan_group *vg; + struct sk_buff *skb; + u16 pvid; + + netdev_dbg(dev, "arp send dev %s dst %pI4 dst_hw %pM src %pI4 src_hw %pM\n", + dev->name, &dest_ip, dest_hw, &src_ip, src_hw); + + if (!vlan_tci) { + arp_send(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + return; + } + + skb = arp_create(ARPOP_REPLY, ETH_P_ARP, dest_ip, dev, src_ip, + dest_hw, src_hw, target_hw); + if (!skb) + return; + + if (p) + vg = nbp_vlan_group_rcu(p); + else + vg = br_vlan_group_rcu(br); + pvid = br_get_pvid(vg); + if (pvid == (vlan_tci & VLAN_VID_MASK)) + vlan_tci = 0; + + if (vlan_tci) + __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci); + + if (p) { + arp_xmit(skb); + } else { + skb_reset_mac_header(skb); + __skb_pull(skb, skb_network_offset(skb)); + skb->ip_summed = CHECKSUM_UNNECESSARY; + skb->pkt_type = PACKET_HOST; + + netif_rx_ni(skb); + } +} + +static int br_chk_addr_ip(struct net_device *dev, void *data) +{ + __be32 ip = *(__be32 *)data; + struct in_device *in_dev; + __be32 addr = 0; + + in_dev = __in_dev_get_rcu(dev); + if (in_dev) + addr = inet_confirm_addr(dev_net(dev), in_dev, 0, ip, + RT_SCOPE_HOST); + + if (addr == ip) + return 1; + + return 0; +} + +static bool br_is_local_ip(struct net_device *dev, __be32 ip) +{ + if (br_chk_addr_ip(dev, &ip)) + return true; + + /* check if ip is configured on upper dev */ + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip, &ip)) + return true; + + return false; +} + +void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p) +{ + struct net_device *dev = br->dev; + struct net_device *vlandev = dev; + struct neighbour *n; + struct arphdr *parp; + u8 *arpptr, *sha; + __be32 sip, tip; + + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + + if ((dev->flags & IFF_NOARP) || + !pskb_may_pull(skb, arp_hdr_len(dev))) + return; + + parp = arp_hdr(skb); + + if (parp->ar_pro != htons(ETH_P_IP) || + parp->ar_hln != dev->addr_len || + parp->ar_pln != 4) + return; + + arpptr = (u8 *)parp + sizeof(struct arphdr); + sha = arpptr; + arpptr += dev->addr_len; /* sha */ + memcpy(&sip, arpptr, sizeof(sip)); + arpptr += sizeof(sip); + arpptr += dev->addr_len; /* tha */ + memcpy(&tip, arpptr, sizeof(tip)); + + if (ipv4_is_loopback(tip) || + ipv4_is_multicast(tip)) + return; + + if (br->neigh_suppress_enabled) { + if (p && (p->flags & BR_NEIGH_SUPPRESS)) + return; + if (ipv4_is_zeronet(sip) || sip == tip) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + } + + if (parp->ar_op != htons(ARPOP_REQUEST)) + return; + + if (vid != 0) { + vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, + vid); + if (!vlandev) + return; + } + + if (br->neigh_suppress_enabled && br_is_local_ip(vlandev, tip)) { + /* its our local ip, so don't proxy reply + * and don't forward to neigh suppress ports + */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + n = neigh_lookup(&arp_tbl, &tip, vlandev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = br_fdb_find_rcu(br, n->ha, vid); + if (f) { + bool replied = false; + + if ((p && (p->flags & BR_PROXYARP)) || + (f->dst && (f->dst->flags & (BR_PROXYARP_WIFI | + BR_NEIGH_SUPPRESS)))) { + if (!vid) + br_arp_send(br, p, skb->dev, sip, tip, + sha, n->ha, sha, 0, 0); + else + br_arp_send(br, p, skb->dev, sip, tip, + sha, n->ha, sha, + skb->vlan_proto, + skb_vlan_tag_get(skb)); + replied = true; + } + + /* If we have replied or as long as we know the + * mac, indicate to arp replied + */ + if (replied || br->neigh_suppress_enabled) + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } + + neigh_release(n); + } +} +#endif + +#if IS_ENABLED(CONFIG_IPV6) +struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *msg) +{ + struct nd_msg *m; + + m = skb_header_pointer(skb, skb_network_offset(skb) + + sizeof(struct ipv6hdr), sizeof(*msg), msg); + if (!m) + return NULL; + + if (m->icmph.icmp6_code != 0 || + (m->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION && + m->icmph.icmp6_type != NDISC_NEIGHBOUR_ADVERTISEMENT)) + return NULL; + + return m; +} + +static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p, + struct sk_buff *request, struct neighbour *n, + __be16 vlan_proto, u16 vlan_tci, struct nd_msg *ns) +{ + struct net_device *dev = request->dev; + struct net_bridge_vlan_group *vg; + struct sk_buff *reply; + struct nd_msg *na; + struct ipv6hdr *pip6; + int na_olen = 8; /* opt hdr + ETH_ALEN for target */ + int ns_olen; + int i, len; + u8 *daddr; + u16 pvid; + + if (!dev) + return; + + len = LL_RESERVED_SPACE(dev) + sizeof(struct ipv6hdr) + + sizeof(*na) + na_olen + dev->needed_tailroom; + + reply = alloc_skb(len, GFP_ATOMIC); + if (!reply) + return; + + reply->protocol = htons(ETH_P_IPV6); + reply->dev = dev; + skb_reserve(reply, LL_RESERVED_SPACE(dev)); + skb_push(reply, sizeof(struct ethhdr)); + skb_set_mac_header(reply, 0); + + daddr = eth_hdr(request)->h_source; + + /* Do we need option processing ? */ + ns_olen = request->len - (skb_network_offset(request) + + sizeof(struct ipv6hdr)) - sizeof(*ns); + for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) { + if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) { + daddr = ns->opt + i + sizeof(struct nd_opt_hdr); + break; + } + } + + /* Ethernet header */ + ether_addr_copy(eth_hdr(reply)->h_dest, daddr); + ether_addr_copy(eth_hdr(reply)->h_source, n->ha); + eth_hdr(reply)->h_proto = htons(ETH_P_IPV6); + reply->protocol = htons(ETH_P_IPV6); + + skb_pull(reply, sizeof(struct ethhdr)); + skb_set_network_header(reply, 0); + skb_put(reply, sizeof(struct ipv6hdr)); + + /* IPv6 header */ + pip6 = ipv6_hdr(reply); + memset(pip6, 0, sizeof(struct ipv6hdr)); + pip6->version = 6; + pip6->priority = ipv6_hdr(request)->priority; + pip6->nexthdr = IPPROTO_ICMPV6; + pip6->hop_limit = 255; + pip6->daddr = ipv6_hdr(request)->saddr; + pip6->saddr = *(struct in6_addr *)n->primary_key; + + skb_pull(reply, sizeof(struct ipv6hdr)); + skb_set_transport_header(reply, 0); + + na = (struct nd_msg *)skb_put(reply, sizeof(*na) + na_olen); + + /* Neighbor Advertisement */ + memset(na, 0, sizeof(*na) + na_olen); + na->icmph.icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT; + na->icmph.icmp6_router = 0; /* XXX: should be 1 ? */ + na->icmph.icmp6_override = 1; + na->icmph.icmp6_solicited = 1; + na->target = ns->target; + ether_addr_copy(&na->opt[2], n->ha); + na->opt[0] = ND_OPT_TARGET_LL_ADDR; + na->opt[1] = na_olen >> 3; + + na->icmph.icmp6_cksum = csum_ipv6_magic(&pip6->saddr, + &pip6->daddr, + sizeof(*na) + na_olen, + IPPROTO_ICMPV6, + csum_partial(na, sizeof(*na) + na_olen, 0)); + + pip6->payload_len = htons(sizeof(*na) + na_olen); + + skb_push(reply, sizeof(struct ipv6hdr)); + skb_push(reply, sizeof(struct ethhdr)); + + reply->ip_summed = CHECKSUM_UNNECESSARY; + + if (p) + vg = nbp_vlan_group_rcu(p); + else + vg = br_vlan_group_rcu(br); + pvid = br_get_pvid(vg); + if (pvid == (vlan_tci & VLAN_VID_MASK)) + vlan_tci = 0; + + if (vlan_tci) + __vlan_hwaccel_put_tag(reply, vlan_proto, vlan_tci); + + netdev_dbg(dev, "nd send dev %s dst %pI6 dst_hw %pM src %pI6 src_hw %pM\n", + dev->name, &pip6->daddr, daddr, &pip6->saddr, n->ha); + + if (p) { + dev_queue_xmit(reply); + } else { + skb_reset_mac_header(reply); + __skb_pull(reply, skb_network_offset(reply)); + reply->ip_summed = CHECKSUM_UNNECESSARY; + reply->pkt_type = PACKET_HOST; + + netif_rx_ni(reply); + } +} + +static int br_chk_addr_ip6(struct net_device *dev, void *data) +{ + struct in6_addr *addr = (struct in6_addr *)data; + + if (ipv6_chk_addr(dev_net(dev), addr, dev, 0)) + return 1; + + return 0; +} + +static bool br_is_local_ip6(struct net_device *dev, struct in6_addr *addr) + +{ + if (br_chk_addr_ip6(dev, addr)) + return true; + + /* check if ip is configured on upper dev */ + if (netdev_walk_all_upper_dev_rcu(dev, br_chk_addr_ip6, addr)) + return true; + + return false; +} + +void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p, struct nd_msg *msg) +{ + struct net_device *dev = br->dev; + struct net_device *vlandev = NULL; + struct in6_addr *saddr, *daddr; + struct ipv6hdr *iphdr; + struct neighbour *n; + + BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; + + if (p && (p->flags & BR_NEIGH_SUPPRESS)) + return; + + if (msg->icmph.icmp6_type == NDISC_NEIGHBOUR_ADVERTISEMENT && + !msg->icmph.icmp6_solicited) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION) + return; + + iphdr = ipv6_hdr(skb); + saddr = &iphdr->saddr; + daddr = &iphdr->daddr; + + if (ipv6_addr_any(saddr) || !ipv6_addr_cmp(saddr, daddr)) { + /* prevent flooding to neigh suppress ports */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + if (vid != 0) { + /* build neigh table lookup on the vlan device */ + vlandev = __vlan_find_dev_deep_rcu(br->dev, skb->vlan_proto, + vid); + if (!vlandev) + return; + } else { + vlandev = dev; + } + + if (br_is_local_ip6(vlandev, &msg->target)) { + /* its our own ip, so don't proxy reply + * and don't forward to arp suppress ports + */ + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + return; + } + + n = neigh_lookup(ipv6_stub->nd_tbl, &msg->target, vlandev); + if (n) { + struct net_bridge_fdb_entry *f; + + if (!(n->nud_state & NUD_VALID)) { + neigh_release(n); + return; + } + + f = br_fdb_find_rcu(br, n->ha, vid); + if (f) { + bool replied = false; + + if (f->dst && (f->dst->flags & BR_NEIGH_SUPPRESS)) { + if (vid != 0) + br_nd_send(br, p, skb, n, + skb->vlan_proto, + skb_vlan_tag_get(skb), msg); + else + br_nd_send(br, p, skb, n, 0, 0, msg); + replied = true; + } + + /* If we have replied or as long as we know the + * mac, indicate to NEIGH_SUPPRESS ports that we + * have replied + */ + if (replied || br->neigh_suppress_enabled) + BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; + } + neigh_release(n); + } +} +#endif diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 861ae2a165f4..af5b8c87f590 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -39,6 +39,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) struct pcpu_sw_netstats *brstats = this_cpu_ptr(br->stats); const struct nf_br_ops *nf_ops; const unsigned char *dest; + struct ethhdr *eth; u16 vid = 0; rcu_read_lock(); @@ -53,14 +54,34 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); + br_switchdev_frame_unmark(skb); BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); + eth = eth_hdr(skb); skb_pull(skb, ETH_HLEN); if (!br_allowed_ingress(br, br_vlan_group_rcu(br), skb, &vid)) goto out; + if (IS_ENABLED(CONFIG_INET) && + (eth->h_proto == htons(ETH_P_ARP) || + eth->h_proto == htons(ETH_P_RARP)) && + br->neigh_suppress_enabled) { + br_do_proxy_suppress_arp(skb, br, vid, NULL); + } else if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + br->neigh_suppress_enabled && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg, _msg; + + msg = br_is_nd_neigh_msg(skb, &_msg); + if (msg) + br_do_suppress_nd(skb, br, vid, NULL, msg); + } + dest = eth_hdr(skb)->h_dest; if (is_broadcast_ether_addr(dest)) { br_flood(br, skb, BR_PKT_BROADCAST, false, true); @@ -319,12 +340,13 @@ void br_netpoll_disable(struct net_bridge_port *p) #endif -static int br_add_slave(struct net_device *dev, struct net_device *slave_dev) +static int br_add_slave(struct net_device *dev, struct net_device *slave_dev, + struct netlink_ext_ack *extack) { struct net_bridge *br = netdev_priv(dev); - return br_add_if(br, slave_dev); + return br_add_if(br, slave_dev, extack); } static int br_del_slave(struct net_device *dev, struct net_device *slave_dev) @@ -399,7 +421,7 @@ void br_dev_setup(struct net_device *dev) br->bridge_id.prio[0] = 0x80; br->bridge_id.prio[1] = 0x00; - ether_addr_copy(br->group_addr, eth_reserved_addr_base); + ether_addr_copy(br->group_addr, eth_stp_addr); br->stp_enabled = BR_NO_STP; br->group_fwd_mask = BR_GROUPFWD_DEFAULT; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index a5e4a736a984..4ea5c8bbe286 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -25,6 +25,7 @@ #include <asm/unaligned.h> #include <linux/if_vlan.h> #include <net/switchdev.h> +#include <trace/events/bridge.h> #include "br_private.h" static struct kmem_cache *br_fdb_cache __read_mostly; @@ -169,29 +170,13 @@ static void fdb_del_hw_addr(struct net_bridge *br, const unsigned char *addr) } } -static void fdb_del_external_learn(struct net_bridge_fdb_entry *f) -{ - struct switchdev_obj_port_fdb fdb = { - .obj = { - .orig_dev = f->dst->dev, - .id = SWITCHDEV_OBJ_ID_PORT_FDB, - .flags = SWITCHDEV_F_DEFER, - }, - .vid = f->vlan_id, - }; - - ether_addr_copy(fdb.addr, f->addr.addr); - switchdev_port_obj_del(f->dst->dev, &fdb.obj); -} - static void fdb_delete(struct net_bridge *br, struct net_bridge_fdb_entry *f) { + trace_fdb_delete(br, f); + if (f->is_static) fdb_del_hw_addr(br, f->addr.addr); - if (f->added_by_external_learn) - fdb_del_external_learn(f); - hlist_del_init_rcu(&f->hlist); fdb_notify(br, f, RTM_DELNEIGH); call_rcu(&f->rcu, fdb_rcu_free); @@ -598,8 +583,10 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, fdb->updated = now; if (unlikely(added_by_user)) fdb->added_by_user = 1; - if (unlikely(fdb_modified)) + if (unlikely(fdb_modified)) { + trace_br_fdb_update(br, source, addr, vid, added_by_user); fdb_notify(br, fdb, RTM_NEWNEIGH); + } } } else { spin_lock(&br->hash_lock); @@ -608,6 +595,7 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source, if (fdb) { if (unlikely(added_by_user)) fdb->added_by_user = 1; + trace_br_fdb_update(br, source, addr, vid, added_by_user); fdb_notify(br, fdb, RTM_NEWNEIGH); } } @@ -888,6 +876,8 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], struct net_bridge *br = NULL; int err = 0; + trace_br_fdb_add(ndm, dev, addr, vid, nlh_flags); + if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) { pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state); return -EINVAL; @@ -1084,6 +1074,8 @@ int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p, bool modified = false; int err = 0; + trace_br_fdb_external_learn_add(br, p, addr, vid); + spin_lock_bh(&br->hash_lock); head = &br->hash[br_mac_hash(addr, vid)]; diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c index 48fb17417fac..b4eed113d2ec 100644 --- a/net/bridge/br_forward.c +++ b/net/bridge/br_forward.c @@ -204,7 +204,7 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, /* Do not flood to ports that enable proxy ARP */ if (p->flags & BR_PROXYARP) continue; - if ((p->flags & BR_PROXYARP_WIFI) && + if ((p->flags & (BR_PROXYARP_WIFI | BR_NEIGH_SUPPRESS)) && BR_INPUT_SKB_CB(skb)->proxyarp_replied) continue; diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index f3aef22931ab..9ba4ed65c52b 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -271,7 +271,7 @@ static void del_nbp(struct net_bridge_port *p) br_stp_disable_port(p); spin_unlock_bh(&br->lock); - br_ifinfo_notify(RTM_DELLINK, p); + br_ifinfo_notify(RTM_DELLINK, NULL, p); list_del_rcu(&p->list); if (netdev_get_fwd_headroom(dev) == br->dev->needed_headroom) @@ -310,6 +310,8 @@ void br_dev_delete(struct net_device *dev, struct list_head *head) del_nbp(p); } + br_recalculate_neigh_suppress_enabled(br); + br_fdb_delete_by_port(br, NULL, 0, 1); cancel_delayed_work_sync(&br->gc_work); @@ -480,7 +482,8 @@ netdev_features_t br_features_recompute(struct net_bridge *br, } /* called with RTNL */ -int br_add_if(struct net_bridge *br, struct net_device *dev) +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack) { struct net_bridge_port *p; int err = 0; @@ -500,16 +503,22 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) return -EINVAL; /* No bridging of bridges */ - if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) + if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) { + NL_SET_ERR_MSG(extack, + "Can not enslave a bridge to a bridge"); return -ELOOP; + } /* Device is already being bridged */ if (br_port_exists(dev)) return -EBUSY; /* No bridging devices that dislike that (e.g. wireless) */ - if (dev->priv_flags & IFF_DONT_BRIDGE) + if (dev->priv_flags & IFF_DONT_BRIDGE) { + NL_SET_ERR_MSG(extack, + "Device does not allow enslaving to a bridge"); return -EOPNOTSUPP; + } p = new_nbp(br, dev); if (IS_ERR(p)) @@ -540,7 +549,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) dev->priv_flags |= IFF_BRIDGE_PORT; - err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL); + err = netdev_master_upper_dev_link(dev, br->dev, NULL, NULL, extack); if (err) goto err5; @@ -580,7 +589,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev) br_stp_enable_port(p); spin_unlock_bh(&br->lock); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (changed_addr) call_netdevice_notifiers(NETDEV_CHANGEADDR, br->dev); @@ -653,4 +662,7 @@ void br_port_flags_change(struct net_bridge_port *p, unsigned long mask) if (mask & BR_AUTO_MASK) nbp_update_port_count(br); + + if (mask & BR_NEIGH_SUPPRESS) + br_recalculate_neigh_suppress_enabled(br); } diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 7637f58c1226..7f98a7d25866 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -71,62 +71,6 @@ static int br_pass_frame_up(struct sk_buff *skb) br_netif_receive_skb); } -static void br_do_proxy_arp(struct sk_buff *skb, struct net_bridge *br, - u16 vid, struct net_bridge_port *p) -{ - struct net_device *dev = br->dev; - struct neighbour *n; - struct arphdr *parp; - u8 *arpptr, *sha; - __be32 sip, tip; - - BR_INPUT_SKB_CB(skb)->proxyarp_replied = false; - - if ((dev->flags & IFF_NOARP) || - !pskb_may_pull(skb, arp_hdr_len(dev))) - return; - - parp = arp_hdr(skb); - - if (parp->ar_pro != htons(ETH_P_IP) || - parp->ar_op != htons(ARPOP_REQUEST) || - parp->ar_hln != dev->addr_len || - parp->ar_pln != 4) - return; - - arpptr = (u8 *)parp + sizeof(struct arphdr); - sha = arpptr; - arpptr += dev->addr_len; /* sha */ - memcpy(&sip, arpptr, sizeof(sip)); - arpptr += sizeof(sip); - arpptr += dev->addr_len; /* tha */ - memcpy(&tip, arpptr, sizeof(tip)); - - if (ipv4_is_loopback(tip) || - ipv4_is_multicast(tip)) - return; - - n = neigh_lookup(&arp_tbl, &tip, dev); - if (n) { - struct net_bridge_fdb_entry *f; - - if (!(n->nud_state & NUD_VALID)) { - neigh_release(n); - return; - } - - f = br_fdb_find_rcu(br, n->ha, vid); - if (f && ((p->flags & BR_PROXYARP) || - (f->dst && (f->dst->flags & BR_PROXYARP_WIFI)))) { - arp_send(ARPOP_REPLY, ETH_P_ARP, sip, skb->dev, tip, - sha, n->ha, sha); - BR_INPUT_SKB_CB(skb)->proxyarp_replied = true; - } - - neigh_release(n); - } -} - /* note: already called with rcu_read_lock */ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -171,15 +115,29 @@ int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb BR_INPUT_SKB_CB(skb)->brdev = br->dev; - if (IS_ENABLED(CONFIG_INET) && skb->protocol == htons(ETH_P_ARP)) - br_do_proxy_arp(skb, br, vid, p); + if (IS_ENABLED(CONFIG_INET) && + (skb->protocol == htons(ETH_P_ARP) || + skb->protocol == htons(ETH_P_RARP))) { + br_do_proxy_suppress_arp(skb, br, vid, p); + } else if (IS_ENABLED(CONFIG_IPV6) && + skb->protocol == htons(ETH_P_IPV6) && + br->neigh_suppress_enabled && + pskb_may_pull(skb, sizeof(struct ipv6hdr) + + sizeof(struct nd_msg)) && + ipv6_hdr(skb)->nexthdr == IPPROTO_ICMPV6) { + struct nd_msg *msg, _msg; + + msg = br_is_nd_neigh_msg(skb, &_msg); + if (msg) + br_do_suppress_nd(skb, br, vid, p, msg); + } switch (pkt_type) { case BR_PKT_MULTICAST: mdst = br_mdb_get(br, skb, vid); if ((mdst || BR_INPUT_SKB_CB_MROUTERS_ONLY(skb)) && br_multicast_querier_exists(br, eth_hdr(skb))) { - if ((mdst && mdst->mglist) || + if ((mdst && mdst->host_joined) || br_multicast_is_router(br)) { local_rcv = true; br->dev->stats.multicast++; @@ -289,6 +247,7 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) * * Others reserved for future standardization */ + fwd_mask |= p->group_fwd_mask; switch (dest[5]) { case 0x00: /* Bridge Group Address */ /* If STP is turned off, diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c index 7970f8540cbb..73b957fd639d 100644 --- a/net/bridge/br_ioctl.c +++ b/net/bridge/br_ioctl.c @@ -98,7 +98,7 @@ static int add_del_if(struct net_bridge *br, int ifindex, int isadd) return -EINVAL; if (isadd) - ret = br_add_if(br, dev); + ret = br_add_if(br, dev, NULL); else ret = br_del_if(br, dev); @@ -293,7 +293,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) if (!ret) { if (p) - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); else netdev_state_change(br->dev); } diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c index a0b11e7d67d9..b0f4c734900b 100644 --- a/net/bridge/br_mdb.c +++ b/net/bridge/br_mdb.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/err.h> #include <linux/igmp.h> #include <linux/kernel.h> @@ -291,6 +292,46 @@ err: kfree(priv); } +static void br_mdb_switchdev_host_port(struct net_device *dev, + struct net_device *lower_dev, + struct br_mdb_entry *entry, int type) +{ + struct switchdev_obj_port_mdb mdb = { + .obj = { + .id = SWITCHDEV_OBJ_ID_HOST_MDB, + .flags = SWITCHDEV_F_DEFER, + }, + .vid = entry->vid, + }; + + if (entry->addr.proto == htons(ETH_P_IP)) + ip_eth_mc_map(entry->addr.u.ip4, mdb.addr); +#if IS_ENABLED(CONFIG_IPV6) + else + ipv6_eth_mc_map(&entry->addr.u.ip6, mdb.addr); +#endif + + mdb.obj.orig_dev = dev; + switch (type) { + case RTM_NEWMDB: + switchdev_port_obj_add(lower_dev, &mdb.obj); + break; + case RTM_DELMDB: + switchdev_port_obj_del(lower_dev, &mdb.obj); + break; + } +} + +static void br_mdb_switchdev_host(struct net_device *dev, + struct br_mdb_entry *entry, int type) +{ + struct net_device *lower_dev; + struct list_head *iter; + + netdev_for_each_lower_dev(dev, lower_dev, iter) + br_mdb_switchdev_host_port(dev, lower_dev, entry, type); +} + static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, struct br_mdb_entry *entry, int type) { @@ -316,7 +357,7 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, #endif mdb.obj.orig_dev = port_dev; - if (port_dev && type == RTM_NEWMDB) { + if (p && port_dev && type == RTM_NEWMDB) { complete_info = kmalloc(sizeof(*complete_info), GFP_ATOMIC); if (complete_info) { complete_info->port = p; @@ -326,10 +367,13 @@ static void __br_mdb_notify(struct net_device *dev, struct net_bridge_port *p, if (switchdev_port_obj_add(port_dev, &mdb.obj)) kfree(complete_info); } - } else if (port_dev && type == RTM_DELMDB) { + } else if (p && port_dev && type == RTM_DELMDB) { switchdev_port_obj_del(port_dev, &mdb.obj); } + if (!p) + br_mdb_switchdev_host(dev, entry, type); + skb = nlmsg_new(rtnl_mdb_nlmsg_size(), GFP_ATOMIC); if (!skb) goto errout; @@ -352,7 +396,10 @@ void br_mdb_notify(struct net_device *dev, struct net_bridge_port *port, struct br_mdb_entry entry; memset(&entry, 0, sizeof(entry)); - entry.ifindex = port->dev->ifindex; + if (port) + entry.ifindex = port->dev->ifindex; + else + entry.ifindex = dev->ifindex; entry.addr.proto = group->proto; entry.addr.u.ip4 = group->u.ip4; #if IS_ENABLED(CONFIG_IPV6) @@ -654,7 +701,7 @@ static int __br_mdb_del(struct net_bridge *br, struct br_mdb_entry *entry) call_rcu_bh(&p->rcu, br_multicast_free_pg); err = 0; - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); break; @@ -713,9 +760,9 @@ static int br_mdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, void br_mdb_init(void) { - rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, NULL); - rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_GETMDB, NULL, br_mdb_dump, 0); + rtnl_register(PF_BRIDGE, RTM_NEWMDB, br_mdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELMDB, br_mdb_del, NULL, 0); } void br_mdb_uninit(void) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 8dc5c8d69bcd..cb4729539b82 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -239,9 +239,9 @@ static void br_multicast_free_group(struct rcu_head *head) kfree(mp); } -static void br_multicast_group_expired(unsigned long data) +static void br_multicast_group_expired(struct timer_list *t) { - struct net_bridge_mdb_entry *mp = (void *)data; + struct net_bridge_mdb_entry *mp = from_timer(mp, t, timer); struct net_bridge *br = mp->br; struct net_bridge_mdb_htable *mdb; @@ -249,7 +249,8 @@ static void br_multicast_group_expired(unsigned long data) if (!netif_running(br->dev) || timer_pending(&mp->timer)) goto out; - mp->mglist = false; + mp->host_joined = false; + br_mdb_notify(br->dev, NULL, &mp->addr, RTM_DELMDB, 0); if (mp->ports) goto out; @@ -292,7 +293,7 @@ static void br_multicast_del_pg(struct net_bridge *br, p->flags); call_rcu_bh(&p->rcu, br_multicast_free_pg); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); @@ -302,9 +303,9 @@ static void br_multicast_del_pg(struct net_bridge *br, WARN_ON(1); } -static void br_multicast_port_group_expired(unsigned long data) +static void br_multicast_port_group_expired(struct timer_list *t) { - struct net_bridge_port_group *pg = (void *)data; + struct net_bridge_port_group *pg = from_timer(pg, t, timer); struct net_bridge *br = pg->port->br; spin_lock(&br->multicast_lock); @@ -701,8 +702,7 @@ rehash: mp->br = br; mp->addr = *group; - setup_timer(&mp->timer, br_multicast_group_expired, - (unsigned long)mp); + timer_setup(&mp->timer, br_multicast_group_expired, 0); hlist_add_head_rcu(&mp->hlist[mdb->ver], &mdb->mhash[hash]); mdb->size++; @@ -729,8 +729,7 @@ struct net_bridge_port_group *br_multicast_new_port_group( p->flags = flags; rcu_assign_pointer(p->next, next); hlist_add_head(&p->mglist, &port->mglist); - setup_timer(&p->timer, br_multicast_port_group_expired, - (unsigned long)p); + timer_setup(&p->timer, br_multicast_port_group_expired, 0); if (src) memcpy(p->eth_addr, src, ETH_ALEN); @@ -775,7 +774,10 @@ static int br_multicast_add_group(struct net_bridge *br, goto err; if (!port) { - mp->mglist = true; + if (!mp->host_joined) { + mp->host_joined = true; + br_mdb_notify(br->dev, NULL, &mp->addr, RTM_NEWMDB, 0); + } mod_timer(&mp->timer, now + br->multicast_membership_interval); goto out; } @@ -843,9 +845,10 @@ static int br_ip6_multicast_add_group(struct net_bridge *br, } #endif -static void br_multicast_router_expired(unsigned long data) +static void br_multicast_router_expired(struct timer_list *t) { - struct net_bridge_port *port = (void *)data; + struct net_bridge_port *port = + from_timer(port, t, multicast_router_timer); struct net_bridge *br = port->br; spin_lock(&br->multicast_lock); @@ -859,8 +862,32 @@ out: spin_unlock(&br->multicast_lock); } -static void br_multicast_local_router_expired(unsigned long data) +static void br_mc_router_state_change(struct net_bridge *p, + bool is_mc_router) +{ + struct switchdev_attr attr = { + .orig_dev = p->dev, + .id = SWITCHDEV_ATTR_ID_BRIDGE_MROUTER, + .flags = SWITCHDEV_F_DEFER, + .u.mrouter = is_mc_router, + }; + + switchdev_port_attr_set(p->dev, &attr); +} + +static void br_multicast_local_router_expired(struct timer_list *t) { + struct net_bridge *br = from_timer(br, t, multicast_router_timer); + + spin_lock(&br->multicast_lock); + if (br->multicast_router == MDB_RTR_TYPE_DISABLED || + br->multicast_router == MDB_RTR_TYPE_PERM || + timer_pending(&br->multicast_router_timer)) + goto out; + + br_mc_router_state_change(br, false); +out: + spin_unlock(&br->multicast_lock); } static void br_multicast_querier_expired(struct net_bridge *br, @@ -876,17 +903,17 @@ out: spin_unlock(&br->multicast_lock); } -static void br_ip4_multicast_querier_expired(unsigned long data) +static void br_ip4_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip4_other_query.timer); br_multicast_querier_expired(br, &br->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_querier_expired(unsigned long data) +static void br_ip6_multicast_querier_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip6_other_query.timer); br_multicast_querier_expired(br, &br->ip6_own_query); } @@ -987,17 +1014,17 @@ out: spin_unlock(&br->multicast_lock); } -static void br_ip4_multicast_port_query_expired(unsigned long data) +static void br_ip4_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = (void *)data; + struct net_bridge_port *port = from_timer(port, t, ip4_own_query.timer); br_multicast_port_query_expired(port, &port->ip4_own_query); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_port_query_expired(unsigned long data) +static void br_ip6_multicast_port_query_expired(struct timer_list *t) { - struct net_bridge_port *port = (void *)data; + struct net_bridge_port *port = from_timer(port, t, ip6_own_query.timer); br_multicast_port_query_expired(port, &port->ip6_own_query); } @@ -1019,13 +1046,13 @@ int br_multicast_add_port(struct net_bridge_port *port) { port->multicast_router = MDB_RTR_TYPE_TEMP_QUERY; - setup_timer(&port->multicast_router_timer, br_multicast_router_expired, - (unsigned long)port); - setup_timer(&port->ip4_own_query.timer, - br_ip4_multicast_port_query_expired, (unsigned long)port); + timer_setup(&port->multicast_router_timer, + br_multicast_router_expired, 0); + timer_setup(&port->ip4_own_query.timer, + br_ip4_multicast_port_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&port->ip6_own_query.timer, - br_ip6_multicast_port_query_expired, (unsigned long)port); + timer_setup(&port->ip6_own_query.timer, + br_ip6_multicast_port_query_expired, 0); #endif br_mc_disabled_update(port->dev, port->br->multicast_disabled); @@ -1364,9 +1391,12 @@ static void br_multicast_mark_router(struct net_bridge *br, unsigned long now = jiffies; if (!port) { - if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) + if (br->multicast_router == MDB_RTR_TYPE_TEMP_QUERY) { + if (!timer_pending(&br->multicast_router_timer)) + br_mc_router_state_change(br, true); mod_timer(&br->multicast_router_timer, now + br->multicast_querier_interval); + } return; } @@ -1451,7 +1481,7 @@ static int br_ip4_multicast_query(struct net_bridge *br, max_delay *= br->multicast_last_member_count; - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1535,7 +1565,7 @@ static int br_ip6_multicast_query(struct net_bridge *br, goto out; max_delay *= br->multicast_last_member_count; - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, now + max_delay) : try_to_del_timer_sync(&mp->timer) >= 0)) @@ -1596,7 +1626,7 @@ br_multicast_leave_group(struct net_bridge *br, br_mdb_notify(br->dev, port, group, RTM_DELMDB, p->flags); - if (!mp->ports && !mp->mglist && + if (!mp->ports && !mp->host_joined && netif_running(br->dev)) mod_timer(&mp->timer, jiffies); } @@ -1636,7 +1666,7 @@ br_multicast_leave_group(struct net_bridge *br, br->multicast_last_member_interval; if (!port) { - if (mp->mglist && + if (mp->host_joined && (timer_pending(&mp->timer) ? time_after(mp->timer.expires, time) : try_to_del_timer_sync(&mp->timer) >= 0)) { @@ -1906,17 +1936,17 @@ static void br_multicast_query_expired(struct net_bridge *br, spin_unlock(&br->multicast_lock); } -static void br_ip4_multicast_query_expired(unsigned long data) +static void br_ip4_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip4_own_query.timer); br_multicast_query_expired(br, &br->ip4_own_query, &br->ip4_querier); } #if IS_ENABLED(CONFIG_IPV6) -static void br_ip6_multicast_query_expired(unsigned long data) +static void br_ip6_multicast_query_expired(struct timer_list *t) { - struct net_bridge *br = (void *)data; + struct net_bridge *br = from_timer(br, t, ip6_own_query.timer); br_multicast_query_expired(br, &br->ip6_own_query, &br->ip6_querier); } @@ -1951,17 +1981,17 @@ void br_multicast_init(struct net_bridge *br) br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); - setup_timer(&br->multicast_router_timer, + timer_setup(&br->multicast_router_timer, br_multicast_local_router_expired, 0); - setup_timer(&br->ip4_other_query.timer, - br_ip4_multicast_querier_expired, (unsigned long)br); - setup_timer(&br->ip4_own_query.timer, br_ip4_multicast_query_expired, - (unsigned long)br); + timer_setup(&br->ip4_other_query.timer, + br_ip4_multicast_querier_expired, 0); + timer_setup(&br->ip4_own_query.timer, + br_ip4_multicast_query_expired, 0); #if IS_ENABLED(CONFIG_IPV6) - setup_timer(&br->ip6_other_query.timer, - br_ip6_multicast_querier_expired, (unsigned long)br); - setup_timer(&br->ip6_own_query.timer, br_ip6_multicast_query_expired, - (unsigned long)br); + timer_setup(&br->ip6_other_query.timer, + br_ip6_multicast_querier_expired, 0); + timer_setup(&br->ip6_own_query.timer, + br_ip6_multicast_query_expired, 0); #endif } @@ -2042,9 +2072,14 @@ int br_multicast_set_router(struct net_bridge *br, unsigned long val) switch (val) { case MDB_RTR_TYPE_DISABLED: case MDB_RTR_TYPE_PERM: + br_mc_router_state_change(br, val == MDB_RTR_TYPE_PERM); del_timer(&br->multicast_router_timer); - /* fall through */ + br->multicast_router = val; + err = 0; + break; case MDB_RTR_TYPE_TEMP_QUERY: + if (br->multicast_router != MDB_RTR_TYPE_TEMP_QUERY) + br_mc_router_state_change(br, false); br->multicast_router = val; err = 0; break; @@ -2184,6 +2219,18 @@ bool br_multicast_enabled(const struct net_device *dev) } EXPORT_SYMBOL_GPL(br_multicast_enabled); +bool br_multicast_router(const struct net_device *dev) +{ + struct net_bridge *br = netdev_priv(dev); + bool is_router; + + spin_lock_bh(&br->multicast_lock); + is_router = br_multicast_is_router(br); + spin_unlock_bh(&br->multicast_lock); + return is_router; +} +EXPORT_SYMBOL_GPL(br_multicast_router); + int br_multicast_set_querier(struct net_bridge *br, unsigned long val) { unsigned long max_delay; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 2261e5194c82..c2eea1b8737a 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -887,7 +887,7 @@ EXPORT_SYMBOL_GPL(br_netfilter_enable); /* For br_nf_post_routing, we need (prio = NF_BR_PRI_LAST), because * br_dev_queue_push_xmit is called afterwards */ -static struct nf_hook_ops br_nf_ops[] __read_mostly = { +static const struct nf_hook_ops br_nf_ops[] = { { .hook = br_nf_pre_routing, .pf = NFPROTO_BRIDGE, @@ -985,22 +985,25 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { - struct nf_hook_entry *elem; + const struct nf_hook_entries *e; struct nf_hook_state state; + struct nf_hook_ops **ops; + unsigned int i; int ret; - for (elem = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); - elem && nf_hook_entry_priority(elem) <= NF_BR_PRI_BRNF; - elem = rcu_dereference(elem->next)) - ; - - if (!elem) + e = rcu_dereference(net->nf.hooks[NFPROTO_BRIDGE][hook]); + if (!e) return okfn(net, sk, skb); + ops = nf_hook_entries_get_hook_ops(e); + for (i = 0; i < e->num_hook_entries && + ops[i]->priority <= NF_BR_PRI_BRNF; i++) + ; + nf_hook_state_init(&state, hook, NFPROTO_BRIDGE, indev, outdev, sk, net, okfn); - ret = nf_hook_slow(skb, &state, elem); + ret = nf_hook_slow(skb, &state, e, i); if (ret == 1) ret = okfn(net, sk, skb); diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 3bc890716c89..d0ef0a8e8831 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -138,6 +138,7 @@ static inline size_t br_port_info_size(void) + nla_total_size(1) /* IFLA_BRPORT_PROXYARP */ + nla_total_size(1) /* IFLA_BRPORT_PROXYARP_WIFI */ + nla_total_size(1) /* IFLA_BRPORT_VLAN_TUNNEL */ + + nla_total_size(1) /* IFLA_BRPORT_NEIGH_SUPPRESS */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_ROOT_ID */ + nla_total_size(sizeof(struct ifla_bridge_id)) /* IFLA_BRPORT_BRIDGE_ID */ + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_DESIGNATED_PORT */ @@ -152,6 +153,7 @@ static inline size_t br_port_info_size(void) #ifdef CONFIG_BRIDGE_IGMP_SNOOPING + nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */ #endif + + nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */ + 0; } @@ -208,7 +210,10 @@ static int br_port_fill_attrs(struct sk_buff *skb, p->topology_change_ack) || nla_put_u8(skb, IFLA_BRPORT_CONFIG_PENDING, p->config_pending) || nla_put_u8(skb, IFLA_BRPORT_VLAN_TUNNEL, !!(p->flags & - BR_VLAN_TUNNEL))) + BR_VLAN_TUNNEL)) || + nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) || + nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS, + !!(p->flags & BR_NEIGH_SUPPRESS))) return -EMSGSIZE; timerval = br_timer_value(&p->message_age_timer); @@ -356,14 +361,14 @@ nla_put_failure: * Contains port and master info as well as carrier and bridge state. */ static int br_fill_ifinfo(struct sk_buff *skb, - struct net_bridge_port *port, + const struct net_bridge_port *port, u32 pid, u32 seq, int event, unsigned int flags, u32 filter_mask, const struct net_device *dev) { + u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; struct net_bridge *br; struct ifinfomsg *hdr; struct nlmsghdr *nlh; - u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN; if (port) br = port->br; @@ -449,28 +454,36 @@ nla_put_failure: return -EMSGSIZE; } -/* - * Notify listeners of a change in port information - */ -void br_ifinfo_notify(int event, struct net_bridge_port *port) +/* Notify listeners of a change in bridge or port information */ +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port) { - struct net *net; + u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; + struct net_device *dev; struct sk_buff *skb; int err = -ENOBUFS; - u32 filter = RTEXT_FILTER_BRVLAN_COMPRESSED; + struct net *net; + u16 port_no = 0; - if (!port) + if (WARN_ON(!port && !br)) return; - net = dev_net(port->dev); - br_debug(port->br, "port %u(%s) event %d\n", - (unsigned int)port->port_no, port->dev->name, event); + if (port) { + dev = port->dev; + br = port->br; + port_no = port->port_no; + } else { + dev = br->dev; + } - skb = nlmsg_new(br_nlmsg_size(port->dev, filter), GFP_ATOMIC); + net = dev_net(dev); + br_debug(br, "port %u(%s) event %d\n", port_no, dev->name, event); + + skb = nlmsg_new(br_nlmsg_size(dev, filter), GFP_ATOMIC); if (skb == NULL) goto errout; - err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, port->dev); + err = br_fill_ifinfo(skb, port, 0, 0, event, 0, filter, dev); if (err < 0) { /* -EMSGSIZE implies BUG in br_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -483,7 +496,6 @@ errout: rtnl_set_sk_err(net, RTNLGRP_LINK, err); } - /* * Dump information about all ports, in response to GETLINK */ @@ -501,8 +513,9 @@ int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, } static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, - int cmd, struct bridge_vlan_info *vinfo) + int cmd, struct bridge_vlan_info *vinfo, bool *changed) { + bool curr_change; int err = 0; switch (cmd) { @@ -511,22 +524,27 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, /* if the MASTER flag is set this will act on the global * per-VLAN entry as well */ - err = nbp_vlan_add(p, vinfo->vid, vinfo->flags); - if (err) - break; + err = nbp_vlan_add(p, vinfo->vid, vinfo->flags, + &curr_change); } else { vinfo->flags |= BRIDGE_VLAN_INFO_BRENTRY; - err = br_vlan_add(br, vinfo->vid, vinfo->flags); + err = br_vlan_add(br, vinfo->vid, vinfo->flags, + &curr_change); } + if (curr_change) + *changed = true; break; case RTM_DELLINK: if (p) { - nbp_vlan_delete(p, vinfo->vid); - if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER) - br_vlan_delete(p->br, vinfo->vid); - } else { - br_vlan_delete(br, vinfo->vid); + if (!nbp_vlan_delete(p, vinfo->vid)) + *changed = true; + + if ((vinfo->flags & BRIDGE_VLAN_INFO_MASTER) && + !br_vlan_delete(p->br, vinfo->vid)) + *changed = true; + } else if (!br_vlan_delete(br, vinfo->vid)) { + *changed = true; } break; } @@ -537,7 +555,8 @@ static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p, static int br_process_vlan_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct bridge_vlan_info *vinfo_curr, - struct bridge_vlan_info **vinfo_last) + struct bridge_vlan_info **vinfo_last, + bool *changed) { if (!vinfo_curr->vid || vinfo_curr->vid >= VLAN_VID_MASK) return -EINVAL; @@ -567,22 +586,22 @@ static int br_process_vlan_info(struct net_bridge *br, sizeof(struct bridge_vlan_info)); for (v = (*vinfo_last)->vid; v <= vinfo_curr->vid; v++) { tmp_vinfo.vid = v; - err = br_vlan_info(br, p, cmd, &tmp_vinfo); + err = br_vlan_info(br, p, cmd, &tmp_vinfo, changed); if (err) break; } *vinfo_last = NULL; - return 0; + return err; } - return br_vlan_info(br, p, cmd, vinfo_curr); + return br_vlan_info(br, p, cmd, vinfo_curr, changed); } static int br_afspec(struct net_bridge *br, struct net_bridge_port *p, struct nlattr *af_spec, - int cmd) + int cmd, bool *changed) { struct bridge_vlan_info *vinfo_curr = NULL; struct bridge_vlan_info *vinfo_last = NULL; @@ -602,7 +621,8 @@ static int br_afspec(struct net_bridge *br, return err; err = br_process_vlan_tunnel_info(br, p, cmd, &tinfo_curr, - &tinfo_last); + &tinfo_last, + changed); if (err) return err; break; @@ -611,7 +631,7 @@ static int br_afspec(struct net_bridge *br, return -EINVAL; vinfo_curr = nla_data(attr); err = br_process_vlan_info(br, p, cmd, vinfo_curr, - &vinfo_last); + &vinfo_last, changed); if (err) return err; break; @@ -637,6 +657,9 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = { [IFLA_BRPORT_MCAST_TO_UCAST] = { .type = NLA_U8 }, [IFLA_BRPORT_MCAST_FLOOD] = { .type = NLA_U8 }, [IFLA_BRPORT_BCAST_FLOOD] = { .type = NLA_U8 }, + [IFLA_BRPORT_VLAN_TUNNEL] = { .type = NLA_U8 }, + [IFLA_BRPORT_GROUP_FWD_MASK] = { .type = NLA_U16 }, + [IFLA_BRPORT_NEIGH_SUPPRESS] = { .type = NLA_U8 }, }; /* Change the state of the port and notify spanning tree */ @@ -773,6 +796,20 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) return err; } #endif + + if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) { + u16 fwd_mask = nla_get_u16(tb[IFLA_BRPORT_GROUP_FWD_MASK]); + + if (fwd_mask & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = fwd_mask; + } + + err = br_set_port_flag(p, tb, IFLA_BRPORT_NEIGH_SUPPRESS, + BR_NEIGH_SUPPRESS); + if (err) + return err; + br_port_flags_change(p, old_flags ^ p->flags); return 0; } @@ -780,10 +817,12 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[]) /* Change state and parameters on port. */ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { + struct net_bridge *br = (struct net_bridge *)netdev_priv(dev); + struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + struct net_bridge_port *p; struct nlattr *protinfo; struct nlattr *afspec; - struct net_bridge_port *p; - struct nlattr *tb[IFLA_BRPORT_MAX + 1]; + bool changed = false; int err = 0; protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_PROTINFO); @@ -819,15 +858,14 @@ int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) } if (err) goto out; + changed = true; } - if (afspec) { - err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_SETLINK); - } + if (afspec) + err = br_afspec(br, p, afspec, RTM_SETLINK, &changed); - if (err == 0) - br_ifinfo_notify(RTM_NEWLINK, p); + if (changed) + br_ifinfo_notify(RTM_NEWLINK, br, p); out: return err; } @@ -835,8 +873,10 @@ out: /* Delete port information */ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) { - struct nlattr *afspec; + struct net_bridge *br = (struct net_bridge *)netdev_priv(dev); struct net_bridge_port *p; + struct nlattr *afspec; + bool changed = false; int err = 0; afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); @@ -848,13 +888,12 @@ int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags) if (!p && !(dev->priv_flags & IFF_EBRIDGE)) return -EINVAL; - err = br_afspec((struct net_bridge *)netdev_priv(dev), p, - afspec, RTM_DELLINK); - if (err == 0) + err = br_afspec(br, p, afspec, RTM_DELLINK, &changed); + if (changed) /* Send RTM_NEWLINK because userspace * expects RTM_NEWLINK for vlan dels */ - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, br, p); return err; } diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c index 3712c7f0e00c..da8cb99fd259 100644 --- a/net/bridge/br_netlink_tunnel.c +++ b/net/bridge/br_netlink_tunnel.c @@ -198,7 +198,7 @@ static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + }; static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, - u16 vid, u32 tun_id) + u16 vid, u32 tun_id, bool *changed) { int err = 0; @@ -208,9 +208,12 @@ static int br_vlan_tunnel_info(struct net_bridge_port *p, int cmd, switch (cmd) { case RTM_SETLINK: err = nbp_vlan_tunnel_info_add(p, vid, tun_id); + if (!err) + *changed = true; break; case RTM_DELLINK: - nbp_vlan_tunnel_info_delete(p, vid); + if (!nbp_vlan_tunnel_info_delete(p, vid)) + *changed = true; break; } @@ -254,7 +257,8 @@ int br_parse_vlan_tunnel_info(struct nlattr *attr, int br_process_vlan_tunnel_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, - struct vtunnel_info *tinfo_last) + struct vtunnel_info *tinfo_last, + bool *changed) { int err; @@ -272,7 +276,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, return -EINVAL; t = tinfo_last->tunid; for (v = tinfo_last->vid; v <= tinfo_curr->vid; v++) { - err = br_vlan_tunnel_info(p, cmd, v, t); + err = br_vlan_tunnel_info(p, cmd, v, t, changed); if (err) return err; t++; @@ -283,7 +287,7 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, if (tinfo_last->flags) return -EINVAL; err = br_vlan_tunnel_info(p, cmd, tinfo_curr->vid, - tinfo_curr->tunid); + tinfo_curr->tunid, changed); if (err) return err; memset(tinfo_last, 0, sizeof(struct vtunnel_info)); diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index fd9ee73e0a6d..1312b8d20ec3 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -36,7 +36,14 @@ /* Control of forwarding link local multicast */ #define BR_GROUPFWD_DEFAULT 0 /* Don't allow forwarding of control protocols like STP, MAC PAUSE and LACP */ -#define BR_GROUPFWD_RESTRICTED 0x0007u +enum { + BR_GROUPFWD_STP = BIT(0), + BR_GROUPFWD_MACPAUSE = BIT(1), + BR_GROUPFWD_LACP = BIT(2), +}; + +#define BR_GROUPFWD_RESTRICTED (BR_GROUPFWD_STP | BR_GROUPFWD_MACPAUSE | \ + BR_GROUPFWD_LACP) /* The Nearest Customer Bridge Group Address, 01-80-C2-00-00-[00,0B,0C,0D,0F] */ #define BR_GROUPFWD_8021AD 0xB801u @@ -202,7 +209,7 @@ struct net_bridge_mdb_entry struct rcu_head rcu; struct timer_list timer; struct br_ip addr; - bool mglist; + bool host_joined; }; struct net_bridge_mdb_htable @@ -268,6 +275,7 @@ struct net_bridge_port { #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif + u16 group_fwd_mask; }; #define br_auto_port(p) ((p)->flags & BR_AUTO_MASK) @@ -396,6 +404,7 @@ struct net_bridge { #ifdef CONFIG_NET_SWITCHDEV int offload_fwd_mark; #endif + bool neigh_suppress_enabled; }; struct br_input_skb_cb { @@ -558,7 +567,8 @@ void br_flood(struct net_bridge *br, struct sk_buff *skb, void br_port_carrier_check(struct net_bridge_port *p); int br_add_bridge(struct net *net, const char *name); int br_del_bridge(struct net *net, const char *name); -int br_add_if(struct net_bridge *br, struct net_device *dev); +int br_add_if(struct net_bridge *br, struct net_device *dev, + struct netlink_ext_ack *extack); int br_del_if(struct net_bridge *br, struct net_device *dev); int br_min_mtu(const struct net_bridge *br); netdev_features_t br_features_recompute(struct net_bridge *br, @@ -793,7 +803,8 @@ struct sk_buff *br_handle_vlan(struct net_bridge *br, const struct net_bridge_port *port, struct net_bridge_vlan_group *vg, struct sk_buff *skb); -int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags); +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed); int br_vlan_delete(struct net_bridge *br, u16 vid); void br_vlan_flush(struct net_bridge *br); struct net_bridge_vlan *br_vlan_find(struct net_bridge_vlan_group *vg, u16 vid); @@ -806,7 +817,8 @@ int br_vlan_set_stats(struct net_bridge *br, unsigned long val); int br_vlan_init(struct net_bridge *br); int br_vlan_set_default_pvid(struct net_bridge *br, unsigned long val); int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid); -int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags); +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed); int nbp_vlan_delete(struct net_bridge_port *port, u16 vid); void nbp_vlan_flush(struct net_bridge_port *port); int nbp_vlan_init(struct net_bridge_port *port); @@ -893,8 +905,10 @@ static inline struct sk_buff *br_handle_vlan(struct net_bridge *br, return skb; } -static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +static inline int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, + bool *changed) { + *changed = false; return -EOPNOTSUPP; } @@ -916,8 +930,10 @@ static inline int br_vlan_init(struct net_bridge *br) return 0; } -static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +static inline int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed) { + *changed = false; return -EOPNOTSUPP; } @@ -1055,7 +1071,8 @@ extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr) extern struct rtnl_link_ops br_link_ops; int br_netlink_init(void); void br_netlink_fini(void); -void br_ifinfo_notify(int event, struct net_bridge_port *port); +void br_ifinfo_notify(int event, const struct net_bridge *br, + const struct net_bridge_port *port); int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags); int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev, @@ -1091,6 +1108,11 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p, unsigned long mask); void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type); + +static inline void br_switchdev_frame_unmark(struct sk_buff *skb) +{ + skb->offload_fwd_mark = 0; +} #else static inline int nbp_switchdev_mark_set(struct net_bridge_port *p) { @@ -1119,6 +1141,17 @@ static inline void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) { } + +static inline void br_switchdev_frame_unmark(struct sk_buff *skb) +{ +} #endif /* CONFIG_NET_SWITCHDEV */ +/* br_arp_nd_proxy.c */ +void br_recalculate_neigh_suppress_enabled(struct net_bridge *br); +void br_do_proxy_suppress_arp(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p); +void br_do_suppress_nd(struct sk_buff *skb, struct net_bridge *br, + u16 vid, struct net_bridge_port *p, struct nd_msg *msg); +struct nd_msg *br_is_nd_neigh_msg(struct sk_buff *skb, struct nd_msg *m); #endif diff --git a/net/bridge/br_private_tunnel.h b/net/bridge/br_private_tunnel.h index 4a447a378ab3..a259471bfd78 100644 --- a/net/bridge/br_private_tunnel.h +++ b/net/bridge/br_private_tunnel.h @@ -26,7 +26,8 @@ int br_process_vlan_tunnel_info(struct net_bridge *br, struct net_bridge_port *p, int cmd, struct vtunnel_info *tinfo_curr, - struct vtunnel_info *tinfo_last); + struct vtunnel_info *tinfo_last, + bool *changed); int br_get_vlan_tunnel_info_size(struct net_bridge_vlan_group *vg); int br_fill_vlan_tunnel_info(struct sk_buff *skb, struct net_bridge_vlan_group *vg); diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c index 8f56c2d1f1a7..b6941961a876 100644 --- a/net/bridge/br_stp.c +++ b/net/bridge/br_stp.c @@ -123,7 +123,7 @@ static void br_root_port_block(const struct net_bridge *br, (unsigned int) p->port_no, p->dev->name); br_set_state(p, BR_STATE_LISTENING); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (br->forward_delay > 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); @@ -403,7 +403,7 @@ static void br_make_blocking(struct net_bridge_port *p) br_topology_change_detection(p->br); br_set_state(p, BR_STATE_BLOCKING); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); del_timer(&p->forward_delay_timer); } @@ -426,7 +426,7 @@ static void br_make_forwarding(struct net_bridge_port *p) else br_set_state(p, BR_STATE_LEARNING); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); if (br->forward_delay != 0) mod_timer(&p->forward_delay_timer, jiffies + br->forward_delay); diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c index 89110319ef0f..808e2b914015 100644 --- a/net/bridge/br_stp_if.c +++ b/net/bridge/br_stp_if.c @@ -96,7 +96,7 @@ void br_stp_enable_port(struct net_bridge_port *p) { br_init_port(p); br_port_state_selection(p->br); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); } /* called under bridge lock */ @@ -111,7 +111,7 @@ void br_stp_disable_port(struct net_bridge_port *p) p->topology_change_ack = 0; p->config_pending = 0; - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); del_timer(&p->message_age_timer); del_timer(&p->forward_delay_timer); diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index 60b6fe277a8b..e7739de5f0e1 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -31,9 +31,9 @@ static int br_is_designated_for_some_port(const struct net_bridge *br) return 0; } -static void br_hello_timer_expired(unsigned long arg) +static void br_hello_timer_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *)arg; + struct net_bridge *br = from_timer(br, t, hello_timer); br_debug(br, "hello timer expired\n"); spin_lock(&br->lock); @@ -47,9 +47,9 @@ static void br_hello_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_message_age_timer_expired(unsigned long arg) +static void br_message_age_timer_expired(struct timer_list *t) { - struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge_port *p = from_timer(p, t, message_age_timer); struct net_bridge *br = p->br; const bridge_id *id = &p->designated_bridge; int was_root; @@ -80,9 +80,9 @@ static void br_message_age_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_forward_delay_timer_expired(unsigned long arg) +static void br_forward_delay_timer_expired(struct timer_list *t) { - struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge_port *p = from_timer(p, t, forward_delay_timer); struct net_bridge *br = p->br; br_debug(br, "port %u(%s) forward delay timer\n", @@ -99,14 +99,14 @@ static void br_forward_delay_timer_expired(unsigned long arg) netif_carrier_on(br->dev); } rcu_read_lock(); - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); rcu_read_unlock(); spin_unlock(&br->lock); } -static void br_tcn_timer_expired(unsigned long arg) +static void br_tcn_timer_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *) arg; + struct net_bridge *br = from_timer(br, t, tcn_timer); br_debug(br, "tcn timer expired\n"); spin_lock(&br->lock); @@ -118,9 +118,9 @@ static void br_tcn_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_topology_change_timer_expired(unsigned long arg) +static void br_topology_change_timer_expired(struct timer_list *t) { - struct net_bridge *br = (struct net_bridge *) arg; + struct net_bridge *br = from_timer(br, t, topology_change_timer); br_debug(br, "topo change timer expired\n"); spin_lock(&br->lock); @@ -129,9 +129,9 @@ static void br_topology_change_timer_expired(unsigned long arg) spin_unlock(&br->lock); } -static void br_hold_timer_expired(unsigned long arg) +static void br_hold_timer_expired(struct timer_list *t) { - struct net_bridge_port *p = (struct net_bridge_port *) arg; + struct net_bridge_port *p = from_timer(p, t, hold_timer); br_debug(p->br, "port %u(%s) hold timer expired\n", (unsigned int) p->port_no, p->dev->name); @@ -144,27 +144,17 @@ static void br_hold_timer_expired(unsigned long arg) void br_stp_timer_init(struct net_bridge *br) { - setup_timer(&br->hello_timer, br_hello_timer_expired, - (unsigned long) br); - - setup_timer(&br->tcn_timer, br_tcn_timer_expired, - (unsigned long) br); - - setup_timer(&br->topology_change_timer, - br_topology_change_timer_expired, - (unsigned long) br); + timer_setup(&br->hello_timer, br_hello_timer_expired, 0); + timer_setup(&br->tcn_timer, br_tcn_timer_expired, 0); + timer_setup(&br->topology_change_timer, + br_topology_change_timer_expired, 0); } void br_stp_port_timer_init(struct net_bridge_port *p) { - setup_timer(&p->message_age_timer, br_message_age_timer_expired, - (unsigned long) p); - - setup_timer(&p->forward_delay_timer, br_forward_delay_timer_expired, - (unsigned long) p); - - setup_timer(&p->hold_timer, br_hold_timer_expired, - (unsigned long) p); + timer_setup(&p->message_age_timer, br_message_age_timer_expired, 0); + timer_setup(&p->forward_delay_timer, br_forward_delay_timer_expired, 0); + timer_setup(&p->hold_timer, br_hold_timer_expired, 0); } /* Report ticks left (in USER_HZ) used for API */ diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 181a44d0f1da..9700e0f3307b 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/list.h> #include <linux/netdevice.h> @@ -115,7 +116,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) { - if (!fdb->added_by_user) + if (!fdb->added_by_user || !fdb->dst) return; switch (type) { diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 5d5d413a6cf8..0254c35b2bf0 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -165,6 +165,23 @@ static int store_flush(struct net_bridge_port *p, unsigned long v) } static BRPORT_ATTR(flush, S_IWUSR, NULL, store_flush); +static ssize_t show_group_fwd_mask(struct net_bridge_port *p, char *buf) +{ + return sprintf(buf, "%#x\n", p->group_fwd_mask); +} + +static int store_group_fwd_mask(struct net_bridge_port *p, + unsigned long v) +{ + if (v & BR_GROUPFWD_MACPAUSE) + return -EINVAL; + p->group_fwd_mask = v; + + return 0; +} +static BRPORT_ATTR(group_fwd_mask, S_IRUGO | S_IWUSR, show_group_fwd_mask, + store_group_fwd_mask); + BRPORT_ATTR_FLAG(hairpin_mode, BR_HAIRPIN_MODE); BRPORT_ATTR_FLAG(bpdu_guard, BR_BPDU_GUARD); BRPORT_ATTR_FLAG(root_block, BR_ROOT_BLOCK); @@ -174,6 +191,7 @@ BRPORT_ATTR_FLAG(proxyarp, BR_PROXYARP); BRPORT_ATTR_FLAG(proxyarp_wifi, BR_PROXYARP_WIFI); BRPORT_ATTR_FLAG(multicast_flood, BR_MCAST_FLOOD); BRPORT_ATTR_FLAG(broadcast_flood, BR_BCAST_FLOOD); +BRPORT_ATTR_FLAG(neigh_suppress, BR_NEIGH_SUPPRESS); #ifdef CONFIG_BRIDGE_IGMP_SNOOPING static ssize_t show_multicast_router(struct net_bridge_port *p, char *buf) @@ -223,6 +241,8 @@ static const struct brport_attribute *brport_attrs[] = { &brport_attr_proxyarp_wifi, &brport_attr_multicast_flood, &brport_attr_broadcast_flood, + &brport_attr_group_fwd_mask, + &brport_attr_neigh_suppress, NULL }; @@ -260,7 +280,7 @@ static ssize_t brport_store(struct kobject *kobj, ret = brport_attr->store(p, val); spin_unlock_bh(&p->br->lock); if (!ret) { - br_ifinfo_notify(RTM_NEWLINK, p); + br_ifinfo_notify(RTM_NEWLINK, NULL, p); ret = count; } } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 233a30040c91..51935270c651 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -32,27 +32,34 @@ static struct net_bridge_vlan *br_vlan_lookup(struct rhashtable *tbl, u16 vid) return rhashtable_lookup_fast(tbl, &vid, br_vlan_rht_params); } -static void __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) +static bool __vlan_add_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid == vid) - return; + return false; smp_wmb(); vg->pvid = vid; + + return true; } -static void __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) +static bool __vlan_delete_pvid(struct net_bridge_vlan_group *vg, u16 vid) { if (vg->pvid != vid) - return; + return false; smp_wmb(); vg->pvid = 0; + + return true; } -static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) +/* return true if anything changed, false otherwise */ +static bool __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) { struct net_bridge_vlan_group *vg; + u16 old_flags = v->flags; + bool ret; if (br_vlan_is_master(v)) vg = br_vlan_group(v->br); @@ -60,14 +67,16 @@ static void __vlan_add_flags(struct net_bridge_vlan *v, u16 flags) vg = nbp_vlan_group(v->port); if (flags & BRIDGE_VLAN_INFO_PVID) - __vlan_add_pvid(vg, v->vid); + ret = __vlan_add_pvid(vg, v->vid); else - __vlan_delete_pvid(vg, v->vid); + ret = __vlan_delete_pvid(vg, v->vid); if (flags & BRIDGE_VLAN_INFO_UNTAGGED) v->flags |= BRIDGE_VLAN_INFO_UNTAGGED; else v->flags &= ~BRIDGE_VLAN_INFO_UNTAGGED; + + return ret || !!(old_flags ^ v->flags); } static int __vlan_vid_add(struct net_device *dev, struct net_bridge *br, @@ -151,8 +160,10 @@ static struct net_bridge_vlan *br_vlan_get_master(struct net_bridge *br, u16 vid vg = br_vlan_group(br); masterv = br_vlan_find(vg, vid); if (!masterv) { + bool changed; + /* missing global ctx, create it now */ - if (br_vlan_add(br, vid, 0)) + if (br_vlan_add(br, vid, 0, &changed)) return NULL; masterv = br_vlan_find(vg, vid); if (WARN_ON(!masterv)) @@ -232,8 +243,11 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags) /* need to work on the master vlan too */ if (flags & BRIDGE_VLAN_INFO_MASTER) { - err = br_vlan_add(br, v->vid, flags | - BRIDGE_VLAN_INFO_BRENTRY); + bool changed; + + err = br_vlan_add(br, v->vid, + flags | BRIDGE_VLAN_INFO_BRENTRY, + &changed); if (err) goto out_filt; } @@ -550,8 +564,9 @@ bool br_should_learn(struct net_bridge_port *p, struct sk_buff *skb, u16 *vid) /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated */ -int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) +int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags, bool *changed) { struct net_bridge_vlan_group *vg; struct net_bridge_vlan *vlan; @@ -559,6 +574,7 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) ASSERT_RTNL(); + *changed = false; vg = br_vlan_group(br); vlan = br_vlan_find(vg, vid); if (vlan) { @@ -576,8 +592,11 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) refcount_inc(&vlan->refcnt); vlan->flags |= BRIDGE_VLAN_INFO_BRENTRY; vg->num_vlans++; + *changed = true; } - __vlan_add_flags(vlan, flags); + if (__vlan_add_flags(vlan, flags)) + *changed = true; + return 0; } @@ -600,6 +619,8 @@ int br_vlan_add(struct net_bridge *br, u16 vid, u16 flags) if (ret) { free_percpu(vlan->stats); kfree(vlan); + } else { + *changed = true; } return ret; @@ -824,9 +845,10 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) const struct net_bridge_vlan *pvent; struct net_bridge_vlan_group *vg; struct net_bridge_port *p; + unsigned long *changed; + bool vlchange; u16 old_pvid; int err = 0; - unsigned long *changed; if (!pvid) { br_vlan_disable_default_pvid(br); @@ -850,7 +872,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) err = br_vlan_add(br, pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange); if (err) goto out; br_vlan_delete(br, old_pvid); @@ -869,7 +892,8 @@ int __br_vlan_set_default_pvid(struct net_bridge *br, u16 pvid) err = nbp_vlan_add(p, pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange); if (err) goto err_port; nbp_vlan_delete(p, old_pvid); @@ -890,7 +914,8 @@ err_port: if (old_pvid) nbp_vlan_add(p, old_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &vlchange); nbp_vlan_delete(p, pvid); } @@ -899,7 +924,8 @@ err_port: br_vlan_add(br, old_pvid, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, + &vlchange); br_vlan_delete(br, pvid); } goto out; @@ -931,6 +957,7 @@ int br_vlan_init(struct net_bridge *br) { struct net_bridge_vlan_group *vg; int ret = -ENOMEM; + bool changed; vg = kzalloc(sizeof(*vg), GFP_KERNEL); if (!vg) @@ -947,7 +974,7 @@ int br_vlan_init(struct net_bridge *br) rcu_assign_pointer(br->vlgrp, vg); ret = br_vlan_add(br, 1, BRIDGE_VLAN_INFO_PVID | BRIDGE_VLAN_INFO_UNTAGGED | - BRIDGE_VLAN_INFO_BRENTRY); + BRIDGE_VLAN_INFO_BRENTRY, &changed); if (ret) goto err_vlan_add; @@ -992,9 +1019,12 @@ int nbp_vlan_init(struct net_bridge_port *p) INIT_LIST_HEAD(&vg->vlan_list); rcu_assign_pointer(p->vlgrp, vg); if (p->br->default_pvid) { + bool changed; + ret = nbp_vlan_add(p, p->br->default_pvid, BRIDGE_VLAN_INFO_PVID | - BRIDGE_VLAN_INFO_UNTAGGED); + BRIDGE_VLAN_INFO_UNTAGGED, + &changed); if (ret) goto err_vlan_add; } @@ -1016,8 +1046,10 @@ err_vlan_enabled: /* Must be protected by RTNL. * Must be called with vid in range from 1 to 4094 inclusive. + * changed must be true only if the vlan was created or updated */ -int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) +int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags, + bool *changed) { struct switchdev_obj_port_vlan v = { .obj.orig_dev = port->dev, @@ -1031,13 +1063,15 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) ASSERT_RTNL(); + *changed = false; vlan = br_vlan_find(nbp_vlan_group(port), vid); if (vlan) { /* Pass the flags to the hardware bridge */ ret = switchdev_port_obj_add(port->dev, &v.obj); if (ret && ret != -EOPNOTSUPP) return ret; - __vlan_add_flags(vlan, flags); + *changed = __vlan_add_flags(vlan, flags); + return 0; } @@ -1050,6 +1084,8 @@ int nbp_vlan_add(struct net_bridge_port *port, u16 vid, u16 flags) ret = __vlan_add(vlan, flags); if (ret) kfree(vlan); + else + *changed = true; return ret; } diff --git a/net/bridge/netfilter/Makefile b/net/bridge/netfilter/Makefile index be4d0cea78ce..2f28e16de6c7 100644 --- a/net/bridge/netfilter/Makefile +++ b/net/bridge/netfilter/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the netfilter modules for Link Layer filtering on a bridge. # diff --git a/net/bridge/netfilter/ebt_ip.c b/net/bridge/netfilter/ebt_ip.c index d06968bdf5ec..2b46c50abce0 100644 --- a/net/bridge/netfilter/ebt_ip.c +++ b/net/bridge/netfilter/ebt_ip.c @@ -64,14 +64,14 @@ ebt_ip_mt(const struct sk_buff *skb, struct xt_action_param *par) if (NF_INVF(info, EBT_IP_DPORT, dst < info->dport[0] || dst > info->dport[1])) - return false; + return false; } if (info->bitmask & EBT_IP_SPORT) { u32 src = ntohs(pptr->src); if (NF_INVF(info, EBT_IP_SPORT, src < info->sport[0] || src > info->sport[1])) - return false; + return false; } } return true; diff --git a/net/bridge/netfilter/ebt_ip6.c b/net/bridge/netfilter/ebt_ip6.c index 4617491be41e..2a5a52a53ec4 100644 --- a/net/bridge/netfilter/ebt_ip6.c +++ b/net/bridge/netfilter/ebt_ip6.c @@ -89,7 +89,7 @@ ebt_ip6_mt(const struct sk_buff *skb, struct xt_action_param *par) if (NF_INVF(info, EBT_IP6_SPORT, src < info->sport[0] || src > info->sport[1])) - return false; + return false; } if ((info->bitmask & EBT_IP6_ICMP6) && NF_INVF(info, EBT_IP6_ICMP6, diff --git a/net/bridge/netfilter/ebtable_broute.c b/net/bridge/netfilter/ebtable_broute.c index 2585b100ebbb..276b60262981 100644 --- a/net/bridge/netfilter/ebtable_broute.c +++ b/net/bridge/netfilter/ebtable_broute.c @@ -65,8 +65,8 @@ static int ebt_broute(struct sk_buff *skb) static int __net_init broute_net_init(struct net *net) { - net->xt.broute_table = ebt_register_table(net, &broute_table, NULL); - return PTR_ERR_OR_ZERO(net->xt.broute_table); + return ebt_register_table(net, &broute_table, NULL, + &net->xt.broute_table); } static void __net_exit broute_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index f22ef7c21913..c41da5fac84f 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -70,7 +70,7 @@ ebt_out_hook(void *priv, struct sk_buff *skb, return ebt_do_table(skb, state, state->net->xt.frame_filter); } -static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { +static const struct nf_hook_ops ebt_ops_filter[] = { { .hook = ebt_in_hook, .pf = NFPROTO_BRIDGE, @@ -93,8 +93,8 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __net_init frame_filter_net_init(struct net *net) { - net->xt.frame_filter = ebt_register_table(net, &frame_filter, ebt_ops_filter); - return PTR_ERR_OR_ZERO(net->xt.frame_filter); + return ebt_register_table(net, &frame_filter, ebt_ops_filter, + &net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 2f7a4f314406..08df7406ecb3 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -48,7 +48,7 @@ static int check(const struct ebt_table_info *info, unsigned int valid_hooks) return 0; } -static struct ebt_table frame_nat = { +static const struct ebt_table frame_nat = { .name = "nat", .table = &initial_table, .valid_hooks = NAT_VALID_HOOKS, @@ -70,7 +70,7 @@ ebt_nat_out(void *priv, struct sk_buff *skb, return ebt_do_table(skb, state, state->net->xt.frame_nat); } -static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { +static const struct nf_hook_ops ebt_ops_nat[] = { { .hook = ebt_nat_out, .pf = NFPROTO_BRIDGE, @@ -93,8 +93,8 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __net_init frame_nat_net_init(struct net *net) { - net->xt.frame_nat = ebt_register_table(net, &frame_nat, ebt_ops_nat); - return PTR_ERR_OR_ZERO(net->xt.frame_nat); + return ebt_register_table(net, &frame_nat, ebt_ops_nat, + &net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 9c6e619f452b..37817d25b63d 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -252,13 +252,11 @@ unsigned int ebt_do_table(struct sk_buff *skb, } if (verdict == EBT_RETURN) { letsreturn: -#ifdef CONFIG_NETFILTER_DEBUG - if (sp == 0) { - BUGPRINT("RETURN on base chain"); + if (WARN(sp == 0, "RETURN on base chain")) { /* act like this is EBT_CONTINUE */ goto letscontinue; } -#endif + sp--; /* put all the local variables right */ i = cs[sp].n; @@ -271,26 +269,24 @@ letsreturn: } if (verdict == EBT_CONTINUE) goto letscontinue; -#ifdef CONFIG_NETFILTER_DEBUG - if (verdict < 0) { - BUGPRINT("bogus standard verdict\n"); + + if (WARN(verdict < 0, "bogus standard verdict\n")) { read_unlock_bh(&table->lock); return NF_DROP; } -#endif + /* jump to a udc */ cs[sp].n = i + 1; cs[sp].chaininfo = chaininfo; cs[sp].e = ebt_next_entry(point); i = 0; chaininfo = (struct ebt_entries *) (base + verdict); -#ifdef CONFIG_NETFILTER_DEBUG - if (chaininfo->distinguisher) { - BUGPRINT("jump to non-chain\n"); + + if (WARN(chaininfo->distinguisher, "jump to non-chain\n")) { read_unlock_bh(&table->lock); return NF_DROP; } -#endif + nentries = chaininfo->nentries; point = (struct ebt_entry *)chaininfo->data; counter_base = cb_base + chaininfo->counter_offset; @@ -1069,15 +1065,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, #ifdef CONFIG_AUDIT if (audit_enabled) { - struct audit_buffer *ab; - - ab = audit_log_start(current->audit_context, GFP_KERNEL, - AUDIT_NETFILTER_CFG); - if (ab) { - audit_log_format(ab, "table=%s family=%u entries=%u", - repl->name, AF_BRIDGE, repl->nentries); - audit_log_end(ab); - } + audit_log(current->audit_context, GFP_KERNEL, + AUDIT_NETFILTER_CFG, + "table=%s family=%u entries=%u", + repl->name, AF_BRIDGE, repl->nentries); } #endif return ret; @@ -1178,9 +1169,8 @@ static void __ebt_unregister_table(struct net *net, struct ebt_table *table) kfree(table); } -struct ebt_table * -ebt_register_table(struct net *net, const struct ebt_table *input_table, - const struct nf_hook_ops *ops) +int ebt_register_table(struct net *net, const struct ebt_table *input_table, + const struct nf_hook_ops *ops, struct ebt_table **res) { struct ebt_table_info *newinfo; struct ebt_table *t, *table; @@ -1192,7 +1182,7 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, repl->entries == NULL || repl->entries_size == 0 || repl->counters != NULL || input_table->private != NULL) { BUGPRINT("Bad table data for ebt_register_table!!!\n"); - return ERR_PTR(-EINVAL); + return -EINVAL; } /* Don't add one table to multiple lists. */ @@ -1261,16 +1251,18 @@ ebt_register_table(struct net *net, const struct ebt_table *input_table, list_add(&table->list, &net->xt.tables[NFPROTO_BRIDGE]); mutex_unlock(&ebt_mutex); + WRITE_ONCE(*res, table); + if (!ops) - return table; + return 0; ret = nf_register_net_hooks(net, ops, hweight32(table->valid_hooks)); if (ret) { __ebt_unregister_table(net, table); - return ERR_PTR(ret); + *res = NULL; } - return table; + return ret; free_unlock: mutex_unlock(&ebt_mutex); free_chainstack: @@ -1285,7 +1277,7 @@ free_newinfo: free_table: kfree(table); out: - return ERR_PTR(ret); + return ret; } void ebt_unregister_table(struct net *net, struct ebt_table *table, @@ -2120,9 +2112,8 @@ static int size_entry_mwt(struct ebt_entry *entry, const unsigned char *base, for (i = 0, j = 1 ; j < 4 ; j++, i++) { struct compat_ebt_entry_mwt *match32; unsigned int size; - char *buf = buf_start; + char *buf = buf_start + offsets[i]; - buf = buf_start + offsets[i]; if (offsets[i] > offsets[j]) return -EINVAL; diff --git a/net/caif/Makefile b/net/caif/Makefile index cc2b51154d03..4f6c0517cdfb 100644 --- a/net/caif/Makefile +++ b/net/caif/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 ccflags-$(CONFIG_CAIF_DEBUG) := -DDEBUG caif-y := caif_dev.o \ diff --git a/net/can/Makefile b/net/can/Makefile index 10936754e3f2..1242bbbfe57f 100644 --- a/net/can/Makefile +++ b/net/can/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux Controller Area Network core. # diff --git a/net/can/af_can.c b/net/can/af_can.c index 88edac0f3e36..003b2d6d655f 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -78,7 +78,7 @@ MODULE_PARM_DESC(stats_timer, "enable timer for statistics (default:on)"); static struct kmem_cache *rcv_cache __read_mostly; /* table of registered CAN protocols */ -static const struct can_proto *proto_tab[CAN_NPROTO] __read_mostly; +static const struct can_proto __rcu *proto_tab[CAN_NPROTO] __read_mostly; static DEFINE_MUTEX(proto_tab_lock); static atomic_t skbcounter = ATOMIC_INIT(0); @@ -788,7 +788,7 @@ int can_proto_register(const struct can_proto *cp) mutex_lock(&proto_tab_lock); - if (proto_tab[proto]) { + if (rcu_access_pointer(proto_tab[proto])) { pr_err("can: protocol %d already registered\n", proto); err = -EBUSY; } else @@ -812,7 +812,7 @@ void can_proto_unregister(const struct can_proto *cp) int proto = cp->protocol; mutex_lock(&proto_tab_lock); - BUG_ON(proto_tab[proto] != cp); + BUG_ON(rcu_access_pointer(proto_tab[proto]) != cp); RCU_INIT_POINTER(proto_tab[proto], NULL); mutex_unlock(&proto_tab_lock); @@ -875,15 +875,20 @@ static int can_pernet_init(struct net *net) spin_lock_init(&net->can.can_rcvlists_lock); net->can.can_rx_alldev_list = kzalloc(sizeof(struct dev_rcv_lists), GFP_KERNEL); - + if (!net->can.can_rx_alldev_list) + goto out; net->can.can_stats = kzalloc(sizeof(struct s_stats), GFP_KERNEL); + if (!net->can.can_stats) + goto out_free_alldev_list; net->can.can_pstats = kzalloc(sizeof(struct s_pstats), GFP_KERNEL); + if (!net->can.can_pstats) + goto out_free_can_stats; if (IS_ENABLED(CONFIG_PROC_FS)) { /* the statistics are updated every second (timer triggered) */ if (stats_timer) { - setup_timer(&net->can.can_stattimer, can_stat_update, - (unsigned long)net); + timer_setup(&net->can.can_stattimer, can_stat_update, + 0); mod_timer(&net->can.can_stattimer, round_jiffies(jiffies + HZ)); } @@ -892,6 +897,13 @@ static int can_pernet_init(struct net *net) } return 0; + + out_free_can_stats: + kfree(net->can.can_stats); + out_free_alldev_list: + kfree(net->can.can_rx_alldev_list); + out: + return -ENOMEM; } static void can_pernet_exit(struct net *net) diff --git a/net/can/af_can.h b/net/can/af_can.h index d0ef45bb2a72..eca6463c6213 100644 --- a/net/can/af_can.h +++ b/net/can/af_can.h @@ -113,6 +113,6 @@ struct s_pstats { /* function prototypes for the CAN networklayer procfs (proc.c) */ void can_init_proc(struct net *net); void can_remove_proc(struct net *net); -void can_stat_update(unsigned long data); +void can_stat_update(struct timer_list *t); #endif /* AF_CAN_H */ diff --git a/net/can/bcm.c b/net/can/bcm.c index 47a8748d953a..13690334efa3 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -1493,13 +1493,14 @@ static int bcm_init(struct sock *sk) static int bcm_release(struct socket *sock) { struct sock *sk = sock->sk; - struct net *net = sock_net(sk); + struct net *net; struct bcm_sock *bo; struct bcm_op *op, *next; - if (sk == NULL) + if (!sk) return 0; + net = sock_net(sk); bo = bcm_sk(sk); /* remove bcm_ops, timer, rx_unregister(), etc. */ diff --git a/net/can/gw.c b/net/can/gw.c index 29748d844c3f..73a02af4b5d7 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -1031,15 +1031,15 @@ static __init int cgw_module_init(void) notifier.notifier_call = cgw_notifier; register_netdevice_notifier(¬ifier); - if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, NULL)) { + if (__rtnl_register(PF_CAN, RTM_GETROUTE, NULL, cgw_dump_jobs, 0)) { unregister_netdevice_notifier(¬ifier); kmem_cache_destroy(cgw_cache); return -ENOBUFS; } /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, NULL); - __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, NULL); + __rtnl_register(PF_CAN, RTM_NEWROUTE, cgw_create_job, NULL, 0); + __rtnl_register(PF_CAN, RTM_DELROUTE, cgw_remove_job, NULL, 0); return 0; } diff --git a/net/can/proc.c b/net/can/proc.c index 83045f00c63c..d979b3dc49a6 100644 --- a/net/can/proc.c +++ b/net/can/proc.c @@ -115,9 +115,9 @@ static unsigned long calc_rate(unsigned long oldjif, unsigned long newjif, return rate; } -void can_stat_update(unsigned long data) +void can_stat_update(struct timer_list *t) { - struct net *net = (struct net *)data; + struct net *net = from_timer(net, t, can.can_stattimer); struct s_stats *can_stats = net->can.can_stats; unsigned long j = jiffies; /* snapshot */ @@ -221,7 +221,7 @@ static int can_stats_proc_show(struct seq_file *m, void *v) seq_putc(m, '\n'); - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) { seq_printf(m, " %8ld %% total match ratio (RXMR)\n", can_stats->total_rx_match_ratio); @@ -291,7 +291,7 @@ static int can_reset_stats_proc_show(struct seq_file *m, void *v) user_reset = 1; - if (net->can.can_stattimer.function == can_stat_update) { + if (net->can.can_stattimer.function == (TIMER_FUNC_TYPE)can_stat_update) { seq_printf(m, "Scheduled statistic reset #%ld.\n", can_pstats->stats_reset + 1); } else { diff --git a/net/ceph/Makefile b/net/ceph/Makefile index 6a5180903e7b..b4bded4b5396 100644 --- a/net/ceph/Makefile +++ b/net/ceph/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for CEPH filesystem. # diff --git a/net/ceph/armor.c b/net/ceph/armor.c index 1fc1ee11dfa2..0db8065928df 100644 --- a/net/ceph/armor.c +++ b/net/ceph/armor.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/errno.h> diff --git a/net/ceph/auth.c b/net/ceph/auth.c index 48bb8d95195b..dbde2b3c3c15 100644 --- a/net/ceph/auth.c +++ b/net/ceph/auth.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/module.h> diff --git a/net/ceph/auth_none.c b/net/ceph/auth_none.c index df45e467c81f..41d2a0c72236 100644 --- a/net/ceph/auth_none.c +++ b/net/ceph/auth_none.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> diff --git a/net/ceph/auth_none.h b/net/ceph/auth_none.h index 62021535ae4a..860ed9875791 100644 --- a/net/ceph/auth_none.h +++ b/net/ceph/auth_none.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _FS_CEPH_AUTH_NONE_H #define _FS_CEPH_AUTH_NONE_H diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c index 8757fb87dab8..2f4a1baf5f52 100644 --- a/net/ceph/auth_x.c +++ b/net/ceph/auth_x.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> diff --git a/net/ceph/auth_x.h b/net/ceph/auth_x.h index 48e9ad41bd2a..454cb54568af 100644 --- a/net/ceph/auth_x.h +++ b/net/ceph/auth_x.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _FS_CEPH_AUTH_X_H #define _FS_CEPH_AUTH_X_H diff --git a/net/ceph/auth_x_protocol.h b/net/ceph/auth_x_protocol.h index 671d30576c4f..32c13d763b9a 100644 --- a/net/ceph/auth_x_protocol.h +++ b/net/ceph/auth_x_protocol.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __FS_CEPH_AUTH_X_PROTOCOL #define __FS_CEPH_AUTH_X_PROTOCOL diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c index add5f921a0ff..5622763ad402 100644 --- a/net/ceph/buffer.c +++ b/net/ceph/buffer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> diff --git a/net/ceph/ceph_fs.c b/net/ceph/ceph_fs.c index dcbe67ff3e2b..756a2dc10d27 100644 --- a/net/ceph/ceph_fs.c +++ b/net/ceph/ceph_fs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Some non-inline ceph helpers */ diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c index 19b7d8aa915c..10e01494993c 100644 --- a/net/ceph/ceph_strings.c +++ b/net/ceph/ceph_strings.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Ceph string constants */ diff --git a/net/ceph/cls_lock_client.c b/net/ceph/cls_lock_client.c index 08ada893f01e..8d2032b2f225 100644 --- a/net/ceph/cls_lock_client.c +++ b/net/ceph/cls_lock_client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/types.h> diff --git a/net/ceph/crush/crush.c b/net/ceph/crush/crush.c index 4b428f46a8ca..3d70244bc1b6 100644 --- a/net/ceph/crush/crush.c +++ b/net/ceph/crush/crush.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #ifdef __KERNEL__ # include <linux/slab.h> # include <linux/crush/crush.h> diff --git a/net/ceph/crush/hash.c b/net/ceph/crush/hash.c index ed123af49eba..e5cc603cdb17 100644 --- a/net/ceph/crush/hash.c +++ b/net/ceph/crush/hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #ifdef __KERNEL__ # include <linux/crush/hash.h> #else diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c index 46008d5ac504..489610ac1cdd 100644 --- a/net/ceph/crypto.c +++ b/net/ceph/crypto.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> diff --git a/net/ceph/crypto.h b/net/ceph/crypto.h index 58d83aa7740f..bb45c7d43739 100644 --- a/net/ceph/crypto.h +++ b/net/ceph/crypto.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _FS_CEPH_CRYPTO_H #define _FS_CEPH_CRYPTO_H diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c index fa5233e0d01c..1eef6806aa1a 100644 --- a/net/ceph/debugfs.c +++ b/net/ceph/debugfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/device.h> diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index a67298c7e0cd..ad93342c90d7 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/crc32c.h> diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c index 875675765531..9ae1bab8c05d 100644 --- a/net/ceph/mon_client.c +++ b/net/ceph/mon_client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/module.h> @@ -676,7 +677,8 @@ bad: /* * Do a synchronous statfs(). */ -int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) +int ceph_monc_do_statfs(struct ceph_mon_client *monc, u64 data_pool, + struct ceph_statfs *buf) { struct ceph_mon_generic_request *req; struct ceph_mon_statfs *h; @@ -696,6 +698,7 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) goto out; req->u.st = buf; + req->request->hdr.version = cpu_to_le16(2); mutex_lock(&monc->mutex); register_generic_request(req); @@ -705,6 +708,8 @@ int ceph_monc_do_statfs(struct ceph_mon_client *monc, struct ceph_statfs *buf) h->monhdr.session_mon = cpu_to_le16(-1); h->monhdr.session_mon_tid = 0; h->fsid = monc->monmap->fsid; + h->contains_data_pool = (data_pool != CEPH_NOPOOL); + h->data_pool = cpu_to_le64(data_pool); send_generic_request(monc, req); mutex_unlock(&monc->mutex); diff --git a/net/ceph/msgpool.c b/net/ceph/msgpool.c index aaed59a47b1d..72571535883f 100644 --- a/net/ceph/msgpool.c +++ b/net/ceph/msgpool.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/err.h> diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index dcfbdd74dfd1..2814dba5902d 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> @@ -863,8 +864,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, dst->cls.method_len = src->cls.method_len; dst->cls.indata_len = cpu_to_le32(src->cls.indata_len); break; - case CEPH_OSD_OP_STARTSYNC: - break; case CEPH_OSD_OP_WATCH: dst->watch.cookie = cpu_to_le64(src->watch.cookie); dst->watch.ver = cpu_to_le64(0); @@ -916,9 +915,6 @@ static u32 osd_req_encode_op(struct ceph_osd_op *dst, * if the file was recently truncated, we include information about its * old and new size so that the object can be updated appropriately. (we * avoid synchronously deleting truncated objects because it's slow.) - * - * if @do_sync, include a 'startsync' command so that the osd will flush - * data quickly. */ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc, struct ceph_file_layout *layout, diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index f358d0bfa76b..0da27c66349a 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> @@ -2445,19 +2446,34 @@ static void apply_upmap(struct ceph_osdmap *osdmap, pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); if (pg) { - for (i = 0; i < raw->size; i++) { - for (j = 0; j < pg->pg_upmap_items.len; j++) { - int from = pg->pg_upmap_items.from_to[j][0]; - int to = pg->pg_upmap_items.from_to[j][1]; - - if (from == raw->osds[i]) { - if (!(to != CRUSH_ITEM_NONE && - to < osdmap->max_osd && - osdmap->osd_weight[to] == 0)) - raw->osds[i] = to; + /* + * Note: this approach does not allow a bidirectional swap, + * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. + */ + for (i = 0; i < pg->pg_upmap_items.len; i++) { + int from = pg->pg_upmap_items.from_to[i][0]; + int to = pg->pg_upmap_items.from_to[i][1]; + int pos = -1; + bool exists = false; + + /* make sure replacement doesn't already appear */ + for (j = 0; j < raw->size; j++) { + int osd = raw->osds[j]; + + if (osd == to) { + exists = true; break; } + /* ignore mapping if target is marked out */ + if (osd == from && pos < 0 && + !(to != CRUSH_ITEM_NONE && + to < osdmap->max_osd && + osdmap->osd_weight[to] == 0)) { + pos = j; + } } + if (!exists && pos >= 0) + raw->osds[pos] = to; } } } diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c index ce09f73be759..2ea0564771d2 100644 --- a/net/ceph/pagelist.c +++ b/net/ceph/pagelist.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #include <linux/gfp.h> #include <linux/slab.h> diff --git a/net/ceph/pagevec.c b/net/ceph/pagevec.c index 1a7c9a79a53c..a3d0adc828e6 100644 --- a/net/ceph/pagevec.c +++ b/net/ceph/pagevec.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ceph/ceph_debug.h> #include <linux/module.h> @@ -24,9 +25,9 @@ struct page **ceph_get_direct_page_vector(const void __user *data, return ERR_PTR(-ENOMEM); while (got < num_pages) { - rc = get_user_pages_unlocked( + rc = get_user_pages_fast( (unsigned long)data + ((unsigned long)got * PAGE_SIZE), - num_pages - got, pages + got, write_page ? FOLL_WRITE : 0); + num_pages - got, write_page, pages + got); if (rc < 0) break; BUG_ON(rc == 0); diff --git a/net/ceph/string_table.c b/net/ceph/string_table.c index 22fb96efcf34..3191d9d160a2 100644 --- a/net/ceph/string_table.c +++ b/net/ceph/string_table.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/slab.h> #include <linux/gfp.h> #include <linux/string.h> diff --git a/net/compat.c b/net/compat.c index 6ded6c821d7a..22381719718c 100644 --- a/net/compat.c +++ b/net/compat.c @@ -185,6 +185,13 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk, ucmsg = cmsg_compat_nxthdr(kmsg, ucmsg, ucmlen); } + /* + * check the length of messages copied in is the same as the + * what we get from the first loop + */ + if ((char *)kcmsg - (char *)kcmsg_base != kcmlen) + goto Einval; + /* Ok, looks like we made it. Hook it up and return success. */ kmsg->msg_control = kcmsg_base; kmsg->msg_controllen = kcmlen; diff --git a/net/core/Makefile b/net/core/Makefile index 79f9479e9658..1fd0a9c88b1b 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux networking core. # @@ -9,9 +10,9 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o obj-y += dev.o ethtool.o dev_addr_lists.o dst.o netevent.o \ neighbour.o rtnetlink.o utils.o link_watch.o filter.o \ - sock_diag.o dev_ioctl.o tso.o sock_reuseport.o + sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \ + fib_notifier.o -obj-$(CONFIG_XFRM) += flow.o obj-y += net-sysfs.o obj-$(CONFIG_PROC_FS) += net-procfs.o obj-$(CONFIG_NET_PKTGEN) += pktgen.o diff --git a/net/core/datagram.c b/net/core/datagram.c index ee5647bd91b3..522873ed120b 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SUCS NET3: * @@ -169,20 +170,26 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, int *peeked, int *off, int *err, struct sk_buff **last) { + bool peek_at_off = false; struct sk_buff *skb; - int _off = *off; + int _off = 0; + + if (unlikely(flags & MSG_PEEK && *off >= 0)) { + peek_at_off = true; + _off = *off; + } *last = queue->prev; skb_queue_walk(queue, skb) { if (flags & MSG_PEEK) { - if (_off >= skb->len && (skb->len || _off || - skb->peeked)) { + if (peek_at_off && _off >= skb->len && + (_off || skb->peeked)) { _off -= skb->len; continue; } if (!skb->len) { skb = skb_set_peeked(skb); - if (unlikely(IS_ERR(skb))) { + if (IS_ERR(skb)) { *err = PTR_ERR(skb); return NULL; } @@ -356,7 +363,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, if (flags & MSG_PEEK) { err = -ENOENT; spin_lock_bh(&sk_queue->lock); - if (skb == skb_peek(sk_queue)) { + if (skb->next) { __skb_unlink(skb, sk_queue); refcount_dec(&skb->users); if (destructor) @@ -573,27 +580,12 @@ fault: } EXPORT_SYMBOL(skb_copy_datagram_from_iter); -/** - * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter - * @skb: buffer to copy - * @from: the source to copy from - * - * The function will first copy up to headlen, and then pin the userspace - * pages and build frags through them. - * - * Returns 0, -EFAULT or -EMSGSIZE. - */ -int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) +int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length) { - int len = iov_iter_count(from); - int copy = min_t(int, skb_headlen(skb), len); - int frag = 0; + int frag = skb_shinfo(skb)->nr_frags; - /* copy up to skb headlen */ - if (skb_copy_datagram_from_iter(skb, 0, from, copy)) - return -EFAULT; - - while (iov_iter_count(from)) { + while (length && iov_iter_count(from)) { struct page *pages[MAX_SKB_FRAGS]; size_t start; ssize_t copied; @@ -603,18 +595,24 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) if (frag == MAX_SKB_FRAGS) return -EMSGSIZE; - copied = iov_iter_get_pages(from, pages, ~0U, + copied = iov_iter_get_pages(from, pages, length, MAX_SKB_FRAGS - frag, &start); if (copied < 0) return -EFAULT; iov_iter_advance(from, copied); + length -= copied; truesize = PAGE_ALIGN(copied + start); skb->data_len += copied; skb->len += copied; skb->truesize += truesize; - refcount_add(truesize, &skb->sk->sk_wmem_alloc); + if (sk && sk->sk_type == SOCK_STREAM) { + sk->sk_wmem_queued += truesize; + sk_mem_charge(sk, truesize); + } else { + refcount_add(truesize, &skb->sk->sk_wmem_alloc); + } while (copied) { int size = min_t(int, copied, PAGE_SIZE - start); skb_fill_page_desc(skb, frag++, pages[n], start, size); @@ -625,6 +623,28 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) } return 0; } +EXPORT_SYMBOL(__zerocopy_sg_from_iter); + +/** + * zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter + * @skb: buffer to copy + * @from: the source to copy from + * + * The function will first copy up to headlen, and then pin the userspace + * pages and build frags through them. + * + * Returns 0, -EFAULT or -EMSGSIZE. + */ +int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from) +{ + int copy = min_t(int, skb_headlen(skb), iov_iter_count(from)); + + /* copy up to skb headlen */ + if (skb_copy_datagram_from_iter(skb, 0, from, copy)) + return -EFAULT; + + return __zerocopy_sg_from_iter(NULL, skb, from, ~0U); +} EXPORT_SYMBOL(zerocopy_sg_from_iter); static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset, diff --git a/net/core/dev.c b/net/core/dev.c index ce15a06d5558..8ee29f4f5fa9 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -144,6 +144,8 @@ #include <linux/netfilter_ingress.h> #include <linux/crash_dump.h> #include <linux/sctp.h> +#include <net/udp_tunnel.h> +#include <linux/net_namespace.h> #include "net-sysfs.h" @@ -161,7 +163,6 @@ static struct list_head offload_base __read_mostly; static int netif_rx_internal(struct sk_buff *skb); static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info); static struct napi_struct *napi_by_id(unsigned int napi_id); @@ -187,6 +188,8 @@ static struct napi_struct *napi_by_id(unsigned int napi_id); DEFINE_RWLOCK(dev_base_lock); EXPORT_SYMBOL(dev_base_lock); +static DEFINE_MUTEX(ifalias_mutex); + /* protects napi_hash addition/deletion and napi_gen_id */ static DEFINE_SPINLOCK(napi_hash_lock); @@ -1061,7 +1064,10 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) unsigned long *inuse; struct net_device *d; - p = strnchr(name, IFNAMSIZ-1, '%'); + if (!dev_valid_name(name)) + return -EINVAL; + + p = strchr(name, '%'); if (p) { /* * Verify the string as this thing may have come from @@ -1092,8 +1098,7 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) free_page((unsigned long) inuse); } - if (buf != name) - snprintf(buf, IFNAMSIZ, name, i); + snprintf(buf, IFNAMSIZ, name, i); if (!__dev_get_by_name(net, buf)) return i; @@ -1101,7 +1106,21 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) * when the name is long and there isn't enough space left * for the digits, or if all bits are used. */ - return -ENFILE; + return p ? -ENFILE : -EEXIST; +} + +static int dev_alloc_name_ns(struct net *net, + struct net_device *dev, + const char *name) +{ + char buf[IFNAMSIZ]; + int ret; + + BUG_ON(!net); + ret = __dev_alloc_name(net, name, buf); + if (ret >= 0) + strlcpy(dev->name, buf, IFNAMSIZ); + return ret; } /** @@ -1120,50 +1139,16 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf) int dev_alloc_name(struct net_device *dev, const char *name) { - char buf[IFNAMSIZ]; - struct net *net; - int ret; - - BUG_ON(!dev_net(dev)); - net = dev_net(dev); - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); - return ret; + return dev_alloc_name_ns(dev_net(dev), dev, name); } EXPORT_SYMBOL(dev_alloc_name); -static int dev_alloc_name_ns(struct net *net, - struct net_device *dev, - const char *name) -{ - char buf[IFNAMSIZ]; - int ret; - - ret = __dev_alloc_name(net, name, buf); - if (ret >= 0) - strlcpy(dev->name, buf, IFNAMSIZ); - return ret; -} - -static int dev_get_valid_name(struct net *net, - struct net_device *dev, - const char *name) +int dev_get_valid_name(struct net *net, struct net_device *dev, + const char *name) { - BUG_ON(!net); - - if (!dev_valid_name(name)) - return -EINVAL; - - if (strchr(name, '%')) - return dev_alloc_name_ns(net, dev, name); - else if (__dev_get_by_name(net, name)) - return -EEXIST; - else if (dev->name != name) - strlcpy(dev->name, name, IFNAMSIZ); - - return 0; + return dev_alloc_name_ns(net, dev, name); } +EXPORT_SYMBOL(dev_get_valid_name); /** * dev_change_name - change name of a device @@ -1264,29 +1249,53 @@ rollback: */ int dev_set_alias(struct net_device *dev, const char *alias, size_t len) { - char *new_ifalias; - - ASSERT_RTNL(); + struct dev_ifalias *new_alias = NULL; if (len >= IFALIASZ) return -EINVAL; - if (!len) { - kfree(dev->ifalias); - dev->ifalias = NULL; - return 0; + if (len) { + new_alias = kmalloc(sizeof(*new_alias) + len + 1, GFP_KERNEL); + if (!new_alias) + return -ENOMEM; + + memcpy(new_alias->ifalias, alias, len); + new_alias->ifalias[len] = 0; } - new_ifalias = krealloc(dev->ifalias, len + 1, GFP_KERNEL); - if (!new_ifalias) - return -ENOMEM; - dev->ifalias = new_ifalias; - memcpy(dev->ifalias, alias, len); - dev->ifalias[len] = 0; + mutex_lock(&ifalias_mutex); + rcu_swap_protected(dev->ifalias, new_alias, + mutex_is_locked(&ifalias_mutex)); + mutex_unlock(&ifalias_mutex); + + if (new_alias) + kfree_rcu(new_alias, rcuhead); return len; } +/** + * dev_get_alias - get ifalias of a device + * @dev: device + * @name: buffer to store name of ifalias + * @len: size of buffer + * + * get ifalias for a device. Caller must make sure dev cannot go + * away, e.g. rcu read lock or own a reference count to device. + */ +int dev_get_alias(const struct net_device *dev, char *name, size_t len) +{ + const struct dev_ifalias *alias; + int ret = 0; + + rcu_read_lock(); + alias = rcu_dereference(dev->ifalias); + if (alias) + ret = snprintf(name, len, "%s", alias->ifalias); + rcu_read_unlock(); + + return ret; +} /** * netdev_features_change - device changes features @@ -1311,10 +1320,11 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info.dev = dev, + }; - change_info.flags_changed = 0; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL); } @@ -1413,7 +1423,7 @@ int dev_open(struct net_device *dev) } EXPORT_SYMBOL(dev_open); -static int __dev_close_many(struct list_head *head) +static void __dev_close_many(struct list_head *head) { struct net_device *dev; @@ -1455,23 +1465,18 @@ static int __dev_close_many(struct list_head *head) dev->flags &= ~IFF_UP; netpoll_poll_enable(dev); } - - return 0; } -static int __dev_close(struct net_device *dev) +static void __dev_close(struct net_device *dev) { - int retval; LIST_HEAD(single); list_add(&dev->close_list, &single); - retval = __dev_close_many(&single); + __dev_close_many(&single); list_del(&single); - - return retval; } -int dev_close_many(struct list_head *head, bool unlink) +void dev_close_many(struct list_head *head, bool unlink) { struct net_device *dev, *tmp; @@ -1488,8 +1493,6 @@ int dev_close_many(struct list_head *head, bool unlink) if (unlink) list_del_init(&dev->close_list); } - - return 0; } EXPORT_SYMBOL(dev_close_many); @@ -1502,7 +1505,7 @@ EXPORT_SYMBOL(dev_close_many); * is then deactivated and finally a %NETDEV_DOWN is sent to the notifier * chain. */ -int dev_close(struct net_device *dev) +void dev_close(struct net_device *dev) { if (dev->flags & IFF_UP) { LIST_HEAD(single); @@ -1511,7 +1514,6 @@ int dev_close(struct net_device *dev) dev_close_many(&single, true); list_del(&single); } - return 0; } EXPORT_SYMBOL(dev_close); @@ -1543,9 +1545,10 @@ EXPORT_SYMBOL(dev_disable_lro); static int call_netdevice_notifier(struct notifier_block *nb, unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - netdev_notifier_info_init(&info, dev); return nb->notifier_call(nb, val, &info); } @@ -1670,11 +1673,9 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); */ static int call_netdevice_notifiers_info(unsigned long val, - struct net_device *dev, struct netdev_notifier_info *info) { ASSERT_RTNL(); - netdev_notifier_info_init(info, dev); return raw_notifier_call_chain(&netdev_chain, val, info); } @@ -1689,9 +1690,11 @@ static int call_netdevice_notifiers_info(unsigned long val, int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - struct netdev_notifier_info info; + struct netdev_notifier_info info = { + .dev = dev, + }; - return call_netdevice_notifiers_info(val, dev, &info); + return call_netdevice_notifiers_info(val, &info); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -1860,7 +1863,7 @@ static inline int deliver_skb(struct sk_buff *skb, struct packet_type *pt_prev, struct net_device *orig_dev) { - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) return -ENOMEM; refcount_inc(&skb->users); return pt_prev->func(skb, skb->dev, pt_prev, orig_dev); @@ -1955,8 +1958,12 @@ again: goto again; } out_unlock: - if (pt_prev) - pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); + if (pt_prev) { + if (!skb_orphan_frags_rx(skb2, GFP_ATOMIC)) + pt_prev->func(skb2, skb->dev, pt_prev, skb->dev); + else + kfree_skb(skb2); + } rcu_read_unlock(); } EXPORT_SYMBOL_GPL(dev_queue_xmit_nit); @@ -2015,6 +2022,7 @@ int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) return 0; } +EXPORT_SYMBOL(netdev_txq_to_tc); #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); @@ -2738,8 +2746,7 @@ EXPORT_SYMBOL(skb_mac_gso_segment); static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) - return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_UNNECESSARY; + return skb->ip_summed != CHECKSUM_PARTIAL; return skb->ip_summed == CHECKSUM_NONE; } @@ -3249,22 +3256,22 @@ EXPORT_SYMBOL(dev_loopback_xmit); static struct sk_buff * sch_handle_egress(struct sk_buff *skb, int *ret, struct net_device *dev) { - struct tcf_proto *cl = rcu_dereference_bh(dev->egress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(dev->miniq_egress); struct tcf_result cl_res; - if (!cl) + if (!miniq) return skb; /* qdisc_skb_cb(skb)->pkt_len was already set by the caller. */ - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); *ret = NET_XMIT_DROP; kfree_skb(skb); return NULL; @@ -3729,7 +3736,7 @@ bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, flow_table = rcu_dereference(rxqueue->rps_flow_table); if (flow_table && flow_id <= flow_table->mask) { rflow = &flow_table->flows[flow_id]; - cpu = ACCESS_ONCE(rflow->cpu); + cpu = READ_ONCE(rflow->cpu); if (rflow->filter == filter_id && cpu < nr_cpu_ids && ((int)(per_cpu(softnet_data, cpu).input_queue_head - rflow->last_qtail) < @@ -3865,6 +3872,142 @@ drop: return NET_RX_DROP; } +static u32 netif_receive_generic_xdp(struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + u32 metalen, act = XDP_DROP; + struct xdp_buff xdp; + void *orig_data; + int hlen, off; + u32 mac_len; + + /* Reinjected packets coming from act_mirred or similar should + * not get XDP generic processing. + */ + if (skb_cloned(skb)) + return XDP_PASS; + + /* XDP packets must be linear and must have sufficient headroom + * of XDP_PACKET_HEADROOM bytes. This is the guarantee that also + * native XDP provides, thus we need to do it here as well. + */ + if (skb_is_nonlinear(skb) || + skb_headroom(skb) < XDP_PACKET_HEADROOM) { + int hroom = XDP_PACKET_HEADROOM - skb_headroom(skb); + int troom = skb->tail + skb->data_len - skb->end; + + /* In case we have to go down the path and also linearize, + * then lets do the pskb_expand_head() work just once here. + */ + if (pskb_expand_head(skb, + hroom > 0 ? ALIGN(hroom, NET_SKB_PAD) : 0, + troom > 0 ? troom + 128 : 0, GFP_ATOMIC)) + goto do_drop; + if (troom > 0 && __skb_linearize(skb)) + goto do_drop; + } + + /* The XDP program wants to see the packet starting at the MAC + * header. + */ + mac_len = skb->data - skb_mac_header(skb); + hlen = skb_headlen(skb) + mac_len; + xdp.data = skb->data - mac_len; + xdp.data_meta = xdp.data; + xdp.data_end = xdp.data + hlen; + xdp.data_hard_start = skb->data - skb_headroom(skb); + orig_data = xdp.data; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + + off = xdp.data - orig_data; + if (off > 0) + __skb_pull(skb, off); + else if (off < 0) + __skb_push(skb, -off); + skb->mac_header += off; + + switch (act) { + case XDP_REDIRECT: + case XDP_TX: + __skb_push(skb, mac_len); + break; + case XDP_PASS: + metalen = xdp.data - xdp.data_meta; + if (metalen) + skb_metadata_set(skb, metalen); + break; + default: + bpf_warn_invalid_xdp_action(act); + /* fall through */ + case XDP_ABORTED: + trace_xdp_exception(skb->dev, xdp_prog, act); + /* fall through */ + case XDP_DROP: + do_drop: + kfree_skb(skb); + break; + } + + return act; +} + +/* When doing generic XDP we have to bypass the qdisc layer and the + * network taps in order to match in-driver-XDP behavior. + */ +void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) +{ + struct net_device *dev = skb->dev; + struct netdev_queue *txq; + bool free_skb = true; + int cpu, rc; + + txq = netdev_pick_tx(dev, skb, NULL); + cpu = smp_processor_id(); + HARD_TX_LOCK(dev, txq, cpu); + if (!netif_xmit_stopped(txq)) { + rc = netdev_start_xmit(skb, dev, txq, 0); + if (dev_xmit_complete(rc)) + free_skb = false; + } + HARD_TX_UNLOCK(dev, txq); + if (free_skb) { + trace_xdp_exception(dev, xdp_prog, XDP_TX); + kfree_skb(skb); + } +} +EXPORT_SYMBOL_GPL(generic_xdp_tx); + +static struct static_key generic_xdp_needed __read_mostly; + +int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb) +{ + if (xdp_prog) { + u32 act = netif_receive_generic_xdp(skb, xdp_prog); + int err; + + if (act != XDP_PASS) { + switch (act) { + case XDP_REDIRECT: + err = xdp_do_generic_redirect(skb->dev, skb, + xdp_prog); + if (err) + goto out_redir; + /* fallthru to submit skb */ + case XDP_TX: + generic_xdp_tx(skb, xdp_prog); + break; + } + return XDP_DROP; + } + } + return XDP_PASS; +out_redir: + kfree_skb(skb); + return XDP_DROP; +} +EXPORT_SYMBOL_GPL(do_xdp_generic); + static int netif_rx_internal(struct sk_buff *skb) { int ret; @@ -3872,6 +4015,24 @@ static int netif_rx_internal(struct sk_buff *skb) net_timestamp_check(netdev_tstamp_prequeue, skb); trace_netif_rx(skb); + + if (static_key_false(&generic_xdp_needed)) { + int ret; + + preempt_disable(); + rcu_read_lock(); + ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); + rcu_read_unlock(); + preempt_enable(); + + /* Consider XDP consuming the packet a success from + * the netdev point of view we do not want to count + * this as an error. + */ + if (ret != XDP_PASS) + return NET_RX_SUCCESS; + } + #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4010,7 +4171,7 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, struct net_device *orig_dev) { #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto *cl = rcu_dereference_bh(skb->dev->ingress_cl_list); + struct mini_Qdisc *miniq = rcu_dereference_bh(skb->dev->miniq_ingress); struct tcf_result cl_res; /* If there's at least one ingress present somewhere (so @@ -4018,8 +4179,9 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, * that are not configured with an ingress qdisc will bail * out here. */ - if (!cl) + if (!miniq) return skb; + if (*pt_prev) { *ret = deliver_skb(skb, *pt_prev, orig_dev); *pt_prev = NULL; @@ -4027,15 +4189,15 @@ sch_handle_ingress(struct sk_buff *skb, struct packet_type **pt_prev, int *ret, qdisc_skb_cb(skb)->pkt_len = skb->len; skb->tc_at_ingress = 1; - qdisc_bstats_cpu_update(cl->q, skb); + mini_qdisc_bstats_cpu_update(miniq, skb); - switch (tcf_classify(skb, cl, &cl_res, false)) { + switch (tcf_classify(skb, miniq->filter_list, &cl_res, false)) { case TC_ACT_OK: case TC_ACT_RECLASSIFY: skb->tc_index = TC_H_MIN(cl_res.classid); break; case TC_ACT_SHOT: - qdisc_qstats_cpu_drop(cl->q); + mini_qdisc_qstats_cpu_drop(miniq); kfree_skb(skb); return NULL; case TC_ACT_STOLEN: @@ -4292,7 +4454,7 @@ skip_classify: } if (pt_prev) { - if (unlikely(skb_orphan_frags(skb, GFP_ATOMIC))) + if (unlikely(skb_orphan_frags_rx(skb, GFP_ATOMIC))) goto drop; else ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev); @@ -4313,6 +4475,33 @@ out: return ret; } +/** + * netif_receive_skb_core - special purpose version of netif_receive_skb + * @skb: buffer to process + * + * More direct receive version of netif_receive_skb(). It should + * only be used by callers that have a need to skip RPS and Generic XDP. + * Caller must also take care of handling if (page_is_)pfmemalloc. + * + * This function may only be called from softirq context and interrupts + * should be enabled. + * + * Return values (usually ignored): + * NET_RX_SUCCESS: no congestion + * NET_RX_DROP: packet was dropped + */ +int netif_receive_skb_core(struct sk_buff *skb) +{ + int ret; + + rcu_read_lock(); + ret = __netif_receive_skb_core(skb, false); + rcu_read_unlock(); + + return ret; +} +EXPORT_SYMBOL(netif_receive_skb_core); + static int __netif_receive_skb(struct sk_buff *skb) { int ret; @@ -4338,9 +4527,7 @@ static int __netif_receive_skb(struct sk_buff *skb) return ret; } -static struct static_key generic_xdp_needed __read_mostly; - -static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) +static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp) { struct bpf_prog *old = rtnl_dereference(dev->xdp_prog); struct bpf_prog *new = xdp->prog; @@ -4373,89 +4560,6 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_xdp *xdp) return ret; } -static u32 netif_receive_generic_xdp(struct sk_buff *skb, - struct bpf_prog *xdp_prog) -{ - struct xdp_buff xdp; - u32 act = XDP_DROP; - void *orig_data; - int hlen, off; - u32 mac_len; - - /* Reinjected packets coming from act_mirred or similar should - * not get XDP generic processing. - */ - if (skb_cloned(skb)) - return XDP_PASS; - - if (skb_linearize(skb)) - goto do_drop; - - /* The XDP program wants to see the packet starting at the MAC - * header. - */ - mac_len = skb->data - skb_mac_header(skb); - hlen = skb_headlen(skb) + mac_len; - xdp.data = skb->data - mac_len; - xdp.data_end = xdp.data + hlen; - xdp.data_hard_start = skb->data - skb_headroom(skb); - orig_data = xdp.data; - - act = bpf_prog_run_xdp(xdp_prog, &xdp); - - off = xdp.data - orig_data; - if (off > 0) - __skb_pull(skb, off); - else if (off < 0) - __skb_push(skb, -off); - - switch (act) { - case XDP_TX: - __skb_push(skb, mac_len); - /* fall through */ - case XDP_PASS: - break; - - default: - bpf_warn_invalid_xdp_action(act); - /* fall through */ - case XDP_ABORTED: - trace_xdp_exception(skb->dev, xdp_prog, act); - /* fall through */ - case XDP_DROP: - do_drop: - kfree_skb(skb); - break; - } - - return act; -} - -/* When doing generic XDP we have to bypass the qdisc layer and the - * network taps in order to match in-driver-XDP behavior. - */ -static void generic_xdp_tx(struct sk_buff *skb, struct bpf_prog *xdp_prog) -{ - struct net_device *dev = skb->dev; - struct netdev_queue *txq; - bool free_skb = true; - int cpu, rc; - - txq = netdev_pick_tx(dev, skb, NULL); - cpu = smp_processor_id(); - HARD_TX_LOCK(dev, txq, cpu); - if (!netif_xmit_stopped(txq)) { - rc = netdev_start_xmit(skb, dev, txq, 0); - if (dev_xmit_complete(rc)) - free_skb = false; - } - HARD_TX_UNLOCK(dev, txq); - if (free_skb) { - trace_xdp_exception(dev, xdp_prog, XDP_TX); - kfree_skb(skb); - } -} - static int netif_receive_skb_internal(struct sk_buff *skb) { int ret; @@ -4465,23 +4569,20 @@ static int netif_receive_skb_internal(struct sk_buff *skb) if (skb_defer_rx_timestamp(skb)) return NET_RX_SUCCESS; - rcu_read_lock(); - if (static_key_false(&generic_xdp_needed)) { - struct bpf_prog *xdp_prog = rcu_dereference(skb->dev->xdp_prog); + int ret; - if (xdp_prog) { - u32 act = netif_receive_generic_xdp(skb, xdp_prog); + preempt_disable(); + rcu_read_lock(); + ret = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); + rcu_read_unlock(); + preempt_enable(); - if (act != XDP_PASS) { - rcu_read_unlock(); - if (act == XDP_TX) - generic_xdp_tx(skb, xdp_prog); - return NET_RX_DROP; - } - } + if (ret != XDP_PASS) + return NET_RX_DROP; } + rcu_read_lock(); #ifdef CONFIG_RPS if (static_key_false(&rps_needed)) { struct rps_dev_flow voidflow, *rflow = &voidflow; @@ -4653,6 +4754,7 @@ static void gro_list_prepare(struct napi_struct *napi, struct sk_buff *skb) diffs = (unsigned long)p->dev ^ (unsigned long)skb->dev; diffs |= p->vlan_tci ^ skb->vlan_tci; diffs |= skb_metadata_dst_cmp(p, skb); + diffs |= skb_metadata_differs(p, skb); if (maclen == ETH_HLEN) diffs |= compare_ether_header(skb_mac_header(p), skb_mac_header(skb)); @@ -5289,6 +5391,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) * Ideally, a new ndo_busy_poll_stop() could avoid another round. */ rc = napi->poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); netpoll_poll_unlock(have_poll_lock); if (rc == BUSY_POLL_BUDGET) __napi_schedule(napi); @@ -5667,12 +5770,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -static bool netdev_has_any_upper_dev(struct net_device *dev) +bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->adj_list.upper); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device @@ -6184,9 +6288,19 @@ static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, static int __netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, bool master, - void *upper_priv, void *upper_info) -{ - struct netdev_notifier_changeupper_info changeupper_info; + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) +{ + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + .extack = extack, + }, + .upper_dev = upper_dev, + .master = master, + .linking = true, + .upper_info = upper_info, + }; int ret = 0; ASSERT_RTNL(); @@ -6204,12 +6318,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (master && netdev_master_upper_dev_get(dev)) return -EBUSY; - changeupper_info.upper_dev = upper_dev; - changeupper_info.master = master; - changeupper_info.linking = true; - changeupper_info.upper_info = upper_info; - - ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6220,7 +6329,7 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) @@ -6245,9 +6354,11 @@ rollback: * returns zero. */ int netdev_upper_dev_link(struct net_device *dev, - struct net_device *upper_dev) + struct net_device *upper_dev, + struct netlink_ext_ack *extack) { - return __netdev_upper_dev_link(dev, upper_dev, false, NULL, NULL); + return __netdev_upper_dev_link(dev, upper_dev, false, + NULL, NULL, extack); } EXPORT_SYMBOL(netdev_upper_dev_link); @@ -6266,10 +6377,11 @@ EXPORT_SYMBOL(netdev_upper_dev_link); */ int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info) + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack) { return __netdev_upper_dev_link(dev, upper_dev, true, - upper_priv, upper_info); + upper_priv, upper_info, extack); } EXPORT_SYMBOL(netdev_master_upper_dev_link); @@ -6284,20 +6396,24 @@ EXPORT_SYMBOL(netdev_master_upper_dev_link); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { - struct netdev_notifier_changeupper_info changeupper_info; + struct netdev_notifier_changeupper_info changeupper_info = { + .info = { + .dev = dev, + }, + .upper_dev = upper_dev, + .linking = false, + }; ASSERT_RTNL(); - changeupper_info.upper_dev = upper_dev; changeupper_info.master = netdev_master_upper_dev_get(dev) == upper_dev; - changeupper_info.linking = false; - call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_PRECHANGEUPPER, &changeupper_info.info); __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, + call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, &changeupper_info.info); } EXPORT_SYMBOL(netdev_upper_dev_unlink); @@ -6313,11 +6429,13 @@ EXPORT_SYMBOL(netdev_upper_dev_unlink); void netdev_bonding_info_change(struct net_device *dev, struct netdev_bonding_info *bonding_info) { - struct netdev_notifier_bonding_info info; + struct netdev_notifier_bonding_info info = { + .info.dev = dev, + }; memcpy(&info.bonding_info, bonding_info, sizeof(struct netdev_bonding_info)); - call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev, + call_netdevice_notifiers_info(NETDEV_BONDING_INFO, &info.info); } EXPORT_SYMBOL(netdev_bonding_info_change); @@ -6443,11 +6561,13 @@ EXPORT_SYMBOL(dev_get_nest_level); void netdev_lower_state_changed(struct net_device *lower_dev, void *lower_state_info) { - struct netdev_notifier_changelowerstate_info changelowerstate_info; + struct netdev_notifier_changelowerstate_info changelowerstate_info = { + .info.dev = lower_dev, + }; ASSERT_RTNL(); changelowerstate_info.lower_state_info = lower_state_info; - call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, lower_dev, + call_netdevice_notifiers_info(NETDEV_CHANGELOWERSTATE, &changelowerstate_info.info); } EXPORT_SYMBOL(netdev_lower_state_changed); @@ -6689,8 +6809,12 @@ int __dev_change_flags(struct net_device *dev, unsigned int flags) */ ret = 0; - if ((old_flags ^ flags) & IFF_UP) - ret = ((old_flags & IFF_UP) ? __dev_close : __dev_open)(dev); + if ((old_flags ^ flags) & IFF_UP) { + if (old_flags & IFF_UP) + __dev_close(dev); + else + ret = __dev_open(dev); + } if ((flags ^ dev->gflags) & IFF_PROMISC) { int inc = (flags & IFF_PROMISC) ? 1 : -1; @@ -6734,11 +6858,14 @@ void __dev_notify_flags(struct net_device *dev, unsigned int old_flags, if (dev->flags & IFF_UP && (changes & ~(IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) { - struct netdev_notifier_change_info change_info; + struct netdev_notifier_change_info change_info = { + .info = { + .dev = dev, + }, + .flags_changed = changes, + }; - change_info.flags_changed = changes; - call_netdevice_notifiers_info(NETDEV_CHANGE, dev, - &change_info.info); + call_netdevice_notifiers_info(NETDEV_CHANGE, &change_info.info); } } @@ -6945,26 +7072,26 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id) +u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id) { - struct netdev_xdp xdp; + struct netdev_bpf xdp; memset(&xdp, 0, sizeof(xdp)); xdp.command = XDP_QUERY_PROG; /* Query must always succeed. */ - WARN_ON(xdp_op(dev, &xdp) < 0); + WARN_ON(bpf_op(dev, &xdp) < 0); if (prog_id) *prog_id = xdp.prog_id; return xdp.prog_attached; } -static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, +static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { - struct netdev_xdp xdp; + struct netdev_bpf xdp; memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -6975,7 +7102,7 @@ static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, xdp.flags = flags; xdp.prog = prog; - return xdp_op(dev, &xdp); + return bpf_op(dev, &xdp); } /** @@ -6992,32 +7119,36 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, { const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - xdp_op_t xdp_op, xdp_chk; + bpf_op_t bpf_op, bpf_chk; int err; ASSERT_RTNL(); - xdp_op = xdp_chk = ops->ndo_xdp; - if (!xdp_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) + bpf_op = bpf_chk = ops->ndo_bpf; + if (!bpf_op && (flags & (XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE))) return -EOPNOTSUPP; - if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) - xdp_op = generic_xdp_install; - if (xdp_op == xdp_chk) - xdp_chk = generic_xdp_install; + if (!bpf_op || (flags & XDP_FLAGS_SKB_MODE)) + bpf_op = generic_xdp_install; + if (bpf_op == bpf_chk) + bpf_chk = generic_xdp_install; if (fd >= 0) { - if (xdp_chk && __dev_xdp_attached(dev, xdp_chk, NULL)) + if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, xdp_op, NULL)) + __dev_xdp_attached(dev, bpf_op, NULL)) return -EBUSY; - prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + if (bpf_op == ops->ndo_bpf) + prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, + dev); + else + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } - err = dev_xdp_install(dev, xdp_op, extack, flags, prog); + err = dev_xdp_install(dev, bpf_op, extack, flags, prog); if (err < 0 && prog) bpf_prog_put(prog); @@ -7109,7 +7240,7 @@ static void rollback_registered_many(struct list_head *head) if (!dev->rtnl_link_ops || dev->rtnl_link_state == RTNL_LINK_INITIALIZED) skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U, 0, - GFP_KERNEL); + GFP_KERNEL, NULL); /* * Flush the unicast and multicast chains @@ -7235,24 +7366,6 @@ static netdev_features_t netdev_fix_features(struct net_device *dev, features &= ~NETIF_F_GSO; } - /* UFO needs SG and checksumming */ - if (features & NETIF_F_UFO) { - /* maybe split UFO into V4 and V6? */ - if (!(features & NETIF_F_HW_CSUM) && - ((features & (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM)) != - (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM))) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no checksum offload features.\n"); - features &= ~NETIF_F_UFO; - } - - if (!(features & NETIF_F_SG)) { - netdev_dbg(dev, - "Dropping NETIF_F_UFO since no NETIF_F_SG feature.\n"); - features &= ~NETIF_F_UFO; - } - } - /* GSO partial features require GSO partial be set */ if ((features & dev->gso_partial_features) && !(features & NETIF_F_GSO_PARTIAL)) { @@ -7313,8 +7426,27 @@ sync_lower: netdev_for_each_lower_dev(dev, lower, iter) netdev_sync_lower_features(dev, lower, features); - if (!err) + if (!err) { + netdev_features_t diff = features ^ dev->features; + + if (diff & NETIF_F_RX_UDP_TUNNEL_PORT) { + /* udp_tunnel_{get,drop}_rx_info both need + * NETIF_F_RX_UDP_TUNNEL_PORT enabled on the + * device, or they won't do anything. + * Thus we need to update dev->features + * *before* calling udp_tunnel_get_rx_info, + * but *after* calling udp_tunnel_drop_rx_info. + */ + if (features & NETIF_F_RX_UDP_TUNNEL_PORT) { + dev->features = features; + udp_tunnel_get_rx_info(dev); + } else { + udp_tunnel_drop_rx_info(dev); + } + } + dev->features = features; + } return err < 0 ? 0 : 1; } @@ -7516,6 +7648,12 @@ int register_netdevice(struct net_device *dev) */ dev->hw_features |= NETIF_F_SOFT_FEATURES; dev->features |= NETIF_F_SOFT_FEATURES; + + if (dev->netdev_ops->ndo_udp_tunnel_add) { + dev->features |= NETIF_F_RX_UDP_TUNNEL_PORT; + dev->hw_features |= NETIF_F_RX_UDP_TUNNEL_PORT; + } + dev->wanted_features = dev->features & dev->hw_features; if (!(dev->flags & IFF_LOOPBACK)) @@ -7939,7 +8077,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, unsigned int txqs, unsigned int rxqs) { struct net_device *dev; - size_t alloc_size; + unsigned int alloc_size; struct net_device *p; BUG_ON(strlen(name) >= sizeof(dev->name)); @@ -8189,7 +8327,7 @@ EXPORT_SYMBOL(unregister_netdev); int dev_change_net_namespace(struct net_device *dev, struct net *net, const char *pat) { - int err; + int err, new_nsid; ASSERT_RTNL(); @@ -8245,7 +8383,11 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char call_netdevice_notifiers(NETDEV_UNREGISTER, dev); rcu_barrier(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); - rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL); + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) + new_nsid = peernet2id_alloc(dev_net(dev), net); + else + new_nsid = peernet2id(dev_net(dev), net); + rtmsg_ifinfo_newnet(RTM_DELLINK, dev, ~0U, GFP_KERNEL, &new_nsid); /* * Flush the unicast and multicast chains @@ -8507,6 +8649,8 @@ static void __net_exit netdev_exit(struct net *net) { kfree(net->dev_name_head); kfree(net->dev_index_head); + if (net != &init_net) + WARN_ON_ONCE(!list_empty(&net->dev_base_head)); } static struct pernet_operations __net_initdata netdev_net_ops = { diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 709a4e6fb447..7e690d0ccd05 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/kmod.h> #include <linux/netdevice.h> #include <linux/etherdevice.h> @@ -303,7 +304,18 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) case SIOCSIFTXQLEN: if (ifr->ifr_qlen < 0) return -EINVAL; - dev->tx_queue_len = ifr->ifr_qlen; + if (dev->tx_queue_len ^ ifr->ifr_qlen) { + unsigned int orig_len = dev->tx_queue_len; + + dev->tx_queue_len = ifr->ifr_qlen; + err = call_netdevice_notifiers( + NETDEV_CHANGE_TX_QUEUE_LEN, dev); + err = notifier_to_errno(err); + if (err) { + dev->tx_queue_len = orig_len; + return err; + } + } return 0; case SIOCSIFNAME: diff --git a/net/core/devlink.c b/net/core/devlink.c index a0adfc31a3fe..7d430c1d9c3e 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -29,6 +29,57 @@ #define CREATE_TRACE_POINTS #include <trace/events/devlink.h> +static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = { + { + .name = "destination mac", + .id = DEVLINK_DPIPE_FIELD_ETHERNET_DST_MAC, + .bitwidth = 48, + }, +}; + +struct devlink_dpipe_header devlink_dpipe_header_ethernet = { + .name = "ethernet", + .id = DEVLINK_DPIPE_HEADER_ETHERNET, + .fields = devlink_dpipe_fields_ethernet, + .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ethernet), + .global = true, +}; +EXPORT_SYMBOL(devlink_dpipe_header_ethernet); + +static struct devlink_dpipe_field devlink_dpipe_fields_ipv4[] = { + { + .name = "destination ip", + .id = DEVLINK_DPIPE_FIELD_IPV4_DST_IP, + .bitwidth = 32, + }, +}; + +struct devlink_dpipe_header devlink_dpipe_header_ipv4 = { + .name = "ipv4", + .id = DEVLINK_DPIPE_HEADER_IPV4, + .fields = devlink_dpipe_fields_ipv4, + .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv4), + .global = true, +}; +EXPORT_SYMBOL(devlink_dpipe_header_ipv4); + +static struct devlink_dpipe_field devlink_dpipe_fields_ipv6[] = { + { + .name = "destination ip", + .id = DEVLINK_DPIPE_FIELD_IPV6_DST_IP, + .bitwidth = 128, + }, +}; + +struct devlink_dpipe_header devlink_dpipe_header_ipv6 = { + .name = "ipv6", + .id = DEVLINK_DPIPE_HEADER_IPV6, + .fields = devlink_dpipe_fields_ipv6, + .fields_count = ARRAY_SIZE(devlink_dpipe_fields_ipv6), + .global = true, +}; +EXPORT_SYMBOL(devlink_dpipe_header_ipv6); + EXPORT_TRACEPOINT_SYMBOL_GPL(devlink_hwmsg); static LIST_HEAD(devlink_list); @@ -1613,13 +1664,15 @@ static int devlink_dpipe_table_put(struct sk_buff *skb, struct devlink_dpipe_table *table) { struct nlattr *table_attr; + u64 table_size; + table_size = table->table_ops->size_get(table->priv); table_attr = nla_nest_start(skb, DEVLINK_ATTR_DPIPE_TABLE); if (!table_attr) return -EMSGSIZE; if (nla_put_string(skb, DEVLINK_ATTR_DPIPE_TABLE_NAME, table->name) || - nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table->size, + nla_put_u64_64bit(skb, DEVLINK_ATTR_DPIPE_TABLE_SIZE, table_size, DEVLINK_ATTR_PAD)) goto nla_put_failure; if (nla_put_u8(skb, DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED, @@ -1960,6 +2013,28 @@ int devlink_dpipe_entry_ctx_close(struct devlink_dpipe_dump_ctx *dump_ctx) } EXPORT_SYMBOL_GPL(devlink_dpipe_entry_ctx_close); +void devlink_dpipe_entry_clear(struct devlink_dpipe_entry *entry) + +{ + unsigned int value_count, value_index; + struct devlink_dpipe_value *value; + + value = entry->action_values; + value_count = entry->action_values_count; + for (value_index = 0; value_index < value_count; value_index++) { + kfree(value[value_index].value); + kfree(value[value_index].mask); + } + + value = entry->match_values; + value_count = entry->match_values_count; + for (value_index = 0; value_index < value_count; value_index++) { + kfree(value[value_index].value); + kfree(value[value_index].mask); + } +} +EXPORT_SYMBOL(devlink_dpipe_entry_clear); + static int devlink_dpipe_entries_fill(struct genl_info *info, enum devlink_command cmd, int flags, struct devlink_dpipe_table *table) @@ -2684,20 +2759,21 @@ EXPORT_SYMBOL_GPL(devlink_dpipe_table_counter_enabled); * @table_name: table name * @table_ops: table ops * @priv: priv - * @size: size * @counter_control_extern: external control for counters */ int devlink_dpipe_table_register(struct devlink *devlink, const char *table_name, struct devlink_dpipe_table_ops *table_ops, - void *priv, u64 size, - bool counter_control_extern) + void *priv, bool counter_control_extern) { struct devlink_dpipe_table *table; if (devlink_dpipe_table_find(&devlink->dpipe_table_list, table_name)) return -EEXIST; + if (WARN_ON(!table_ops->size_get)) + return -EINVAL; + table = kzalloc(sizeof(*table), GFP_KERNEL); if (!table) return -ENOMEM; @@ -2705,7 +2781,6 @@ int devlink_dpipe_table_register(struct devlink *devlink, table->name = table_name; table->table_ops = table_ops; table->priv = priv; - table->size = size; table->counter_control_extern = counter_control_extern; mutex_lock(&devlink_mutex); diff --git a/net/core/dst.c b/net/core/dst.c index 00aa972ad1a1..662a2d4a3d19 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -55,7 +55,7 @@ const struct dst_metrics dst_default_metrics = { * We really want to avoid false sharing on this variable, and catch * any writes on it. */ - .refcnt = ATOMIC_INIT(1), + .refcnt = REFCOUNT_INIT(1), }; void dst_init(struct dst_entry *dst, struct dst_ops *ops, @@ -213,7 +213,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) struct dst_metrics *old_p = (struct dst_metrics *)__DST_METRICS_PTR(old); unsigned long prev, new; - atomic_set(&p->refcnt, 1); + refcount_set(&p->refcnt, 1); memcpy(p->metrics, old_p->metrics, sizeof(p->metrics)); new = (unsigned long) p; @@ -225,7 +225,7 @@ u32 *dst_cow_metrics_generic(struct dst_entry *dst, unsigned long old) if (prev & DST_METRICS_READ_ONLY) p = NULL; } else if (prev & DST_METRICS_REFCOUNTED) { - if (atomic_dec_and_test(&old_p->refcnt)) + if (refcount_dec_and_test(&old_p->refcnt)) kfree(old_p); } } @@ -299,7 +299,8 @@ EXPORT_SYMBOL_GPL(metadata_dst_alloc); void metadata_dst_free(struct metadata_dst *md_dst) { #ifdef CONFIG_DST_CACHE - dst_cache_destroy(&md_dst->u.tun_info.dst_cache); + if (md_dst->type == METADATA_IP_TUNNEL) + dst_cache_destroy(&md_dst->u.tun_info.dst_cache); #endif kfree(md_dst); } @@ -321,3 +322,19 @@ metadata_dst_alloc_percpu(u8 optslen, enum metadata_type type, gfp_t flags) return md_dst; } EXPORT_SYMBOL_GPL(metadata_dst_alloc_percpu); + +void metadata_dst_free_percpu(struct metadata_dst __percpu *md_dst) +{ +#ifdef CONFIG_DST_CACHE + int cpu; + + for_each_possible_cpu(cpu) { + struct metadata_dst *one_md_dst = per_cpu_ptr(md_dst, cpu); + + if (one_md_dst->type == METADATA_IP_TUNNEL) + dst_cache_destroy(&one_md_dst->u.tun_info.dst_cache); + } +#endif + free_percpu(md_dst); +} +EXPORT_SYMBOL_GPL(metadata_dst_free_percpu); diff --git a/net/core/ethtool.c b/net/core/ethtool.c index 674b6c9cec18..f8fcf450a36e 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -76,7 +76,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_LRO_BIT] = "rx-lro", [NETIF_F_TSO_BIT] = "tx-tcp-segmentation", - [NETIF_F_UFO_BIT] = "tx-udp-fragmentation", [NETIF_F_GSO_ROBUST_BIT] = "tx-gso-robust", [NETIF_F_TSO_ECN_BIT] = "tx-tcp-ecn-segmentation", [NETIF_F_TSO_MANGLEID_BIT] = "tx-tcp-mangleid-segmentation", @@ -106,6 +105,7 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN] [NETIF_F_HW_TC_BIT] = "hw-tc-offload", [NETIF_F_HW_ESP_BIT] = "esp-hw-offload", [NETIF_F_HW_ESP_TX_CSUM_BIT] = "esp-tx-csum-hw-offload", + [NETIF_F_RX_UDP_TUNNEL_PORT_BIT] = "rx-udp_tunnel-port-offload", }; static const char @@ -299,9 +299,6 @@ static netdev_features_t ethtool_get_feature_mask(u32 eth_cmd) case ETHTOOL_GTSO: case ETHTOOL_STSO: return NETIF_F_ALL_TSO; - case ETHTOOL_GUFO: - case ETHTOOL_SUFO: - return NETIF_F_UFO; case ETHTOOL_GGSO: case ETHTOOL_SGSO: return NETIF_F_GSO; @@ -406,6 +403,22 @@ static int __ethtool_set_flags(struct net_device *dev, u32 data) return 0; } +/* Given two link masks, AND them together and save the result in dst. */ +void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, + struct ethtool_link_ksettings *src) +{ + unsigned int size = BITS_TO_LONGS(__ETHTOOL_LINK_MODE_MASK_NBITS); + unsigned int idx = 0; + + for (; idx < size; idx++) { + dst->link_modes.supported[idx] &= + src->link_modes.supported[idx]; + dst->link_modes.advertising[idx] &= + src->link_modes.advertising[idx]; + } +} +EXPORT_SYMBOL(ethtool_intersect_link_masks); + void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32) { @@ -439,7 +452,7 @@ bool ethtool_convert_link_mode_to_legacy_u32(u32 *legacy_u32, EXPORT_SYMBOL(ethtool_convert_link_mode_to_legacy_u32); /* return false if legacy contained non-0 deprecated fields - * transceiver/maxtxpkt/maxrxpkt. rest of ksettings always updated + * maxtxpkt/maxrxpkt. rest of ksettings always updated */ static bool convert_legacy_settings_to_link_ksettings( @@ -454,8 +467,7 @@ convert_legacy_settings_to_link_ksettings( * deprecated legacy fields, and they should not use * %ETHTOOL_GLINKSETTINGS/%ETHTOOL_SLINKSETTINGS */ - if (legacy_settings->transceiver || - legacy_settings->maxtxpkt || + if (legacy_settings->maxtxpkt || legacy_settings->maxrxpkt) retval = false; @@ -528,6 +540,8 @@ convert_link_ksettings_to_legacy_settings( = link_ksettings->base.eth_tp_mdix; legacy_settings->eth_tp_mdix_ctrl = link_ksettings->base.eth_tp_mdix_ctrl; + legacy_settings->transceiver + = link_ksettings->base.transceiver; return retval; } @@ -2515,6 +2529,33 @@ static int set_phy_tunable(struct net_device *dev, void __user *useraddr) return ret; } +static int ethtool_get_fecparam(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_fecparam fecparam = { ETHTOOL_GFECPARAM }; + + if (!dev->ethtool_ops->get_fecparam) + return -EOPNOTSUPP; + + dev->ethtool_ops->get_fecparam(dev, &fecparam); + + if (copy_to_user(useraddr, &fecparam, sizeof(fecparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_fecparam(struct net_device *dev, void __user *useraddr) +{ + struct ethtool_fecparam fecparam; + + if (!dev->ethtool_ops->set_fecparam) + return -EOPNOTSUPP; + + if (copy_from_user(&fecparam, useraddr, sizeof(fecparam))) + return -EFAULT; + + return dev->ethtool_ops->set_fecparam(dev, &fecparam); +} + /* The main entry point in this file. Called from net/core/dev_ioctl.c */ int dev_ethtool(struct net *net, struct ifreq *ifr) @@ -2555,7 +2596,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GPHYSTATS: case ETHTOOL_GTSO: case ETHTOOL_GPERMADDR: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: case ETHTOOL_GFLAGS: @@ -2574,6 +2614,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GTUNABLE: case ETHTOOL_PHY_GTUNABLE: case ETHTOOL_GLINKSETTINGS: + case ETHTOOL_GFECPARAM: break; default: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) @@ -2723,7 +2764,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_GRXCSUM: case ETHTOOL_GSG: case ETHTOOL_GTSO: - case ETHTOOL_GUFO: case ETHTOOL_GGSO: case ETHTOOL_GGRO: rc = ethtool_get_one_feature(dev, useraddr, ethcmd); @@ -2732,7 +2772,6 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_SRXCSUM: case ETHTOOL_SSG: case ETHTOOL_STSO: - case ETHTOOL_SUFO: case ETHTOOL_SGSO: case ETHTOOL_SGRO: rc = ethtool_set_one_feature(dev, useraddr, ethcmd); @@ -2785,6 +2824,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr) case ETHTOOL_PHY_STUNABLE: rc = set_phy_tunable(dev, useraddr); break; + case ETHTOOL_GFECPARAM: + rc = ethtool_get_fecparam(dev, useraddr); + break; + case ETHTOOL_SFECPARAM: + rc = ethtool_set_fecparam(dev, useraddr); + break; default: rc = -EOPNOTSUPP; } diff --git a/net/core/fib_notifier.c b/net/core/fib_notifier.c new file mode 100644 index 000000000000..0c048bdeb016 --- /dev/null +++ b/net/core/fib_notifier.c @@ -0,0 +1,181 @@ +#include <linux/rtnetlink.h> +#include <linux/notifier.h> +#include <linux/rcupdate.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <net/net_namespace.h> +#include <net/fib_notifier.h> + +static ATOMIC_NOTIFIER_HEAD(fib_chain); + +int call_fib_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return nb->notifier_call(nb, event_type, info); +} +EXPORT_SYMBOL(call_fib_notifier); + +int call_fib_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->net = net; + return atomic_notifier_call_chain(&fib_chain, event_type, info); +} +EXPORT_SYMBOL(call_fib_notifiers); + +static unsigned int fib_seq_sum(void) +{ + struct fib_notifier_ops *ops; + unsigned int fib_seq = 0; + struct net *net; + + rtnl_lock(); + for_each_net(net) { + rcu_read_lock(); + list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + if (!try_module_get(ops->owner)) + continue; + fib_seq += ops->fib_seq_read(net); + module_put(ops->owner); + } + rcu_read_unlock(); + } + rtnl_unlock(); + + return fib_seq; +} + +static int fib_net_dump(struct net *net, struct notifier_block *nb) +{ + struct fib_notifier_ops *ops; + + list_for_each_entry_rcu(ops, &net->fib_notifier_ops, list) { + int err; + + if (!try_module_get(ops->owner)) + continue; + err = ops->fib_dump(net, nb); + module_put(ops->owner); + if (err) + return err; + } + + return 0; +} + +static bool fib_dump_is_consistent(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb), + unsigned int fib_seq) +{ + atomic_notifier_chain_register(&fib_chain, nb); + if (fib_seq == fib_seq_sum()) + return true; + atomic_notifier_chain_unregister(&fib_chain, nb); + if (cb) + cb(nb); + return false; +} + +#define FIB_DUMP_MAX_RETRIES 5 +int register_fib_notifier(struct notifier_block *nb, + void (*cb)(struct notifier_block *nb)) +{ + int retries = 0; + int err; + + do { + unsigned int fib_seq = fib_seq_sum(); + struct net *net; + + rcu_read_lock(); + for_each_net_rcu(net) { + err = fib_net_dump(net, nb); + if (err) + goto err_fib_net_dump; + } + rcu_read_unlock(); + + if (fib_dump_is_consistent(nb, cb, fib_seq)) + return 0; + } while (++retries < FIB_DUMP_MAX_RETRIES); + + return -EBUSY; + +err_fib_net_dump: + rcu_read_unlock(); + return err; +} +EXPORT_SYMBOL(register_fib_notifier); + +int unregister_fib_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&fib_chain, nb); +} +EXPORT_SYMBOL(unregister_fib_notifier); + +static int __fib_notifier_ops_register(struct fib_notifier_ops *ops, + struct net *net) +{ + struct fib_notifier_ops *o; + + list_for_each_entry(o, &net->fib_notifier_ops, list) + if (ops->family == o->family) + return -EEXIST; + list_add_tail_rcu(&ops->list, &net->fib_notifier_ops); + return 0; +} + +struct fib_notifier_ops * +fib_notifier_ops_register(const struct fib_notifier_ops *tmpl, struct net *net) +{ + struct fib_notifier_ops *ops; + int err; + + ops = kmemdup(tmpl, sizeof(*ops), GFP_KERNEL); + if (!ops) + return ERR_PTR(-ENOMEM); + + err = __fib_notifier_ops_register(ops, net); + if (err) + goto err_register; + + return ops; + +err_register: + kfree(ops); + return ERR_PTR(err); +} +EXPORT_SYMBOL(fib_notifier_ops_register); + +void fib_notifier_ops_unregister(struct fib_notifier_ops *ops) +{ + list_del_rcu(&ops->list); + kfree_rcu(ops, rcu); +} +EXPORT_SYMBOL(fib_notifier_ops_unregister); + +static int __net_init fib_notifier_net_init(struct net *net) +{ + INIT_LIST_HEAD(&net->fib_notifier_ops); + return 0; +} + +static void __net_exit fib_notifier_net_exit(struct net *net) +{ + WARN_ON_ONCE(!list_empty(&net->fib_notifier_ops)); +} + +static struct pernet_operations fib_notifier_net_ops = { + .init = fib_notifier_net_init, + .exit = fib_notifier_net_exit, +}; + +static int __init fib_notifier_init(void) +{ + return register_pernet_subsys(&fib_notifier_net_ops); +} + +subsys_initcall(fib_notifier_init); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index fdcb1bcd2afa..98e1066c3d55 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -299,6 +299,69 @@ out: } EXPORT_SYMBOL_GPL(fib_rules_lookup); +static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule, int family) +{ + struct fib_rule_notifier_info info = { + .info.family = family, + .rule = rule, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_fib_rule_notifiers(struct net *net, + enum fib_event_type event_type, + struct fib_rule *rule, + struct fib_rules_ops *ops, + struct netlink_ext_ack *extack) +{ + struct fib_rule_notifier_info info = { + .info.family = ops->family, + .info.extack = extack, + .rule = rule, + }; + + ops->fib_rules_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + +/* Called with rcu_read_lock() */ +int fib_rules_dump(struct net *net, struct notifier_block *nb, int family) +{ + struct fib_rules_ops *ops; + struct fib_rule *rule; + + ops = lookup_rules_ops(net, family); + if (!ops) + return -EAFNOSUPPORT; + list_for_each_entry_rcu(rule, &ops->rules_list, list) + call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule, + family); + rules_ops_put(ops); + + return 0; +} +EXPORT_SYMBOL_GPL(fib_rules_dump); + +unsigned int fib_rules_seq_read(struct net *net, int family) +{ + unsigned int fib_rules_seq; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + + ops = lookup_rules_ops(net, family); + if (!ops) + return 0; + fib_rules_seq = ops->fib_rules_seq; + rules_ops_put(ops); + + return fib_rules_seq; +} +EXPORT_SYMBOL_GPL(fib_rules_seq_read); + static int validate_rulemsg(struct fib_rule_hdr *frh, struct nlattr **tb, struct fib_rules_ops *ops) { @@ -548,6 +611,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, if (rule->tun_id) ip_tunnel_need_metadata(); + call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule, ops, extack); notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); @@ -687,6 +751,8 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, } } + call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule, ops, + NULL); notify_rule_change(RTM_DELRULE, rule, ops, nlh, NETLINK_CB(skb).portid); fib_rule_put(rule); @@ -956,16 +1022,22 @@ static int __net_init fib_rules_net_init(struct net *net) return 0; } +static void __net_exit fib_rules_net_exit(struct net *net) +{ + WARN_ON_ONCE(!list_empty(&net->rules_ops)); +} + static struct pernet_operations fib_rules_net_ops = { .init = fib_rules_net_init, + .exit = fib_rules_net_exit, }; static int __init fib_rules_init(void) { int err; - rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWRULE, fib_nl_newrule, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELRULE, fib_nl_delrule, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_GETRULE, NULL, fib_nl_dumprule, 0); err = register_pernet_subsys(&fib_rules_net_ops); if (err < 0) diff --git a/net/core/filter.c b/net/core/filter.c index 6280a602604c..1afa17935954 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -43,6 +43,7 @@ #include <linux/timer.h> #include <linux/uaccess.h> #include <asm/unaligned.h> +#include <asm/cmpxchg.h> #include <linux/filter.h> #include <linux/ratelimit.h> #include <linux/seccomp.h> @@ -55,6 +56,7 @@ #include <net/sock_reuseport.h> #include <net/busy_poll.h> #include <net/tcp.h> +#include <linux/bpf_trace.h> /** * sk_filter_trim_cap - run a packet through a socket filter @@ -513,14 +515,27 @@ do_pass: break; } - /* Convert JEQ into JNE when 'jump_true' is next insn. */ - if (fp->jt == 0 && BPF_OP(fp->code) == BPF_JEQ) { - insn->code = BPF_JMP | BPF_JNE | bpf_src; + /* Convert some jumps when 'jump_true' is next insn. */ + if (fp->jt == 0) { + switch (BPF_OP(fp->code)) { + case BPF_JEQ: + insn->code = BPF_JMP | BPF_JNE | bpf_src; + break; + case BPF_JGT: + insn->code = BPF_JMP | BPF_JLE | bpf_src; + break; + case BPF_JGE: + insn->code = BPF_JMP | BPF_JLT | bpf_src; + break; + default: + goto jmp_rest; + } + target = i + fp->jf + 1; BPF_EMIT_JMP; break; } - +jmp_rest: /* Other jumps are mapped into two insns: Jxx and JA. */ target = i + fp->jt + 1; insn->code = BPF_JMP | BPF_OP(fp->code) | bpf_src; @@ -975,10 +990,14 @@ static bool __sk_filter_charge(struct sock *sk, struct sk_filter *fp) bool sk_filter_charge(struct sock *sk, struct sk_filter *fp) { - bool ret = __sk_filter_charge(sk, fp); - if (ret) - refcount_inc(&fp->refcnt); - return ret; + if (!refcount_inc_not_zero(&fp->refcnt)) + return false; + + if (!__sk_filter_charge(sk, fp)) { + sk_filter_release(fp); + return false; + } + return true; } static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) @@ -1388,7 +1407,7 @@ static inline int bpf_try_make_writable(struct sk_buff *skb, { int err = __bpf_try_make_writable(skb, write_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return err; } @@ -1778,6 +1797,9 @@ static const struct bpf_func_proto bpf_clone_redirect_proto = { struct redirect_info { u32 ifindex; u32 flags; + struct bpf_map *map; + struct bpf_map *map_to_flush; + unsigned long map_owner; }; static DEFINE_PER_CPU(struct redirect_info, redirect_info); @@ -1818,6 +1840,47 @@ static const struct bpf_func_proto bpf_redirect_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_4(bpf_sk_redirect_map, struct sk_buff *, skb, + struct bpf_map *, map, u32, key, u64, flags) +{ + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + + /* If user passes invalid input drop the packet. */ + if (unlikely(flags)) + return SK_DROP; + + tcb->bpf.key = key; + tcb->bpf.flags = flags; + tcb->bpf.map = map; + + return SK_PASS; +} + +struct sock *do_sk_redirect_map(struct sk_buff *skb) +{ + struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); + struct sock *sk = NULL; + + if (tcb->bpf.map) { + sk = __sock_map_lookup_elem(tcb->bpf.map, tcb->bpf.key); + + tcb->bpf.key = 0; + tcb->bpf.map = NULL; + } + + return sk; +} + +static const struct bpf_func_proto bpf_sk_redirect_map_proto = { + .func = bpf_sk_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_ANYTHING, +}; + BPF_CALL_1(bpf_get_cgroup_classid, const struct sk_buff *, skb) { return task_get_classid(skb); @@ -1906,7 +1969,7 @@ BPF_CALL_3(bpf_skb_vlan_push, struct sk_buff *, skb, __be16, vlan_proto, ret = skb_vlan_push(skb, vlan_proto, vlan_tci); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -1928,7 +1991,7 @@ BPF_CALL_1(bpf_skb_vlan_pop, struct sk_buff *, skb) ret = skb_vlan_pop(skb); bpf_pull_mac_rcsum(skb); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2024,8 +2087,8 @@ static int bpf_skb_proto_4_to_6(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV4 needs to - * be changed into SKB_GSO_TCPV6. + /* SKB_GSO_TCPV4 needs to be changed into + * SKB_GSO_TCPV6. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV4; @@ -2060,8 +2123,8 @@ static int bpf_skb_proto_6_to_4(struct sk_buff *skb) return ret; if (skb_is_gso(skb)) { - /* SKB_GSO_UDP stays as is. SKB_GSO_TCPV6 needs to - * be changed into SKB_GSO_TCPV4. + /* SKB_GSO_TCPV6 needs to be changed into + * SKB_GSO_TCPV4. */ if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6) { skb_shinfo(skb)->gso_type &= ~SKB_GSO_TCPV6; @@ -2122,7 +2185,7 @@ BPF_CALL_3(bpf_skb_change_proto, struct sk_buff *, skb, __be16, proto, * need to be verified first. */ ret = bpf_skb_proto_xlat(skb, proto); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2247,7 +2310,7 @@ static int bpf_skb_adjust_net(struct sk_buff *skb, s32 len_diff) ret = shrink ? bpf_skb_net_shrink(skb, len_diff_abs) : bpf_skb_net_grow(skb, len_diff_abs); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2338,7 +2401,7 @@ BPF_CALL_3(bpf_skb_change_tail, struct sk_buff *, skb, u32, new_len, skb_gso_reset(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return ret; } @@ -2378,7 +2441,7 @@ BPF_CALL_3(bpf_skb_change_head, struct sk_buff *, skb, u32, head_room, skb_reset_mac_header(skb); } - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); return 0; } @@ -2391,14 +2454,26 @@ static const struct bpf_func_proto bpf_skb_change_head_proto = { .arg3_type = ARG_ANYTHING, }; +static unsigned long xdp_get_metalen(const struct xdp_buff *xdp) +{ + return xdp_data_meta_unsupported(xdp) ? 0 : + xdp->data - xdp->data_meta; +} + BPF_CALL_2(bpf_xdp_adjust_head, struct xdp_buff *, xdp, int, offset) { + unsigned long metalen = xdp_get_metalen(xdp); + void *data_start = xdp->data_hard_start + metalen; void *data = xdp->data + offset; - if (unlikely(data < xdp->data_hard_start || + if (unlikely(data < data_start || data > xdp->data_end - ETH_HLEN)) return -EINVAL; + if (metalen) + memmove(xdp->data_meta + offset, + xdp->data_meta, metalen); + xdp->data_meta += offset; xdp->data = data; return 0; @@ -2412,6 +2487,326 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = { .arg2_type = ARG_ANYTHING, }; +BPF_CALL_2(bpf_xdp_adjust_meta, struct xdp_buff *, xdp, int, offset) +{ + void *meta = xdp->data_meta + offset; + unsigned long metalen = xdp->data - meta; + + if (xdp_data_meta_unsupported(xdp)) + return -ENOTSUPP; + if (unlikely(meta < xdp->data_hard_start || + meta > xdp->data)) + return -EINVAL; + if (unlikely((metalen & (sizeof(__u32) - 1)) || + (metalen > 32))) + return -EACCES; + + xdp->data_meta = meta; + + return 0; +} + +static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = { + .func = bpf_xdp_adjust_meta, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, +}; + +static int __bpf_tx_xdp(struct net_device *dev, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (!dev->netdev_ops->ndo_xdp_xmit) { + return -EOPNOTSUPP; + } + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; + dev->netdev_ops->ndo_xdp_flush(dev); + return 0; +} + +static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, + struct bpf_map *map, + struct xdp_buff *xdp, + u32 index) +{ + int err; + + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + struct net_device *dev = fwd; + + if (!dev->netdev_ops->ndo_xdp_xmit) + return -EOPNOTSUPP; + + err = dev->netdev_ops->ndo_xdp_xmit(dev, xdp); + if (err) + return err; + __dev_map_insert_ctx(map, index); + + } else if (map->map_type == BPF_MAP_TYPE_CPUMAP) { + struct bpf_cpu_map_entry *rcpu = fwd; + + err = cpu_map_enqueue(rcpu, xdp, dev_rx); + if (err) + return err; + __cpu_map_insert_ctx(map, index); + } + return 0; +} + +void xdp_do_flush_map(void) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct bpf_map *map = ri->map_to_flush; + + ri->map_to_flush = NULL; + if (map) { + switch (map->map_type) { + case BPF_MAP_TYPE_DEVMAP: + __dev_map_flush(map); + break; + case BPF_MAP_TYPE_CPUMAP: + __cpu_map_flush(map); + break; + default: + break; + } + } +} +EXPORT_SYMBOL_GPL(xdp_do_flush_map); + +static void *__xdp_map_lookup_elem(struct bpf_map *map, u32 index) +{ + switch (map->map_type) { + case BPF_MAP_TYPE_DEVMAP: + return __dev_map_lookup_elem(map, index); + case BPF_MAP_TYPE_CPUMAP: + return __cpu_map_lookup_elem(map, index); + default: + return NULL; + } +} + +static inline bool xdp_map_invalid(const struct bpf_prog *xdp_prog, + unsigned long aux) +{ + return (unsigned long)xdp_prog->aux != aux; +} + +static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + unsigned long map_owner = ri->map_owner; + struct bpf_map *map = ri->map; + u32 index = ri->ifindex; + void *fwd = NULL; + int err; + + ri->ifindex = 0; + ri->map = NULL; + ri->map_owner = 0; + + if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { + err = -EFAULT; + map = NULL; + goto err; + } + + fwd = __xdp_map_lookup_elem(map, index); + if (!fwd) { + err = -EINVAL; + goto err; + } + if (ri->map_to_flush && ri->map_to_flush != map) + xdp_do_flush_map(); + + err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); + if (unlikely(err)) + goto err; + + ri->map_to_flush = map; + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); + return err; +} + +int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + struct net_device *fwd; + u32 index = ri->ifindex; + int err; + + if (ri->map) + return xdp_do_redirect_map(dev, xdp, xdp_prog); + + fwd = dev_get_by_index_rcu(dev_net(dev), index); + ri->ifindex = 0; + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + err = __bpf_tx_xdp(fwd, NULL, xdp, 0); + if (unlikely(err)) + goto err; + + _trace_xdp_redirect(dev, xdp_prog, index); + return 0; +err: + _trace_xdp_redirect_err(dev, xdp_prog, index, err); + return err; +} +EXPORT_SYMBOL_GPL(xdp_do_redirect); + +static int __xdp_generic_ok_fwd_dev(struct sk_buff *skb, struct net_device *fwd) +{ + unsigned int len; + + if (unlikely(!(fwd->flags & IFF_UP))) + return -ENETDOWN; + + len = fwd->mtu + fwd->hard_header_len + VLAN_HLEN; + if (skb->len > len) + return -EMSGSIZE; + + return 0; +} + +int xdp_do_generic_redirect_map(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + unsigned long map_owner = ri->map_owner; + struct bpf_map *map = ri->map; + struct net_device *fwd = NULL; + u32 index = ri->ifindex; + int err = 0; + + ri->ifindex = 0; + ri->map = NULL; + ri->map_owner = 0; + + if (unlikely(xdp_map_invalid(xdp_prog, map_owner))) { + err = -EFAULT; + map = NULL; + goto err; + } + fwd = __xdp_map_lookup_elem(map, index); + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + if (map->map_type == BPF_MAP_TYPE_DEVMAP) { + if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + goto err; + skb->dev = fwd; + } else { + /* TODO: Handle BPF_MAP_TYPE_CPUMAP */ + err = -EBADRQC; + goto err; + } + + _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); + return 0; +err: + _trace_xdp_redirect_map_err(dev, xdp_prog, fwd, map, index, err); + return err; +} + +int xdp_do_generic_redirect(struct net_device *dev, struct sk_buff *skb, + struct bpf_prog *xdp_prog) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + u32 index = ri->ifindex; + struct net_device *fwd; + int err = 0; + + if (ri->map) + return xdp_do_generic_redirect_map(dev, skb, xdp_prog); + + ri->ifindex = 0; + fwd = dev_get_by_index_rcu(dev_net(dev), index); + if (unlikely(!fwd)) { + err = -EINVAL; + goto err; + } + + if (unlikely((err = __xdp_generic_ok_fwd_dev(skb, fwd)))) + goto err; + + skb->dev = fwd; + _trace_xdp_redirect(dev, xdp_prog, index); + return 0; +err: + _trace_xdp_redirect_err(dev, xdp_prog, index, err); + return err; +} +EXPORT_SYMBOL_GPL(xdp_do_generic_redirect); + +BPF_CALL_2(bpf_xdp_redirect, u32, ifindex, u64, flags) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + ri->map = NULL; + ri->map_owner = 0; + + return XDP_REDIRECT; +} + +static const struct bpf_func_proto bpf_xdp_redirect_proto = { + .func = bpf_xdp_redirect, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_ANYTHING, + .arg2_type = ARG_ANYTHING, +}; + +BPF_CALL_4(bpf_xdp_redirect_map, struct bpf_map *, map, u32, ifindex, u64, flags, + unsigned long, map_owner) +{ + struct redirect_info *ri = this_cpu_ptr(&redirect_info); + + if (unlikely(flags)) + return XDP_ABORTED; + + ri->ifindex = ifindex; + ri->flags = flags; + ri->map = map; + ri->map_owner = map_owner; + + return XDP_REDIRECT; +} + +/* Note, arg4 is hidden from users and populated by the verifier + * with the right pointer. + */ +static const struct bpf_func_proto bpf_xdp_redirect_map_proto = { + .func = bpf_xdp_redirect_map, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_CONST_MAP_PTR, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, +}; + bool bpf_helper_changes_pkt_data(void *func) { if (func == bpf_skb_vlan_push || @@ -2425,7 +2820,8 @@ bool bpf_helper_changes_pkt_data(void *func) func == bpf_clone_redirect || func == bpf_l3_csum_replace || func == bpf_l4_csum_replace || - func == bpf_xdp_adjust_head) + func == bpf_xdp_adjust_head || + func == bpf_xdp_adjust_meta) return true; return false; @@ -2676,14 +3072,15 @@ static const struct bpf_func_proto * bpf_get_skb_set_tunnel_proto(enum bpf_func_id which) { if (!md_dst) { - /* Race is not possible, since it's called from verifier - * that is holding verifier mutex. - */ - md_dst = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, - METADATA_IP_TUNNEL, - GFP_KERNEL); - if (!md_dst) + struct metadata_dst __percpu *tmp; + + tmp = metadata_dst_alloc_percpu(IP_TUNNEL_OPTS_MAX, + METADATA_IP_TUNNEL, + GFP_KERNEL); + if (!tmp) return NULL; + if (cmpxchg(&md_dst, NULL, tmp)) + metadata_dst_free_percpu(tmp); } switch (which) { @@ -2836,15 +3233,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_prot->setsockopt == tcp_setsockopt) { if (optname == TCP_CONGESTION) { char name[TCP_CA_NAME_MAX]; + bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; strncpy(name, optval, min_t(long, optlen, TCP_CA_NAME_MAX-1)); name[TCP_CA_NAME_MAX-1] = 0; - ret = tcp_set_congestion_control(sk, name, false); - if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) - /* replacing an existing ca */ - tcp_reinit_congestion_control(sk, - inet_csk(sk)->icsk_ca_ops); + ret = tcp_set_congestion_control(sk, name, false, reinit); } else { struct tcp_sock *tp = tcp_sk(sk); @@ -2872,7 +3266,6 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, ret = -EINVAL; } } - ret = -EINVAL; #endif } else { ret = -EINVAL; @@ -2882,7 +3275,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, static const struct bpf_func_proto bpf_setsockopt_proto = { .func = bpf_setsockopt, - .gpl_only = true, + .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, .arg2_type = ARG_ANYTHING, @@ -2891,6 +3284,47 @@ static const struct bpf_func_proto bpf_setsockopt_proto = { .arg5_type = ARG_CONST_SIZE, }; +BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + struct sock *sk = bpf_sock->sk; + + if (!sk_fullsock(sk)) + goto err_clear; + +#ifdef CONFIG_INET + if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { + if (optname == TCP_CONGESTION) { + struct inet_connection_sock *icsk = inet_csk(sk); + + if (!icsk->icsk_ca_ops || optlen <= 1) + goto err_clear; + strncpy(optval, icsk->icsk_ca_ops->name, optlen); + optval[optlen - 1] = 0; + } else { + goto err_clear; + } + } else { + goto err_clear; + } + return 0; +#endif +err_clear: + memset(optval, 0, optlen); + return -EINVAL; +} + +static const struct bpf_func_proto bpf_getsockopt_proto = { + .func = bpf_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -2920,6 +3354,20 @@ bpf_base_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * +sock_filter_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + /* inet and inet6 sockets are created in a process + * context so there is always a valid uid/gid + */ + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto * sk_filter_func_proto(enum bpf_func_id func_id) { switch (func_id) { @@ -3011,6 +3459,12 @@ xdp_func_proto(enum bpf_func_id func_id) return &bpf_get_smp_processor_id_proto; case BPF_FUNC_xdp_adjust_head: return &bpf_xdp_adjust_head_proto; + case BPF_FUNC_xdp_adjust_meta: + return &bpf_xdp_adjust_meta_proto; + case BPF_FUNC_redirect: + return &bpf_xdp_redirect_proto; + case BPF_FUNC_redirect_map: + return &bpf_xdp_redirect_map_proto; default: return bpf_base_func_proto(func_id); } @@ -3049,6 +3503,34 @@ static const struct bpf_func_proto * switch (func_id) { case BPF_FUNC_setsockopt: return &bpf_setsockopt_proto; + case BPF_FUNC_getsockopt: + return &bpf_getsockopt_proto; + case BPF_FUNC_sock_map_update: + return &bpf_sock_map_update_proto; + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_skb_store_bytes: + return &bpf_skb_store_bytes_proto; + case BPF_FUNC_skb_load_bytes: + return &bpf_skb_load_bytes_proto; + case BPF_FUNC_skb_pull_data: + return &bpf_skb_pull_data_proto; + case BPF_FUNC_skb_change_tail: + return &bpf_skb_change_tail_proto; + case BPF_FUNC_skb_change_head: + return &bpf_skb_change_head_proto; + case BPF_FUNC_get_socket_cookie: + return &bpf_get_socket_cookie_proto; + case BPF_FUNC_get_socket_uid: + return &bpf_get_socket_uid_proto; + case BPF_FUNC_sk_redirect_map: + return &bpf_sk_redirect_map_proto; default: return bpf_base_func_proto(func_id); } @@ -3106,7 +3588,12 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type if (off + size > offsetofend(struct __sk_buff, cb[4])) return false; break; + case bpf_ctx_range_till(struct __sk_buff, remote_ip6[0], remote_ip6[3]): + case bpf_ctx_range_till(struct __sk_buff, local_ip6[0], local_ip6[3]): + case bpf_ctx_range_till(struct __sk_buff, remote_ip4, remote_ip4): + case bpf_ctx_range_till(struct __sk_buff, local_ip4, local_ip4): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): if (size != size_default) return false; @@ -3133,7 +3620,9 @@ static bool sk_filter_is_valid_access(int off, int size, switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): case bpf_ctx_range(struct __sk_buff, data): + case bpf_ctx_range(struct __sk_buff, data_meta): case bpf_ctx_range(struct __sk_buff, data_end): + case bpf_ctx_range_till(struct __sk_buff, family, local_port): return false; } @@ -3155,6 +3644,8 @@ static bool lwt_is_valid_access(int off, int size, { switch (off) { case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range_till(struct __sk_buff, family, local_port): + case bpf_ctx_range(struct __sk_buff, data_meta): return false; } @@ -3188,6 +3679,8 @@ static bool sock_filter_is_valid_access(int off, int size, if (type == BPF_WRITE) { switch (off) { case offsetof(struct bpf_sock, bound_dev_if): + case offsetof(struct bpf_sock, mark): + case offsetof(struct bpf_sock, priority): break; default: return false; @@ -3205,8 +3698,8 @@ static bool sock_filter_is_valid_access(int off, int size, return true; } -static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, - const struct bpf_prog *prog) +static int bpf_unclone_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog, int drop_verdict) { struct bpf_insn *insn = insn_buf; @@ -3233,7 +3726,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, * return TC_ACT_SHOT; */ *insn++ = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2); - *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, TC_ACT_SHOT); + *insn++ = BPF_ALU32_IMM(BPF_MOV, BPF_REG_0, drop_verdict); *insn++ = BPF_EXIT_INSN(); /* restore: */ @@ -3244,6 +3737,12 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, return insn - insn_buf; } +static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + return bpf_unclone_prologue(insn_buf, direct_write, prog, TC_ACT_SHOT); +} + static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, struct bpf_insn_access_aux *info) @@ -3265,9 +3764,14 @@ static bool tc_cls_act_is_valid_access(int off, int size, case bpf_ctx_range(struct __sk_buff, data): info->reg_type = PTR_TO_PACKET; break; + case bpf_ctx_range(struct __sk_buff, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case bpf_ctx_range(struct __sk_buff, data_end): info->reg_type = PTR_TO_PACKET_END; break; + case bpf_ctx_range_till(struct __sk_buff, family, local_port): + return false; } return bpf_skb_is_valid_access(off, size, type, info); @@ -3296,6 +3800,9 @@ static bool xdp_is_valid_access(int off, int size, case offsetof(struct xdp_md, data): info->reg_type = PTR_TO_PACKET; break; + case offsetof(struct xdp_md, data_meta): + info->reg_type = PTR_TO_PACKET_META; + break; case offsetof(struct xdp_md, data_end): info->reg_type = PTR_TO_PACKET_END; break; @@ -3306,7 +3813,11 @@ static bool xdp_is_valid_access(int off, int size, void bpf_warn_invalid_xdp_action(u32 act) { - WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act); + const u32 act_max = XDP_REDIRECT; + + WARN_ONCE(1, "%s XDP return value %u, expect packet loss!\n", + act > act_max ? "Illegal" : "Driver unsupported", + act); } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); @@ -3340,6 +3851,46 @@ static bool sock_ops_is_valid_access(int off, int size, return __is_valid_sock_ops_access(off, size); } +static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, + const struct bpf_prog *prog) +{ + return bpf_unclone_prologue(insn_buf, direct_write, prog, SK_DROP); +} + +static bool sk_skb_is_valid_access(int off, int size, + enum bpf_access_type type, + struct bpf_insn_access_aux *info) +{ + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_classid): + case bpf_ctx_range(struct __sk_buff, data_meta): + return false; + } + + if (type == BPF_WRITE) { + switch (off) { + case bpf_ctx_range(struct __sk_buff, tc_index): + case bpf_ctx_range(struct __sk_buff, priority): + break; + default: + return false; + } + } + + switch (off) { + case bpf_ctx_range(struct __sk_buff, mark): + return false; + case bpf_ctx_range(struct __sk_buff, data): + info->reg_type = PTR_TO_PACKET; + break; + case bpf_ctx_range(struct __sk_buff, data_end): + info->reg_type = PTR_TO_PACKET_END; + break; + } + + return bpf_skb_is_valid_access(off, size, type, info); +} + static u32 bpf_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -3485,6 +4036,15 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, offsetof(struct sk_buff, data)); break; + case offsetof(struct __sk_buff, data_meta): + off = si->off; + off -= offsetof(struct __sk_buff, data_meta); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct bpf_skb_data_end, data_meta); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + case offsetof(struct __sk_buff, data_end): off = si->off; off -= offsetof(struct __sk_buff, data_end); @@ -3525,6 +4085,106 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; + case offsetof(struct __sk_buff, family): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_family) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + bpf_target_off(struct sock_common, + skc_family, + 2, target_size)); + break; + case offsetof(struct __sk_buff, remote_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_daddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + bpf_target_off(struct sock_common, + skc_daddr, + 4, target_size)); + break; + case offsetof(struct __sk_buff, local_ip4): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_rcv_saddr) != 4); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + bpf_target_off(struct sock_common, + skc_rcv_saddr, + 4, target_size)); + break; + case offsetof(struct __sk_buff, remote_ip6[0]) ... + offsetof(struct __sk_buff, remote_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_daddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct __sk_buff, remote_ip6[0]); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_daddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + case offsetof(struct __sk_buff, local_ip6[0]) ... + offsetof(struct __sk_buff, local_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) != 4); + + off = si->off; + off -= offsetof(struct __sk_buff, local_ip6[0]); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg, + offsetof(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]) + + off); +#else + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct __sk_buff, remote_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_dport) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + bpf_target_off(struct sock_common, + skc_dport, + 2, target_size)); +#ifndef __BIG_ENDIAN_BITFIELD + *insn++ = BPF_ALU32_IMM(BPF_LSH, si->dst_reg, 16); +#endif + break; + + case offsetof(struct __sk_buff, local_port): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock_common, skc_num) != 2); + + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_buff, sk), + si->dst_reg, si->src_reg, + offsetof(struct sk_buff, sk)); + *insn++ = BPF_LDX_MEM(BPF_H, si->dst_reg, si->dst_reg, + bpf_target_off(struct sock_common, + skc_num, 2, target_size)); + break; } return insn - insn_buf; @@ -3549,6 +4209,28 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, offsetof(struct sock, sk_bound_dev_if)); break; + case offsetof(struct bpf_sock, mark): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_mark) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_mark)); + break; + + case offsetof(struct bpf_sock, priority): + BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_priority) != 4); + + if (type == BPF_WRITE) + *insn++ = BPF_STX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + else + *insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->src_reg, + offsetof(struct sock, sk_priority)); + break; + case offsetof(struct bpf_sock, family): BUILD_BUG_ON(FIELD_SIZEOF(struct sock, sk_family) != 2); @@ -3611,6 +4293,11 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, si->dst_reg, si->src_reg, offsetof(struct xdp_buff, data)); break; + case offsetof(struct xdp_md, data_meta): + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_meta), + si->dst_reg, si->src_reg, + offsetof(struct xdp_buff, data_meta)); + break; case offsetof(struct xdp_md, data_end): *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, data_end), si->dst_reg, si->src_reg, @@ -3754,61 +4441,120 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } -const struct bpf_verifier_ops sk_filter_prog_ops = { +static u32 sk_skb_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + int off; + + switch (si->off) { + case offsetof(struct __sk_buff, data_end): + off = si->off; + off -= offsetof(struct __sk_buff, data_end); + off += offsetof(struct sk_buff, cb); + off += offsetof(struct tcp_skb_cb, bpf.data_end); + *insn++ = BPF_LDX_MEM(BPF_SIZEOF(void *), si->dst_reg, + si->src_reg, off); + break; + default: + return bpf_convert_ctx_access(type, si, insn_buf, prog, + target_size); + } + + return insn - insn_buf; +} + +const struct bpf_verifier_ops sk_filter_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, }; -const struct bpf_verifier_ops tc_cls_act_prog_ops = { +const struct bpf_prog_ops sk_filter_prog_ops = { +}; + +const struct bpf_verifier_ops tc_cls_act_verifier_ops = { .get_func_proto = tc_cls_act_func_proto, .is_valid_access = tc_cls_act_is_valid_access, .convert_ctx_access = tc_cls_act_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops tc_cls_act_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops xdp_prog_ops = { +const struct bpf_verifier_ops xdp_verifier_ops = { .get_func_proto = xdp_func_proto, .is_valid_access = xdp_is_valid_access, .convert_ctx_access = xdp_convert_ctx_access, +}; + +const struct bpf_prog_ops xdp_prog_ops = { .test_run = bpf_prog_test_run_xdp, }; -const struct bpf_verifier_ops cg_skb_prog_ops = { +const struct bpf_verifier_ops cg_skb_verifier_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_skb_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_inout_prog_ops = { +const struct bpf_verifier_ops lwt_inout_verifier_ops = { .get_func_proto = lwt_inout_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, +}; + +const struct bpf_prog_ops lwt_inout_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops lwt_xmit_prog_ops = { +const struct bpf_verifier_ops lwt_xmit_verifier_ops = { .get_func_proto = lwt_xmit_func_proto, .is_valid_access = lwt_is_valid_access, .convert_ctx_access = bpf_convert_ctx_access, .gen_prologue = tc_cls_act_prologue, +}; + +const struct bpf_prog_ops lwt_xmit_prog_ops = { .test_run = bpf_prog_test_run_skb, }; -const struct bpf_verifier_ops cg_sock_prog_ops = { - .get_func_proto = bpf_base_func_proto, +const struct bpf_verifier_ops cg_sock_verifier_ops = { + .get_func_proto = sock_filter_func_proto, .is_valid_access = sock_filter_is_valid_access, .convert_ctx_access = sock_filter_convert_ctx_access, }; -const struct bpf_verifier_ops sock_ops_prog_ops = { +const struct bpf_prog_ops cg_sock_prog_ops = { +}; + +const struct bpf_verifier_ops sock_ops_verifier_ops = { .get_func_proto = sock_ops_func_proto, .is_valid_access = sock_ops_is_valid_access, .convert_ctx_access = sock_ops_convert_ctx_access, }; +const struct bpf_prog_ops sock_ops_prog_ops = { +}; + +const struct bpf_verifier_ops sk_skb_verifier_ops = { + .get_func_proto = sk_skb_func_proto, + .is_valid_access = sk_skb_is_valid_access, + .convert_ctx_access = sk_skb_convert_ctx_access, + .gen_prologue = sk_skb_prologue, +}; + +const struct bpf_prog_ops sk_skb_prog_ops = { +}; + int sk_detach_filter(struct sock *sk) { int ret = -ENOENT; diff --git a/net/core/flow.c b/net/core/flow.c deleted file mode 100644 index f7f5d1932a27..000000000000 --- a/net/core/flow.c +++ /dev/null @@ -1,516 +0,0 @@ -/* flow.c: Generic flow cache. - * - * Copyright (C) 2003 Alexey N. Kuznetsov (kuznet@ms2.inr.ac.ru) - * Copyright (C) 2003 David S. Miller (davem@redhat.com) - */ - -#include <linux/kernel.h> -#include <linux/module.h> -#include <linux/list.h> -#include <linux/jhash.h> -#include <linux/interrupt.h> -#include <linux/mm.h> -#include <linux/random.h> -#include <linux/init.h> -#include <linux/slab.h> -#include <linux/smp.h> -#include <linux/completion.h> -#include <linux/percpu.h> -#include <linux/bitops.h> -#include <linux/notifier.h> -#include <linux/cpu.h> -#include <linux/cpumask.h> -#include <linux/mutex.h> -#include <net/flow.h> -#include <linux/atomic.h> -#include <linux/security.h> -#include <net/net_namespace.h> - -struct flow_cache_entry { - union { - struct hlist_node hlist; - struct list_head gc_list; - } u; - struct net *net; - u16 family; - u8 dir; - u32 genid; - struct flowi key; - struct flow_cache_object *object; -}; - -struct flow_flush_info { - struct flow_cache *cache; - atomic_t cpuleft; - struct completion completion; -}; - -static struct kmem_cache *flow_cachep __read_mostly; - -#define flow_cache_hash_size(cache) (1U << (cache)->hash_shift) -#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ) - -static void flow_cache_new_hashrnd(unsigned long arg) -{ - struct flow_cache *fc = (void *) arg; - int i; - - for_each_possible_cpu(i) - per_cpu_ptr(fc->percpu, i)->hash_rnd_recalc = 1; - - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); -} - -static int flow_entry_valid(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (atomic_read(&xfrm->flow_cache_genid) != fle->genid) - return 0; - if (fle->object && !fle->object->ops->check(fle->object)) - return 0; - return 1; -} - -static void flow_entry_kill(struct flow_cache_entry *fle, - struct netns_xfrm *xfrm) -{ - if (fle->object) - fle->object->ops->delete(fle->object); - kmem_cache_free(flow_cachep, fle); -} - -static void flow_cache_gc_task(struct work_struct *work) -{ - struct list_head gc_list; - struct flow_cache_entry *fce, *n; - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_gc_work); - - INIT_LIST_HEAD(&gc_list); - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - - list_for_each_entry_safe(fce, n, &gc_list, u.gc_list) { - flow_entry_kill(fce, xfrm); - atomic_dec(&xfrm->flow_cache_gc_count); - } -} - -static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp, - unsigned int deleted, - struct list_head *gc_list, - struct netns_xfrm *xfrm) -{ - if (deleted) { - atomic_add(deleted, &xfrm->flow_cache_gc_count); - fcp->hash_count -= deleted; - spin_lock_bh(&xfrm->flow_cache_gc_lock); - list_splice_tail(gc_list, &xfrm->flow_cache_gc_list); - spin_unlock_bh(&xfrm->flow_cache_gc_lock); - schedule_work(&xfrm->flow_cache_gc_work); - } -} - -static void __flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - unsigned int shrink_to) -{ - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - for (i = 0; i < flow_cache_hash_size(fc); i++) { - unsigned int saved = 0; - - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (saved < shrink_to && - flow_entry_valid(fle, xfrm)) { - saved++; - } else { - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); -} - -static void flow_cache_shrink(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - unsigned int shrink_to = fc->low_watermark / flow_cache_hash_size(fc); - - __flow_cache_shrink(fc, fcp, shrink_to); -} - -static void flow_new_hash_rnd(struct flow_cache *fc, - struct flow_cache_percpu *fcp) -{ - get_random_bytes(&fcp->hash_rnd, sizeof(u32)); - fcp->hash_rnd_recalc = 0; - __flow_cache_shrink(fc, fcp, 0); -} - -static u32 flow_hash_code(struct flow_cache *fc, - struct flow_cache_percpu *fcp, - const struct flowi *key, - unsigned int keysize) -{ - const u32 *k = (const u32 *) key; - const u32 length = keysize * sizeof(flow_compare_t) / sizeof(u32); - - return jhash2(k, length, fcp->hash_rnd) - & (flow_cache_hash_size(fc) - 1); -} - -/* I hear what you're saying, use memcmp. But memcmp cannot make - * important assumptions that we can here, such as alignment. - */ -static int flow_key_compare(const struct flowi *key1, const struct flowi *key2, - unsigned int keysize) -{ - const flow_compare_t *k1, *k1_lim, *k2; - - k1 = (const flow_compare_t *) key1; - k1_lim = k1 + keysize; - - k2 = (const flow_compare_t *) key2; - - do { - if (*k1++ != *k2++) - return 1; - } while (k1 < k1_lim); - - return 0; -} - -struct flow_cache_object * -flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir, - flow_resolve_t resolver, void *ctx) -{ - struct flow_cache *fc = &net->xfrm.flow_cache_global; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle, *tfle; - struct flow_cache_object *flo; - unsigned int keysize; - unsigned int hash; - - local_bh_disable(); - fcp = this_cpu_ptr(fc->percpu); - - fle = NULL; - flo = NULL; - - keysize = flow_key_size(family); - if (!keysize) - goto nocache; - - /* Packet really early in init? Making flow_cache_init a - * pre-smp initcall would solve this. --RR */ - if (!fcp->hash_table) - goto nocache; - - if (fcp->hash_rnd_recalc) - flow_new_hash_rnd(fc, fcp); - - hash = flow_hash_code(fc, fcp, key, keysize); - hlist_for_each_entry(tfle, &fcp->hash_table[hash], u.hlist) { - if (tfle->net == net && - tfle->family == family && - tfle->dir == dir && - flow_key_compare(key, &tfle->key, keysize) == 0) { - fle = tfle; - break; - } - } - - if (unlikely(!fle)) { - if (fcp->hash_count > fc->high_watermark) - flow_cache_shrink(fc, fcp); - - if (atomic_read(&net->xfrm.flow_cache_gc_count) > - 2 * num_online_cpus() * fc->high_watermark) { - flo = ERR_PTR(-ENOBUFS); - goto ret_object; - } - - fle = kmem_cache_alloc(flow_cachep, GFP_ATOMIC); - if (fle) { - fle->net = net; - fle->family = family; - fle->dir = dir; - memcpy(&fle->key, key, keysize * sizeof(flow_compare_t)); - fle->object = NULL; - hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]); - fcp->hash_count++; - } - } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) { - flo = fle->object; - if (!flo) - goto ret_object; - flo = flo->ops->get(flo); - if (flo) - goto ret_object; - } else if (fle->object) { - flo = fle->object; - flo->ops->delete(flo); - fle->object = NULL; - } - -nocache: - flo = NULL; - if (fle) { - flo = fle->object; - fle->object = NULL; - } - flo = resolver(net, key, family, dir, flo, ctx); - if (fle) { - fle->genid = atomic_read(&net->xfrm.flow_cache_genid); - if (!IS_ERR(flo)) - fle->object = flo; - else - fle->genid--; - } else { - if (!IS_ERR_OR_NULL(flo)) - flo->ops->delete(flo); - } -ret_object: - local_bh_enable(); - return flo; -} -EXPORT_SYMBOL(flow_cache_lookup); - -static void flow_cache_flush_tasklet(unsigned long data) -{ - struct flow_flush_info *info = (void *)data; - struct flow_cache *fc = info->cache; - struct flow_cache_percpu *fcp; - struct flow_cache_entry *fle; - struct hlist_node *tmp; - LIST_HEAD(gc_list); - unsigned int deleted = 0; - struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm, - flow_cache_global); - unsigned int i; - - fcp = this_cpu_ptr(fc->percpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) { - hlist_for_each_entry_safe(fle, tmp, - &fcp->hash_table[i], u.hlist) { - if (flow_entry_valid(fle, xfrm)) - continue; - - deleted++; - hlist_del(&fle->u.hlist); - list_add_tail(&fle->u.gc_list, &gc_list); - } - } - - flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm); - - if (atomic_dec_and_test(&info->cpuleft)) - complete(&info->completion); -} - -/* - * Return whether a cpu needs flushing. Conservatively, we assume - * the presence of any entries means the core may require flushing, - * since the flow_cache_ops.check() function may assume it's running - * on the same core as the per-cpu cache component. - */ -static int flow_cache_percpu_empty(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp; - unsigned int i; - - fcp = per_cpu_ptr(fc->percpu, cpu); - for (i = 0; i < flow_cache_hash_size(fc); i++) - if (!hlist_empty(&fcp->hash_table[i])) - return 0; - return 1; -} - -static void flow_cache_flush_per_cpu(void *data) -{ - struct flow_flush_info *info = data; - struct tasklet_struct *tasklet; - - tasklet = &this_cpu_ptr(info->cache->percpu)->flush_tasklet; - tasklet->data = (unsigned long)info; - tasklet_schedule(tasklet); -} - -void flow_cache_flush(struct net *net) -{ - struct flow_flush_info info; - cpumask_var_t mask; - int i, self; - - /* Track which cpus need flushing to avoid disturbing all cores. */ - if (!alloc_cpumask_var(&mask, GFP_KERNEL)) - return; - cpumask_clear(mask); - - /* Don't want cpus going down or up during this. */ - get_online_cpus(); - mutex_lock(&net->xfrm.flow_flush_sem); - info.cache = &net->xfrm.flow_cache_global; - for_each_online_cpu(i) - if (!flow_cache_percpu_empty(info.cache, i)) - cpumask_set_cpu(i, mask); - atomic_set(&info.cpuleft, cpumask_weight(mask)); - if (atomic_read(&info.cpuleft) == 0) - goto done; - - init_completion(&info.completion); - - local_bh_disable(); - self = cpumask_test_and_clear_cpu(smp_processor_id(), mask); - on_each_cpu_mask(mask, flow_cache_flush_per_cpu, &info, 0); - if (self) - flow_cache_flush_tasklet((unsigned long)&info); - local_bh_enable(); - - wait_for_completion(&info.completion); - -done: - mutex_unlock(&net->xfrm.flow_flush_sem); - put_online_cpus(); - free_cpumask_var(mask); -} - -static void flow_cache_flush_task(struct work_struct *work) -{ - struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm, - flow_cache_flush_work); - struct net *net = container_of(xfrm, struct net, xfrm); - - flow_cache_flush(net); -} - -void flow_cache_flush_deferred(struct net *net) -{ - schedule_work(&net->xfrm.flow_cache_flush_work); -} - -static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu) -{ - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - unsigned int sz = sizeof(struct hlist_head) * flow_cache_hash_size(fc); - - if (!fcp->hash_table) { - fcp->hash_table = kzalloc_node(sz, GFP_KERNEL, cpu_to_node(cpu)); - if (!fcp->hash_table) { - pr_err("NET: failed to allocate flow cache sz %u\n", sz); - return -ENOMEM; - } - fcp->hash_rnd_recalc = 1; - fcp->hash_count = 0; - tasklet_init(&fcp->flush_tasklet, flow_cache_flush_tasklet, 0); - } - return 0; -} - -static int flow_cache_cpu_up_prep(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - - return flow_cache_cpu_prepare(fc, cpu); -} - -static int flow_cache_cpu_dead(unsigned int cpu, struct hlist_node *node) -{ - struct flow_cache *fc = hlist_entry_safe(node, struct flow_cache, node); - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu); - - __flow_cache_shrink(fc, fcp, 0); - return 0; -} - -int flow_cache_init(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - if (!flow_cachep) - flow_cachep = kmem_cache_create("flow_cache", - sizeof(struct flow_cache_entry), - 0, SLAB_PANIC, NULL); - spin_lock_init(&net->xfrm.flow_cache_gc_lock); - INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list); - INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task); - INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task); - mutex_init(&net->xfrm.flow_flush_sem); - atomic_set(&net->xfrm.flow_cache_gc_count, 0); - - fc->hash_shift = 10; - fc->low_watermark = 2 * flow_cache_hash_size(fc); - fc->high_watermark = 4 * flow_cache_hash_size(fc); - - fc->percpu = alloc_percpu(struct flow_cache_percpu); - if (!fc->percpu) - return -ENOMEM; - - if (cpuhp_state_add_instance(CPUHP_NET_FLOW_PREPARE, &fc->node)) - goto err; - - setup_timer(&fc->rnd_timer, flow_cache_new_hashrnd, - (unsigned long) fc); - fc->rnd_timer.expires = jiffies + FLOW_HASH_RND_PERIOD; - add_timer(&fc->rnd_timer); - - return 0; - -err: - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; - - return -ENOMEM; -} -EXPORT_SYMBOL(flow_cache_init); - -void flow_cache_fini(struct net *net) -{ - int i; - struct flow_cache *fc = &net->xfrm.flow_cache_global; - - del_timer_sync(&fc->rnd_timer); - - cpuhp_state_remove_instance_nocalls(CPUHP_NET_FLOW_PREPARE, &fc->node); - - for_each_possible_cpu(i) { - struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i); - kfree(fcp->hash_table); - fcp->hash_table = NULL; - } - - free_percpu(fc->percpu); - fc->percpu = NULL; -} -EXPORT_SYMBOL(flow_cache_fini); - -void __init flow_cache_hp_init(void) -{ - int ret; - - ret = cpuhp_setup_state_multi(CPUHP_NET_FLOW_PREPARE, - "net/flow:prepare", - flow_cache_cpu_up_prep, - flow_cache_cpu_dead); - WARN_ON(ret < 0); -} diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index fc5fc4594c90..15ce30063765 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -4,10 +4,13 @@ #include <linux/ip.h> #include <linux/ipv6.h> #include <linux/if_vlan.h> +#include <net/dsa.h> +#include <net/dst_metadata.h> #include <net/ip.h> #include <net/ipv6.h> #include <net/gre.h> #include <net/pptp.h> +#include <net/tipc.h> #include <linux/igmp.h> #include <linux/icmp.h> #include <linux/sctp.h> @@ -114,11 +117,101 @@ __be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto, } EXPORT_SYMBOL(__skb_flow_get_ports); -enum flow_dissect_ret { - FLOW_DISSECT_RET_OUT_GOOD, - FLOW_DISSECT_RET_OUT_BAD, - FLOW_DISSECT_RET_OUT_PROTO_AGAIN, -}; +static void +skb_flow_dissect_set_enc_addr_type(enum flow_dissector_key_id type, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct flow_dissector_key_control *ctrl; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_CONTROL)) + return; + + ctrl = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL, + target_container); + ctrl->addr_type = type; +} + +static void +__skb_flow_dissect_tunnel_info(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container) +{ + struct ip_tunnel_info *info; + struct ip_tunnel_key *key; + + /* A quick check to see if there might be something to do. */ + if (!dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_CONTROL) && + !dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS)) + return; + + info = skb_tunnel_info(skb); + if (!info) + return; + + key = &info->key; + + switch (ip_tunnel_info_af(info)) { + case AF_INET: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV4_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS)) { + struct flow_dissector_key_ipv4_addrs *ipv4; + + ipv4 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS, + target_container); + ipv4->src = key->u.ipv4.src; + ipv4->dst = key->u.ipv4.dst; + } + break; + case AF_INET6: + skb_flow_dissect_set_enc_addr_type(FLOW_DISSECTOR_KEY_IPV6_ADDRS, + flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS)) { + struct flow_dissector_key_ipv6_addrs *ipv6; + + ipv6 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS, + target_container); + ipv6->src = key->u.ipv6.src; + ipv6->dst = key->u.ipv6.dst; + } + break; + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_dissector_key_keyid *keyid; + + keyid = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_KEYID, + target_container); + keyid->keyid = tunnel_id_to_key32(key->tun_id); + } + + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_dissector_key_ports *tp; + + tp = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_ENC_PORTS, + target_container); + tp->src = key->tp_src; + tp->dst = key->tp_dst; + } +} static enum flow_dissect_ret __skb_flow_dissect_mpls(const struct sk_buff *skb, @@ -340,7 +433,7 @@ __skb_flow_dissect_gre(const struct sk_buff *skb, if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) return FLOW_DISSECT_RET_OUT_GOOD; - return FLOW_DISSECT_RET_OUT_PROTO_AGAIN; + return FLOW_DISSECT_RET_PROTO_AGAIN; } static void @@ -401,6 +494,18 @@ __skb_flow_dissect_ipv6(const struct sk_buff *skb, key_ip->ttl = iph->hop_limit; } +/* Maximum number of protocol headers that can be parsed in + * __skb_flow_dissect + */ +#define MAX_FLOW_DISSECT_HDRS 15 + +static bool skb_flow_dissect_allowed(int *num_hdrs) +{ + ++*num_hdrs; + + return (*num_hdrs <= MAX_FLOW_DISSECT_HDRS); +} + /** * __skb_flow_dissect - extract the flow_keys struct and return it * @skb: sk_buff to extract the flow from, can be NULL if the rest are specified @@ -430,7 +535,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_dissector_key_icmp *key_icmp; struct flow_dissector_key_tags *key_tags; struct flow_dissector_key_vlan *key_vlan; + enum flow_dissect_ret fdret; bool skip_vlan = false; + int num_hdrs = 0; u8 ip_proto = 0; bool ret; @@ -440,6 +547,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb, skb->vlan_proto : skb->protocol; nhoff = skb_network_offset(skb); hlen = skb_headlen(skb); +#if IS_ENABLED(CONFIG_NET_DSA) + if (unlikely(skb->dev && netdev_uses_dsa(skb->dev))) { + const struct dsa_device_ops *ops; + int offset; + + ops = skb->dev->dsa_ptr->tag_ops; + if (ops->flow_dissect && + !ops->flow_dissect(skb, &proto, &offset)) { + hlen -= offset; + nhoff += offset; + } + } +#endif } /* It is ensured by skb_flow_dissector_init() that control key will @@ -456,6 +576,9 @@ bool __skb_flow_dissect(const struct sk_buff *skb, FLOW_DISSECTOR_KEY_BASIC, target_container); + __skb_flow_dissect_tunnel_info(skb, flow_dissector, + target_container); + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { struct ethhdr *eth = eth_hdr(skb); @@ -468,14 +591,19 @@ bool __skb_flow_dissect(const struct sk_buff *skb, } proto_again: + fdret = FLOW_DISSECT_RET_CONTINUE; + switch (proto) { case htons(ETH_P_IP): { const struct iphdr *iph; struct iphdr _iph; -ip: + iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); - if (!iph || iph->ihl < 5) - goto out_bad; + if (!iph || iph->ihl < 5) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + nhoff += iph->ihl * 4; ip_proto = iph->protocol; @@ -495,19 +623,25 @@ ip: key_control->flags |= FLOW_DIS_IS_FRAGMENT; if (iph->frag_off & htons(IP_OFFSET)) { - goto out_good; + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; } else { key_control->flags |= FLOW_DIS_FIRST_FRAG; - if (!(flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) - goto out_good; + if (!(flags & + FLOW_DISSECTOR_F_PARSE_1ST_FRAG)) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } } } __skb_flow_dissect_ipv4(skb, flow_dissector, target_container, data, iph); - if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) - goto out_good; + if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } break; } @@ -515,10 +649,11 @@ ip: const struct ipv6hdr *iph; struct ipv6hdr _iph; -ipv6: iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph); - if (!iph) - goto out_bad; + if (!iph) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } ip_proto = iph->nexthdr; nhoff += sizeof(struct ipv6hdr); @@ -547,15 +682,17 @@ ipv6: target_container); key_tags->flow_label = ntohl(flow_label); } - if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) - goto out_good; + if (flags & FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } } __skb_flow_dissect_ipv6(skb, flow_dissector, target_container, data, iph); if (flags & FLOW_DISSECTOR_F_STOP_AT_L3) - goto out_good; + fdret = FLOW_DISSECT_RET_OUT_GOOD; break; } @@ -571,12 +708,17 @@ ipv6: if (!vlan_tag_present || eth_type_vlan(skb->protocol)) { vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan); - if (!vlan) - goto out_bad; + if (!vlan) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + proto = vlan->h_vlan_encapsulated_proto; nhoff += sizeof(*vlan); - if (skip_vlan) - goto proto_again; + if (skip_vlan) { + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; + } } skip_vlan = true; @@ -599,7 +741,8 @@ ipv6: } } - goto proto_again; + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; } case htons(ETH_P_PPP_SES): { struct { @@ -607,86 +750,104 @@ ipv6: __be16 proto; } *hdr, _hdr; hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); - if (!hdr) - goto out_bad; + if (!hdr) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + proto = hdr->proto; nhoff += PPPOE_SES_HLEN; switch (proto) { case htons(PPP_IP): - goto ip; + proto = htons(ETH_P_IP); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; case htons(PPP_IPV6): - goto ipv6; + proto = htons(ETH_P_IPV6); + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; default: - goto out_bad; + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; } + break; } case htons(ETH_P_TIPC): { - struct { - __be32 pre[3]; - __be32 srcnode; - } *hdr, _hdr; - hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); - if (!hdr) - goto out_bad; + struct tipc_basic_hdr *hdr, _hdr; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), + data, hlen, &_hdr); + if (!hdr) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_TIPC_ADDRS)) { + FLOW_DISSECTOR_KEY_TIPC)) { key_addrs = skb_flow_dissector_target(flow_dissector, - FLOW_DISSECTOR_KEY_TIPC_ADDRS, + FLOW_DISSECTOR_KEY_TIPC, target_container); - key_addrs->tipcaddrs.srcnode = hdr->srcnode; - key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC_ADDRS; + key_addrs->tipckey.key = tipc_hdr_rps_key(hdr); + key_control->addr_type = FLOW_DISSECTOR_KEY_TIPC; } - goto out_good; + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; } case htons(ETH_P_MPLS_UC): case htons(ETH_P_MPLS_MC): -mpls: - switch (__skb_flow_dissect_mpls(skb, flow_dissector, + fdret = __skb_flow_dissect_mpls(skb, flow_dissector, target_container, data, - nhoff, hlen)) { - case FLOW_DISSECT_RET_OUT_GOOD: - goto out_good; - case FLOW_DISSECT_RET_OUT_BAD: - default: - goto out_bad; - } + nhoff, hlen); + break; case htons(ETH_P_FCOE): - if ((hlen - nhoff) < FCOE_HEADER_LEN) - goto out_bad; + if ((hlen - nhoff) < FCOE_HEADER_LEN) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } nhoff += FCOE_HEADER_LEN; - goto out_good; + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; case htons(ETH_P_ARP): case htons(ETH_P_RARP): - switch (__skb_flow_dissect_arp(skb, flow_dissector, + fdret = __skb_flow_dissect_arp(skb, flow_dissector, target_container, data, - nhoff, hlen)) { - case FLOW_DISSECT_RET_OUT_GOOD: - goto out_good; - case FLOW_DISSECT_RET_OUT_BAD: - default: - goto out_bad; - } + nhoff, hlen); + break; + + default: + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } + + /* Process result of proto processing */ + switch (fdret) { + case FLOW_DISSECT_RET_OUT_GOOD: + goto out_good; + case FLOW_DISSECT_RET_PROTO_AGAIN: + if (skb_flow_dissect_allowed(&num_hdrs)) + goto proto_again; + goto out_good; + case FLOW_DISSECT_RET_CONTINUE: + case FLOW_DISSECT_RET_IPPROTO_AGAIN: + break; + case FLOW_DISSECT_RET_OUT_BAD: default: goto out_bad; } ip_proto_again: + fdret = FLOW_DISSECT_RET_CONTINUE; + switch (ip_proto) { case IPPROTO_GRE: - switch (__skb_flow_dissect_gre(skb, key_control, flow_dissector, + fdret = __skb_flow_dissect_gre(skb, key_control, flow_dissector, target_container, data, - &proto, &nhoff, &hlen, flags)) { - case FLOW_DISSECT_RET_OUT_GOOD: - goto out_good; - case FLOW_DISSECT_RET_OUT_BAD: - goto out_bad; - case FLOW_DISSECT_RET_OUT_PROTO_AGAIN: - goto proto_again; - } + &proto, &nhoff, &hlen, flags); + break; + case NEXTHDR_HOP: case NEXTHDR_ROUTING: case NEXTHDR_DEST: { @@ -697,13 +858,16 @@ ip_proto_again: opthdr = __skb_header_pointer(skb, nhoff, sizeof(_opthdr), data, hlen, &_opthdr); - if (!opthdr) - goto out_bad; + if (!opthdr) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } ip_proto = opthdr[0]; nhoff += (opthdr[1] + 1) << 3; - goto ip_proto_again; + fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; + break; } case NEXTHDR_FRAGMENT: { struct frag_hdr _fh, *fh; @@ -714,8 +878,10 @@ ip_proto_again: fh = __skb_header_pointer(skb, nhoff, sizeof(_fh), data, hlen, &_fh); - if (!fh) - goto out_bad; + if (!fh) { + fdret = FLOW_DISSECT_RET_OUT_BAD; + break; + } key_control->flags |= FLOW_DIS_IS_FRAGMENT; @@ -724,34 +890,50 @@ ip_proto_again: if (!(fh->frag_off & htons(IP6_OFFSET))) { key_control->flags |= FLOW_DIS_FIRST_FRAG; - if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) - goto ip_proto_again; + if (flags & FLOW_DISSECTOR_F_PARSE_1ST_FRAG) { + fdret = FLOW_DISSECT_RET_IPPROTO_AGAIN; + break; + } } - goto out_good; + + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; } case IPPROTO_IPIP: proto = htons(ETH_P_IP); key_control->flags |= FLOW_DIS_ENCAPSULATION; - if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) - goto out_good; + if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; - goto ip; case IPPROTO_IPV6: proto = htons(ETH_P_IPV6); key_control->flags |= FLOW_DIS_ENCAPSULATION; - if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) - goto out_good; + if (flags & FLOW_DISSECTOR_F_STOP_AT_ENCAP) { + fdret = FLOW_DISSECT_RET_OUT_GOOD; + break; + } + + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; + - goto ipv6; case IPPROTO_MPLS: proto = htons(ETH_P_MPLS_UC); - goto mpls; + fdret = FLOW_DISSECT_RET_PROTO_AGAIN; + break; + case IPPROTO_TCP: __skb_flow_dissect_tcp(skb, flow_dissector, target_container, data, nhoff, hlen); break; + default: break; } @@ -773,6 +955,24 @@ ip_proto_again: key_icmp->icmp = skb_flow_get_be16(skb, nhoff, data, hlen); } + /* Process result of IP proto processing */ + switch (fdret) { + case FLOW_DISSECT_RET_PROTO_AGAIN: + if (skb_flow_dissect_allowed(&num_hdrs)) + goto proto_again; + break; + case FLOW_DISSECT_RET_IPPROTO_AGAIN: + if (skb_flow_dissect_allowed(&num_hdrs)) + goto ip_proto_again; + break; + case FLOW_DISSECT_RET_OUT_GOOD: + case FLOW_DISSECT_RET_CONTINUE: + break; + case FLOW_DISSECT_RET_OUT_BAD: + default: + goto out_bad; + } + out_good: ret = true; @@ -824,8 +1024,8 @@ static inline size_t flow_keys_hash_length(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: diff -= sizeof(flow->addrs.v6addrs); break; - case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - diff -= sizeof(flow->addrs.tipcaddrs); + case FLOW_DISSECTOR_KEY_TIPC: + diff -= sizeof(flow->addrs.tipckey); break; } return (sizeof(*flow) - diff) / sizeof(u32); @@ -839,8 +1039,8 @@ __be32 flow_get_u32_src(const struct flow_keys *flow) case FLOW_DISSECTOR_KEY_IPV6_ADDRS: return (__force __be32)ipv6_addr_hash( &flow->addrs.v6addrs.src); - case FLOW_DISSECTOR_KEY_TIPC_ADDRS: - return flow->addrs.tipcaddrs.srcnode; + case FLOW_DISSECTOR_KEY_TIPC: + return flow->addrs.tipckey.key; default: return 0; } @@ -998,51 +1198,6 @@ __u32 skb_get_hash_perturb(const struct sk_buff *skb, u32 perturb) } EXPORT_SYMBOL(skb_get_hash_perturb); -__u32 __skb_get_hash_flowi6(struct sk_buff *skb, const struct flowi6 *fl6) -{ - struct flow_keys keys; - - memset(&keys, 0, sizeof(keys)); - - memcpy(&keys.addrs.v6addrs.src, &fl6->saddr, - sizeof(keys.addrs.v6addrs.src)); - memcpy(&keys.addrs.v6addrs.dst, &fl6->daddr, - sizeof(keys.addrs.v6addrs.dst)); - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; - keys.ports.src = fl6->fl6_sport; - keys.ports.dst = fl6->fl6_dport; - keys.keyid.keyid = fl6->fl6_gre_key; - keys.tags.flow_label = (__force u32)fl6->flowlabel; - keys.basic.ip_proto = fl6->flowi6_proto; - - __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), - flow_keys_have_l4(&keys)); - - return skb->hash; -} -EXPORT_SYMBOL(__skb_get_hash_flowi6); - -__u32 __skb_get_hash_flowi4(struct sk_buff *skb, const struct flowi4 *fl4) -{ - struct flow_keys keys; - - memset(&keys, 0, sizeof(keys)); - - keys.addrs.v4addrs.src = fl4->saddr; - keys.addrs.v4addrs.dst = fl4->daddr; - keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; - keys.ports.src = fl4->fl4_sport; - keys.ports.dst = fl4->fl4_dport; - keys.keyid.keyid = fl4->fl4_gre_key; - keys.basic.ip_proto = fl4->flowi4_proto; - - __skb_set_sw_hash(skb, flow_hash_from_keys(&keys), - flow_keys_have_l4(&keys)); - - return skb->hash; -} -EXPORT_SYMBOL(__skb_get_hash_flowi4); - u32 __skb_get_poff(const struct sk_buff *skb, void *data, const struct flow_keys *keys, int hlen) { @@ -1166,8 +1321,8 @@ static const struct flow_dissector_key flow_keys_dissector_keys[] = { .offset = offsetof(struct flow_keys, addrs.v6addrs), }, { - .key_id = FLOW_DISSECTOR_KEY_TIPC_ADDRS, - .offset = offsetof(struct flow_keys, addrs.tipcaddrs), + .key_id = FLOW_DISSECTOR_KEY_TIPC, + .offset = offsetof(struct flow_keys, addrs.tipckey), }, { .key_id = FLOW_DISSECTOR_KEY_PORTS, diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 0385dece1f6f..7c1ffd6f9501 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -83,10 +83,10 @@ static void est_timer(unsigned long arg) u64 rate, brate; est_fetch_counters(est, &b); - brate = (b.bytes - est->last_bytes) << (8 - est->ewma_log); + brate = (b.bytes - est->last_bytes) << (10 - est->ewma_log - est->intvl_log); brate -= (est->avbps >> est->ewma_log); - rate = (u64)(b.packets - est->last_packets) << (8 - est->ewma_log); + rate = (u64)(b.packets - est->last_packets) << (10 - est->ewma_log - est->intvl_log); rate -= (est->avpps >> est->ewma_log); write_seqcount_begin(&est->seq); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 814e58a3ce8b..4b54e5f107c6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/skbuff.h> #include <linux/slab.h> #include <linux/netdevice.h> diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 1307731ddfe4..e7e626fb87bb 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -51,7 +51,7 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, */ preempt_disable(); rcu_read_lock(); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); rcu_read_unlock(); diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index d9cb3532f1dd..0b171756453c 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -44,6 +44,8 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "SEG6"; case LWTUNNEL_ENCAP_BPF: return "BPF"; + case LWTUNNEL_ENCAP_SEG6_LOCAL: + return "SEG6LOCAL"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: case LWTUNNEL_ENCAP_NONE: @@ -65,7 +67,7 @@ struct lwtunnel_state *lwtunnel_state_alloc(int encap_len) return lws; } -EXPORT_SYMBOL(lwtunnel_state_alloc); +EXPORT_SYMBOL_GPL(lwtunnel_state_alloc); static const struct lwtunnel_encap_ops __rcu * lwtun_encaps[LWTUNNEL_ENCAP_MAX + 1] __read_mostly; @@ -80,7 +82,7 @@ int lwtunnel_encap_add_ops(const struct lwtunnel_encap_ops *ops, &lwtun_encaps[num], NULL, ops) ? 0 : -1; } -EXPORT_SYMBOL(lwtunnel_encap_add_ops); +EXPORT_SYMBOL_GPL(lwtunnel_encap_add_ops); int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, unsigned int encap_type) @@ -99,7 +101,7 @@ int lwtunnel_encap_del_ops(const struct lwtunnel_encap_ops *ops, return ret; } -EXPORT_SYMBOL(lwtunnel_encap_del_ops); +EXPORT_SYMBOL_GPL(lwtunnel_encap_del_ops); int lwtunnel_build_state(u16 encap_type, struct nlattr *encap, unsigned int family, @@ -138,7 +140,7 @@ int lwtunnel_build_state(u16 encap_type, return ret; } -EXPORT_SYMBOL(lwtunnel_build_state); +EXPORT_SYMBOL_GPL(lwtunnel_build_state); int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) { @@ -175,7 +177,7 @@ int lwtunnel_valid_encap_type(u16 encap_type, struct netlink_ext_ack *extack) return ret; } -EXPORT_SYMBOL(lwtunnel_valid_encap_type); +EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type); int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, struct netlink_ext_ack *extack) @@ -205,7 +207,7 @@ int lwtunnel_valid_encap_type_attr(struct nlattr *attr, int remaining, return 0; } -EXPORT_SYMBOL(lwtunnel_valid_encap_type_attr); +EXPORT_SYMBOL_GPL(lwtunnel_valid_encap_type_attr); void lwtstate_free(struct lwtunnel_state *lws) { @@ -219,7 +221,7 @@ void lwtstate_free(struct lwtunnel_state *lws) } module_put(ops->owner); } -EXPORT_SYMBOL(lwtstate_free); +EXPORT_SYMBOL_GPL(lwtstate_free); int lwtunnel_fill_encap(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { @@ -259,7 +261,7 @@ nla_put_failure: return (ret == -EOPNOTSUPP ? 0 : ret); } -EXPORT_SYMBOL(lwtunnel_fill_encap); +EXPORT_SYMBOL_GPL(lwtunnel_fill_encap); int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) { @@ -281,7 +283,7 @@ int lwtunnel_get_encap_size(struct lwtunnel_state *lwtstate) return ret; } -EXPORT_SYMBOL(lwtunnel_get_encap_size); +EXPORT_SYMBOL_GPL(lwtunnel_get_encap_size); int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) { @@ -309,7 +311,7 @@ int lwtunnel_cmp_encap(struct lwtunnel_state *a, struct lwtunnel_state *b) return ret; } -EXPORT_SYMBOL(lwtunnel_cmp_encap); +EXPORT_SYMBOL_GPL(lwtunnel_cmp_encap); int lwtunnel_output(struct net *net, struct sock *sk, struct sk_buff *skb) { @@ -343,7 +345,7 @@ drop: return ret; } -EXPORT_SYMBOL(lwtunnel_output); +EXPORT_SYMBOL_GPL(lwtunnel_output); int lwtunnel_xmit(struct sk_buff *skb) { @@ -378,7 +380,7 @@ drop: return ret; } -EXPORT_SYMBOL(lwtunnel_xmit); +EXPORT_SYMBOL_GPL(lwtunnel_xmit); int lwtunnel_input(struct sk_buff *skb) { @@ -412,4 +414,4 @@ drop: return ret; } -EXPORT_SYMBOL(lwtunnel_input); +EXPORT_SYMBOL_GPL(lwtunnel_input); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d0713627deb6..6ea3a1a7f36a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -457,7 +457,7 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, const void *pkey) { struct neighbour *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val; struct neigh_hash_table *nht; @@ -488,7 +488,7 @@ struct neighbour *__neigh_create(struct neigh_table *tbl, const void *pkey, struct net_device *dev, bool want_ref) { u32 hash_val; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; int error; struct neighbour *n1, *rc, *n = neigh_alloc(tbl, dev); struct neigh_hash_table *nht; @@ -572,7 +572,7 @@ out_neigh_release: } EXPORT_SYMBOL(__neigh_create); -static u32 pneigh_hash(const void *pkey, int key_len) +static u32 pneigh_hash(const void *pkey, unsigned int key_len) { u32 hash_val = *(u32 *)(pkey + key_len - 4); hash_val ^= (hash_val >> 16); @@ -585,7 +585,7 @@ static u32 pneigh_hash(const void *pkey, int key_len) static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct net *net, const void *pkey, - int key_len, + unsigned int key_len, struct net_device *dev) { while (n) { @@ -601,7 +601,7 @@ static struct pneigh_entry *__pneigh_lookup_1(struct pneigh_entry *n, struct pneigh_entry *__pneigh_lookup(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); return __pneigh_lookup_1(tbl->phash_buckets[hash_val], @@ -614,7 +614,7 @@ struct pneigh_entry * pneigh_lookup(struct neigh_table *tbl, struct net_device *dev, int creat) { struct pneigh_entry *n; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); read_lock_bh(&tbl->lock); @@ -659,7 +659,7 @@ int pneigh_delete(struct neigh_table *tbl, struct net *net, const void *pkey, struct net_device *dev) { struct pneigh_entry *n, **np; - int key_len = tbl->key_len; + unsigned int key_len = tbl->key_len; u32 hash_val = pneigh_hash(pkey, key_len); write_lock_bh(&tbl->lock); @@ -1662,7 +1662,7 @@ static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(dst_attr) < tbl->key_len) + if (nla_len(dst_attr) < (int)tbl->key_len) goto out; if (ndm->ndm_flags & NTF_PROXY) { @@ -1730,7 +1730,7 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, if (tbl == NULL) return -EAFNOSUPPORT; - if (nla_len(tb[NDA_DST]) < tbl->key_len) + if (nla_len(tb[NDA_DST]) < (int)tbl->key_len) goto out; dst = nla_data(tb[NDA_DST]); lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; @@ -3261,13 +3261,13 @@ EXPORT_SYMBOL(neigh_sysctl_unregister); static int __init neigh_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWNEIGH, neigh_add, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELNEIGH, neigh_delete, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_GETNEIGH, NULL, neigh_dump_info, 0); rtnl_register(PF_UNSPEC, RTM_GETNEIGHTBL, NULL, neightbl_dump_info, - NULL); - rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, NULL); + 0); + rtnl_register(PF_UNSPEC, RTM_SETNEIGHTBL, neightbl_set, NULL, 0); return 0; } diff --git a/net/core/net-procfs.c b/net/core/net-procfs.c index 4847964931df..615ccab55f38 100644 --- a/net/core/net-procfs.c +++ b/net/core/net-procfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/netdevice.h> #include <linux/proc_fs.h> #include <linux/seq_file.h> diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b4f9922b6f23..799b75268291 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -97,7 +97,8 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr, return restart_syscall(); if (dev_isalive(netdev)) { - if ((ret = (*set)(netdev, new)) == 0) + ret = (*set)(netdev, new); + if (ret == 0) ret = len; } rtnl_unlock(); @@ -160,6 +161,7 @@ static ssize_t broadcast_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *ndev = to_net_dev(dev); + if (dev_isalive(ndev)) return sysfs_format_mac(buf, ndev->broadcast, ndev->addr_len); return -EINVAL; @@ -170,7 +172,7 @@ static int change_carrier(struct net_device *dev, unsigned long new_carrier) { if (!netif_running(dev)) return -EINVAL; - return dev_change_carrier(dev, (bool) new_carrier); + return dev_change_carrier(dev, (bool)new_carrier); } static ssize_t carrier_store(struct device *dev, struct device_attribute *attr, @@ -183,9 +185,10 @@ static ssize_t carrier_show(struct device *dev, struct device_attribute *attr, char *buf) { struct net_device *netdev = to_net_dev(dev); - if (netif_running(netdev)) { + + if (netif_running(netdev)) return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); - } + return -EINVAL; } static DEVICE_ATTR_RW(carrier); @@ -290,6 +293,7 @@ static ssize_t carrier_changes_show(struct device *dev, char *buf) { struct net_device *netdev = to_net_dev(dev); + return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_changes)); } @@ -299,7 +303,7 @@ static DEVICE_ATTR_RO(carrier_changes); static int change_mtu(struct net_device *dev, unsigned long new_mtu) { - return dev_set_mtu(dev, (int) new_mtu); + return dev_set_mtu(dev, (int)new_mtu); } static ssize_t mtu_store(struct device *dev, struct device_attribute *attr, @@ -311,7 +315,7 @@ NETDEVICE_SHOW_RW(mtu, fmt_dec); static int change_flags(struct net_device *dev, unsigned long new_flags) { - return dev_change_flags(dev, (unsigned int) new_flags); + return dev_change_flags(dev, (unsigned int)new_flags); } static ssize_t flags_store(struct device *dev, struct device_attribute *attr, @@ -362,8 +366,8 @@ static int change_gro_flush_timeout(struct net_device *dev, unsigned long val) } static ssize_t gro_flush_timeout_store(struct device *dev, - struct device_attribute *attr, - const char *buf, size_t len) + struct device_attribute *attr, + const char *buf, size_t len) { if (!capable(CAP_NET_ADMIN)) return -EPERM; @@ -378,7 +382,7 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, struct net_device *netdev = to_net_dev(dev); struct net *net = dev_net(netdev); size_t count = len; - ssize_t ret; + ssize_t ret = 0; if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; @@ -389,30 +393,37 @@ static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr, if (!rtnl_trylock()) return restart_syscall(); - ret = dev_set_alias(netdev, buf, count); + + if (dev_isalive(netdev)) { + ret = dev_set_alias(netdev, buf, count); + if (ret < 0) + goto err; + ret = len; + netdev_state_change(netdev); + } +err: rtnl_unlock(); - return ret < 0 ? ret : len; + return ret; } static ssize_t ifalias_show(struct device *dev, struct device_attribute *attr, char *buf) { const struct net_device *netdev = to_net_dev(dev); + char tmp[IFALIASZ]; ssize_t ret = 0; - if (!rtnl_trylock()) - return restart_syscall(); - if (netdev->ifalias) - ret = sprintf(buf, "%s\n", netdev->ifalias); - rtnl_unlock(); + ret = dev_get_alias(netdev, tmp, sizeof(tmp)); + if (ret > 0) + ret = sprintf(buf, "%s\n", tmp); return ret; } static DEVICE_ATTR_RW(ifalias); static int change_group(struct net_device *dev, unsigned long new_group) { - dev_set_group(dev, (int) new_group); + dev_set_group(dev, (int)new_group); return 0; } @@ -426,7 +437,7 @@ static DEVICE_ATTR(netdev_group, S_IRUGO | S_IWUSR, group_show, group_store); static int change_proto_down(struct net_device *dev, unsigned long proto_down) { - return dev_change_proto_down(dev, (bool) proto_down); + return dev_change_proto_down(dev, (bool)proto_down); } static ssize_t proto_down_store(struct device *dev, @@ -508,7 +519,7 @@ static ssize_t phys_switch_id_show(struct device *dev, } static DEVICE_ATTR_RO(phys_switch_id); -static struct attribute *net_class_attrs[] = { +static struct attribute *net_class_attrs[] __ro_after_init = { &dev_attr_netdev_group.attr, &dev_attr_type.attr, &dev_attr_dev_id.attr, @@ -549,14 +560,14 @@ static ssize_t netstat_show(const struct device *d, ssize_t ret = -EINVAL; WARN_ON(offset > sizeof(struct rtnl_link_stats64) || - offset % sizeof(u64) != 0); + offset % sizeof(u64) != 0); read_lock(&dev_base_lock); if (dev_isalive(dev)) { struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *) stats) + offset)); + ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset)); } read_unlock(&dev_base_lock); return ret; @@ -565,7 +576,7 @@ static ssize_t netstat_show(const struct device *d, /* generate a read-only statistics attribute */ #define NETSTAT_ENTRY(name) \ static ssize_t name##_show(struct device *d, \ - struct device_attribute *attr, char *buf) \ + struct device_attribute *attr, char *buf) \ { \ return netstat_show(d, attr, buf, \ offsetof(struct rtnl_link_stats64, name)); \ @@ -597,7 +608,7 @@ NETSTAT_ENTRY(rx_compressed); NETSTAT_ENTRY(tx_compressed); NETSTAT_ENTRY(rx_nohandler); -static struct attribute *netstat_attrs[] = { +static struct attribute *netstat_attrs[] __ro_after_init = { &dev_attr_rx_packets.attr, &dev_attr_tx_packets.attr, &dev_attr_rx_bytes.attr, @@ -625,7 +636,6 @@ static struct attribute *netstat_attrs[] = { NULL }; - static const struct attribute_group netstat_group = { .name = "statistics", .attrs = netstat_attrs, @@ -647,33 +657,33 @@ static const struct attribute_group wireless_group = { #endif /* CONFIG_SYSFS */ #ifdef CONFIG_SYSFS -#define to_rx_queue_attr(_attr) container_of(_attr, \ - struct rx_queue_attribute, attr) +#define to_rx_queue_attr(_attr) \ + container_of(_attr, struct rx_queue_attribute, attr) #define to_rx_queue(obj) container_of(obj, struct netdev_rx_queue, kobj) static ssize_t rx_queue_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { - struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); + const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); struct netdev_rx_queue *queue = to_rx_queue(kobj); if (!attribute->show) return -EIO; - return attribute->show(queue, attribute, buf); + return attribute->show(queue, buf); } static ssize_t rx_queue_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { - struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); + const struct rx_queue_attribute *attribute = to_rx_queue_attr(attr); struct netdev_rx_queue *queue = to_rx_queue(kobj); if (!attribute->store) return -EIO; - return attribute->store(queue, attribute, buf, count); + return attribute->store(queue, buf, count); } static const struct sysfs_ops rx_queue_sysfs_ops = { @@ -682,8 +692,7 @@ static const struct sysfs_ops rx_queue_sysfs_ops = { }; #ifdef CONFIG_RPS -static ssize_t show_rps_map(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attribute, char *buf) +static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf) { struct rps_map *map; cpumask_var_t mask; @@ -706,8 +715,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, } static ssize_t store_rps_map(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attribute, - const char *buf, size_t len) + const char *buf, size_t len) { struct rps_map *old_map, *map; cpumask_var_t mask; @@ -727,8 +735,8 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, } map = kzalloc(max_t(unsigned int, - RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), - GFP_KERNEL); + RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES), + GFP_KERNEL); if (!map) { free_cpumask_var(mask); return -ENOMEM; @@ -738,9 +746,9 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, for_each_cpu_and(cpu, mask, cpu_online_mask) map->cpus[i++] = cpu; - if (i) + if (i) { map->len = i; - else { + } else { kfree(map); map = NULL; } @@ -765,7 +773,6 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue, } static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, char *buf) { struct rps_dev_flow_table *flow_table; @@ -788,8 +795,7 @@ static void rps_dev_flow_table_release(struct rcu_head *rcu) } static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, - struct rx_queue_attribute *attr, - const char *buf, size_t len) + const char *buf, size_t len) { unsigned long mask, count; struct rps_dev_flow_table *table, *old_table; @@ -831,8 +837,9 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, table->mask = mask; for (count = 0; count <= mask; count++) table->flows[count].cpu = RPS_NO_CPU; - } else + } else { table = NULL; + } spin_lock(&rps_dev_flow_lock); old_table = rcu_dereference_protected(queue->rps_flow_table, @@ -846,16 +853,15 @@ static ssize_t store_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, return len; } -static struct rx_queue_attribute rps_cpus_attribute = - __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); - +static struct rx_queue_attribute rps_cpus_attribute __ro_after_init + = __ATTR(rps_cpus, S_IRUGO | S_IWUSR, show_rps_map, store_rps_map); -static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute = - __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, - show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); +static struct rx_queue_attribute rps_dev_flow_table_cnt_attribute __ro_after_init + = __ATTR(rps_flow_cnt, S_IRUGO | S_IWUSR, + show_rps_dev_flow_table_cnt, store_rps_dev_flow_table_cnt); #endif /* CONFIG_RPS */ -static struct attribute *rx_queue_default_attrs[] = { +static struct attribute *rx_queue_default_attrs[] __ro_after_init = { #ifdef CONFIG_RPS &rps_cpus_attribute.attr, &rps_dev_flow_table_cnt_attribute.attr, @@ -870,7 +876,6 @@ static void rx_queue_release(struct kobject *kobj) struct rps_map *map; struct rps_dev_flow_table *flow_table; - map = rcu_dereference_protected(queue->rps_map, 1); if (map) { RCU_INIT_POINTER(queue->rps_map, NULL); @@ -900,7 +905,7 @@ static const void *rx_queue_namespace(struct kobject *kobj) return ns; } -static struct kobj_type rx_queue_ktype = { +static struct kobj_type rx_queue_ktype __ro_after_init = { .sysfs_ops = &rx_queue_sysfs_ops, .release = rx_queue_release, .default_attrs = rx_queue_default_attrs, @@ -915,23 +920,22 @@ static int rx_queue_add_kobject(struct net_device *dev, int index) kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &rx_queue_ktype, NULL, - "rx-%u", index); + "rx-%u", index); if (error) - goto exit; + return error; if (dev->sysfs_rx_queue_group) { error = sysfs_create_group(kobj, dev->sysfs_rx_queue_group); - if (error) - goto exit; + if (error) { + kobject_put(kobj); + return error; + } } kobject_uevent(kobj, KOBJ_ADD); dev_hold(queue->dev); return error; -exit: - kobject_put(kobj); - return error; } #endif /* CONFIG_SYSFS */ @@ -976,39 +980,40 @@ net_rx_queue_update_kobjects(struct net_device *dev, int old_num, int new_num) */ struct netdev_queue_attribute { struct attribute attr; - ssize_t (*show)(struct netdev_queue *queue, - struct netdev_queue_attribute *attr, char *buf); + ssize_t (*show)(struct netdev_queue *queue, char *buf); ssize_t (*store)(struct netdev_queue *queue, - struct netdev_queue_attribute *attr, const char *buf, size_t len); + const char *buf, size_t len); }; -#define to_netdev_queue_attr(_attr) container_of(_attr, \ - struct netdev_queue_attribute, attr) +#define to_netdev_queue_attr(_attr) \ + container_of(_attr, struct netdev_queue_attribute, attr) #define to_netdev_queue(obj) container_of(obj, struct netdev_queue, kobj) static ssize_t netdev_queue_attr_show(struct kobject *kobj, struct attribute *attr, char *buf) { - struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + const struct netdev_queue_attribute *attribute + = to_netdev_queue_attr(attr); struct netdev_queue *queue = to_netdev_queue(kobj); if (!attribute->show) return -EIO; - return attribute->show(queue, attribute, buf); + return attribute->show(queue, buf); } static ssize_t netdev_queue_attr_store(struct kobject *kobj, struct attribute *attr, const char *buf, size_t count) { - struct netdev_queue_attribute *attribute = to_netdev_queue_attr(attr); + const struct netdev_queue_attribute *attribute + = to_netdev_queue_attr(attr); struct netdev_queue *queue = to_netdev_queue(kobj); if (!attribute->store) return -EIO; - return attribute->store(queue, attribute, buf, count); + return attribute->store(queue, buf, count); } static const struct sysfs_ops netdev_queue_sysfs_ops = { @@ -1016,9 +1021,7 @@ static const struct sysfs_ops netdev_queue_sysfs_ops = { .store = netdev_queue_attr_store, }; -static ssize_t show_trans_timeout(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, - char *buf) +static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) { unsigned long trans_timeout; @@ -1040,8 +1043,7 @@ static unsigned int get_netdev_queue_index(struct netdev_queue *queue) return i; } -static ssize_t show_traffic_class(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, +static ssize_t traffic_class_show(struct netdev_queue *queue, char *buf) { struct net_device *dev = queue->dev; @@ -1055,16 +1057,14 @@ static ssize_t show_traffic_class(struct netdev_queue *queue, } #ifdef CONFIG_XPS -static ssize_t show_tx_maxrate(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, +static ssize_t tx_maxrate_show(struct netdev_queue *queue, char *buf) { return sprintf(buf, "%lu\n", queue->tx_maxrate); } -static ssize_t set_tx_maxrate(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, - const char *buf, size_t len) +static ssize_t tx_maxrate_store(struct netdev_queue *queue, + const char *buf, size_t len) { struct net_device *dev = queue->dev; int err, index = get_netdev_queue_index(queue); @@ -1089,16 +1089,15 @@ static ssize_t set_tx_maxrate(struct netdev_queue *queue, return err; } -static struct netdev_queue_attribute queue_tx_maxrate = - __ATTR(tx_maxrate, S_IRUGO | S_IWUSR, - show_tx_maxrate, set_tx_maxrate); +static struct netdev_queue_attribute queue_tx_maxrate __ro_after_init + = __ATTR_RW(tx_maxrate); #endif -static struct netdev_queue_attribute queue_trans_timeout = - __ATTR(tx_timeout, S_IRUGO, show_trans_timeout, NULL); +static struct netdev_queue_attribute queue_trans_timeout __ro_after_init + = __ATTR_RO(tx_timeout); -static struct netdev_queue_attribute queue_traffic_class = - __ATTR(traffic_class, S_IRUGO, show_traffic_class, NULL); +static struct netdev_queue_attribute queue_traffic_class __ro_after_init + = __ATTR_RO(traffic_class); #ifdef CONFIG_BQL /* @@ -1115,9 +1114,9 @@ static ssize_t bql_set(const char *buf, const size_t count, unsigned int value; int err; - if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) + if (!strcmp(buf, "max") || !strcmp(buf, "max\n")) { value = DQL_MAX_LIMIT; - else { + } else { err = kstrtouint(buf, 10, &value); if (err < 0) return err; @@ -1131,7 +1130,6 @@ static ssize_t bql_set(const char *buf, const size_t count, } static ssize_t bql_show_hold_time(struct netdev_queue *queue, - struct netdev_queue_attribute *attr, char *buf) { struct dql *dql = &queue->dql; @@ -1140,7 +1138,6 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue, } static ssize_t bql_set_hold_time(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, const char *buf, size_t len) { struct dql *dql = &queue->dql; @@ -1156,12 +1153,11 @@ static ssize_t bql_set_hold_time(struct netdev_queue *queue, return len; } -static struct netdev_queue_attribute bql_hold_time_attribute = - __ATTR(hold_time, S_IRUGO | S_IWUSR, bql_show_hold_time, - bql_set_hold_time); +static struct netdev_queue_attribute bql_hold_time_attribute __ro_after_init + = __ATTR(hold_time, S_IRUGO | S_IWUSR, + bql_show_hold_time, bql_set_hold_time); static ssize_t bql_show_inflight(struct netdev_queue *queue, - struct netdev_queue_attribute *attr, char *buf) { struct dql *dql = &queue->dql; @@ -1169,33 +1165,31 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue, return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed); } -static struct netdev_queue_attribute bql_inflight_attribute = +static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = __ATTR(inflight, S_IRUGO, bql_show_inflight, NULL); #define BQL_ATTR(NAME, FIELD) \ static ssize_t bql_show_ ## NAME(struct netdev_queue *queue, \ - struct netdev_queue_attribute *attr, \ char *buf) \ { \ return bql_show(buf, queue->dql.FIELD); \ } \ \ static ssize_t bql_set_ ## NAME(struct netdev_queue *queue, \ - struct netdev_queue_attribute *attr, \ const char *buf, size_t len) \ { \ return bql_set(buf, len, &queue->dql.FIELD); \ } \ \ -static struct netdev_queue_attribute bql_ ## NAME ## _attribute = \ - __ATTR(NAME, S_IRUGO | S_IWUSR, bql_show_ ## NAME, \ - bql_set_ ## NAME); +static struct netdev_queue_attribute bql_ ## NAME ## _attribute __ro_after_init \ + = __ATTR(NAME, S_IRUGO | S_IWUSR, \ + bql_show_ ## NAME, bql_set_ ## NAME) -BQL_ATTR(limit, limit) -BQL_ATTR(limit_max, max_limit) -BQL_ATTR(limit_min, min_limit) +BQL_ATTR(limit, limit); +BQL_ATTR(limit_max, max_limit); +BQL_ATTR(limit_min, min_limit); -static struct attribute *dql_attrs[] = { +static struct attribute *dql_attrs[] __ro_after_init = { &bql_limit_attribute.attr, &bql_limit_max_attribute.attr, &bql_limit_min_attribute.attr, @@ -1211,8 +1205,8 @@ static const struct attribute_group dql_group = { #endif /* CONFIG_BQL */ #ifdef CONFIG_XPS -static ssize_t show_xps_map(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, char *buf) +static ssize_t xps_cpus_show(struct netdev_queue *queue, + char *buf) { struct net_device *dev = queue->dev; int cpu, len, num_tc = 1, tc = 0; @@ -1258,9 +1252,8 @@ static ssize_t show_xps_map(struct netdev_queue *queue, return len < PAGE_SIZE ? len : -EINVAL; } -static ssize_t store_xps_map(struct netdev_queue *queue, - struct netdev_queue_attribute *attribute, - const char *buf, size_t len) +static ssize_t xps_cpus_store(struct netdev_queue *queue, + const char *buf, size_t len) { struct net_device *dev = queue->dev; unsigned long index; @@ -1288,11 +1281,11 @@ static ssize_t store_xps_map(struct netdev_queue *queue, return err ? : len; } -static struct netdev_queue_attribute xps_cpus_attribute = - __ATTR(xps_cpus, S_IRUGO | S_IWUSR, show_xps_map, store_xps_map); +static struct netdev_queue_attribute xps_cpus_attribute __ro_after_init + = __ATTR_RW(xps_cpus); #endif /* CONFIG_XPS */ -static struct attribute *netdev_queue_default_attrs[] = { +static struct attribute *netdev_queue_default_attrs[] __ro_after_init = { &queue_trans_timeout.attr, &queue_traffic_class.attr, #ifdef CONFIG_XPS @@ -1322,7 +1315,7 @@ static const void *netdev_queue_namespace(struct kobject *kobj) return ns; } -static struct kobj_type netdev_queue_ktype = { +static struct kobj_type netdev_queue_ktype __ro_after_init = { .sysfs_ops = &netdev_queue_sysfs_ops, .release = netdev_queue_release, .default_attrs = netdev_queue_default_attrs, @@ -1337,23 +1330,22 @@ static int netdev_queue_add_kobject(struct net_device *dev, int index) kobj->kset = dev->queues_kset; error = kobject_init_and_add(kobj, &netdev_queue_ktype, NULL, - "tx-%u", index); + "tx-%u", index); if (error) - goto exit; + return error; #ifdef CONFIG_BQL error = sysfs_create_group(kobj, &dql_group); - if (error) - goto exit; + if (error) { + kobject_put(kobj); + return error; + } #endif kobject_uevent(kobj, KOBJ_ADD); dev_hold(queue->dev); return 0; -exit: - kobject_put(kobj); - return error; } #endif /* CONFIG_SYSFS */ @@ -1395,7 +1387,7 @@ static int register_queue_kobjects(struct net_device *dev) #ifdef CONFIG_SYSFS dev->queues_kset = kset_create_and_add("queues", - NULL, &dev->dev.kobj); + NULL, &dev->dev.kobj); if (!dev->queues_kset) return -ENOMEM; real_rx = dev->real_num_rx_queues; @@ -1463,7 +1455,7 @@ static const void *net_netlink_ns(struct sock *sk) return sock_net(sk); } -struct kobj_ns_type_operations net_ns_type_operations = { +const struct kobj_ns_type_operations net_ns_type_operations = { .type = KOBJ_NS_TYPE_NET, .current_may_mount = net_current_may_mount, .grab_current_ns = net_grab_current_ns, @@ -1485,7 +1477,8 @@ static int netdev_uevent(struct device *d, struct kobj_uevent_env *env) /* pass ifindex to uevent. * ifindex is useful as it won't change (interface name may change) - * and is what RtNetlink uses natively. */ + * and is what RtNetlink uses natively. + */ retval = add_uevent_var(env, "IFINDEX=%d", dev->ifindex); exit: @@ -1502,7 +1495,10 @@ static void netdev_release(struct device *d) BUG_ON(dev->reg_state != NETREG_RELEASED); - kfree(dev->ifalias); + /* no need to wait for rcu grace period: + * device is dead and about to be freed. + */ + kfree(rcu_access_pointer(dev->ifalias)); netdev_freemem(dev); } @@ -1513,7 +1509,7 @@ static const void *net_namespace(struct device *d) return dev_net(dev); } -static struct class net_class = { +static struct class net_class __ro_after_init = { .name = "net", .dev_release = netdev_release, .dev_groups = net_class_groups, @@ -1560,7 +1556,7 @@ EXPORT_SYMBOL(of_find_net_device_by_node); */ void netdev_unregister_kobject(struct net_device *ndev) { - struct device *dev = &(ndev->dev); + struct device *dev = &ndev->dev; if (!atomic_read(&dev_net(ndev)->count)) dev_set_uevent_suppress(dev, 1); @@ -1577,7 +1573,7 @@ void netdev_unregister_kobject(struct net_device *ndev) /* Create sysfs entries for network device. */ int netdev_register_kobject(struct net_device *ndev) { - struct device *dev = &(ndev->dev); + struct device *dev = &ndev->dev; const struct attribute_group **groups = ndev->sysfs_groups; int error = 0; @@ -1620,14 +1616,14 @@ int netdev_register_kobject(struct net_device *ndev) return error; } -int netdev_class_create_file_ns(struct class_attribute *class_attr, +int netdev_class_create_file_ns(const struct class_attribute *class_attr, const void *ns) { return class_create_file_ns(&net_class, class_attr, ns); } EXPORT_SYMBOL(netdev_class_create_file_ns); -void netdev_class_remove_file_ns(struct class_attribute *class_attr, +void netdev_class_remove_file_ns(const struct class_attribute *class_attr, const void *ns) { class_remove_file_ns(&net_class, class_attr, ns); diff --git a/net/core/net-sysfs.h b/net/core/net-sysfs.h index 2745a1b51e03..006876c7b78d 100644 --- a/net/core/net-sysfs.h +++ b/net/core/net-sysfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_SYSFS_H__ #define __NET_SYSFS_H__ diff --git a/net/core/net-traces.c b/net/core/net-traces.c index 92da5e4ceb4f..380934580fa1 100644 --- a/net/core/net-traces.c +++ b/net/core/net-traces.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * consolidates trace point definitions * @@ -31,12 +32,23 @@ #include <trace/events/napi.h> #include <trace/events/sock.h> #include <trace/events/udp.h> +#include <trace/events/tcp.h> #include <trace/events/fib.h> +#include <trace/events/qdisc.h> #if IS_ENABLED(CONFIG_IPV6) #include <trace/events/fib6.h> EXPORT_TRACEPOINT_SYMBOL_GPL(fib6_table_lookup); #endif +#if IS_ENABLED(CONFIG_BRIDGE) +#include <trace/events/bridge.h> +EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add); +EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add); +EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete); +EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update); +#endif EXPORT_TRACEPOINT_SYMBOL_GPL(kfree_skb); EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll); + +EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 8726d051f31d..b797832565d3 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -234,6 +234,7 @@ int peernet2id_alloc(struct net *net, struct net *peer) rtnl_net_notifyid(net, RTM_NEWNSID, id); return id; } +EXPORT_SYMBOL_GPL(peernet2id_alloc); /* This function returns, if assigned, the id of a peer netns. */ int peernet2id(struct net *net, struct net *peer) @@ -855,9 +856,10 @@ static int __init net_ns_init(void) register_pernet_subsys(&net_ns_ops); - rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, + RTNL_FLAG_DOIT_UNLOCKED); rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, rtnl_net_dumpid, - NULL); + RTNL_FLAG_DOIT_UNLOCKED); return 0; } diff --git a/net/core/netclassid_cgroup.c b/net/core/netclassid_cgroup.c index 029a61ac6cdd..5e4f04004a49 100644 --- a/net/core/netclassid_cgroup.c +++ b/net/core/netclassid_cgroup.c @@ -100,7 +100,7 @@ static int write_classid(struct cgroup_subsys_state *css, struct cftype *cft, cs->classid = (u32)value; - css_task_iter_start(css, &it); + css_task_iter_start(css, 0, &it); while ((p = css_task_iter_next(&it))) { task_lock(p); iterate_fd(p->files, 0, update_classid_sock, diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 912731bed7b7..57557a6a950c 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -334,7 +334,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb, /* It is up to the caller to keep npinfo alive. */ struct netpoll_info *npinfo; - WARN_ON_ONCE(!irqs_disabled()); + lockdep_assert_irqs_disabled(); npinfo = rcu_dereference_bh(np->dev->npinfo); if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) { diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6e1e10ff433a..f95a15086225 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2165,7 +2165,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) + pkt_dev->pkt_overhead; } - for (i = 0; i < IN6_ADDR_HSIZE; i++) + for (i = 0; i < sizeof(struct in6_addr); i++) if (pkt_dev->cur_in6_saddr.s6_addr[i]) { set = 1; break; @@ -2711,7 +2711,7 @@ static inline __be16 build_tci(unsigned int id, unsigned int cfi, static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, int datalen) { - struct timeval timestamp; + struct timespec64 timestamp; struct pktgen_hdr *pgh; pgh = skb_put(skb, sizeof(*pgh)); @@ -2773,9 +2773,17 @@ static void pktgen_finalize_skb(struct pktgen_dev *pkt_dev, struct sk_buff *skb, pgh->tv_sec = 0; pgh->tv_usec = 0; } else { - do_gettimeofday(×tamp); + /* + * pgh->tv_sec wraps in y2106 when interpreted as unsigned + * as done by wireshark, or y2038 when interpreted as signed. + * This is probably harmless, but if anyone wants to improve + * it, we could introduce a variant that puts 64-bit nanoseconds + * into the respective header bytes. + * This would also be slightly faster to read. + */ + ktime_get_real_ts64(×tamp); pgh->tv_sec = htonl(timestamp.tv_sec); - pgh->tv_usec = htonl(timestamp.tv_usec); + pgh->tv_usec = htonl(timestamp.tv_nsec / NSEC_PER_USEC); } } @@ -3377,7 +3385,7 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev) static void pktgen_xmit(struct pktgen_dev *pkt_dev) { - unsigned int burst = ACCESS_ONCE(pkt_dev->burst); + unsigned int burst = READ_ONCE(pkt_dev->burst); struct net_device *odev = pkt_dev->odev; struct netdev_queue *txq; struct sk_buff *skb; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9201e3621351..dabba2a91fc8 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -62,7 +62,7 @@ struct rtnl_link { rtnl_doit_func doit; rtnl_dumpit_func dumpit; - rtnl_calcit_func calcit; + unsigned int flags; }; static DEFINE_MUTEX(rtnl_mutex); @@ -127,7 +127,8 @@ bool lockdep_rtnl_is_held(void) EXPORT_SYMBOL(lockdep_rtnl_is_held); #endif /* #ifdef CONFIG_PROVE_LOCKING */ -static struct rtnl_link *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; +static struct rtnl_link __rcu *rtnl_msg_handlers[RTNL_FAMILY_MAX + 1]; +static refcount_t rtnl_msg_handlers_ref[RTNL_FAMILY_MAX + 1]; static inline int rtm_msgindex(int msgtype) { @@ -143,58 +144,13 @@ static inline int rtm_msgindex(int msgtype) return msgindex; } -static rtnl_doit_func rtnl_get_doit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - if (protocol <= RTNL_FAMILY_MAX) - tab = rtnl_msg_handlers[protocol]; - else - tab = NULL; - - if (tab == NULL || tab[msgindex].doit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab[msgindex].doit; -} - -static rtnl_dumpit_func rtnl_get_dumpit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - if (protocol <= RTNL_FAMILY_MAX) - tab = rtnl_msg_handlers[protocol]; - else - tab = NULL; - - if (tab == NULL || tab[msgindex].dumpit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab[msgindex].dumpit; -} - -static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) -{ - struct rtnl_link *tab; - - if (protocol <= RTNL_FAMILY_MAX) - tab = rtnl_msg_handlers[protocol]; - else - tab = NULL; - - if (tab == NULL || tab[msgindex].calcit == NULL) - tab = rtnl_msg_handlers[PF_UNSPEC]; - - return tab[msgindex].calcit; -} - /** * __rtnl_register - Register a rtnetlink message type * @protocol: Protocol family or PF_UNSPEC * @msgtype: rtnetlink message type * @doit: Function pointer called for each request message * @dumpit: Function pointer called for each dump request (NLM_F_DUMP) message - * @calcit: Function pointer to calc size of dump message + * @flags: rtnl_link_flags to modifiy behaviour of doit/dumpit functions * * Registers the specified function pointers (at least one of them has * to be non-NULL) to be called whenever a request message for the @@ -208,7 +164,7 @@ static rtnl_calcit_func rtnl_get_calcit(int protocol, int msgindex) */ int __rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, - rtnl_calcit_func calcit) + unsigned int flags) { struct rtnl_link *tab; int msgindex; @@ -216,23 +172,20 @@ int __rtnl_register(int protocol, int msgtype, BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); - tab = rtnl_msg_handlers[protocol]; + tab = rcu_dereference_raw(rtnl_msg_handlers[protocol]); if (tab == NULL) { tab = kcalloc(RTM_NR_MSGTYPES, sizeof(*tab), GFP_KERNEL); if (tab == NULL) return -ENOBUFS; - rtnl_msg_handlers[protocol] = tab; + rcu_assign_pointer(rtnl_msg_handlers[protocol], tab); } if (doit) tab[msgindex].doit = doit; - if (dumpit) tab[msgindex].dumpit = dumpit; - - if (calcit) - tab[msgindex].calcit = calcit; + tab[msgindex].flags |= flags; return 0; } @@ -249,9 +202,9 @@ EXPORT_SYMBOL_GPL(__rtnl_register); */ void rtnl_register(int protocol, int msgtype, rtnl_doit_func doit, rtnl_dumpit_func dumpit, - rtnl_calcit_func calcit) + unsigned int flags) { - if (__rtnl_register(protocol, msgtype, doit, dumpit, calcit) < 0) + if (__rtnl_register(protocol, msgtype, doit, dumpit, flags) < 0) panic("Unable to register rtnetlink message handler, " "protocol = %d, message type = %d\n", protocol, msgtype); @@ -267,17 +220,23 @@ EXPORT_SYMBOL_GPL(rtnl_register); */ int rtnl_unregister(int protocol, int msgtype) { + struct rtnl_link *handlers; int msgindex; BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); msgindex = rtm_msgindex(msgtype); - if (rtnl_msg_handlers[protocol] == NULL) + rtnl_lock(); + handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); + if (!handlers) { + rtnl_unlock(); return -ENOENT; + } - rtnl_msg_handlers[protocol][msgindex].doit = NULL; - rtnl_msg_handlers[protocol][msgindex].dumpit = NULL; - rtnl_msg_handlers[protocol][msgindex].calcit = NULL; + handlers[msgindex].doit = NULL; + handlers[msgindex].dumpit = NULL; + handlers[msgindex].flags = 0; + rtnl_unlock(); return 0; } @@ -292,10 +251,20 @@ EXPORT_SYMBOL_GPL(rtnl_unregister); */ void rtnl_unregister_all(int protocol) { + struct rtnl_link *handlers; + BUG_ON(protocol < 0 || protocol > RTNL_FAMILY_MAX); - kfree(rtnl_msg_handlers[protocol]); - rtnl_msg_handlers[protocol] = NULL; + rtnl_lock(); + handlers = rtnl_dereference(rtnl_msg_handlers[protocol]); + RCU_INIT_POINTER(rtnl_msg_handlers[protocol], NULL); + rtnl_unlock(); + + synchronize_net(); + + while (refcount_read(&rtnl_msg_handlers_ref[protocol]) > 1) + schedule(); + kfree(handlers); } EXPORT_SYMBOL_GPL(rtnl_unregister_all); @@ -433,16 +402,24 @@ static size_t rtnl_link_get_slave_info_data_size(const struct net_device *dev) { struct net_device *master_dev; const struct rtnl_link_ops *ops; + size_t size = 0; - master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + rcu_read_lock(); + + master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (!master_dev) - return 0; + goto out; + ops = master_dev->rtnl_link_ops; if (!ops || !ops->get_slave_size) - return 0; + goto out; /* IFLA_INFO_SLAVE_DATA + nested data */ - return nla_total_size(sizeof(struct nlattr)) + + size = nla_total_size(sizeof(struct nlattr)) + ops->get_slave_size(master_dev, dev); + +out: + rcu_read_unlock(); + return size; } static size_t rtnl_link_get_size(const struct net_device *dev) @@ -476,7 +453,7 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) { const struct rtnl_af_ops *ops; - list_for_each_entry(ops, &rtnl_af_ops, list) { + list_for_each_entry_rcu(ops, &rtnl_af_ops, list) { if (ops->family == family) return ops; } @@ -493,32 +470,22 @@ static const struct rtnl_af_ops *rtnl_af_lookup(const int family) void rtnl_af_register(struct rtnl_af_ops *ops) { rtnl_lock(); - list_add_tail(&ops->list, &rtnl_af_ops); + list_add_tail_rcu(&ops->list, &rtnl_af_ops); rtnl_unlock(); } EXPORT_SYMBOL_GPL(rtnl_af_register); /** - * __rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. - * @ops: struct rtnl_af_ops * to unregister - * - * The caller must hold the rtnl_mutex. - */ -void __rtnl_af_unregister(struct rtnl_af_ops *ops) -{ - list_del(&ops->list); -} -EXPORT_SYMBOL_GPL(__rtnl_af_unregister); - -/** * rtnl_af_unregister - Unregister rtnl_af_ops from rtnetlink. * @ops: struct rtnl_af_ops * to unregister */ void rtnl_af_unregister(struct rtnl_af_ops *ops) { rtnl_lock(); - __rtnl_af_unregister(ops); + list_del_rcu(&ops->list); rtnl_unlock(); + + synchronize_rcu(); } EXPORT_SYMBOL_GPL(rtnl_af_unregister); @@ -531,13 +498,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, /* IFLA_AF_SPEC */ size = nla_total_size(sizeof(struct nlattr)); - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_link_af_size) { /* AF_* + nested data */ size += nla_total_size(sizeof(struct nlattr)) + af_ops->get_link_af_size(dev, ext_filter_mask); } } + rcu_read_unlock(); return size; } @@ -545,11 +514,15 @@ static size_t rtnl_link_get_af_size(const struct net_device *dev, static bool rtnl_have_link_slave_info(const struct net_device *dev) { struct net_device *master_dev; + bool ret = false; - master_dev = netdev_master_upper_dev_get((struct net_device *) dev); + rcu_read_lock(); + + master_dev = netdev_master_upper_dev_get_rcu((struct net_device *)dev); if (master_dev && master_dev->rtnl_link_ops) - return true; - return false; + ret = true; + rcu_read_unlock(); + return ret; } static int rtnl_link_slave_info_fill(struct sk_buff *skb, @@ -946,8 +919,10 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + rtnl_xdp_size() /* IFLA_XDP */ + nla_total_size(4) /* IFLA_EVENT */ - + nla_total_size(1); /* IFLA_PROTO_DOWN */ - + + nla_total_size(4) /* IFLA_NEW_NETNSID */ + + nla_total_size(1) /* IFLA_PROTO_DOWN */ + + nla_total_size(4) /* IFLA_IF_NETNSID */ + + 0; } static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev) @@ -1234,6 +1209,36 @@ nla_put_vfinfo_failure: return -EMSGSIZE; } +static noinline_for_stack int rtnl_fill_vf(struct sk_buff *skb, + struct net_device *dev, + u32 ext_filter_mask) +{ + struct nlattr *vfinfo; + int i, num_vfs; + + if (!dev->dev.parent || ((ext_filter_mask & RTEXT_FILTER_VF) == 0)) + return 0; + + num_vfs = dev_num_vf(dev->dev.parent); + if (nla_put_u32(skb, IFLA_NUM_VF, num_vfs)) + return -EMSGSIZE; + + if (!dev->netdev_ops->ndo_get_vf_config) + return 0; + + vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); + if (!vfinfo) + return -EMSGSIZE; + + for (i = 0; i < num_vfs; i++) { + if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) + return -EMSGSIZE; + } + + nla_nest_end(skb, vfinfo); + return 0; +} + static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) { struct rtnl_link_ifmap map; @@ -1265,10 +1270,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) *prog_id = generic_xdp_prog->aux->id; return XDP_ATTACHED_SKB; } - if (!ops->ndo_xdp) + if (!ops->ndo_bpf) return XDP_ATTACHED_NONE; - return __dev_xdp_attached(dev, ops->ndo_xdp, prog_id); + return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id); } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) @@ -1330,16 +1335,108 @@ static u32 rtnl_get_event(unsigned long event) return rtnl_event_type; } -static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, +static int put_master_ifindex(struct sk_buff *skb, struct net_device *dev) +{ + const struct net_device *upper_dev; + int ret = 0; + + rcu_read_lock(); + + upper_dev = netdev_master_upper_dev_get_rcu(dev); + if (upper_dev) + ret = nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex); + + rcu_read_unlock(); + return ret; +} + +static int nla_put_iflink(struct sk_buff *skb, const struct net_device *dev) +{ + int ifindex = dev_get_iflink(dev); + + if (dev->ifindex == ifindex) + return 0; + + return nla_put_u32(skb, IFLA_LINK, ifindex); +} + +static noinline_for_stack int nla_put_ifalias(struct sk_buff *skb, + struct net_device *dev) +{ + char buf[IFALIASZ]; + int ret; + + ret = dev_get_alias(dev, buf, sizeof(buf)); + return ret > 0 ? nla_put_string(skb, IFLA_IFALIAS, buf) : 0; +} + +static int rtnl_fill_link_netnsid(struct sk_buff *skb, + const struct net_device *dev, + struct net *src_net) +{ + if (dev->rtnl_link_ops && dev->rtnl_link_ops->get_link_net) { + struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); + + if (!net_eq(dev_net(dev), link_net)) { + int id = peernet2id_alloc(src_net, link_net); + + if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) + return -EMSGSIZE; + } + } + + return 0; +} + +static int rtnl_fill_link_af(struct sk_buff *skb, + const struct net_device *dev, + u32 ext_filter_mask) +{ + const struct rtnl_af_ops *af_ops; + struct nlattr *af_spec; + + af_spec = nla_nest_start(skb, IFLA_AF_SPEC); + if (!af_spec) + return -EMSGSIZE; + + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { + struct nlattr *af; + int err; + + if (!af_ops->fill_link_af) + continue; + + af = nla_nest_start(skb, af_ops->family); + if (!af) + return -EMSGSIZE; + + err = af_ops->fill_link_af(skb, dev, ext_filter_mask); + /* + * Caller may return ENODATA to indicate that there + * was no data to be dumped. This is not an error, it + * means we should trim the attribute header and + * continue. + */ + if (err == -ENODATA) + nla_nest_cancel(skb, af); + else if (err < 0) + return -EMSGSIZE; + + nla_nest_end(skb, af); + } + + nla_nest_end(skb, af_spec); + return 0; +} + +static int rtnl_fill_ifinfo(struct sk_buff *skb, + struct net_device *dev, struct net *src_net, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask, - u32 event) + u32 event, int *new_nsid, int tgt_netnsid) { struct ifinfomsg *ifm; struct nlmsghdr *nlh; - struct nlattr *af_spec; - struct rtnl_af_ops *af_ops; - struct net_device *upper_dev = netdev_master_upper_dev_get(dev); ASSERT_RTNL(); nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ifm), flags); @@ -1354,6 +1451,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, ifm->ifi_flags = dev_get_flags(dev); ifm->ifi_change = change; + if (tgt_netnsid >= 0 && nla_put_s32(skb, IFLA_IF_NETNSID, tgt_netnsid)) + goto nla_put_failure; + if (nla_put_string(skb, IFLA_IFNAME, dev->name) || nla_put_u32(skb, IFLA_TXQLEN, dev->tx_queue_len) || nla_put_u8(skb, IFLA_OPERSTATE, @@ -1368,15 +1468,12 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, #ifdef CONFIG_RPS nla_put_u32(skb, IFLA_NUM_RX_QUEUES, dev->num_rx_queues) || #endif - (dev->ifindex != dev_get_iflink(dev) && - nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev))) || - (upper_dev && - nla_put_u32(skb, IFLA_MASTER, upper_dev->ifindex)) || + nla_put_iflink(skb, dev) || + put_master_ifindex(skb, dev) || nla_put_u8(skb, IFLA_CARRIER, netif_carrier_ok(dev)) || (dev->qdisc && nla_put_string(skb, IFLA_QDISC, dev->qdisc->ops->id)) || - (dev->ifalias && - nla_put_string(skb, IFLA_IFALIAS, dev->ifalias)) || + nla_put_ifalias(skb, dev) || nla_put_u32(skb, IFLA_CARRIER_CHANGES, atomic_read(&dev->carrier_changes)) || nla_put_u8(skb, IFLA_PROTO_DOWN, dev->proto_down)) @@ -1408,27 +1505,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_fill_stats(skb, dev)) goto nla_put_failure; - if (dev->dev.parent && (ext_filter_mask & RTEXT_FILTER_VF) && - nla_put_u32(skb, IFLA_NUM_VF, dev_num_vf(dev->dev.parent))) + if (rtnl_fill_vf(skb, dev, ext_filter_mask)) goto nla_put_failure; - if (dev->netdev_ops->ndo_get_vf_config && dev->dev.parent && - ext_filter_mask & RTEXT_FILTER_VF) { - int i; - struct nlattr *vfinfo; - int num_vfs = dev_num_vf(dev->dev.parent); - - vfinfo = nla_nest_start(skb, IFLA_VFINFO_LIST); - if (!vfinfo) - goto nla_put_failure; - for (i = 0; i < num_vfs; i++) { - if (rtnl_fill_vfinfo(skb, dev, i, vfinfo)) - goto nla_put_failure; - } - - nla_nest_end(skb, vfinfo); - } - if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; @@ -1440,51 +1519,23 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, goto nla_put_failure; } - if (dev->rtnl_link_ops && - dev->rtnl_link_ops->get_link_net) { - struct net *link_net = dev->rtnl_link_ops->get_link_net(dev); - - if (!net_eq(dev_net(dev), link_net)) { - int id = peernet2id_alloc(dev_net(dev), link_net); - - if (nla_put_s32(skb, IFLA_LINK_NETNSID, id)) - goto nla_put_failure; - } - } - - if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC))) + if (rtnl_fill_link_netnsid(skb, dev, src_net)) goto nla_put_failure; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { - if (af_ops->fill_link_af) { - struct nlattr *af; - int err; - - if (!(af = nla_nest_start(skb, af_ops->family))) - goto nla_put_failure; - - err = af_ops->fill_link_af(skb, dev, ext_filter_mask); - - /* - * Caller may return ENODATA to indicate that there - * was no data to be dumped. This is not an error, it - * means we should trim the attribute header and - * continue. - */ - if (err == -ENODATA) - nla_nest_cancel(skb, af); - else if (err < 0) - goto nla_put_failure; - - nla_nest_end(skb, af); - } - } + if (new_nsid && + nla_put_s32(skb, IFLA_NEW_NETNSID, *new_nsid) < 0) + goto nla_put_failure; - nla_nest_end(skb, af_spec); + rcu_read_lock(); + if (rtnl_fill_link_af(skb, dev, ext_filter_mask)) + goto nla_put_failure_rcu; + rcu_read_unlock(); nlmsg_end(skb, nlh); return 0; +nla_put_failure_rcu: + rcu_read_unlock(); nla_put_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -1506,7 +1557,10 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_LINKINFO] = { .type = NLA_NESTED }, [IFLA_NET_NS_PID] = { .type = NLA_U32 }, [IFLA_NET_NS_FD] = { .type = NLA_U32 }, - [IFLA_IFALIAS] = { .type = NLA_STRING, .len = IFALIASZ-1 }, + /* IFLA_IFALIAS is a string, but policy is set to NLA_BINARY to + * allow 0-length string (needed to remove an alias). + */ + [IFLA_IFALIAS] = { .type = NLA_BINARY, .len = IFALIASZ - 1 }, [IFLA_VFINFO_LIST] = {. type = NLA_NESTED }, [IFLA_VF_PORTS] = { .type = NLA_NESTED }, [IFLA_PORT_SELF] = { .type = NLA_NESTED }, @@ -1523,6 +1577,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_XDP] = { .type = NLA_NESTED }, [IFLA_EVENT] = { .type = NLA_U32 }, [IFLA_GROUP] = { .type = NLA_U32 }, + [IFLA_IF_NETNSID] = { .type = NLA_S32 }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1626,9 +1681,28 @@ static bool link_dump_filtered(struct net_device *dev, return false; } +static struct net *get_target_net(struct sk_buff *skb, int netnsid) +{ + struct net *net; + + net = get_net_ns_by_id(sock_net(skb->sk), netnsid); + if (!net) + return ERR_PTR(-EINVAL); + + /* For now, the caller is required to have CAP_NET_ADMIN in + * the user namespace owning the target net ns. + */ + if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) { + put_net(net); + return ERR_PTR(-EACCES); + } + return net; +} + static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; int h, s_h; int idx = 0, s_idx; struct net_device *dev; @@ -1638,14 +1712,13 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) const struct rtnl_link_ops *kind_ops = NULL; unsigned int flags = NLM_F_MULTI; int master_idx = 0; + int netnsid = -1; int err; int hdrlen; s_h = cb->args[0]; s_idx = cb->args[1]; - cb->seq = net->dev_base_seq; - /* A hack to preserve kernel<->userspace interface. * The correct header is ifinfomsg. It is consistent with rtnl_getlink. * However, before Linux v3.9 the code here assumed rtgenmsg and that's @@ -1658,6 +1731,15 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (nlmsg_parse(cb->nlh, hdrlen, tb, IFLA_MAX, ifla_policy, NULL) >= 0) { + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb, netnsid); + if (IS_ERR(tgt_net)) { + tgt_net = net; + netnsid = -1; + } + } + if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); @@ -1673,17 +1755,19 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) { idx = 0; - head = &net->dev_index_head[h]; + head = &tgt_net->dev_index_head[h]; hlist_for_each_entry(dev, head, index_hlist) { if (link_dump_filtered(dev, master_idx, kind_ops)) goto cont; if (idx < s_idx) goto cont; - err = rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, + err = rtnl_fill_ifinfo(skb, dev, net, + RTM_NEWLINK, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, flags, - ext_filter_mask, 0); + ext_filter_mask, 0, NULL, + netnsid); if (err < 0) { if (likely(skb->len)) @@ -1691,8 +1775,6 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) goto out_err; } - - nl_dump_check_consistent(cb, nlmsg_hdr(skb)); cont: idx++; } @@ -1702,6 +1784,10 @@ out: out_err: cb->args[1] = idx; cb->args[0] = h; + cb->seq = net->dev_base_seq; + nl_dump_check_consistent(cb, nlmsg_hdr(skb)); + if (netnsid >= 0) + put_net(tgt_net); return err; } @@ -1748,17 +1834,27 @@ static int validate_linkmsg(struct net_device *dev, struct nlattr *tb[]) nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops; - if (!(af_ops = rtnl_af_lookup(nla_type(af)))) + rcu_read_lock(); + af_ops = rtnl_af_lookup(nla_type(af)); + if (!af_ops) { + rcu_read_unlock(); return -EAFNOSUPPORT; + } - if (!af_ops->set_link_af) + if (!af_ops->set_link_af) { + rcu_read_unlock(); return -EOPNOTSUPP; + } if (af_ops->validate_link_af) { err = af_ops->validate_link_af(dev, af); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); return err; + } } + + rcu_read_unlock(); } } @@ -1934,7 +2030,8 @@ static int do_setvfinfo(struct net_device *dev, struct nlattr **tb) return err; } -static int do_set_master(struct net_device *dev, int ifindex) +static int do_set_master(struct net_device *dev, int ifindex, + struct netlink_ext_ack *extack) { struct net_device *upper_dev = netdev_master_upper_dev_get(dev); const struct net_device_ops *ops; @@ -1959,7 +2056,7 @@ static int do_set_master(struct net_device *dev, int ifindex) return -EINVAL; ops = upper_dev->netdev_ops; if (ops->ndo_add_slave) { - err = ops->ndo_add_slave(upper_dev, dev); + err = ops->ndo_add_slave(upper_dev, dev, extack); if (err) return err; } else { @@ -2092,7 +2189,7 @@ static int do_setlink(const struct sk_buff *skb, } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) goto errout; status |= DO_SETLINK_MODIFIED; @@ -2118,7 +2215,7 @@ static int do_setlink(const struct sk_buff *skb, dev->tx_queue_len = orig_len; goto errout; } - status |= DO_SETLINK_NOTIFY; + status |= DO_SETLINK_MODIFIED; } } @@ -2215,13 +2312,17 @@ static int do_setlink(const struct sk_buff *skb, nla_for_each_nested(af, tb[IFLA_AF_SPEC], rem) { const struct rtnl_af_ops *af_ops; - if (!(af_ops = rtnl_af_lookup(nla_type(af)))) - BUG(); + rcu_read_lock(); + + BUG_ON(!(af_ops = rtnl_af_lookup(nla_type(af)))); err = af_ops->set_link_af(dev, af); - if (err < 0) + if (err < 0) { + rcu_read_unlock(); goto errout; + } + rcu_read_unlock(); status |= DO_SETLINK_NOTIFY; } } @@ -2273,7 +2374,7 @@ static int do_setlink(const struct sk_buff *skb, errout: if (status & DO_SETLINK_MODIFIED) { - if (status & DO_SETLINK_NOTIFY) + if ((status & DO_SETLINK_NOTIFY) == DO_SETLINK_NOTIFY) netdev_state_change(dev); if (err < 0) @@ -2299,6 +2400,9 @@ static int rtnl_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) goto errout; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2393,6 +2497,9 @@ static int rtnl_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); @@ -2524,6 +2631,9 @@ replay: if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) + return -EOPNOTSUPP; + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); else @@ -2601,12 +2711,6 @@ replay: return err; slave_data = slave_attr; } - if (m_ops->slave_validate) { - err = m_ops->slave_validate(tb, slave_data, - extack); - if (err < 0) - return err; - } } if (dev) { @@ -2736,7 +2840,8 @@ replay: goto out_unregister; } if (tb[IFLA_MASTER]) { - err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER])); + err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), + extack); if (err) goto out_unregister; } @@ -2762,11 +2867,13 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct net *tgt_net = net; struct ifinfomsg *ifm; char ifname[IFNAMSIZ]; struct nlattr *tb[IFLA_MAX+1]; struct net_device *dev = NULL; struct sk_buff *nskb; + int netnsid = -1; int err; u32 ext_filter_mask = 0; @@ -2774,35 +2881,50 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) return err; + if (tb[IFLA_IF_NETNSID]) { + netnsid = nla_get_s32(tb[IFLA_IF_NETNSID]); + tgt_net = get_target_net(skb, netnsid); + if (IS_ERR(tgt_net)) + return PTR_ERR(tgt_net); + } + if (tb[IFLA_IFNAME]) nla_strlcpy(ifname, tb[IFLA_IFNAME], IFNAMSIZ); if (tb[IFLA_EXT_MASK]) ext_filter_mask = nla_get_u32(tb[IFLA_EXT_MASK]); + err = -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifi_index > 0) - dev = __dev_get_by_index(net, ifm->ifi_index); + dev = __dev_get_by_index(tgt_net, ifm->ifi_index); else if (tb[IFLA_IFNAME]) - dev = __dev_get_by_name(net, ifname); + dev = __dev_get_by_name(tgt_net, ifname); else - return -EINVAL; + goto out; + err = -ENODEV; if (dev == NULL) - return -ENODEV; + goto out; + err = -ENOBUFS; nskb = nlmsg_new(if_nlmsg_size(dev, ext_filter_mask), GFP_KERNEL); if (nskb == NULL) - return -ENOBUFS; + goto out; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, - nlh->nlmsg_seq, 0, 0, ext_filter_mask, 0); + err = rtnl_fill_ifinfo(nskb, dev, net, + RTM_NEWLINK, NETLINK_CB(skb).portid, + nlh->nlmsg_seq, 0, 0, ext_filter_mask, + 0, NULL, netnsid); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); +out: + if (netnsid >= 0) + put_net(tgt_net); return err; } @@ -2831,11 +2953,13 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) * traverse the list of net devices and compute the minimum * buffer size based upon the filter mask. */ - list_for_each_entry(dev, &net->dev_base_head, dev_list) { + rcu_read_lock(); + for_each_netdev_rcu(net, dev) { min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, if_nlmsg_size(dev, ext_filter_mask)); } + rcu_read_unlock(); return nlmsg_total_size(min_ifinfo_dump_size); } @@ -2847,19 +2971,29 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) if (s_idx == 0) s_idx = 1; + for (idx = 1; idx <= RTNL_FAMILY_MAX; idx++) { int type = cb->nlh->nlmsg_type-RTM_BASE; + struct rtnl_link *handlers; + rtnl_dumpit_func dumpit; + if (idx < s_idx || idx == PF_PACKET) continue; - if (rtnl_msg_handlers[idx] == NULL || - rtnl_msg_handlers[idx][type].dumpit == NULL) + + handlers = rtnl_dereference(rtnl_msg_handlers[idx]); + if (!handlers) + continue; + + dumpit = READ_ONCE(handlers[type].dumpit); + if (!dumpit) continue; + if (idx > s_idx) { memset(&cb->args[0], 0, sizeof(cb->args)); cb->prev_seq = 0; cb->seq = 0; } - if (rtnl_msg_handlers[idx][type].dumpit(skb, cb)) + if (dumpit(skb, cb)) break; } cb->family = idx; @@ -2869,7 +3003,7 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb) struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned int change, - u32 event, gfp_t flags) + u32 event, gfp_t flags, int *new_nsid) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -2880,7 +3014,9 @@ struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, if (skb == NULL) goto errout; - err = rtnl_fill_ifinfo(skb, dev, type, 0, 0, change, 0, 0, event); + err = rtnl_fill_ifinfo(skb, dev, dev_net(dev), + type, 0, 0, change, 0, 0, event, + new_nsid, -1); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); @@ -2903,14 +3039,14 @@ void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags) static void rtmsg_ifinfo_event(int type, struct net_device *dev, unsigned int change, u32 event, - gfp_t flags) + gfp_t flags, int *new_nsid) { struct sk_buff *skb; if (dev->reg_state != NETREG_REGISTERED) return; - skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags); + skb = rtmsg_ifinfo_build_skb(type, dev, change, event, flags, new_nsid); if (skb) rtmsg_ifinfo_send(skb, dev, flags); } @@ -2918,9 +3054,15 @@ static void rtmsg_ifinfo_event(int type, struct net_device *dev, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change, gfp_t flags) { - rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags); + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, NULL); +} + +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid) +{ + rtmsg_ifinfo_event(type, dev, change, rtnl_get_event(0), flags, + new_nsid); } -EXPORT_SYMBOL(rtmsg_ifinfo); static int nlmsg_populate_fdb_fill(struct sk_buff *skb, struct net_device *dev, @@ -3027,21 +3169,21 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm, } EXPORT_SYMBOL(ndo_dflt_fdb_add); -static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid) +static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid, + struct netlink_ext_ack *extack) { u16 vid = 0; if (vlan_attr) { if (nla_len(vlan_attr) != sizeof(u16)) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n"); + NL_SET_ERR_MSG(extack, "invalid vlan attribute size"); return -EINVAL; } vid = nla_get_u16(vlan_attr); if (!vid || vid >= VLAN_VID_MASK) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n", - vid); + NL_SET_ERR_MSG(extack, "invalid vlan id"); return -EINVAL; } } @@ -3066,24 +3208,24 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid ifindex\n"); + NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid address\n"); + NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid); + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -3170,24 +3312,24 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh, ndm = nlmsg_data(nlh); if (ndm->ndm_ifindex == 0) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid ifindex\n"); + NL_SET_ERR_MSG(extack, "invalid ifindex"); return -EINVAL; } dev = __dev_get_by_index(net, ndm->ndm_ifindex); if (dev == NULL) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } if (!tb[NDA_LLADDR] || nla_len(tb[NDA_LLADDR]) != ETH_ALEN) { - pr_info("PF_BRIDGE: RTM_DELNEIGH with invalid address\n"); + NL_SET_ERR_MSG(extack, "invalid address"); return -EINVAL; } addr = nla_data(tb[NDA_LLADDR]); - err = fdb_vid_parse(tb[NDA_VLAN], &vid); + err = fdb_vid_parse(tb[NDA_VLAN], &vid, extack); if (err) return err; @@ -3627,7 +3769,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { - pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } @@ -3702,7 +3844,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh, dev = __dev_get_by_index(net, ifm->ifi_index); if (!dev) { - pr_info("PF_BRIDGE: RTM_SETLINK with unknown ifindex\n"); + NL_SET_ERR_MSG(extack, "unknown ifindex"); return -ENODEV; } @@ -3867,6 +4009,9 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, return -EMSGSIZE; ifsm = nlmsg_data(nlh); + ifsm->family = PF_UNSPEC; + ifsm->pad1 = 0; + ifsm->pad2 = 0; ifsm->ifindex = dev->ifindex; ifsm->filter_mask = filter_mask; @@ -3950,25 +4095,30 @@ static int rtnl_fill_statsinfo(struct sk_buff *skb, struct net_device *dev, if (!attr) goto nla_put_failure; - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->fill_stats_af) { struct nlattr *af; int err; af = nla_nest_start(skb, af_ops->family); - if (!af) + if (!af) { + rcu_read_unlock(); goto nla_put_failure; - + } err = af_ops->fill_stats_af(skb, dev); - if (err == -ENODATA) + if (err == -ENODATA) { nla_nest_cancel(skb, af); - else if (err < 0) + } else if (err < 0) { + rcu_read_unlock(); goto nla_put_failure; + } nla_nest_end(skb, af); } } + rcu_read_unlock(); nla_nest_end(skb, attr); @@ -4037,7 +4187,8 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, /* for IFLA_STATS_AF_SPEC */ size += nla_total_size(0); - list_for_each_entry(af_ops, &rtnl_af_ops, list) { + rcu_read_lock(); + list_for_each_entry_rcu(af_ops, &rtnl_af_ops, list) { if (af_ops->get_stats_af_size) { size += nla_total_size( af_ops->get_stats_af_size(dev)); @@ -4046,6 +4197,7 @@ static size_t if_nlmsg_stats_size(const struct net_device *dev, size += nla_total_size(0); } } + rcu_read_unlock(); } return size; @@ -4162,11 +4314,13 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); + struct rtnl_link *handlers; + int err = -EOPNOTSUPP; rtnl_doit_func doit; + unsigned int flags; int kind; int family; int type; - int err; type = nlh->nlmsg_type; if (type > RTM_MAX) @@ -4184,20 +4338,40 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind != 2 && !netlink_net_capable(skb, CAP_NET_ADMIN)) return -EPERM; + if (family >= ARRAY_SIZE(rtnl_msg_handlers)) + family = PF_UNSPEC; + + rcu_read_lock(); + handlers = rcu_dereference(rtnl_msg_handlers[family]); + if (!handlers) { + family = PF_UNSPEC; + handlers = rcu_dereference(rtnl_msg_handlers[family]); + } + if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; - rtnl_calcit_func calcit; u16 min_dump_alloc = 0; - dumpit = rtnl_get_dumpit(family, type); - if (dumpit == NULL) - return -EOPNOTSUPP; - calcit = rtnl_get_calcit(family, type); - if (calcit) - min_dump_alloc = calcit(skb, nlh); + dumpit = READ_ONCE(handlers[type].dumpit); + if (!dumpit) { + family = PF_UNSPEC; + handlers = rcu_dereference(rtnl_msg_handlers[PF_UNSPEC]); + if (!handlers) + goto err_unlock; + + dumpit = READ_ONCE(handlers[type].dumpit); + if (!dumpit) + goto err_unlock; + } + + refcount_inc(&rtnl_msg_handlers_ref[family]); + + if (type == RTM_GETLINK - RTM_BASE) + min_dump_alloc = rtnl_calcit(skb, nlh); + + rcu_read_unlock(); - __rtnl_unlock(); rtnl = net->rtnl; { struct netlink_dump_control c = { @@ -4206,22 +4380,47 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, }; err = netlink_dump_start(rtnl, skb, nlh, &c); } - rtnl_lock(); + refcount_dec(&rtnl_msg_handlers_ref[family]); return err; } - doit = rtnl_get_doit(family, type); - if (doit == NULL) - return -EOPNOTSUPP; + doit = READ_ONCE(handlers[type].doit); + if (!doit) { + family = PF_UNSPEC; + handlers = rcu_dereference(rtnl_msg_handlers[family]); + } + + flags = READ_ONCE(handlers[type].flags); + if (flags & RTNL_FLAG_DOIT_UNLOCKED) { + refcount_inc(&rtnl_msg_handlers_ref[family]); + doit = READ_ONCE(handlers[type].doit); + rcu_read_unlock(); + if (doit) + err = doit(skb, nlh, extack); + refcount_dec(&rtnl_msg_handlers_ref[family]); + return err; + } + + rcu_read_unlock(); + + rtnl_lock(); + handlers = rtnl_dereference(rtnl_msg_handlers[family]); + if (handlers) { + doit = READ_ONCE(handlers[type].doit); + if (doit) + err = doit(skb, nlh, extack); + } + rtnl_unlock(); + return err; - return doit(skb, nlh, extack); +err_unlock: + rcu_read_unlock(); + return -EOPNOTSUPP; } static void rtnetlink_rcv(struct sk_buff *skb) { - rtnl_lock(); netlink_rcv_skb(skb, &rtnetlink_rcv_msg); - rtnl_unlock(); } static int rtnetlink_bind(struct net *net, int group) @@ -4242,15 +4441,20 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi switch (event) { case NETDEV_REBOOT: + case NETDEV_CHANGEMTU: case NETDEV_CHANGEADDR: case NETDEV_CHANGENAME: case NETDEV_FEAT_CHANGE: case NETDEV_BONDING_FAILOVER: + case NETDEV_POST_TYPE_CHANGE: case NETDEV_NOTIFY_PEERS: + case NETDEV_CHANGEUPPER: case NETDEV_RESEND_IGMP: case NETDEV_CHANGEINFODATA: + case NETDEV_CHANGELOWERSTATE: + case NETDEV_CHANGE_TX_QUEUE_LEN: rtmsg_ifinfo_event(RTM_NEWLINK, dev, 0, rtnl_get_event(event), - GFP_KERNEL); + GFP_KERNEL, NULL); break; default: break; @@ -4294,29 +4498,34 @@ static struct pernet_operations rtnetlink_net_ops = { void __init rtnetlink_init(void) { + int i; + + for (i = 0; i < ARRAY_SIZE(rtnl_msg_handlers_ref); i++) + refcount_set(&rtnl_msg_handlers_ref[i], 1); + if (register_pernet_subsys(&rtnetlink_net_ops)) panic("rtnetlink_init: cannot initialize rtnetlink\n"); register_netdevice_notifier(&rtnetlink_dev_notifier); rtnl_register(PF_UNSPEC, RTM_GETLINK, rtnl_getlink, - rtnl_dump_ifinfo, rtnl_calcit); - rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, NULL); + rtnl_dump_ifinfo, 0); + rtnl_register(PF_UNSPEC, RTM_SETLINK, rtnl_setlink, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_NEWLINK, rtnl_newlink, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELLINK, rtnl_dellink, NULL, 0); - rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, NULL); - rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, NULL); - rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, NULL); + rtnl_register(PF_UNSPEC, RTM_GETADDR, NULL, rtnl_dump_all, 0); + rtnl_register(PF_UNSPEC, RTM_GETROUTE, NULL, rtnl_dump_all, 0); + rtnl_register(PF_UNSPEC, RTM_GETNETCONF, NULL, rtnl_dump_all, 0); - rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, NULL); + rtnl_register(PF_BRIDGE, RTM_NEWNEIGH, rtnl_fdb_add, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_DELNEIGH, rtnl_fdb_del, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_GETNEIGH, NULL, rtnl_fdb_dump, 0); - rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, NULL); - rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, NULL); - rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, NULL); + rtnl_register(PF_BRIDGE, RTM_GETLINK, NULL, rtnl_bridge_getlink, 0); + rtnl_register(PF_BRIDGE, RTM_DELLINK, rtnl_bridge_dellink, NULL, 0); + rtnl_register(PF_BRIDGE, RTM_SETLINK, rtnl_bridge_setlink, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETSTATS, rtnl_stats_get, rtnl_stats_dump, - NULL); + 0); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f990eb8b30a9..6b0ff396fa9d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -41,7 +41,6 @@ #include <linux/module.h> #include <linux/types.h> #include <linux/kernel.h> -#include <linux/kmemcheck.h> #include <linux/mm.h> #include <linux/interrupt.h> #include <linux/in.h> @@ -158,31 +157,6 @@ out: * */ -struct sk_buff *__alloc_skb_head(gfp_t gfp_mask, int node) -{ - struct sk_buff *skb; - - /* Get the HEAD */ - skb = kmem_cache_alloc_node(skbuff_head_cache, - gfp_mask & ~__GFP_DMA, node); - if (!skb) - goto out; - - /* - * Only clear those fields we need to clear, not those that we will - * actually initialise below. Hence, don't put any more fields after - * the tail pointer in struct sk_buff! - */ - memset(skb, 0, offsetof(struct sk_buff, tail)); - skb->head = NULL; - skb->truesize = sizeof(struct sk_buff); - refcount_set(&skb->users, 1); - - skb->mac_header = (typeof(skb->mac_header))~0U; -out: - return skb; -} - /** * __alloc_skb - allocate a network buffer * @size: size to allocate @@ -259,14 +233,12 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask, shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - kmemcheck_annotate_variable(shinfo->destructor_arg); if (flags & SKB_ALLOC_FCLONE) { struct sk_buff_fclones *fclones; fclones = container_of(skb, struct sk_buff_fclones, skb1); - kmemcheck_annotate_bitfield(&fclones->skb2, flags1); skb->fclone = SKB_FCLONE_ORIG; refcount_set(&fclones->fclone_ref, 1); @@ -326,7 +298,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size) shinfo = skb_shinfo(skb); memset(shinfo, 0, offsetof(struct skb_shared_info, dataref)); atomic_set(&shinfo->dataref, 1); - kmemcheck_annotate_variable(shinfo->destructor_arg); return skb; } @@ -382,7 +353,7 @@ static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) */ void *netdev_alloc_frag(unsigned int fragsz) { - return __netdev_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); + return __netdev_alloc_frag(fragsz, GFP_ATOMIC); } EXPORT_SYMBOL(netdev_alloc_frag); @@ -395,7 +366,7 @@ static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask) void *napi_alloc_frag(unsigned int fragsz) { - return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD); + return __napi_alloc_frag(fragsz, GFP_ATOMIC); } EXPORT_SYMBOL(napi_alloc_frag); @@ -592,21 +563,10 @@ static void skb_release_data(struct sk_buff *skb) for (i = 0; i < shinfo->nr_frags; i++) __skb_frag_unref(&shinfo->frags[i]); - /* - * If skb buf is from userspace, we need to notify the caller - * the lower device DMA has done; - */ - if (shinfo->tx_flags & SKBTX_DEV_ZEROCOPY) { - struct ubuf_info *uarg; - - uarg = shinfo->destructor_arg; - if (uarg->callback) - uarg->callback(uarg, true); - } - if (shinfo->frag_list) kfree_skb_list(shinfo->frag_list); + skb_zcopy_clear(skb, true); skb_free_head(skb); } @@ -720,14 +680,7 @@ EXPORT_SYMBOL(kfree_skb_list); */ void skb_tx_error(struct sk_buff *skb) { - if (skb_shinfo(skb)->tx_flags & SKBTX_DEV_ZEROCOPY) { - struct ubuf_info *uarg; - - uarg = skb_shinfo(skb)->destructor_arg; - if (uarg->callback) - uarg->callback(uarg, false); - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; - } + skb_zcopy_clear(skb, true); } EXPORT_SYMBOL(skb_tx_error); @@ -753,17 +706,13 @@ EXPORT_SYMBOL(consume_skb); * consume_stateless_skb - free an skbuff, assuming it is stateless * @skb: buffer to free * - * Works like consume_skb(), but this variant assumes that all the head - * states have been already dropped. + * Alike consume_skb(), but this variant assumes that this is the last + * skb reference and all the head states have been already dropped */ -void consume_stateless_skb(struct sk_buff *skb) +void __consume_stateless_skb(struct sk_buff *skb) { - if (!skb_unref(skb)) - return; - trace_consume_skb(skb); - if (likely(skb->head)) - skb_release_data(skb); + skb_release_data(skb); kfree_skbmem(skb); } @@ -941,6 +890,271 @@ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) } EXPORT_SYMBOL_GPL(skb_morph); +static int mm_account_pinned_pages(struct mmpin *mmp, size_t size) +{ + unsigned long max_pg, num_pg, new_pg, old_pg; + struct user_struct *user; + + if (capable(CAP_IPC_LOCK) || !size) + return 0; + + num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */ + max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + user = mmp->user ? : current_user(); + + do { + old_pg = atomic_long_read(&user->locked_vm); + new_pg = old_pg + num_pg; + if (new_pg > max_pg) + return -ENOBUFS; + } while (atomic_long_cmpxchg(&user->locked_vm, old_pg, new_pg) != + old_pg); + + if (!mmp->user) { + mmp->user = get_uid(user); + mmp->num_pg = num_pg; + } else { + mmp->num_pg += num_pg; + } + + return 0; +} + +static void mm_unaccount_pinned_pages(struct mmpin *mmp) +{ + if (mmp->user) { + atomic_long_sub(mmp->num_pg, &mmp->user->locked_vm); + free_uid(mmp->user); + } +} + +struct ubuf_info *sock_zerocopy_alloc(struct sock *sk, size_t size) +{ + struct ubuf_info *uarg; + struct sk_buff *skb; + + WARN_ON_ONCE(!in_task()); + + if (!sock_flag(sk, SOCK_ZEROCOPY)) + return NULL; + + skb = sock_omalloc(sk, 0, GFP_KERNEL); + if (!skb) + return NULL; + + BUILD_BUG_ON(sizeof(*uarg) > sizeof(skb->cb)); + uarg = (void *)skb->cb; + uarg->mmp.user = NULL; + + if (mm_account_pinned_pages(&uarg->mmp, size)) { + kfree_skb(skb); + return NULL; + } + + uarg->callback = sock_zerocopy_callback; + uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; + uarg->len = 1; + uarg->bytelen = size; + uarg->zerocopy = 1; + refcount_set(&uarg->refcnt, 1); + sock_hold(sk); + + return uarg; +} +EXPORT_SYMBOL_GPL(sock_zerocopy_alloc); + +static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) +{ + return container_of((void *)uarg, struct sk_buff, cb); +} + +struct ubuf_info *sock_zerocopy_realloc(struct sock *sk, size_t size, + struct ubuf_info *uarg) +{ + if (uarg) { + const u32 byte_limit = 1 << 19; /* limit to a few TSO */ + u32 bytelen, next; + + /* realloc only when socket is locked (TCP, UDP cork), + * so uarg->len and sk_zckey access is serialized + */ + if (!sock_owned_by_user(sk)) { + WARN_ON_ONCE(1); + return NULL; + } + + bytelen = uarg->bytelen + size; + if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { + /* TCP can create new skb to attach new uarg */ + if (sk->sk_type == SOCK_STREAM) + goto new_alloc; + return NULL; + } + + next = (u32)atomic_read(&sk->sk_zckey); + if ((u32)(uarg->id + uarg->len) == next) { + if (mm_account_pinned_pages(&uarg->mmp, size)) + return NULL; + uarg->len++; + uarg->bytelen = bytelen; + atomic_set(&sk->sk_zckey, ++next); + sock_zerocopy_get(uarg); + return uarg; + } + } + +new_alloc: + return sock_zerocopy_alloc(sk, size); +} +EXPORT_SYMBOL_GPL(sock_zerocopy_realloc); + +static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + u32 old_lo, old_hi; + u64 sum_len; + + old_lo = serr->ee.ee_info; + old_hi = serr->ee.ee_data; + sum_len = old_hi - old_lo + 1ULL + len; + + if (sum_len >= (1ULL << 32)) + return false; + + if (lo != old_hi + 1) + return false; + + serr->ee.ee_data += len; + return true; +} + +void sock_zerocopy_callback(struct ubuf_info *uarg, bool success) +{ + struct sk_buff *tail, *skb = skb_from_uarg(uarg); + struct sock_exterr_skb *serr; + struct sock *sk = skb->sk; + struct sk_buff_head *q; + unsigned long flags; + u32 lo, hi; + u16 len; + + mm_unaccount_pinned_pages(&uarg->mmp); + + /* if !len, there was only 1 call, and it was aborted + * so do not queue a completion notification + */ + if (!uarg->len || sock_flag(sk, SOCK_DEAD)) + goto release; + + len = uarg->len; + lo = uarg->id; + hi = uarg->id + len - 1; + + serr = SKB_EXT_ERR(skb); + memset(serr, 0, sizeof(*serr)); + serr->ee.ee_errno = 0; + serr->ee.ee_origin = SO_EE_ORIGIN_ZEROCOPY; + serr->ee.ee_data = hi; + serr->ee.ee_info = lo; + if (!success) + serr->ee.ee_code |= SO_EE_CODE_ZEROCOPY_COPIED; + + q = &sk->sk_error_queue; + spin_lock_irqsave(&q->lock, flags); + tail = skb_peek_tail(q); + if (!tail || SKB_EXT_ERR(tail)->ee.ee_origin != SO_EE_ORIGIN_ZEROCOPY || + !skb_zerocopy_notify_extend(tail, lo, len)) { + __skb_queue_tail(q, skb); + skb = NULL; + } + spin_unlock_irqrestore(&q->lock, flags); + + sk->sk_error_report(sk); + +release: + consume_skb(skb); + sock_put(sk); +} +EXPORT_SYMBOL_GPL(sock_zerocopy_callback); + +void sock_zerocopy_put(struct ubuf_info *uarg) +{ + if (uarg && refcount_dec_and_test(&uarg->refcnt)) { + if (uarg->callback) + uarg->callback(uarg, uarg->zerocopy); + else + consume_skb(skb_from_uarg(uarg)); + } +} +EXPORT_SYMBOL_GPL(sock_zerocopy_put); + +void sock_zerocopy_put_abort(struct ubuf_info *uarg) +{ + if (uarg) { + struct sock *sk = skb_from_uarg(uarg)->sk; + + atomic_dec(&sk->sk_zckey); + uarg->len--; + + sock_zerocopy_put(uarg); + } +} +EXPORT_SYMBOL_GPL(sock_zerocopy_put_abort); + +extern int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, + struct iov_iter *from, size_t length); + +int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, + struct msghdr *msg, int len, + struct ubuf_info *uarg) +{ + struct ubuf_info *orig_uarg = skb_zcopy(skb); + struct iov_iter orig_iter = msg->msg_iter; + int err, orig_len = skb->len; + + /* An skb can only point to one uarg. This edge case happens when + * TCP appends to an skb, but zerocopy_realloc triggered a new alloc. + */ + if (orig_uarg && uarg != orig_uarg) + return -EEXIST; + + err = __zerocopy_sg_from_iter(sk, skb, &msg->msg_iter, len); + if (err == -EFAULT || (err == -EMSGSIZE && skb->len == orig_len)) { + struct sock *save_sk = skb->sk; + + /* Streams do not free skb on error. Reset to prev state. */ + msg->msg_iter = orig_iter; + skb->sk = sk; + ___pskb_trim(skb, orig_len); + skb->sk = save_sk; + return err; + } + + skb_zcopy_set(skb, uarg); + return skb->len - orig_len; +} +EXPORT_SYMBOL_GPL(skb_zerocopy_iter_stream); + +static int skb_zerocopy_clone(struct sk_buff *nskb, struct sk_buff *orig, + gfp_t gfp_mask) +{ + if (skb_zcopy(orig)) { + if (skb_zcopy(nskb)) { + /* !gfp_mask callers are verified to !skb_zcopy(nskb) */ + if (!gfp_mask) { + WARN_ON_ONCE(1); + return -ENOMEM; + } + if (skb_uarg(nskb) == skb_uarg(orig)) + return 0; + if (skb_copy_ubufs(nskb, GFP_ATOMIC)) + return -EIO; + } + skb_zcopy_set(nskb, skb_uarg(orig)); + } + return 0; +} + /** * skb_copy_ubufs - copy userspace skb frags buffers to kernel * @skb: the skb to modify @@ -958,15 +1172,19 @@ EXPORT_SYMBOL_GPL(skb_morph); */ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) { - int i; int num_frags = skb_shinfo(skb)->nr_frags; struct page *page, *head = NULL; - struct ubuf_info *uarg = skb_shinfo(skb)->destructor_arg; + int i, new_frags; + u32 d_off; - for (i = 0; i < num_frags; i++) { - u8 *vaddr; - skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + if (!num_frags) + return 0; + if (skb_shared(skb) || skb_unclone(skb, gfp_mask)) + return -EINVAL; + + new_frags = (__skb_pagelen(skb) + PAGE_SIZE - 1) >> PAGE_SHIFT; + for (i = 0; i < new_frags; i++) { page = alloc_page(gfp_mask); if (!page) { while (head) { @@ -976,28 +1194,51 @@ int skb_copy_ubufs(struct sk_buff *skb, gfp_t gfp_mask) } return -ENOMEM; } - vaddr = kmap_atomic(skb_frag_page(f)); - memcpy(page_address(page), - vaddr + f->page_offset, skb_frag_size(f)); - kunmap_atomic(vaddr); set_page_private(page, (unsigned long)head); head = page; } + page = head; + d_off = 0; + for (i = 0; i < num_frags; i++) { + skb_frag_t *f = &skb_shinfo(skb)->frags[i]; + u32 p_off, p_len, copied; + struct page *p; + u8 *vaddr; + + skb_frag_foreach_page(f, f->page_offset, skb_frag_size(f), + p, p_off, p_len, copied) { + u32 copy, done = 0; + vaddr = kmap_atomic(p); + + while (done < p_len) { + if (d_off == PAGE_SIZE) { + d_off = 0; + page = (struct page *)page_private(page); + } + copy = min_t(u32, PAGE_SIZE - d_off, p_len - done); + memcpy(page_address(page) + d_off, + vaddr + p_off + done, copy); + done += copy; + d_off += copy; + } + kunmap_atomic(vaddr); + } + } + /* skb frags release userspace buffers */ for (i = 0; i < num_frags; i++) skb_frag_unref(skb, i); - uarg->callback(uarg, false); - /* skb frags point to kernel buffers */ - for (i = num_frags - 1; i >= 0; i--) { - __skb_fill_page_desc(skb, i, head, 0, - skb_shinfo(skb)->frags[i].size); + for (i = 0; i < new_frags - 1; i++) { + __skb_fill_page_desc(skb, i, head, 0, PAGE_SIZE); head = (struct page *)page_private(head); } + __skb_fill_page_desc(skb, new_frags - 1, head, 0, d_off); + skb_shinfo(skb)->nr_frags = new_frags; - skb_shinfo(skb)->tx_flags &= ~SKBTX_DEV_ZEROCOPY; + skb_zcopy_clear(skb, false); return 0; } EXPORT_SYMBOL_GPL(skb_copy_ubufs); @@ -1038,7 +1279,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask) if (!n) return NULL; - kmemcheck_annotate_bitfield(n, flags1); n->fclone = SKB_FCLONE_UNAVAILABLE; } @@ -1109,8 +1349,7 @@ struct sk_buff *skb_copy(const struct sk_buff *skb, gfp_t gfp_mask) /* Set the tail pointer and length */ skb_put(n, skb->len); - if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) - BUG(); + BUG_ON(skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)); copy_skb_header(n, skb); return n; @@ -1158,7 +1397,8 @@ struct sk_buff *__pskb_copy_fclone(struct sk_buff *skb, int headroom, if (skb_shinfo(skb)->nr_frags) { int i; - if (skb_orphan_frags(skb, gfp_mask)) { + if (skb_orphan_frags(skb, gfp_mask) || + skb_zerocopy_clone(n, skb, gfp_mask)) { kfree_skb(n); n = NULL; goto out; @@ -1207,8 +1447,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, BUG_ON(nhead < 0); - if (skb_shared(skb)) - BUG(); + BUG_ON(skb_shared(skb)); size = SKB_DATA_ALIGN(size); @@ -1235,9 +1474,10 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, * be since all we did is relocate the values */ if (skb_cloned(skb)) { - /* copy this zero copy skb frags */ if (skb_orphan_frags(skb, gfp_mask)) goto nofrags; + if (skb_zcopy(skb)) + refcount_inc(&skb_uarg(skb)->refcnt); for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) skb_frag_ref(skb, i); @@ -1266,6 +1506,8 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, skb->nohdr = 0; atomic_set(&skb_shinfo(skb)->dataref, 1); + skb_metadata_clear(skb); + /* It is not generally safe to change skb->truesize. * For the moment, we really care of rx path, or * when skb is orphaned (not attached to a socket). @@ -1350,9 +1592,8 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, head_copy_off = newheadroom - head_copy_len; /* Copy the linear header and data. */ - if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, - skb->len + head_copy_len)) - BUG(); + BUG_ON(skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, + skb->len + head_copy_len)); copy_skb_header(n, skb); @@ -1363,18 +1604,20 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, EXPORT_SYMBOL(skb_copy_expand); /** - * skb_pad - zero pad the tail of an skb + * __skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad + * @free_on_error: free buffer on error * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * - * May return error in out of memory cases. The skb is freed on error. + * May return error in out of memory cases. The skb is freed on error + * if @free_on_error is true. */ -int skb_pad(struct sk_buff *skb, int pad) +int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error) { int err; int ntail; @@ -1403,10 +1646,11 @@ int skb_pad(struct sk_buff *skb, int pad) return 0; free_skb: - kfree_skb(skb); + if (free_on_error) + kfree_skb(skb); return err; } -EXPORT_SYMBOL(skb_pad); +EXPORT_SYMBOL(__skb_pad); /** * pskb_put - add data to the tail of a potentially fragmented buffer @@ -1630,8 +1874,8 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) return NULL; } - if (skb_copy_bits(skb, skb_headlen(skb), skb_tail_pointer(skb), delta)) - BUG(); + BUG_ON(skb_copy_bits(skb, skb_headlen(skb), + skb_tail_pointer(skb), delta)); /* Optimization: no fragments, no reasons to preestimate * size of pulled pages. Superb. @@ -1650,7 +1894,7 @@ void *__pskb_pull_tail(struct sk_buff *skb, int delta) } /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, + * Certainly, it is possible to add an offset to skb data, * but taking into account that pulling is expected to * be very rare operation, it is worth to fight against * further bloating skb head and crucify ourselves here instead. @@ -1719,6 +1963,8 @@ pull_pages: if (eat) { skb_shinfo(skb)->frags[k].page_offset += eat; skb_frag_size_sub(&skb_shinfo(skb)->frags[k], eat); + if (!i) + goto end; eat = 0; } k++; @@ -1726,9 +1972,13 @@ pull_pages: } skb_shinfo(skb)->nr_frags = k; +end: skb->tail += delta; skb->data_len -= delta; + if (!skb->data_len) + skb_zcopy_clear(skb, false); + return skb_tail_pointer(skb); } EXPORT_SYMBOL(__pskb_pull_tail); @@ -1776,16 +2026,20 @@ int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) end = start + skb_frag_size(f); if ((copy = end - offset) > 0) { + u32 p_off, p_len, copied; + struct page *p; u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(f)); - memcpy(to, - vaddr + f->page_offset + offset - start, - copy); - kunmap_atomic(vaddr); + skb_frag_foreach_page(f, + f->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + memcpy(to + copied, vaddr + p_off, p_len); + kunmap_atomic(vaddr); + } if ((len -= copy) == 0) return 0; @@ -2005,6 +2259,107 @@ int skb_splice_bits(struct sk_buff *skb, struct sock *sk, unsigned int offset, } EXPORT_SYMBOL_GPL(skb_splice_bits); +/* Send skb data on a socket. Socket must be locked. */ +int skb_send_sock_locked(struct sock *sk, struct sk_buff *skb, int offset, + int len) +{ + unsigned int orig_len = len; + struct sk_buff *head = skb; + unsigned short fragidx; + int slen, ret; + +do_frag_list: + + /* Deal with head data */ + while (offset < skb_headlen(skb) && len) { + struct kvec kv; + struct msghdr msg; + + slen = min_t(int, len, skb_headlen(skb) - offset); + kv.iov_base = skb->data + offset; + kv.iov_len = slen; + memset(&msg, 0, sizeof(msg)); + + ret = kernel_sendmsg_locked(sk, &msg, &kv, 1, slen); + if (ret <= 0) + goto error; + + offset += ret; + len -= ret; + } + + /* All the data was skb head? */ + if (!len) + goto out; + + /* Make offset relative to start of frags */ + offset -= skb_headlen(skb); + + /* Find where we are in frag list */ + for (fragidx = 0; fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; + + if (offset < frag->size) + break; + + offset -= frag->size; + } + + for (; len && fragidx < skb_shinfo(skb)->nr_frags; fragidx++) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[fragidx]; + + slen = min_t(size_t, len, frag->size - offset); + + while (slen) { + ret = kernel_sendpage_locked(sk, frag->page.p, + frag->page_offset + offset, + slen, MSG_DONTWAIT); + if (ret <= 0) + goto error; + + len -= ret; + offset += ret; + slen -= ret; + } + + offset = 0; + } + + if (len) { + /* Process any frag lists */ + + if (skb == head) { + if (skb_has_frag_list(skb)) { + skb = skb_shinfo(skb)->frag_list; + goto do_frag_list; + } + } else if (skb->next) { + skb = skb->next; + goto do_frag_list; + } + } + +out: + return orig_len - len; + +error: + return orig_len == len ? ret : orig_len - len; +} +EXPORT_SYMBOL_GPL(skb_send_sock_locked); + +/* Send skb data on a socket. */ +int skb_send_sock(struct sock *sk, struct sk_buff *skb, int offset, int len) +{ + int ret = 0; + + lock_sock(sk); + ret = skb_send_sock_locked(sk, skb, offset, len); + release_sock(sk); + + return ret; +} +EXPORT_SYMBOL_GPL(skb_send_sock); + /** * skb_store_bits - store bits from kernel buffer to skb * @skb: destination buffer @@ -2044,15 +2399,20 @@ int skb_store_bits(struct sk_buff *skb, int offset, const void *from, int len) end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { + u32 p_off, p_len, copied; + struct page *p; u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(frag)); - memcpy(vaddr + frag->page_offset + offset - start, - from, copy); - kunmap_atomic(vaddr); + skb_frag_foreach_page(frag, + frag->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + memcpy(vaddr + p_off, from + copied, p_len); + kunmap_atomic(vaddr); + } if ((len -= copy) == 0) return 0; @@ -2117,20 +2477,27 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len, end = start + skb_frag_size(frag); if ((copy = end - offset) > 0) { + u32 p_off, p_len, copied; + struct page *p; __wsum csum2; u8 *vaddr; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = ops->update(vaddr + frag->page_offset + - offset - start, copy, 0); - kunmap_atomic(vaddr); - csum = ops->combine(csum, csum2, pos, copy); + + skb_frag_foreach_page(frag, + frag->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + csum2 = ops->update(vaddr + p_off, p_len, 0); + kunmap_atomic(vaddr); + csum = ops->combine(csum, csum2, pos, p_len); + pos += p_len; + } + if (!(len -= copy)) return csum; offset += copy; - pos += copy; } start = end; } @@ -2203,24 +2570,31 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, end = start + skb_frag_size(&skb_shinfo(skb)->frags[i]); if ((copy = end - offset) > 0) { + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + u32 p_off, p_len, copied; + struct page *p; __wsum csum2; u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; if (copy > len) copy = len; - vaddr = kmap_atomic(skb_frag_page(frag)); - csum2 = csum_partial_copy_nocheck(vaddr + - frag->page_offset + - offset - start, to, - copy, 0); - kunmap_atomic(vaddr); - csum = csum_block_add(csum, csum2, pos); + + skb_frag_foreach_page(frag, + frag->page_offset + offset - start, + copy, p, p_off, p_len, copied) { + vaddr = kmap_atomic(p); + csum2 = csum_partial_copy_nocheck(vaddr + p_off, + to + copied, + p_len, 0); + kunmap_atomic(vaddr); + csum = csum_block_add(csum, csum2, pos); + pos += p_len; + } + if (!(len -= copy)) return csum; offset += copy; to += copy; - pos += copy; } start = end; } @@ -2360,6 +2734,7 @@ skb_zerocopy(struct sk_buff *to, struct sk_buff *from, int len, int hlen) skb_tx_error(from); return -ENOMEM; } + skb_zerocopy_clone(to, from, GFP_ATOMIC); for (i = 0; i < skb_shinfo(from)->nr_frags; i++) { if (!len) @@ -2471,12 +2846,15 @@ EXPORT_SYMBOL(skb_queue_purge); */ void skb_rbtree_purge(struct rb_root *root) { - struct sk_buff *skb, *next; + struct rb_node *p = rb_first(root); - rbtree_postorder_for_each_entry_safe(skb, next, root, rbnode) - kfree_skb(skb); + while (p) { + struct sk_buff *skb = rb_entry(p, struct sk_buff, rbnode); - *root = RB_ROOT; + p = rb_next(p); + rb_erase(&skb->rbnode, root); + kfree_skb(skb); + } } /** @@ -2657,6 +3035,7 @@ void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) skb_shinfo(skb1)->tx_flags |= skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG; + skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); else /* Second chunk has no header, nothing to copy. */ @@ -2700,6 +3079,8 @@ int skb_shift(struct sk_buff *tgt, struct sk_buff *skb, int shiftlen) if (skb_headlen(skb)) return 0; + if (skb_zcopy(tgt) || skb_zcopy(skb)) + return 0; todo = shiftlen; from = 0; @@ -3273,6 +3654,8 @@ normal: skb_shinfo(nskb)->tx_flags |= skb_shinfo(head_skb)->tx_flags & SKBTX_SHARED_FRAG; + if (skb_zerocopy_clone(nskb, head_skb, GFP_ATOMIC)) + goto err; while (pos < offset + len) { if (i >= nfrags) { @@ -4380,6 +4763,7 @@ EXPORT_SYMBOL(kfree_skb_partial); bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, bool *fragstolen, int *delta_truesize) { + struct skb_shared_info *to_shinfo, *from_shinfo; int i, delta, len = from->len; *fragstolen = false; @@ -4394,15 +4778,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, return true; } - if (skb_has_frag_list(to) || skb_has_frag_list(from)) + to_shinfo = skb_shinfo(to); + from_shinfo = skb_shinfo(from); + if (to_shinfo->frag_list || from_shinfo->frag_list) + return false; + if (skb_zcopy(to) || skb_zcopy(from)) return false; if (skb_headlen(from) != 0) { struct page *page; unsigned int offset; - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags >= MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags >= MAX_SKB_FRAGS) return false; if (skb_head_is_locked(from)) @@ -4413,12 +4801,12 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, page = virt_to_head_page(from->head); offset = from->data - (unsigned char *)page_address(page); - skb_fill_page_desc(to, skb_shinfo(to)->nr_frags, + skb_fill_page_desc(to, to_shinfo->nr_frags, page, offset, skb_headlen(from)); *fragstolen = true; } else { - if (skb_shinfo(to)->nr_frags + - skb_shinfo(from)->nr_frags > MAX_SKB_FRAGS) + if (to_shinfo->nr_frags + + from_shinfo->nr_frags > MAX_SKB_FRAGS) return false; delta = from->truesize - SKB_TRUESIZE(skb_end_offset(from)); @@ -4426,19 +4814,19 @@ bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from, WARN_ON_ONCE(delta < len); - memcpy(skb_shinfo(to)->frags + skb_shinfo(to)->nr_frags, - skb_shinfo(from)->frags, - skb_shinfo(from)->nr_frags * sizeof(skb_frag_t)); - skb_shinfo(to)->nr_frags += skb_shinfo(from)->nr_frags; + memcpy(to_shinfo->frags + to_shinfo->nr_frags, + from_shinfo->frags, + from_shinfo->nr_frags * sizeof(skb_frag_t)); + to_shinfo->nr_frags += from_shinfo->nr_frags; if (!skb_cloned(from)) - skb_shinfo(from)->nr_frags = 0; + from_shinfo->nr_frags = 0; /* if the skb is not cloned this does nothing * since we set nr_frags to 0. */ - for (i = 0; i < skb_shinfo(from)->nr_frags; i++) - skb_frag_ref(from, i); + for (i = 0; i < from_shinfo->nr_frags; i++) + __skb_frag_ref(&from_shinfo->frags[i]); to->truesize += delta; to->len += len; @@ -4476,6 +4864,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) if (!xnet) return; + ipvs_reset(skb); skb_orphan(skb); skb->mark = 0; } diff --git a/net/core/sock.c b/net/core/sock.c index ac2a404c73eb..c0b5b2f17412 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -307,16 +307,6 @@ static struct lock_class_key af_wlock_keys[AF_MAX]; static struct lock_class_key af_elock_keys[AF_MAX]; static struct lock_class_key af_kern_callback_keys[AF_MAX]; -/* Take into consideration the size of the struct sk_buff overhead in the - * determination of these values, since that is non-constant across - * platforms. This makes socket queueing behavior and performance - * not depend upon such differences. - */ -#define _SK_MEM_PACKETS 256 -#define _SK_MEM_OVERHEAD SKB_TRUESIZE(256) -#define SK_WMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) -#define SK_RMEM_MAX (_SK_MEM_OVERHEAD * _SK_MEM_PACKETS) - /* Run time adjustable parameters. */ __u32 sysctl_wmem_max __read_mostly = SK_WMEM_MAX; EXPORT_SYMBOL(sysctl_wmem_max); @@ -1055,6 +1045,20 @@ set_rcvbuf: if (val == 1) dst_negative_advice(sk); break; + + case SO_ZEROCOPY: + if (sk->sk_family != PF_INET && sk->sk_family != PF_INET6) + ret = -ENOTSUPP; + else if (sk->sk_protocol != IPPROTO_TCP) + ret = -ENOTSUPP; + else if (sk->sk_state != TCP_CLOSE) + ret = -EBUSY; + else if (val < 0 || val > 1) + ret = -EINVAL; + else + sock_valbool_flag(sk, SOCK_ZEROCOPY, valbool); + break; + default: ret = -ENOPROTOOPT; break; @@ -1383,6 +1387,10 @@ int sock_getsockopt(struct socket *sock, int level, int optname, v.val64 = sock_gen_cookie(sk); break; + case SO_ZEROCOPY: + v.val = sock_flag(sk, SOCK_ZEROCOPY); + break; + default: /* We implement the SO_SNDLOWAT etc to not be settable * (1003.1g 7). @@ -1461,8 +1469,6 @@ static struct sock *sk_prot_alloc(struct proto *prot, gfp_t priority, sk = kmalloc(prot->obj_size, priority); if (sk != NULL) { - kmemcheck_annotate_bitfield(sk, flags); - if (security_sk_alloc(sk, family, priority)) goto out_free; @@ -1646,6 +1652,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) sock_copy(newsk, sk); + newsk->sk_prot_creator = sk->sk_prot; + /* SANITY */ if (likely(newsk->sk_net_refcnt)) get_net(sock_net(newsk)); @@ -1667,19 +1675,28 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_dst_pending_confirm = 0; newsk->sk_wmem_queued = 0; newsk->sk_forward_alloc = 0; + + /* sk->sk_memcg will be populated at accept() time */ + newsk->sk_memcg = NULL; + atomic_set(&newsk->sk_drops, 0); newsk->sk_send_head = NULL; newsk->sk_userlocks = sk->sk_userlocks & ~SOCK_BINDPORT_LOCK; + atomic_set(&newsk->sk_zckey, 0); sock_reset_flag(newsk, SOCK_DONE); + cgroup_sk_alloc(&newsk->sk_cgrp_data); - filter = rcu_dereference_protected(newsk->sk_filter, 1); + rcu_read_lock(); + filter = rcu_dereference(sk->sk_filter); if (filter != NULL) /* though it's an empty new sock, the charging may fail * if sysctl_optmem_max was changed between creation of * original socket and cloning */ is_charged = sk_filter_charge(newsk, filter); + RCU_INIT_POINTER(newsk->sk_filter, filter); + rcu_read_unlock(); if (unlikely(!is_charged || xfrm_sk_clone_policy(newsk, sk))) { /* We need to make sure that we don't uncharge the new @@ -1700,9 +1717,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) newsk->sk_incoming_cpu = raw_smp_processor_id(); atomic64_set(&newsk->sk_cookie, 0); - mem_cgroup_sk_alloc(newsk); - cgroup_sk_alloc(&newsk->sk_cgrp_data); - /* * Before updating sk_refcnt, we must commit prior changes to memory * (Documentation/RCU/rculist_nulls.txt for details) @@ -1757,7 +1771,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst) sk->sk_route_caps |= NETIF_F_GSO_SOFTWARE; sk->sk_route_caps &= ~sk->sk_route_nocaps; if (sk_can_gso(sk)) { - if (dst->header_len) { + if (dst->header_len && !xfrm_dst_offload_ok(dst)) { sk->sk_route_caps &= ~NETIF_F_GSO_MASK; } else { sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM; @@ -1923,6 +1937,33 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force, } EXPORT_SYMBOL(sock_wmalloc); +static void sock_ofree(struct sk_buff *skb) +{ + struct sock *sk = skb->sk; + + atomic_sub(skb->truesize, &sk->sk_omem_alloc); +} + +struct sk_buff *sock_omalloc(struct sock *sk, unsigned long size, + gfp_t priority) +{ + struct sk_buff *skb; + + /* small safe race: SKB_TRUESIZE may differ from final skb->truesize */ + if (atomic_read(&sk->sk_omem_alloc) + SKB_TRUESIZE(size) > + sysctl_optmem_max) + return NULL; + + skb = alloc_skb(size, priority); + if (!skb) + return NULL; + + atomic_add(skb->truesize, &sk->sk_omem_alloc); + skb->sk = sk; + skb->destructor = sock_ofree; + return skb; +} + /* * Allocate a memory block from the socket's option memory buffer. */ @@ -2303,16 +2344,18 @@ int __sk_mem_raise_allocated(struct sock *sk, int size, int amt, int kind) /* guarantee minimum buffer size under pressure */ if (kind == SK_MEM_RECV) { - if (atomic_read(&sk->sk_rmem_alloc) < prot->sysctl_rmem[0]) + if (atomic_read(&sk->sk_rmem_alloc) < sk_get_rmem0(sk, prot)) return 1; } else { /* SK_MEM_SEND */ + int wmem0 = sk_get_wmem0(sk, prot); + if (sk->sk_type == SOCK_STREAM) { - if (sk->sk_wmem_queued < prot->sysctl_wmem[0]) + if (sk->sk_wmem_queued < wmem0) return 1; - } else if (refcount_read(&sk->sk_wmem_alloc) < - prot->sysctl_wmem[0]) + } else if (refcount_read(&sk->sk_wmem_alloc) < wmem0) { return 1; + } } if (sk_has_memory_pressure(sk)) { @@ -2408,9 +2451,6 @@ EXPORT_SYMBOL(__sk_mem_reclaim); int sk_set_peek_off(struct sock *sk, int val) { - if (val < 0) - return -EINVAL; - sk->sk_peek_off = val; return 0; } @@ -2500,6 +2540,12 @@ int sock_no_sendmsg(struct socket *sock, struct msghdr *m, size_t len) } EXPORT_SYMBOL(sock_no_sendmsg); +int sock_no_sendmsg_locked(struct sock *sk, struct msghdr *m, size_t len) +{ + return -EOPNOTSUPP; +} +EXPORT_SYMBOL(sock_no_sendmsg_locked); + int sock_no_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int flags) { @@ -2528,6 +2574,22 @@ ssize_t sock_no_sendpage(struct socket *sock, struct page *page, int offset, siz } EXPORT_SYMBOL(sock_no_sendpage); +ssize_t sock_no_sendpage_locked(struct sock *sk, struct page *page, + int offset, size_t size, int flags) +{ + ssize_t res; + struct msghdr msg = {.msg_flags = flags}; + struct kvec iov; + char *kaddr = kmap(page); + + iov.iov_base = kaddr + offset; + iov.iov_len = size; + res = kernel_sendmsg_locked(sk, &msg, &iov, 1, size); + kunmap(page); + return res; +} +EXPORT_SYMBOL(sock_no_sendpage_locked); + /* * Default Socket Callbacks */ @@ -2623,7 +2685,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk_init_common(sk); sk->sk_send_head = NULL; - init_timer(&sk->sk_timer); + timer_setup(&sk->sk_timer, NULL, 0); sk->sk_allocation = GFP_KERNEL; sk->sk_rcvbuf = sysctl_rmem_default; @@ -2673,6 +2735,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT; sk->sk_stamp = SK_DEFAULT_STAMP; + atomic_set(&sk->sk_zckey, 0); #ifdef CONFIG_NET_RX_BUSY_POLL sk->sk_napi_id = 0; @@ -2681,6 +2744,7 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_max_pacing_rate = ~0U; sk->sk_pacing_rate = ~0U; + sk->sk_pacing_shift = 10; sk->sk_incoming_cpu = -1; /* * Before updating sk_refcnt, we must commit prior changes to memory @@ -2979,7 +3043,6 @@ struct prot_inuse { static DECLARE_BITMAP(proto_inuse_idx, PROTO_INUSE_NR); -#ifdef CONFIG_NET_NS void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) { __this_cpu_add(net->core.inuse->val[prot->inuse_idx], val); @@ -3023,27 +3086,6 @@ static __init int net_inuse_init(void) } core_initcall(net_inuse_init); -#else -static DEFINE_PER_CPU(struct prot_inuse, prot_inuse); - -void sock_prot_inuse_add(struct net *net, struct proto *prot, int val) -{ - __this_cpu_add(prot_inuse.val[prot->inuse_idx], val); -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_add); - -int sock_prot_inuse_get(struct net *net, struct proto *prot) -{ - int cpu, idx = prot->inuse_idx; - int res = 0; - - for_each_possible_cpu(cpu) - res += per_cpu(prot_inuse, cpu).val[idx]; - - return res >= 0 ? res : 0; -} -EXPORT_SYMBOL_GPL(sock_prot_inuse_get); -#endif static void assign_proto_idx(struct proto *prot) { diff --git a/net/core/sock_reuseport.c b/net/core/sock_reuseport.c index eed1ebf7f29d..5eeb1d20cc38 100644 --- a/net/core/sock_reuseport.c +++ b/net/core/sock_reuseport.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * To speed up listener socket lookup, create an array to store all sockets * listening on the same port. This allows a decision to be made after finding @@ -36,9 +37,14 @@ int reuseport_alloc(struct sock *sk) * soft irq of receive path or setsockopt from process context */ spin_lock_bh(&reuseport_lock); - WARN_ONCE(rcu_dereference_protected(sk->sk_reuseport_cb, - lockdep_is_held(&reuseport_lock)), - "multiple allocations for the same socket"); + + /* Allocation attempts can occur concurrently via the setsockopt path + * and the bind/hash path. Nothing to do when we lose the race. + */ + if (rcu_dereference_protected(sk->sk_reuseport_cb, + lockdep_is_held(&reuseport_lock))) + goto out; + reuse = __reuseport_alloc(INIT_SOCKS); if (!reuse) { spin_unlock_bh(&reuseport_lock); @@ -49,6 +55,7 @@ int reuseport_alloc(struct sock *sk) reuse->num_socks = 1; rcu_assign_pointer(sk->sk_reuseport_cb, reuse); +out: spin_unlock_bh(&reuseport_lock); return 0; diff --git a/net/core/stream.c b/net/core/stream.c index 20231dbb1da0..1cff9c6270c6 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SUCS NET3: * diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index b7cd9aafe99e..cbc3dde4cfcc 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* -*- linux-c -*- * sysctl_net_core.c: sysctl interface to net core subsystem. * diff --git a/net/core/tso.c b/net/core/tso.c index 5dca7ce8ee9f..43f4eba61933 100644 --- a/net/core/tso.c +++ b/net/core/tso.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/if_vlan.h> #include <net/ip.h> diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 733f523707ac..bae7d78aa068 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1938,8 +1938,8 @@ static int __init dcbnl_init(void) { INIT_LIST_HEAD(&dcb_app_list); - rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_GETDCB, dcb_doit, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_SETDCB, dcb_doit, NULL, 0); return 0; } diff --git a/net/dccp/Makefile b/net/dccp/Makefile index 5c8362b037ed..2e7b56097bc4 100644 --- a/net/dccp/Makefile +++ b/net/dccp/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_IP_DCCP) += dccp.o dccp_ipv4.o dccp-y := ccid.o feat.o input.o minisocks.o options.o output.o proto.o timer.o \ diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index e1295d5f2c56..1c75cd1255f6 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -126,10 +126,10 @@ static void ccid2_change_l_seq_window(struct sock *sk, u64 val) DCCPF_SEQ_WMAX)); } -static void ccid2_hc_tx_rto_expire(unsigned long data) +static void ccid2_hc_tx_rto_expire(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct ccid2_hc_tx_sock *hc = ccid2_hc_tx_sk(sk); + struct ccid2_hc_tx_sock *hc = from_timer(hc, t, tx_rtotimer); + struct sock *sk = hc->sk; const bool sender_was_blocked = ccid2_cwnd_network_limited(hc); bh_lock_sock(sk); @@ -733,8 +733,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_rpdupack = -1; hc->tx_last_cong = hc->tx_lsndtime = hc->tx_cwnd_stamp = ccid2_jiffies32; hc->tx_cwnd_used = 0; - setup_timer(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, - (unsigned long)sk); + hc->sk = sk; + timer_setup(&hc->tx_rtotimer, ccid2_hc_tx_rto_expire, 0); INIT_LIST_HEAD(&hc->tx_av_chunks); return 0; } diff --git a/net/dccp/ccids/ccid2.h b/net/dccp/ccids/ccid2.h index 6e50ef2898fb..1af0116dc6ce 100644 --- a/net/dccp/ccids/ccid2.h +++ b/net/dccp/ccids/ccid2.h @@ -85,6 +85,7 @@ struct ccid2_hc_tx_sock { tx_rto; u64 tx_rtt_seq:48; struct timer_list tx_rtotimer; + struct sock *sk; /* Congestion Window validation (optional, RFC 2861) */ u32 tx_cwnd_used, diff --git a/net/dccp/ccids/ccid3.c b/net/dccp/ccids/ccid3.c index 119c04317d48..8b5ba6dffac7 100644 --- a/net/dccp/ccids/ccid3.c +++ b/net/dccp/ccids/ccid3.c @@ -195,10 +195,10 @@ static inline void ccid3_hc_tx_update_win_count(struct ccid3_hc_tx_sock *hc, } } -static void ccid3_hc_tx_no_feedback_timer(unsigned long data) +static void ccid3_hc_tx_no_feedback_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct ccid3_hc_tx_sock *hc = ccid3_hc_tx_sk(sk); + struct ccid3_hc_tx_sock *hc = from_timer(hc, t, tx_no_feedback_timer); + struct sock *sk = hc->sk; unsigned long t_nfb = USEC_PER_SEC / 5; bh_lock_sock(sk); @@ -505,8 +505,9 @@ static int ccid3_hc_tx_init(struct ccid *ccid, struct sock *sk) hc->tx_state = TFRC_SSTATE_NO_SENT; hc->tx_hist = NULL; - setup_timer(&hc->tx_no_feedback_timer, - ccid3_hc_tx_no_feedback_timer, (unsigned long)sk); + hc->sk = sk; + timer_setup(&hc->tx_no_feedback_timer, + ccid3_hc_tx_no_feedback_timer, 0); return 0; } diff --git a/net/dccp/ccids/ccid3.h b/net/dccp/ccids/ccid3.h index 1a9933c29672..813d91c6e1e2 100644 --- a/net/dccp/ccids/ccid3.h +++ b/net/dccp/ccids/ccid3.h @@ -106,6 +106,7 @@ struct ccid3_hc_tx_sock { u8 tx_last_win_count; ktime_t tx_t_last_win_count; struct timer_list tx_no_feedback_timer; + struct sock *sk; ktime_t tx_t_ld; ktime_t tx_t_nom; struct tfrc_tx_hist_entry *tx_hist; diff --git a/net/dccp/ccids/lib/packet_history.c b/net/dccp/ccids/lib/packet_history.c index 08df7a3acb3d..876e18592d71 100644 --- a/net/dccp/ccids/lib/packet_history.c +++ b/net/dccp/ccids/lib/packet_history.c @@ -149,10 +149,8 @@ static void tfrc_rx_hist_swap(struct tfrc_rx_hist *h, const u8 a, const u8 b) { const u8 idx_a = tfrc_rx_hist_index(h, a), idx_b = tfrc_rx_hist_index(h, b); - struct tfrc_rx_hist_entry *tmp = h->ring[idx_a]; - h->ring[idx_a] = h->ring[idx_b]; - h->ring[idx_b] = tmp; + swap(h->ring[idx_a], h->ring[idx_b]); } /* diff --git a/net/dccp/ccids/lib/tfrc.c b/net/dccp/ccids/lib/tfrc.c index 62b5828acde0..d7f265e1f50c 100644 --- a/net/dccp/ccids/lib/tfrc.c +++ b/net/dccp/ccids/lib/tfrc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * TFRC library initialisation * diff --git a/net/dccp/input.c b/net/dccp/input.c index fa6be9750bb4..d28d46bff6ab 100644 --- a/net/dccp/input.c +++ b/net/dccp/input.c @@ -534,6 +534,7 @@ static int dccp_rcv_respond_partopen_state_process(struct sock *sk, case DCCP_PKT_DATA: if (sk->sk_state == DCCP_RESPOND) break; + /* fall through */ case DCCP_PKT_DATAACK: case DCCP_PKT_ACK: /* diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 1b202f16531f..e65fcb45c3f6 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -256,7 +256,7 @@ static void dccp_v4_err(struct sk_buff *skb, u32 info) sk = __inet_lookup_established(net, &dccp_hashinfo, iph->daddr, dh->dccph_dport, iph->saddr, ntohs(dh->dccph_sport), - inet_iif(skb)); + inet_iif(skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; @@ -414,8 +414,7 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newinet->inet_saddr = ireq->ir_loc_addr; - newinet->inet_opt = ireq->opt; - ireq->opt = NULL; + RCU_INIT_POINTER(newinet->inet_opt, rcu_dereference(ireq->ireq_opt)); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->inet_id = jiffies; @@ -430,7 +429,10 @@ struct sock *dccp_v4_request_recv_sock(const struct sock *sk, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - + if (*own_req) + ireq->ireq_opt = NULL; + else + newinet->inet_opt = NULL; return newsk; exit_overflow: @@ -441,6 +443,7 @@ exit: __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); dccp_done(newsk); goto exit; @@ -492,7 +495,7 @@ static int dccp_v4_send_response(const struct sock *sk, struct request_sock *req ireq->ir_rmt_addr); err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } @@ -548,7 +551,7 @@ out: static void dccp_v4_reqsk_destructor(struct request_sock *req) { dccp_feat_list_purge(&dccp_rsk(req)->dreq_featneg); - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } void dccp_syn_ack_timeout(const struct request_sock *req) @@ -804,7 +807,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) lookup: sk = __inet_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), - dh->dccph_sport, dh->dccph_dport, &refcounted); + dh->dccph_sport, dh->dccph_dport, 0, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 1b58eac8aad3..5df7857fc0f3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -16,6 +16,7 @@ #include <linux/random.h> #include <linux/slab.h> #include <linux/xfrm.h> +#include <linux/string.h> #include <net/addrconf.h> #include <net/inet_common.h> @@ -30,6 +31,7 @@ #include <net/ip6_checksum.h> #include <net/xfrm.h> #include <net/secure_seq.h> +#include <net/sock.h> #include "dccp.h" #include "ipv6.h" @@ -89,7 +91,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __inet6_lookup_established(net, &dccp_hashinfo, &hdr->daddr, dh->dccph_dport, &hdr->saddr, ntohs(dh->dccph_sport), - inet6_iif(skb)); + inet6_iif(skb), 0); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), @@ -597,19 +599,13 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - /* - * FIXME: Add handling of IPV6_PKTOPTIONS skb. See the comments below - * (wrt ipv6_pktopions) and net/ipv6/tcp_ipv6.c for an example. - */ opt_skb = skb_clone(skb, GFP_ATOMIC); if (sk->sk_state == DCCP_OPEN) { /* Fast path */ if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len)) goto reset; - if (opt_skb) { - /* XXX This is where we would goto ipv6_pktoptions. */ - __kfree_skb(opt_skb); - } + if (opt_skb) + goto ipv6_pktoptions; return 0; } @@ -640,10 +636,8 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (dccp_rcv_state_process(sk, skb, dccp_hdr(skb), skb->len)) goto reset; - if (opt_skb) { - /* XXX This is where we would goto ipv6_pktoptions. */ - __kfree_skb(opt_skb); - } + if (opt_skb) + goto ipv6_pktoptions; return 0; reset: @@ -653,6 +647,35 @@ discard: __kfree_skb(opt_skb); kfree_skb(skb); return 0; + +/* Handling IPV6_PKTOPTIONS skb the similar + * way it's done for net/ipv6/tcp_ipv6.c + */ +ipv6_pktoptions: + if (!((1 << sk->sk_state) & (DCCPF_CLOSED | DCCPF_LISTEN))) { + if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo) + np->mcast_oif = inet6_iif(opt_skb); + if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) + np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit; + if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass) + np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb)); + if (np->repflow) + np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb)); + if (ipv6_opt_accepted(sk, opt_skb, + &DCCP_SKB_CB(opt_skb)->header.h6)) { + skb_set_owner_r(opt_skb, sk); + memmove(IP6CB(opt_skb), + &DCCP_SKB_CB(opt_skb)->header.h6, + sizeof(struct inet6_skb_parm)); + opt_skb = xchg(&np->pktoptions, opt_skb); + } else { + __kfree_skb(opt_skb); + opt_skb = xchg(&np->pktoptions, NULL); + } + } + + kfree_skb(opt_skb); + return 0; } static int dccp_v6_rcv(struct sk_buff *skb) @@ -687,7 +710,7 @@ static int dccp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&dccp_hashinfo, skb, __dccp_hdr_len(dh), dh->dccph_sport, dh->dccph_dport, - inet6_iif(skb), &refcounted); + inet6_iif(skb), 0, &refcounted); if (!sk) { dccp_pr_debug("failed to look up flow ID in table and " "get corresponding socket\n"); diff --git a/net/dccp/options.c b/net/dccp/options.c index 51cdfc3bd8ca..4e40db017e19 100644 --- a/net/dccp/options.c +++ b/net/dccp/options.c @@ -227,8 +227,8 @@ int dccp_parse_options(struct sock *sk, struct dccp_request_sock *dreq, * Ack vectors are processed by the TX CCID if it is * interested. The RX CCID need not parse Ack Vectors, * since it is only interested in clearing old state. - * Fall through. */ + /* fall through */ case DCCPO_MIN_TX_CCID_SPECIFIC ... DCCPO_MAX_TX_CCID_SPECIFIC: if (ccid_hc_tx_parse_options(dp->dccps_hc_tx_ccid, sk, pkt_type, opt, value, len)) diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 86bc40ba6ba5..b68168fcc06a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -24,6 +24,7 @@ #include <net/checksum.h> #include <net/inet_sock.h> +#include <net/inet_common.h> #include <net/sock.h> #include <net/xfrm.h> @@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); +static void dccp_sk_destruct(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_tx_ccid = NULL; + inet_sock_destruct(sk); +} + int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); @@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; + sk->sk_destruct = dccp_sk_destruct; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; @@ -219,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ackvec = NULL; } ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + dp->dccps_hc_rx_ccid = NULL; /* clean up feature negotiation state */ dccp_feat_list_purge(&dp->dccps_featneg); diff --git a/net/dccp/timer.c b/net/dccp/timer.c index 3a2c34027758..b50a8732ff43 100644 --- a/net/dccp/timer.c +++ b/net/dccp/timer.c @@ -125,10 +125,11 @@ static void dccp_retransmit_timer(struct sock *sk) __sk_dst_reset(sk); } -static void dccp_write_timer(unsigned long data) +static void dccp_write_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_retransmit_timer); + struct sock *sk = &icsk->icsk_inet.sk; int event = 0; bh_lock_sock(sk); @@ -161,19 +162,20 @@ out: sock_put(sk); } -static void dccp_keepalive_timer(unsigned long data) +static void dccp_keepalive_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct sock *sk = from_timer(sk, t, sk_timer); pr_err("dccp should not use a keepalive timer !\n"); sock_put(sk); } /* This is the same as tcp_delack_timer, sans prequeue & mem_reclaim stuff */ -static void dccp_delack_timer(unsigned long data) +static void dccp_delack_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; - struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_delack_timer); + struct sock *sk = &icsk->icsk_inet.sk; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -232,10 +234,13 @@ static void dccp_write_xmitlet(unsigned long data) bh_unlock_sock(sk); } -static void dccp_write_xmit_timer(unsigned long data) +static void dccp_write_xmit_timer(struct timer_list *t) { - dccp_write_xmitlet(data); - sock_put((struct sock *)data); + struct dccp_sock *dp = from_timer(dp, t, dccps_xmit_timer); + struct sock *sk = &dp->dccps_inet_connection.icsk_inet.sk; + + dccp_write_xmitlet((unsigned long)sk); + sock_put(sk); } void dccp_init_xmit_timers(struct sock *sk) @@ -243,8 +248,7 @@ void dccp_init_xmit_timers(struct sock *sk) struct dccp_sock *dp = dccp_sk(sk); tasklet_init(&dp->dccps_xmitlet, dccp_write_xmitlet, (unsigned long)sk); - setup_timer(&dp->dccps_xmit_timer, dccp_write_xmit_timer, - (unsigned long)sk); + timer_setup(&dp->dccps_xmit_timer, dccp_write_xmit_timer, 0); inet_csk_init_xmit_timers(sk, &dccp_write_timer, &dccp_delack_timer, &dccp_keepalive_timer); } diff --git a/net/decnet/Makefile b/net/decnet/Makefile index e44003af71f6..9e38122d942b 100644 --- a/net/decnet/Makefile +++ b/net/decnet/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_DECNET) += decnet.o diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 73a0399dc7a2..518cea17b811 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -533,10 +533,6 @@ static struct sock *dn_alloc_sock(struct net *net, struct socket *sock, gfp_t gf scp->keepalive = 10 * HZ; scp->keepalive_fxn = dn_keepalive; - init_timer(&scp->delack_timer); - scp->delack_pending = 0; - scp->delack_fxn = dn_nsp_delayed_ack; - dn_start_slow_timer(sk); out: return sk; @@ -634,10 +630,12 @@ static void dn_destroy_sock(struct sock *sk) goto disc_reject; case DN_RUN: scp->state = DN_DI; + /* fall through */ case DN_DI: case DN_DR: disc_reject: dn_nsp_send_disc(sk, NSP_DISCINIT, 0, sk->sk_allocation); + /* fall through */ case DN_NC: case DN_NR: case DN_RJ: @@ -651,6 +649,7 @@ disc_reject: break; default: printk(KERN_DEBUG "DECnet: dn_destroy_sock passed socket in invalid state\n"); + /* fall through */ case DN_O: dn_stop_slow_timer(sk); diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index fa0110b57ca1..9153247dad28 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX * operating system. DECnet is implemented using the BSD Socket @@ -1038,14 +1039,14 @@ static void dn_eth_down(struct net_device *dev) static void dn_dev_set_timer(struct net_device *dev); -static void dn_dev_timer_func(unsigned long arg) +static void dn_dev_timer_func(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; - struct dn_dev *dn_db; + struct dn_dev *dn_db = from_timer(dn_db, t, timer); + struct net_device *dev; struct dn_ifaddr *ifa; rcu_read_lock(); - dn_db = rcu_dereference(dev->dn_ptr); + dev = dn_db->dev; if (dn_db->t3 <= dn_db->parms.t2) { if (dn_db->parms.timer3) { for (ifa = rcu_dereference(dn_db->ifa_list); @@ -1070,8 +1071,6 @@ static void dn_dev_set_timer(struct net_device *dev) if (dn_db->parms.t2 > dn_db->parms.t3) dn_db->parms.t2 = dn_db->parms.t3; - dn_db->timer.data = (unsigned long)dev; - dn_db->timer.function = dn_dev_timer_func; dn_db->timer.expires = jiffies + (dn_db->parms.t2 * HZ); add_timer(&dn_db->timer); @@ -1100,7 +1099,7 @@ static struct dn_dev *dn_dev_create(struct net_device *dev, int *err) rcu_assign_pointer(dev->dn_ptr, dn_db); dn_db->dev = dev; - init_timer(&dn_db->timer); + timer_setup(&dn_db->timer, dn_dev_timer_func, 0); dn_db->uptime = jiffies; @@ -1419,9 +1418,9 @@ void __init dn_dev_init(void) dn_dev_devices_on(); - rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, NULL); - rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, NULL); - rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, NULL); + rtnl_register(PF_DECnet, RTM_NEWADDR, dn_nl_newaddr, NULL, 0); + rtnl_register(PF_DECnet, RTM_DELADDR, dn_nl_deladdr, NULL, 0); + rtnl_register(PF_DECnet, RTM_GETADDR, NULL, dn_nl_dump_ifaddr, 0); proc_create("decnet_dev", S_IRUGO, init_net.proc_net, &dn_dev_seq_fops); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index f9f6fb3f3c5b..b37a1b833c77 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX * operating system. DECnet is implemented using the BSD Socket @@ -791,8 +792,8 @@ void __init dn_fib_init(void) register_dnaddr_notifier(&dn_fib_dnaddr_notifier); - rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, NULL); - rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, NULL); + rtnl_register(PF_DECnet, RTM_NEWROUTE, dn_fib_rtm_newroute, NULL, 0); + rtnl_register(PF_DECnet, RTM_DELROUTE, dn_fib_rtm_delroute, NULL, 0); } diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 21dedf6fd0f7..528119a5618e 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX * operating system. DECnet is implemented using the BSD Socket @@ -94,7 +95,7 @@ struct neigh_table dn_neigh_table = { [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = 64*1024, + [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 0, [NEIGH_VAR_ANYCAST_DELAY] = 0, [NEIGH_VAR_PROXY_DELAY] = 0, diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 7ac086d5c0c0..1b2120645730 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -776,12 +776,8 @@ static int dn_nsp_rx_packet(struct net *net, struct sock *sk2, * Swap src & dst and look up in the normal way. */ if (unlikely(cb->rt_flags & DN_RT_F_RTS)) { - __le16 tmp = cb->dst_port; - cb->dst_port = cb->src_port; - cb->src_port = tmp; - tmp = cb->dst; - cb->dst = cb->src; - cb->src = tmp; + swap(cb->dst_port, cb->src_port); + swap(cb->dst, cb->src); } /* diff --git a/net/decnet/dn_nsp_out.c b/net/decnet/dn_nsp_out.c index 66f035e476ea..56a52a004c56 100644 --- a/net/decnet/dn_nsp_out.c +++ b/net/decnet/dn_nsp_out.c @@ -313,11 +313,8 @@ static __le16 *dn_mk_ack_header(struct sock *sk, struct sk_buff *skb, unsigned c ackcrs |= 0x8000; /* If this is an "other data/ack" message, swap acknum and ackcrs */ - if (other) { - unsigned short tmp = acknum; - acknum = ackcrs; - ackcrs = tmp; - } + if (other) + swap(acknum, ackcrs); /* Set "cross subchannel" bit in ackcrs */ ackcrs |= 0x2000; @@ -491,17 +488,6 @@ void dn_send_conn_ack (struct sock *sk) dn_nsp_send(skb); } -void dn_nsp_delayed_ack(struct sock *sk) -{ - struct dn_scp *scp = DN_SK(sk); - - if (scp->ackxmt_oth != scp->numoth_rcv) - dn_nsp_send_oth_ack(sk); - - if (scp->ackxmt_dat != scp->numdat_rcv) - dn_nsp_send_data_ack(sk); -} - static int dn_nsp_retrans_conn_conf(struct sock *sk) { struct dn_scp *scp = DN_SK(sk); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index bcbe548f8854..b36dceab0dc1 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -131,7 +131,7 @@ static struct dn_rt_hash_bucket *dn_rt_hash_table; static unsigned int dn_rt_hash_mask; static struct timer_list dn_route_timer; -static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush, 0, 0); +static DEFINE_TIMER(dn_rt_flush_timer, dn_run_flush); int decnet_dst_gc_interval = 2; static struct dst_ops dn_dst_ops = { @@ -338,7 +338,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rth); - dst_use(&rth->dst, now); + dst_hold_and_use(&rth->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); dst_release_immediate(&rt->dst); @@ -351,7 +351,7 @@ static int dn_insert_route(struct dn_route *rt, unsigned int hash, struct dn_rou rcu_assign_pointer(rt->dst.dn_next, dn_rt_hash_table[hash].chain); rcu_assign_pointer(dn_rt_hash_table[hash].chain, rt); - dst_use(&rt->dst, now); + dst_hold_and_use(&rt->dst, now); spin_unlock_bh(&dn_rt_hash_table[hash].lock); *rp = rt; return 0; @@ -1258,7 +1258,7 @@ static int __dn_route_output_key(struct dst_entry **pprt, const struct flowidn * (flp->flowidn_mark == rt->fld.flowidn_mark) && dn_is_output_route(rt) && (rt->fld.flowidn_oif == flp->flowidn_oif)) { - dst_use(&rt->dst, jiffies); + dst_hold_and_use(&rt->dst, jiffies); rcu_read_unlock_bh(); *pprt = &rt->dst; return 0; @@ -1535,7 +1535,7 @@ static int dn_route_input(struct sk_buff *skb) (rt->fld.flowidn_oif == 0) && (rt->fld.flowidn_mark == skb->mark) && (rt->fld.flowidn_iif == cb->iif)) { - dst_use(&rt->dst, jiffies); + dst_hold_and_use(&rt->dst, jiffies); rcu_read_unlock(); skb_dst_set(skb, (struct dst_entry *)rt); return 0; @@ -1922,10 +1922,10 @@ void __init dn_route_init(void) #ifdef CONFIG_DECNET_ROUTER rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_fib_dump, NULL); + dn_fib_dump, 0); #else rtnl_register(PF_DECnet, RTM_GETROUTE, dn_cache_getroute, - dn_cache_dump, NULL); + dn_cache_dump, 0); #endif } diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 295bbd6a56f2..c795c3f509c9 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 232675480756..f0710b5d037d 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX * operating system. DECnet is implemented using the BSD Socket @@ -155,6 +156,7 @@ static void dn_rehash_zone(struct dn_zone *dz) default: printk(KERN_DEBUG "DECnet: dn_rehash_zone: BUG! %d\n", old_divisor); + /* fall through */ case 256: new_divisor = 1024; new_hashmask = 0x3FF; diff --git a/net/decnet/dn_timer.c b/net/decnet/dn_timer.c index 1d330fd43dc7..f430daed24a0 100644 --- a/net/decnet/dn_timer.c +++ b/net/decnet/dn_timer.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX * operating system. DECnet is implemented using the BSD Socket diff --git a/net/decnet/netfilter/dn_rtmsg.c b/net/decnet/netfilter/dn_rtmsg.c index aa8ffecc46a4..ab395e55cd78 100644 --- a/net/decnet/netfilter/dn_rtmsg.c +++ b/net/decnet/netfilter/dn_rtmsg.c @@ -115,7 +115,7 @@ static inline void dnrmg_receive_user_skb(struct sk_buff *skb) RCV_SKB_FAIL(-EINVAL); } -static struct nf_hook_ops dnrmg_ops __read_mostly = { +static const struct nf_hook_ops dnrmg_ops = { .hook = dnrmg_hook, .pf = NFPROTO_DECNET, .hooknum = NF_DN_ROUTE, diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 6c7da6c29bf0..55bf64a22b59 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * DECnet An implementation of the DECnet protocol suite for the LINUX * operating system. DECnet is implemented using the BSD Socket diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c index 8737412c7b27..e1d4d898a007 100644 --- a/net/dns_resolver/dns_key.c +++ b/net/dns_resolver/dns_key.c @@ -224,7 +224,7 @@ static int dns_resolver_match_preparse(struct key_match_data *match_data) static void dns_resolver_describe(const struct key *key, struct seq_file *m) { seq_puts(m, key->description); - if (key_is_instantiated(key)) { + if (key_is_positive(key)) { int err = PTR_ERR(key->payload.data[dns_key_error]); if (err) diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig index cc5f8f971689..03c3bdf25468 100644 --- a/net/dsa/Kconfig +++ b/net/dsa/Kconfig @@ -7,6 +7,7 @@ config HAVE_NET_DSA config NET_DSA tristate "Distributed Switch Architecture" depends on HAVE_NET_DSA && MAY_USE_DEVLINK + depends on BRIDGE || BRIDGE=n select NET_SWITCHDEV select PHYLIB ---help--- @@ -19,6 +20,9 @@ if NET_DSA config NET_DSA_TAG_BRCM bool +config NET_DSA_TAG_BRCM_PREPEND + bool + config NET_DSA_TAG_DSA bool diff --git a/net/dsa/Makefile b/net/dsa/Makefile index fcce25da937c..0e13c1f95d13 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,9 +1,11 @@ +# SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o legacy.o port.o slave.o switch.o +dsa_core-y += dsa.o dsa2.o legacy.o master.o port.o slave.o switch.o # tagging formats dsa_core-$(CONFIG_NET_DSA_TAG_BRCM) += tag_brcm.o +dsa_core-$(CONFIG_NET_DSA_TAG_BRCM_PREPEND) += tag_brcm.o dsa_core-$(CONFIG_NET_DSA_TAG_DSA) += tag_dsa.o dsa_core-$(CONFIG_NET_DSA_TAG_EDSA) += tag_edsa.o dsa_core-$(CONFIG_NET_DSA_TAG_KSZ) += tag_ksz.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 416ac4ef9ba9..6a9d0f50fbee 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -14,6 +14,7 @@ #include <linux/platform_device.h> #include <linux/slab.h> #include <linux/module.h> +#include <linux/notifier.h> #include <linux/of.h> #include <linux/of_mdio.h> #include <linux/of_platform.h> @@ -43,6 +44,9 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { #ifdef CONFIG_NET_DSA_TAG_BRCM [DSA_TAG_PROTO_BRCM] = &brcm_netdev_ops, #endif +#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND + [DSA_TAG_PROTO_BRCM_PREPEND] = &brcm_prepend_netdev_ops, +#endif #ifdef CONFIG_NET_DSA_TAG_DSA [DSA_TAG_PROTO_DSA] = &dsa_netdev_ops, #endif @@ -67,37 +71,6 @@ const struct dsa_device_ops *dsa_device_ops[DSA_TAG_LAST] = { [DSA_TAG_PROTO_NONE] = &none_ops, }; -int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, - struct dsa_port *dport, int port) -{ - struct device_node *port_dn = dport->dn; - struct phy_device *phydev; - int ret, mode; - - if (of_phy_is_fixed_link(port_dn)) { - ret = of_phy_register_fixed_link(port_dn); - if (ret) { - dev_err(dev, "failed to register fixed PHY\n"); - return ret; - } - phydev = of_phy_find_device(port_dn); - - mode = of_get_phy_mode(port_dn); - if (mode < 0) - mode = PHY_INTERFACE_MODE_NA; - phydev->interface = mode; - - genphy_config_init(phydev); - genphy_read_status(phydev); - if (ds->ops->adjust_link) - ds->ops->adjust_link(ds, port, phydev); - - put_device(&phydev->mdio.dev); - } - - return 0; -} - const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) { const struct dsa_device_ops *ops; @@ -112,42 +85,6 @@ const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol) return ops; } -int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp) -{ - struct dsa_switch *ds = cpu_dp->ds; - struct net_device *master; - struct ethtool_ops *cpu_ops; - - master = cpu_dp->netdev; - - cpu_ops = devm_kzalloc(ds->dev, sizeof(*cpu_ops), GFP_KERNEL); - if (!cpu_ops) - return -ENOMEM; - - memcpy(&cpu_dp->ethtool_ops, master->ethtool_ops, - sizeof(struct ethtool_ops)); - cpu_dp->orig_ethtool_ops = master->ethtool_ops; - memcpy(cpu_ops, &cpu_dp->ethtool_ops, - sizeof(struct ethtool_ops)); - dsa_cpu_port_ethtool_init(cpu_ops); - master->ethtool_ops = cpu_ops; - - return 0; -} - -void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp) -{ - cpu_dp->netdev->ethtool_ops = cpu_dp->orig_ethtool_ops; -} - -void dsa_cpu_dsa_destroy(struct dsa_port *port) -{ - struct device_node *port_dn = port->dn; - - if (of_phy_is_fixed_link(port_dn)) - of_phy_deregister_fixed_link(port_dn); -} - static int dev_is_class(struct device *dev, void *class) { if (dev->class != NULL && !strcmp(dev->class->name, class)) @@ -186,12 +123,14 @@ struct net_device *dsa_dev_to_net_device(struct device *dev) EXPORT_SYMBOL_GPL(dsa_dev_to_net_device); static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt, struct net_device *unused) { - struct dsa_switch_tree *dst = dev->dsa_ptr; + struct dsa_port *cpu_dp = dev->dsa_ptr; struct sk_buff *nskb = NULL; + struct pcpu_sw_netstats *s; + struct dsa_slave_priv *p; - if (unlikely(dst == NULL)) { + if (unlikely(!cpu_dp)) { kfree_skb(skb); return 0; } @@ -200,19 +139,23 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb) return 0; - nskb = dst->rcv(skb, dev, pt, orig_dev); + nskb = cpu_dp->rcv(skb, dev, pt); if (!nskb) { kfree_skb(skb); return 0; } skb = nskb; + p = netdev_priv(skb->dev); skb_push(skb, ETH_HLEN); skb->pkt_type = PACKET_HOST; skb->protocol = eth_type_trans(skb, skb->dev); - skb->dev->stats.rx_packets++; - skb->dev->stats.rx_bytes += skb->len; + s = this_cpu_ptr(p->stats64); + u64_stats_update_begin(&s->syncp); + s->rx_packets++; + s->rx_bytes += skb->len; + u64_stats_update_end(&s->syncp); netif_receive_skb(skb); @@ -220,6 +163,11 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev, } #ifdef CONFIG_PM_SLEEP +static bool dsa_is_port_initialized(struct dsa_switch *ds, int p) +{ + return dsa_is_user_port(ds, p) && ds->ports[p].slave; +} + int dsa_switch_suspend(struct dsa_switch *ds) { int i, ret = 0; @@ -229,7 +177,7 @@ int dsa_switch_suspend(struct dsa_switch *ds) if (!dsa_is_port_initialized(ds, i)) continue; - ret = dsa_slave_suspend(ds->ports[i].netdev); + ret = dsa_slave_suspend(ds->ports[i].slave); if (ret) return ret; } @@ -256,7 +204,7 @@ int dsa_switch_resume(struct dsa_switch *ds) if (!dsa_is_port_initialized(ds, i)) continue; - ret = dsa_slave_resume(ds->ports[i].netdev); + ret = dsa_slave_resume(ds->ports[i].slave); if (ret) return ret; } @@ -271,10 +219,44 @@ static struct packet_type dsa_pack_type __read_mostly = { .func = dsa_switch_rcv, }; +static struct workqueue_struct *dsa_owq; + +bool dsa_schedule_work(struct work_struct *work) +{ + return queue_work(dsa_owq, work); +} + +static ATOMIC_NOTIFIER_HEAD(dsa_notif_chain); + +int register_dsa_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_register(&dsa_notif_chain, nb); +} +EXPORT_SYMBOL_GPL(register_dsa_notifier); + +int unregister_dsa_notifier(struct notifier_block *nb) +{ + return atomic_notifier_chain_unregister(&dsa_notif_chain, nb); +} +EXPORT_SYMBOL_GPL(unregister_dsa_notifier); + +int call_dsa_notifiers(unsigned long val, struct net_device *dev, + struct dsa_notifier_info *info) +{ + info->dev = dev; + return atomic_notifier_call_chain(&dsa_notif_chain, val, info); +} +EXPORT_SYMBOL_GPL(call_dsa_notifiers); + static int __init dsa_init_module(void) { int rc; + dsa_owq = alloc_ordered_workqueue("dsa_ordered", + WQ_MEM_RECLAIM); + if (!dsa_owq) + return -ENOMEM; + rc = dsa_slave_register_notifier(); if (rc) return rc; @@ -294,6 +276,7 @@ static void __exit dsa_cleanup_module(void) dsa_slave_unregister_notifier(); dev_remove_pack(&dsa_pack_type); dsa_legacy_unregister(); + destroy_workqueue(dsa_owq); } module_exit(dsa_cleanup_module); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index c442051d5a55..44e3fb7dec8c 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -21,293 +21,297 @@ #include "dsa_priv.h" -static LIST_HEAD(dsa_switch_trees); +static LIST_HEAD(dsa_tree_list); static DEFINE_MUTEX(dsa2_mutex); static const struct devlink_ops dsa_devlink_ops = { }; -static struct dsa_switch_tree *dsa_get_dst(u32 tree) +static struct dsa_switch_tree *dsa_tree_find(int index) { struct dsa_switch_tree *dst; - list_for_each_entry(dst, &dsa_switch_trees, list) - if (dst->tree == tree) { - kref_get(&dst->refcount); + list_for_each_entry(dst, &dsa_tree_list, list) + if (dst->index == index) return dst; - } + return NULL; } -static void dsa_free_dst(struct kref *ref) +static struct dsa_switch_tree *dsa_tree_alloc(int index) { - struct dsa_switch_tree *dst = container_of(ref, struct dsa_switch_tree, - refcount); + struct dsa_switch_tree *dst; - list_del(&dst->list); - kfree(dst); + dst = kzalloc(sizeof(*dst), GFP_KERNEL); + if (!dst) + return NULL; + + dst->index = index; + + INIT_LIST_HEAD(&dst->list); + list_add_tail(&dsa_tree_list, &dst->list); + + /* Initialize the reference counter to the number of switches, not 1 */ + kref_init(&dst->refcount); + refcount_set(&dst->refcount.refcount, 0); + + return dst; } -static void dsa_put_dst(struct dsa_switch_tree *dst) +static void dsa_tree_free(struct dsa_switch_tree *dst) { - kref_put(&dst->refcount, dsa_free_dst); + list_del(&dst->list); + kfree(dst); } -static struct dsa_switch_tree *dsa_add_dst(u32 tree) +static struct dsa_switch_tree *dsa_tree_touch(int index) { struct dsa_switch_tree *dst; - dst = kzalloc(sizeof(*dst), GFP_KERNEL); + dst = dsa_tree_find(index); if (!dst) - return NULL; - dst->tree = tree; - INIT_LIST_HEAD(&dst->list); - list_add_tail(&dsa_switch_trees, &dst->list); - kref_init(&dst->refcount); + dst = dsa_tree_alloc(index); return dst; } -static void dsa_dst_add_ds(struct dsa_switch_tree *dst, - struct dsa_switch *ds, u32 index) +static void dsa_tree_get(struct dsa_switch_tree *dst) { kref_get(&dst->refcount); - dst->ds[index] = ds; } -static void dsa_dst_del_ds(struct dsa_switch_tree *dst, - struct dsa_switch *ds, u32 index) +static void dsa_tree_release(struct kref *ref) { - dst->ds[index] = NULL; - kref_put(&dst->refcount, dsa_free_dst); + struct dsa_switch_tree *dst; + + dst = container_of(ref, struct dsa_switch_tree, refcount); + + dsa_tree_free(dst); } -/* For platform data configurations, we need to have a valid name argument to - * differentiate a disabled port from an enabled one - */ -static bool dsa_port_is_valid(struct dsa_port *port) +static void dsa_tree_put(struct dsa_switch_tree *dst) { - return !!(port->dn || port->name); + kref_put(&dst->refcount, dsa_tree_release); } static bool dsa_port_is_dsa(struct dsa_port *port) { - if (port->name && !strcmp(port->name, "dsa")) - return true; - else - return !!of_parse_phandle(port->dn, "link", 0); + return port->type == DSA_PORT_TYPE_DSA; } static bool dsa_port_is_cpu(struct dsa_port *port) { - if (port->name && !strcmp(port->name, "cpu")) - return true; - else - return !!of_parse_phandle(port->dn, "ethernet", 0); + return port->type == DSA_PORT_TYPE_CPU; } -static bool dsa_ds_find_port_dn(struct dsa_switch *ds, - struct device_node *port) +static bool dsa_port_is_user(struct dsa_port *dp) { - u32 index; - - for (index = 0; index < ds->num_ports; index++) - if (ds->ports[index].dn == port) - return true; - return false; + return dp->type == DSA_PORT_TYPE_USER; } -static struct dsa_switch *dsa_dst_find_port_dn(struct dsa_switch_tree *dst, - struct device_node *port) +static struct dsa_port *dsa_tree_find_port_by_node(struct dsa_switch_tree *dst, + struct device_node *dn) { struct dsa_switch *ds; - u32 index; + struct dsa_port *dp; + int device, port; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - if (dsa_ds_find_port_dn(ds, port)) - return ds; + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dp->dn == dn) + return dp; + } } return NULL; } -static int dsa_port_complete(struct dsa_switch_tree *dst, - struct dsa_switch *src_ds, - struct dsa_port *port, - u32 src_port) +static bool dsa_port_setup_routing_table(struct dsa_port *dp) { - struct device_node *link; - int index; - struct dsa_switch *dst_ds; - - for (index = 0;; index++) { - link = of_parse_phandle(port->dn, "link", index); - if (!link) - break; - - dst_ds = dsa_dst_find_port_dn(dst, link); - of_node_put(link); + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; + struct device_node *dn = dp->dn; + struct of_phandle_iterator it; + struct dsa_port *link_dp; + int err; - if (!dst_ds) - return 1; + of_for_each_phandle(&it, err, dn, "link", NULL, 0) { + link_dp = dsa_tree_find_port_by_node(dst, it.node); + if (!link_dp) { + of_node_put(it.node); + return false; + } - src_ds->rtable[dst_ds->index] = src_port; + ds->rtable[link_dp->ds->index] = dp->index; } - return 0; + return true; } -/* A switch is complete if all the DSA ports phandles point to ports - * known in the tree. A return value of 1 means the tree is not - * complete. This is not an error condition. A value of 0 is - * success. - */ -static int dsa_ds_complete(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static bool dsa_switch_setup_routing_table(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; - int err; - - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; + bool complete = true; + struct dsa_port *dp; + int i; - if (!dsa_port_is_dsa(port)) - continue; + for (i = 0; i < DSA_MAX_SWITCHES; i++) + ds->rtable[i] = DSA_RTABLE_NONE; - err = dsa_port_complete(dst, ds, port, index); - if (err != 0) - return err; + for (i = 0; i < ds->num_ports; i++) { + dp = &ds->ports[i]; - ds->dsa_port_mask |= BIT(index); + if (dsa_port_is_dsa(dp)) { + complete = dsa_port_setup_routing_table(dp); + if (!complete) + break; + } } - return 0; + return complete; } -/* A tree is complete if all the DSA ports phandles point to ports - * known in the tree. A return value of 1 means the tree is not - * complete. This is not an error condition. A value of 0 is - * success. - */ -static int dsa_dst_complete(struct dsa_switch_tree *dst) +static bool dsa_tree_setup_routing_table(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - u32 index; - int err; + bool complete = true; + int device; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - err = dsa_ds_complete(dst, ds); - if (err != 0) - return err; + complete = dsa_switch_setup_routing_table(ds); + if (!complete) + break; } - return 0; + return complete; } -static int dsa_dsa_port_apply(struct dsa_port *port) +static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) { - struct dsa_switch *ds = port->ds; - int err; + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); - if (err) { - dev_warn(ds->dev, "Failed to setup dsa port %d: %d\n", - port->index, err); - return err; - } + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; - return devlink_port_register(ds->devlink, &port->devlink_port, - port->index); -} + if (dsa_port_is_cpu(dp)) + return dp; + } + } -static void dsa_dsa_port_unapply(struct dsa_port *port) -{ - devlink_port_unregister(&port->devlink_port); - dsa_cpu_dsa_destroy(port); + return NULL; } -static int dsa_cpu_port_apply(struct dsa_port *port) +static int dsa_tree_setup_default_cpu(struct dsa_switch_tree *dst) { - struct dsa_switch *ds = port->ds; - int err; + struct dsa_switch *ds; + struct dsa_port *dp; + int device, port; - err = dsa_cpu_dsa_setup(ds, ds->dev, port, port->index); - if (err) { - dev_warn(ds->dev, "Failed to setup cpu port %d: %d\n", - port->index, err); - return err; + /* DSA currently only supports a single CPU port */ + dst->cpu_dp = dsa_tree_find_first_cpu(dst); + if (!dst->cpu_dp) { + pr_warn("Tree has no master device\n"); + return -EINVAL; } - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - err = devlink_port_register(ds->devlink, &port->devlink_port, - port->index); - return err; + /* Assign the default CPU port to all ports of the fabric */ + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; + if (!ds) + continue; + + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + if (dsa_port_is_user(dp)) + dp->cpu_dp = dst->cpu_dp; + } + } + + return 0; } -static void dsa_cpu_port_unapply(struct dsa_port *port) +static void dsa_tree_teardown_default_cpu(struct dsa_switch_tree *dst) { - devlink_port_unregister(&port->devlink_port); - dsa_cpu_dsa_destroy(port); - port->ds->cpu_port_mask &= ~BIT(port->index); - + /* DSA currently only supports a single CPU port */ + dst->cpu_dp = NULL; } -static int dsa_user_port_apply(struct dsa_port *port) +static int dsa_port_setup(struct dsa_port *dp) { - struct dsa_switch *ds = port->ds; - const char *name = port->name; + struct dsa_switch *ds = dp->ds; int err; - if (port->dn) - name = of_get_property(port->dn, "label", NULL); - if (!name) - name = "eth%d"; + memset(&dp->devlink_port, 0, sizeof(dp->devlink_port)); - err = dsa_slave_create(ds, ds->dev, port->index, name); - if (err) { - dev_warn(ds->dev, "Failed to create slave %d: %d\n", - port->index, err); - port->netdev = NULL; - return err; - } - - memset(&port->devlink_port, 0, sizeof(port->devlink_port)); - err = devlink_port_register(ds->devlink, &port->devlink_port, - port->index); + err = devlink_port_register(ds->devlink, &dp->devlink_port, dp->index); if (err) return err; - devlink_port_type_eth_set(&port->devlink_port, port->netdev); + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + case DSA_PORT_TYPE_DSA: + err = dsa_port_fixed_link_register_of(dp); + if (err) { + dev_err(ds->dev, "failed to register fixed link for port %d.%d\n", + ds->index, dp->index); + return err; + } + + break; + case DSA_PORT_TYPE_USER: + err = dsa_slave_create(dp); + if (err) + dev_err(ds->dev, "failed to create slave for port %d.%d\n", + ds->index, dp->index); + else + devlink_port_type_eth_set(&dp->devlink_port, dp->slave); + break; + } return 0; } -static void dsa_user_port_unapply(struct dsa_port *port) +static void dsa_port_teardown(struct dsa_port *dp) { - devlink_port_unregister(&port->devlink_port); - if (port->netdev) { - dsa_slave_destroy(port->netdev); - port->netdev = NULL; - port->ds->enabled_port_mask &= ~(1 << port->index); + devlink_port_unregister(&dp->devlink_port); + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + break; + case DSA_PORT_TYPE_CPU: + case DSA_PORT_TYPE_DSA: + dsa_port_fixed_link_unregister_of(dp); + break; + case DSA_PORT_TYPE_USER: + if (dp->slave) { + dsa_slave_destroy(dp->slave); + dp->slave = NULL; + } + break; } } -static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static int dsa_switch_setup(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; int err; /* Initialize ds->phys_mii_mask before registering the slave MDIO bus @@ -315,7 +319,7 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) * the slave MDIO bus driver rely on these values for probing PHY * devices or not */ - ds->phys_mii_mask = ds->enabled_port_mask; + ds->phys_mii_mask |= dsa_user_ports(ds); /* Add the switch to devlink before calling setup, so that setup can * add dpipe tables @@ -336,12 +340,6 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) if (err) return err; - if (ds->ops->set_addr) { - err = ds->ops->set_addr(ds, dst->cpu_dp->netdev->dev_addr); - if (err < 0) - return err; - } - if (!ds->slave_mii_bus && ds->ops->phy_read) { ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) @@ -354,56 +352,11 @@ static int dsa_ds_apply(struct dsa_switch_tree *dst, struct dsa_switch *ds) return err; } - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; - - if (dsa_port_is_dsa(port)) { - err = dsa_dsa_port_apply(port); - if (err) - return err; - continue; - } - - if (dsa_port_is_cpu(port)) { - err = dsa_cpu_port_apply(port); - if (err) - return err; - continue; - } - - err = dsa_user_port_apply(port); - if (err) - continue; - } - return 0; } -static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static void dsa_switch_teardown(struct dsa_switch *ds) { - struct dsa_port *port; - u32 index; - - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port)) - continue; - - if (dsa_port_is_dsa(port)) { - dsa_dsa_port_unapply(port); - continue; - } - - if (dsa_port_is_cpu(port)) { - dsa_cpu_port_unapply(port); - continue; - } - - dsa_user_port_unapply(port); - } - if (ds->slave_mii_bus && ds->ops->phy_read) mdiobus_unregister(ds->slave_mii_bus); @@ -417,198 +370,228 @@ static void dsa_ds_unapply(struct dsa_switch_tree *dst, struct dsa_switch *ds) } -static int dsa_dst_apply(struct dsa_switch_tree *dst) +static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - u32 index; + struct dsa_port *dp; + int device, port; int err; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - err = dsa_ds_apply(dst, ds); + err = dsa_switch_setup(ds); if (err) return err; - } - if (dst->cpu_dp) { - err = dsa_cpu_port_ethtool_setup(dst->cpu_dp); - if (err) - return err; - } + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; - /* If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dst->cpu_dp->netdev->dsa_ptr = dst; - dst->applied = true; + err = dsa_port_setup(dp); + if (err) + return err; + } + } return 0; } -static void dsa_dst_unapply(struct dsa_switch_tree *dst) +static void dsa_tree_teardown_switches(struct dsa_switch_tree *dst) { struct dsa_switch *ds; - u32 index; - - if (!dst->applied) - return; - - dst->cpu_dp->netdev->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + struct dsa_port *dp; + int device, port; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; + for (device = 0; device < DSA_MAX_SWITCHES; device++) { + ds = dst->ds[device]; if (!ds) continue; - dsa_ds_unapply(dst, ds); - } + for (port = 0; port < ds->num_ports; port++) { + dp = &ds->ports[port]; + + dsa_port_teardown(dp); + } - if (dst->cpu_dp) { - dsa_cpu_port_ethtool_restore(dst->cpu_dp); - dst->cpu_dp = NULL; + dsa_switch_teardown(ds); } +} + +static int dsa_tree_setup_master(struct dsa_switch_tree *dst) +{ + struct dsa_port *cpu_dp = dst->cpu_dp; + struct net_device *master = cpu_dp->master; - pr_info("DSA: tree %d unapplied\n", dst->tree); - dst->applied = false; + /* DSA currently supports a single pair of CPU port and master device */ + return dsa_master_setup(master, cpu_dp); } -static int dsa_cpu_parse(struct dsa_port *port, u32 index, - struct dsa_switch_tree *dst, - struct dsa_switch *ds) +static void dsa_tree_teardown_master(struct dsa_switch_tree *dst) { - enum dsa_tag_protocol tag_protocol; - struct net_device *ethernet_dev; - struct device_node *ethernet; + struct dsa_port *cpu_dp = dst->cpu_dp; + struct net_device *master = cpu_dp->master; - if (port->dn) { - ethernet = of_parse_phandle(port->dn, "ethernet", 0); - if (!ethernet) - return -EINVAL; - ethernet_dev = of_find_net_device_by_node(ethernet); - } else { - ethernet_dev = dsa_dev_to_net_device(ds->cd->netdev[index]); - dev_put(ethernet_dev); - } + return dsa_master_teardown(master); +} - if (!ethernet_dev) - return -EPROBE_DEFER; +static int dsa_tree_setup(struct dsa_switch_tree *dst) +{ + bool complete; + int err; - if (!dst->cpu_dp) { - dst->cpu_dp = port; - dst->cpu_dp->netdev = ethernet_dev; + if (dst->setup) { + pr_err("DSA: tree %d already setup! Disjoint trees?\n", + dst->index); + return -EEXIST; } - /* Initialize cpu_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - ds->cpu_port_mask |= BIT(index); + complete = dsa_tree_setup_routing_table(dst); + if (!complete) + return 0; - tag_protocol = ds->ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) { - dev_warn(ds->dev, "No tagger for this switch\n"); - ds->cpu_port_mask &= ~BIT(index); - return PTR_ERR(dst->tag_ops); - } + err = dsa_tree_setup_default_cpu(dst); + if (err) + return err; - dst->rcv = dst->tag_ops->rcv; + err = dsa_tree_setup_switches(dst); + if (err) + return err; + + err = dsa_tree_setup_master(dst); + if (err) + return err; + + dst->setup = true; + + pr_info("DSA: tree %d setup\n", dst->index); return 0; } -static int dsa_ds_parse(struct dsa_switch_tree *dst, struct dsa_switch *ds) +static void dsa_tree_teardown(struct dsa_switch_tree *dst) { - struct dsa_port *port; - u32 index; + if (!dst->setup) + return; + + dsa_tree_teardown_master(dst); + + dsa_tree_teardown_switches(dst); + + dsa_tree_teardown_default_cpu(dst); + + pr_info("DSA: tree %d torn down\n", dst->index); + + dst->setup = false; +} + +static void dsa_tree_remove_switch(struct dsa_switch_tree *dst, + unsigned int index) +{ + dsa_tree_teardown(dst); + + dst->ds[index] = NULL; + dsa_tree_put(dst); +} + +static int dsa_tree_add_switch(struct dsa_switch_tree *dst, + struct dsa_switch *ds) +{ + unsigned int index = ds->index; int err; - for (index = 0; index < ds->num_ports; index++) { - port = &ds->ports[index]; - if (!dsa_port_is_valid(port) || - dsa_port_is_dsa(port)) - continue; + if (dst->ds[index]) + return -EBUSY; - if (dsa_port_is_cpu(port)) { - err = dsa_cpu_parse(port, index, dst, ds); - if (err) - return err; - } else { - /* Initialize enabled_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - ds->enabled_port_mask |= BIT(index); - } + dsa_tree_get(dst); + dst->ds[index] = ds; - } + err = dsa_tree_setup(dst); + if (err) + dsa_tree_remove_switch(dst, index); - pr_info("DSA: switch %d %d parsed\n", dst->tree, ds->index); + return err; +} + +static int dsa_port_parse_user(struct dsa_port *dp, const char *name) +{ + if (!name) + name = "eth%d"; + + dp->type = DSA_PORT_TYPE_USER; + dp->name = name; return 0; } -static int dsa_dst_parse(struct dsa_switch_tree *dst) +static int dsa_port_parse_dsa(struct dsa_port *dp) { - struct dsa_switch *ds; - struct dsa_port *dp; - u32 index; - int port; - int err; + dp->type = DSA_PORT_TYPE_DSA; - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; + return 0; +} - err = dsa_ds_parse(dst, ds); - if (err) - return err; - } +static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master) +{ + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; + const struct dsa_device_ops *tag_ops; + enum dsa_tag_protocol tag_protocol; - if (!dst->cpu_dp->netdev) { - pr_warn("Tree has no master device\n"); - return -EINVAL; + tag_protocol = ds->ops->get_tag_protocol(ds, dp->index); + tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(tag_ops)) { + dev_warn(ds->dev, "No tagger for this switch\n"); + return PTR_ERR(tag_ops); } - /* Assign the default CPU port to all ports of the fabric */ - for (index = 0; index < DSA_MAX_SWITCHES; index++) { - ds = dst->ds[index]; - if (!ds) - continue; + dp->type = DSA_PORT_TYPE_CPU; + dp->rcv = tag_ops->rcv; + dp->tag_ops = tag_ops; + dp->master = master; + dp->dst = dst; - for (port = 0; port < ds->num_ports; port++) { - dp = &ds->ports[port]; - if (!dsa_port_is_valid(dp) || - dsa_port_is_dsa(dp) || - dsa_port_is_cpu(dp)) - continue; + return 0; +} - dp->cpu_dp = dst->cpu_dp; - } +static int dsa_port_parse_of(struct dsa_port *dp, struct device_node *dn) +{ + struct device_node *ethernet = of_parse_phandle(dn, "ethernet", 0); + const char *name = of_get_property(dn, "label", NULL); + bool link = of_property_read_bool(dn, "link"); + + dp->dn = dn; + + if (ethernet) { + struct net_device *master; + + master = of_find_net_device_by_node(ethernet); + if (!master) + return -EPROBE_DEFER; + + return dsa_port_parse_cpu(dp, master); } - pr_info("DSA: tree %d parsed\n", dst->tree); + if (link) + return dsa_port_parse_dsa(dp); - return 0; + return dsa_port_parse_user(dp, name); } -static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) +static int dsa_switch_parse_ports_of(struct dsa_switch *ds, + struct device_node *dn) { - struct device_node *port; - int err; + struct device_node *ports, *port; + struct dsa_port *dp; u32 reg; + int err; + + ports = of_get_child_by_name(dn, "ports"); + if (!ports) { + dev_err(ds->dev, "no ports child node found\n"); + return -EINVAL; + } for_each_available_child_of_node(ports, port) { err = of_property_read_u32(port, "reg", ®); @@ -618,174 +601,140 @@ static int dsa_parse_ports_dn(struct device_node *ports, struct dsa_switch *ds) if (reg >= ds->num_ports) return -EINVAL; - ds->ports[reg].dn = port; + dp = &ds->ports[reg]; + + err = dsa_port_parse_of(dp, port); + if (err) + return err; } return 0; } -static int dsa_parse_ports(struct dsa_chip_data *cd, struct dsa_switch *ds) +static int dsa_switch_parse_member_of(struct dsa_switch *ds, + struct device_node *dn) { - bool valid_name_found = false; - unsigned int i; + u32 m[2] = { 0, 0 }; + int sz; - for (i = 0; i < DSA_MAX_PORTS; i++) { - if (!cd->port_names[i]) - continue; + /* Don't error out if this optional property isn't found */ + sz = of_property_read_variable_u32_array(dn, "dsa,member", m, 2, 2); + if (sz < 0 && sz != -EINVAL) + return sz; - ds->ports[i].name = cd->port_names[i]; - valid_name_found = true; - } - - if (!valid_name_found && i == DSA_MAX_PORTS) + ds->index = m[1]; + if (ds->index >= DSA_MAX_SWITCHES) return -EINVAL; + ds->dst = dsa_tree_touch(m[0]); + if (!ds->dst) + return -ENOMEM; + return 0; } -static int dsa_parse_member_dn(struct device_node *np, u32 *tree, u32 *index) +static int dsa_switch_parse_of(struct dsa_switch *ds, struct device_node *dn) { int err; - *tree = *index = 0; - - err = of_property_read_u32_index(np, "dsa,member", 0, tree); - if (err) { - /* Does not exist, but it is optional */ - if (err == -EINVAL) - return 0; - return err; - } - - err = of_property_read_u32_index(np, "dsa,member", 1, index); + err = dsa_switch_parse_member_of(ds, dn); if (err) return err; - if (*index >= DSA_MAX_SWITCHES) - return -EINVAL; - - return 0; + return dsa_switch_parse_ports_of(ds, dn); } -static int dsa_parse_member(struct dsa_chip_data *pd, u32 *tree, u32 *index) +static int dsa_port_parse(struct dsa_port *dp, const char *name, + struct device *dev) { - if (!pd) - return -ENODEV; + if (!strcmp(name, "cpu")) { + struct net_device *master; - /* We do not support complex trees with dsa_chip_data */ - *tree = 0; - *index = 0; + master = dsa_dev_to_net_device(dev); + if (!master) + return -EPROBE_DEFER; - return 0; -} - -static struct device_node *dsa_get_ports(struct dsa_switch *ds, - struct device_node *np) -{ - struct device_node *ports; + dev_put(master); - ports = of_get_child_by_name(np, "ports"); - if (!ports) { - dev_err(ds->dev, "no ports child node found\n"); - return ERR_PTR(-EINVAL); + return dsa_port_parse_cpu(dp, master); } - return ports; + if (!strcmp(name, "dsa")) + return dsa_port_parse_dsa(dp); + + return dsa_port_parse_user(dp, name); } -static int _dsa_register_switch(struct dsa_switch *ds) +static int dsa_switch_parse_ports(struct dsa_switch *ds, + struct dsa_chip_data *cd) { - struct dsa_chip_data *pdata = ds->dev->platform_data; - struct device_node *np = ds->dev->of_node; - struct dsa_switch_tree *dst; - struct device_node *ports; - u32 tree, index; - int i, err; - - if (np) { - err = dsa_parse_member_dn(np, &tree, &index); - if (err) - return err; + bool valid_name_found = false; + struct dsa_port *dp; + struct device *dev; + const char *name; + unsigned int i; + int err; - ports = dsa_get_ports(ds, np); - if (IS_ERR(ports)) - return PTR_ERR(ports); + for (i = 0; i < DSA_MAX_PORTS; i++) { + name = cd->port_names[i]; + dev = cd->netdev[i]; + dp = &ds->ports[i]; - err = dsa_parse_ports_dn(ports, ds); - if (err) - return err; - } else { - err = dsa_parse_member(pdata, &tree, &index); - if (err) - return err; + if (!name) + continue; - err = dsa_parse_ports(pdata, ds); + err = dsa_port_parse(dp, name, dev); if (err) return err; - } - dst = dsa_get_dst(tree); - if (!dst) { - dst = dsa_add_dst(tree); - if (!dst) - return -ENOMEM; - } - - if (dst->ds[index]) { - err = -EBUSY; - goto out; + valid_name_found = true; } - ds->dst = dst; - ds->index = index; - ds->cd = pdata; - - /* Initialize the routing table */ - for (i = 0; i < DSA_MAX_SWITCHES; ++i) - ds->rtable[i] = DSA_RTABLE_NONE; + if (!valid_name_found && i == DSA_MAX_PORTS) + return -EINVAL; - dsa_dst_add_ds(dst, ds, index); + return 0; +} - err = dsa_dst_complete(dst); - if (err < 0) - goto out_del_dst; +static int dsa_switch_parse(struct dsa_switch *ds, struct dsa_chip_data *cd) +{ + ds->cd = cd; - if (err == 1) { - /* Not all switches registered yet */ - err = 0; - goto out; - } + /* We don't support interconnected switches nor multiple trees via + * platform data, so this is the unique switch of the tree. + */ + ds->index = 0; + ds->dst = dsa_tree_touch(0); + if (!ds->dst) + return -ENOMEM; - if (dst->applied) { - pr_info("DSA: Disjoint trees?\n"); - return -EINVAL; - } + return dsa_switch_parse_ports(ds, cd); +} - err = dsa_dst_parse(dst); - if (err) { - if (err == -EPROBE_DEFER) { - dsa_dst_del_ds(dst, ds, ds->index); - return err; - } +static int dsa_switch_add(struct dsa_switch *ds) +{ + struct dsa_switch_tree *dst = ds->dst; - goto out_del_dst; - } + return dsa_tree_add_switch(dst, ds); +} - err = dsa_dst_apply(dst); - if (err) { - dsa_dst_unapply(dst); - goto out_del_dst; - } +static int dsa_switch_probe(struct dsa_switch *ds) +{ + struct dsa_chip_data *pdata = ds->dev->platform_data; + struct device_node *np = ds->dev->of_node; + int err; - dsa_put_dst(dst); - return 0; + if (np) + err = dsa_switch_parse_of(ds, np); + else if (pdata) + err = dsa_switch_parse(ds, pdata); + else + err = -ENODEV; -out_del_dst: - dsa_dst_del_ds(dst, ds, ds->index); -out: - dsa_put_dst(dst); + if (err) + return err; - return err; + return dsa_switch_add(ds); } struct dsa_switch *dsa_switch_alloc(struct device *dev, size_t n) @@ -815,26 +764,25 @@ int dsa_register_switch(struct dsa_switch *ds) int err; mutex_lock(&dsa2_mutex); - err = _dsa_register_switch(ds); + err = dsa_switch_probe(ds); mutex_unlock(&dsa2_mutex); return err; } EXPORT_SYMBOL_GPL(dsa_register_switch); -static void _dsa_unregister_switch(struct dsa_switch *ds) +static void dsa_switch_remove(struct dsa_switch *ds) { struct dsa_switch_tree *dst = ds->dst; + unsigned int index = ds->index; - dsa_dst_unapply(dst); - - dsa_dst_del_ds(dst, ds, ds->index); + dsa_tree_remove_switch(dst, index); } void dsa_unregister_switch(struct dsa_switch *ds) { mutex_lock(&dsa2_mutex); - _dsa_unregister_switch(ds); + dsa_switch_remove(ds); mutex_unlock(&dsa2_mutex); } EXPORT_SYMBOL_GPL(dsa_unregister_switch); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 55982cc39b24..7d036696e8c4 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -43,10 +43,10 @@ struct dsa_notifier_bridge_info { /* DSA_NOTIFIER_FDB_* */ struct dsa_notifier_fdb_info { - const struct switchdev_obj_port_fdb *fdb; - struct switchdev_trans *trans; int sw_index; int port; + const unsigned char *addr; + u16 vid; }; /* DSA_NOTIFIER_MDB_* */ @@ -65,18 +65,13 @@ struct dsa_notifier_vlan_info { int port; }; -struct dsa_device_ops { - struct sk_buff *(*xmit)(struct sk_buff *skb, struct net_device *dev); - struct sk_buff *(*rcv)(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev); -}; - struct dsa_slave_priv { - /* Copy of dp->ds->dst->tag_ops->xmit for faster access in hot path */ + /* Copy of CPU port xmit for faster access in slave transmit hot path */ struct sk_buff * (*xmit)(struct sk_buff *skb, struct net_device *dev); + struct pcpu_sw_netstats *stats64; + /* DSA port data, such as switch, port index, etc. */ struct dsa_port *dp; @@ -84,7 +79,6 @@ struct dsa_slave_priv { * The phylib phy_device pointer for the PHY connected * to this port. */ - struct phy_device *phy; phy_interface_t phy_interface; int old_link; int old_pause; @@ -99,68 +93,105 @@ struct dsa_slave_priv { }; /* dsa.c */ -int dsa_cpu_dsa_setup(struct dsa_switch *ds, struct device *dev, - struct dsa_port *dport, int port); -void dsa_cpu_dsa_destroy(struct dsa_port *dport); const struct dsa_device_ops *dsa_resolve_tag_protocol(int tag_protocol); -int dsa_cpu_port_ethtool_setup(struct dsa_port *cpu_dp); -void dsa_cpu_port_ethtool_restore(struct dsa_port *cpu_dp); +bool dsa_schedule_work(struct work_struct *work); /* legacy.c */ int dsa_legacy_register(void); void dsa_legacy_unregister(void); +int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags); +int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid); + +/* master.c */ +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); +void dsa_master_teardown(struct net_device *dev); + +static inline struct net_device *dsa_master_find_slave(struct net_device *dev, + int device, int port) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_switch *ds; + + if (device < 0 || device >= DSA_MAX_SWITCHES) + return NULL; + + ds = dst->ds[device]; + if (!ds) + return NULL; + + if (port < 0 || port >= ds->num_ports) + return NULL; + + return ds->ports[port].slave; +} /* port.c */ int dsa_port_set_state(struct dsa_port *dp, u8 state, struct switchdev_trans *trans); -void dsa_port_set_state_now(struct dsa_port *dp, u8 state); +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy); +void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy); int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br); void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br); int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering, struct switchdev_trans *trans); int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, struct switchdev_trans *trans); -int dsa_port_fdb_add(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans); -int dsa_port_fdb_del(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb); -int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb); -int dsa_port_mdb_add(struct dsa_port *dp, +int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid); +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data); +int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans); -int dsa_port_mdb_del(struct dsa_port *dp, +int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb); -int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb); int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans); int dsa_port_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan); -int dsa_port_vlan_dump(struct dsa_port *dp, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb); +int dsa_port_fixed_link_register_of(struct dsa_port *dp); +void dsa_port_fixed_link_unregister_of(struct dsa_port *dp); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; void dsa_slave_mii_bus_init(struct dsa_switch *ds); -void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops); -int dsa_slave_create(struct dsa_switch *ds, struct device *parent, - int port, const char *name); +int dsa_slave_create(struct dsa_port *dp); void dsa_slave_destroy(struct net_device *slave_dev); int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +static inline struct dsa_port *dsa_slave_to_port(const struct net_device *dev) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + + return p->dp; +} + +static inline struct net_device * +dsa_slave_to_master(const struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dp->cpu_dp->master; +} + /* switch.c */ int dsa_switch_register_notifier(struct dsa_switch *ds); void dsa_switch_unregister_notifier(struct dsa_switch *ds); /* tag_brcm.c */ extern const struct dsa_device_ops brcm_netdev_ops; +extern const struct dsa_device_ops brcm_prepend_netdev_ops; /* tag_dsa.c */ extern const struct dsa_device_ops dsa_netdev_ops; @@ -183,14 +214,4 @@ extern const struct dsa_device_ops qca_netdev_ops; /* tag_trailer.c */ extern const struct dsa_device_ops trailer_netdev_ops; -static inline struct net_device *dsa_master_netdev(struct dsa_slave_priv *p) -{ - return p->dp->cpu_dp->netdev; -} - -static inline struct dsa_port *dsa_get_cpu_port(struct dsa_switch_tree *dst) -{ - return dst->cpu_dp; -} - #endif diff --git a/net/dsa/legacy.c b/net/dsa/legacy.c index 1d7a3282f2a7..84611d7fcfa2 100644 --- a/net/dsa/legacy.c +++ b/net/dsa/legacy.c @@ -78,31 +78,30 @@ dsa_switch_probe(struct device *parent, struct device *host_dev, int sw_addr, } /* basic switch operations **************************************************/ -static int dsa_cpu_dsa_setups(struct dsa_switch *ds, struct device *dev) +static int dsa_cpu_dsa_setups(struct dsa_switch *ds) { - struct dsa_port *dport; int ret, port; for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dport = &ds->ports[port]; - ret = dsa_cpu_dsa_setup(ds, dev, dport, port); + ret = dsa_port_fixed_link_register_of(&ds->ports[port]); if (ret) return ret; } return 0; } -static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master, - struct device *parent) +static int dsa_switch_setup_one(struct dsa_switch *ds, + struct net_device *master) { const struct dsa_switch_ops *ops = ds->ops; struct dsa_switch_tree *dst = ds->dst; struct dsa_chip_data *cd = ds->cd; bool valid_name_found = false; int index = ds->index; + struct dsa_port *dp; int i, ret; /* @@ -111,9 +110,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master for (i = 0; i < ds->num_ports; i++) { char *name; + dp = &ds->ports[i]; + name = cd->port_names[i]; if (name == NULL) continue; + dp->name = name; if (!strcmp(name, "cpu")) { if (dst->cpu_dp) { @@ -122,12 +124,12 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master return -EINVAL; } dst->cpu_dp = &ds->ports[i]; - dst->cpu_dp->netdev = master; - ds->cpu_port_mask |= 1 << i; + dst->cpu_dp->master = master; + dp->type = DSA_PORT_TYPE_CPU; } else if (!strcmp(name, "dsa")) { - ds->dsa_port_mask |= 1 << i; + dp->type = DSA_PORT_TYPE_DSA; } else { - ds->enabled_port_mask |= 1 << i; + dp->type = DSA_PORT_TYPE_USER; } valid_name_found = true; } @@ -138,7 +140,7 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master /* Make the built-in MII bus mask match the number of ports, * switch drivers can override this later */ - ds->phys_mii_mask = ds->enabled_port_mask; + ds->phys_mii_mask |= dsa_user_ports(ds); /* * If the CPU connects to this switch, set the switch tree @@ -146,14 +148,19 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master * switch. */ if (dst->cpu_dp->ds == ds) { + const struct dsa_device_ops *tag_ops; enum dsa_tag_protocol tag_protocol; - tag_protocol = ops->get_tag_protocol(ds); - dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); - if (IS_ERR(dst->tag_ops)) - return PTR_ERR(dst->tag_ops); + tag_protocol = ops->get_tag_protocol(ds, dst->cpu_dp->index); + tag_ops = dsa_resolve_tag_protocol(tag_protocol); + if (IS_ERR(tag_ops)) + return PTR_ERR(tag_ops); + + dst->cpu_dp->tag_ops = tag_ops; - dst->rcv = dst->tag_ops->rcv; + /* Few copies for faster access in master receive hot path */ + dst->cpu_dp->rcv = dst->cpu_dp->tag_ops->rcv; + dst->cpu_dp->dst = dst; } memcpy(ds->rtable, cd->rtable, sizeof(ds->rtable)); @@ -169,14 +176,8 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master if (ret) return ret; - if (ops->set_addr) { - ret = ops->set_addr(ds, master->dev_addr); - if (ret < 0) - return ret; - } - if (!ds->slave_mii_bus && ops->phy_read) { - ds->slave_mii_bus = devm_mdiobus_alloc(parent); + ds->slave_mii_bus = devm_mdiobus_alloc(ds->dev); if (!ds->slave_mii_bus) return -ENOMEM; dsa_slave_mii_bus_init(ds); @@ -193,25 +194,21 @@ static int dsa_switch_setup_one(struct dsa_switch *ds, struct net_device *master ds->ports[i].dn = cd->port_dn[i]; ds->ports[i].cpu_dp = dst->cpu_dp; - if (!(ds->enabled_port_mask & (1 << i))) + if (dsa_is_user_port(ds, i)) continue; - ret = dsa_slave_create(ds, parent, i, cd->port_names[i]); + ret = dsa_slave_create(&ds->ports[i]); if (ret < 0) netdev_err(master, "[%d]: can't create dsa slave device for port %d(%s): %d\n", index, i, cd->port_names[i], ret); } /* Perform configuration of the CPU and DSA ports */ - ret = dsa_cpu_dsa_setups(ds, parent); + ret = dsa_cpu_dsa_setups(ds); if (ret < 0) netdev_err(master, "[%d] : can't configure CPU and DSA ports\n", index); - ret = dsa_cpu_port_ethtool_setup(ds->dst->cpu_dp); - if (ret) - return ret; - return 0; } @@ -252,7 +249,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, struct net_device *master, ds->ops = ops; ds->priv = priv; - ret = dsa_switch_setup_one(ds, master, parent); + ret = dsa_switch_setup_one(ds, master); if (ret) return ERR_PTR(ret); @@ -265,24 +262,20 @@ static void dsa_switch_destroy(struct dsa_switch *ds) /* Destroy network devices for physical switch ports. */ for (port = 0; port < ds->num_ports; port++) { - if (!(ds->enabled_port_mask & (1 << port))) + if (!dsa_is_user_port(ds, port)) continue; - if (!ds->ports[port].netdev) + if (!ds->ports[port].slave) continue; - dsa_slave_destroy(ds->ports[port].netdev); + dsa_slave_destroy(ds->ports[port].slave); } /* Disable configuration of the CPU and DSA ports */ for (port = 0; port < ds->num_ports; port++) { if (!(dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port))) continue; - dsa_cpu_dsa_destroy(&ds->ports[port]); - - /* Clearing a bit which is not set does no harm */ - ds->cpu_port_mask |= ~(1 << port); - ds->dsa_port_mask |= ~(1 << port); + dsa_port_fixed_link_unregister_of(&ds->ports[port]); } if (ds->slave_mii_bus && ds->ops->phy_read) @@ -600,15 +593,7 @@ static int dsa_setup_dst(struct dsa_switch_tree *dst, struct net_device *dev, if (!configured) return -EPROBE_DEFER; - /* - * If we use a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point on get - * sent to the tag format's receive function. - */ - wmb(); - dev->dsa_ptr = dst; - - return 0; + return dsa_master_setup(dst->cpu_dp->master, dst->cpu_dp); } static int dsa_probe(struct platform_device *pdev) @@ -673,13 +658,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) { int i; - dst->cpu_dp->netdev->dsa_ptr = NULL; - - /* If we used a tagging format that doesn't have an ethertype - * field, make sure that all packets from this point get sent - * without the tag and go through the regular receive path. - */ - wmb(); + dsa_master_teardown(dst->cpu_dp->master); for (i = 0; i < dst->pd->nr_chips; i++) { struct dsa_switch *ds = dst->ds[i]; @@ -688,9 +667,7 @@ static void dsa_remove_dst(struct dsa_switch_tree *dst) dsa_switch_destroy(ds); } - dsa_cpu_port_ethtool_restore(dst->cpu_dp); - - dev_put(dst->cpu_dp->netdev); + dev_put(dst->cpu_dp->master); } static int dsa_remove(struct platform_device *pdev) @@ -741,6 +718,26 @@ static int dsa_resume(struct device *d) } #endif +/* legacy way, bypassing the bridge *****************************************/ +int dsa_legacy_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid, + u16 flags) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dsa_port_fdb_add(dp, addr, vid); +} + +int dsa_legacy_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, u16 vid) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + + return dsa_port_fdb_del(dp, addr, vid); +} + static SIMPLE_DEV_PM_OPS(dsa_pm_ops, dsa_suspend, dsa_resume); static const struct of_device_id dsa_of_match_table[] = { diff --git a/net/dsa/master.c b/net/dsa/master.c new file mode 100644 index 000000000000..00589147f042 --- /dev/null +++ b/net/dsa/master.c @@ -0,0 +1,143 @@ +/* + * Handling of a master device, switching frames via its switch fabric CPU port + * + * Copyright (c) 2017 Savoir-faire Linux Inc. + * Vivien Didelot <vivien.didelot@savoirfairelinux.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include "dsa_priv.h" + +static void dsa_master_get_ethtool_stats(struct net_device *dev, + struct ethtool_stats *stats, + uint64_t *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int count = 0; + + if (ops && ops->get_sset_count && ops->get_ethtool_stats) { + count = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_ethtool_stats(dev, stats, data); + } + + if (ds->ops->get_ethtool_stats) + ds->ops->get_ethtool_stats(ds, port, data + count); +} + +static int dsa_master_get_sset_count(struct net_device *dev, int sset) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int count = 0; + + if (ops && ops->get_sset_count) + count += ops->get_sset_count(dev, sset); + + if (sset == ETH_SS_STATS && ds->ops->get_sset_count) + count += ds->ops->get_sset_count(ds); + + return count; +} + +static void dsa_master_get_strings(struct net_device *dev, uint32_t stringset, + uint8_t *data) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + const struct ethtool_ops *ops = cpu_dp->orig_ethtool_ops; + struct dsa_switch *ds = cpu_dp->ds; + int port = cpu_dp->index; + int len = ETH_GSTRING_LEN; + int mcount = 0, count; + unsigned int i; + uint8_t pfx[4]; + uint8_t *ndata; + + snprintf(pfx, sizeof(pfx), "p%.2d", port); + /* We do not want to be NULL-terminated, since this is a prefix */ + pfx[sizeof(pfx) - 1] = '_'; + + if (ops && ops->get_sset_count && ops->get_strings) { + mcount = ops->get_sset_count(dev, ETH_SS_STATS); + ops->get_strings(dev, stringset, data); + } + + if (stringset == ETH_SS_STATS && ds->ops->get_strings) { + ndata = data + mcount * len; + /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle + * the output after to prepend our CPU port prefix we + * constructed earlier + */ + ds->ops->get_strings(ds, port, ndata); + count = ds->ops->get_sset_count(ds); + for (i = 0; i < count; i++) { + memmove(ndata + (i * len + sizeof(pfx)), + ndata + i * len, len - sizeof(pfx)); + memcpy(ndata + i * len, pfx, sizeof(pfx)); + } + } +} + +static int dsa_master_ethtool_setup(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + struct dsa_switch *ds = cpu_dp->ds; + struct ethtool_ops *ops; + + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); + if (!ops) + return -ENOMEM; + + cpu_dp->orig_ethtool_ops = dev->ethtool_ops; + if (cpu_dp->orig_ethtool_ops) + memcpy(ops, cpu_dp->orig_ethtool_ops, sizeof(*ops)); + + ops->get_sset_count = dsa_master_get_sset_count; + ops->get_ethtool_stats = dsa_master_get_ethtool_stats; + ops->get_strings = dsa_master_get_strings; + + dev->ethtool_ops = ops; + + return 0; +} + +static void dsa_master_ethtool_teardown(struct net_device *dev) +{ + struct dsa_port *cpu_dp = dev->dsa_ptr; + + dev->ethtool_ops = cpu_dp->orig_ethtool_ops; + cpu_dp->orig_ethtool_ops = NULL; +} + +int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) +{ + /* If we use a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point on get + * sent to the tag format's receive function. + */ + wmb(); + + dev->dsa_ptr = cpu_dp; + + return dsa_master_ethtool_setup(dev); +} + +void dsa_master_teardown(struct net_device *dev) +{ + dsa_master_ethtool_teardown(dev); + + dev->dsa_ptr = NULL; + + /* If we used a tagging format that doesn't have an ethertype + * field, make sure that all packets from this point get sent + * without the tag and go through the regular receive path. + */ + wmb(); +} diff --git a/net/dsa/port.c b/net/dsa/port.c index efc3bce3a89d..bb4be2679904 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -12,10 +12,12 @@ #include <linux/if_bridge.h> #include <linux/notifier.h> +#include <linux/of_mdio.h> +#include <linux/of_net.h> #include "dsa_priv.h" -static int dsa_port_notify(struct dsa_port *dp, unsigned long e, void *v) +static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v) { struct raw_notifier_head *nh = &dp->ds->dst->nh; int err; @@ -56,7 +58,7 @@ int dsa_port_set_state(struct dsa_port *dp, u8 state, return 0; } -void dsa_port_set_state_now(struct dsa_port *dp, u8 state) +static void dsa_port_set_state_now(struct dsa_port *dp, u8 state) { int err; @@ -65,6 +67,35 @@ void dsa_port_set_state_now(struct dsa_port *dp, u8 state) pr_err("DSA: failed to set STP state %u (%d)\n", state, err); } +int dsa_port_enable(struct dsa_port *dp, struct phy_device *phy) +{ + u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; + struct dsa_switch *ds = dp->ds; + int port = dp->index; + int err; + + if (ds->ops->port_enable) { + err = ds->ops->port_enable(ds, port, phy); + if (err) + return err; + } + + dsa_port_set_state_now(dp, stp_state); + + return 0; +} + +void dsa_port_disable(struct dsa_port *dp, struct phy_device *phy) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index; + + dsa_port_set_state_now(dp, BR_STATE_DISABLED); + + if (ds->ops->port_disable) + ds->ops->port_disable(ds, port, phy); +} + int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br) { struct dsa_notifier_bridge_info info = { @@ -146,44 +177,45 @@ int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock, return dsa_port_notify(dp, DSA_NOTIFIER_AGEING_TIME, &info); } -int dsa_port_fdb_add(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb, - struct switchdev_trans *trans) +int dsa_port_fdb_add(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { struct dsa_notifier_fdb_info info = { .sw_index = dp->ds->index, .port = dp->index, - .trans = trans, - .fdb = fdb, + .addr = addr, + .vid = vid, }; return dsa_port_notify(dp, DSA_NOTIFIER_FDB_ADD, &info); } -int dsa_port_fdb_del(struct dsa_port *dp, - const struct switchdev_obj_port_fdb *fdb) +int dsa_port_fdb_del(struct dsa_port *dp, const unsigned char *addr, + u16 vid) { struct dsa_notifier_fdb_info info = { .sw_index = dp->ds->index, .port = dp->index, - .fdb = fdb, + .addr = addr, + .vid = vid, + }; return dsa_port_notify(dp, DSA_NOTIFIER_FDB_DEL, &info); } -int dsa_port_fdb_dump(struct dsa_port *dp, struct switchdev_obj_port_fdb *fdb, - switchdev_obj_dump_cb_t *cb) +int dsa_port_fdb_dump(struct dsa_port *dp, dsa_fdb_dump_cb_t *cb, void *data) { struct dsa_switch *ds = dp->ds; + int port = dp->index; - if (ds->ops->port_fdb_dump) - return ds->ops->port_fdb_dump(ds, dp->index, fdb, cb); + if (!ds->ops->port_fdb_dump) + return -EOPNOTSUPP; - return -EOPNOTSUPP; + return ds->ops->port_fdb_dump(ds, port, cb, data); } -int dsa_port_mdb_add(struct dsa_port *dp, +int dsa_port_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb, struct switchdev_trans *trans) { @@ -197,7 +229,7 @@ int dsa_port_mdb_add(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_MDB_ADD, &info); } -int dsa_port_mdb_del(struct dsa_port *dp, +int dsa_port_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { struct dsa_notifier_mdb_info info = { @@ -209,17 +241,6 @@ int dsa_port_mdb_del(struct dsa_port *dp, return dsa_port_notify(dp, DSA_NOTIFIER_MDB_DEL, &info); } -int dsa_port_mdb_dump(struct dsa_port *dp, struct switchdev_obj_port_mdb *mdb, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_switch *ds = dp->ds; - - if (ds->ops->port_mdb_dump) - return ds->ops->port_mdb_dump(ds, dp->index, mdb, cb); - - return -EOPNOTSUPP; -} - int dsa_port_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct switchdev_trans *trans) @@ -231,7 +252,10 @@ int dsa_port_vlan_add(struct dsa_port *dp, .vlan = vlan, }; - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + if (br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_ADD, &info); + + return 0; } int dsa_port_vlan_del(struct dsa_port *dp, @@ -243,17 +267,53 @@ int dsa_port_vlan_del(struct dsa_port *dp, .vlan = vlan, }; - return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + if (br_vlan_enabled(dp->bridge_dev)) + return dsa_port_notify(dp, DSA_NOTIFIER_VLAN_DEL, &info); + + return 0; } -int dsa_port_vlan_dump(struct dsa_port *dp, - struct switchdev_obj_port_vlan *vlan, - switchdev_obj_dump_cb_t *cb) +int dsa_port_fixed_link_register_of(struct dsa_port *dp) { + struct device_node *dn = dp->dn; struct dsa_switch *ds = dp->ds; + struct phy_device *phydev; + int port = dp->index; + int mode; + int err; - if (ds->ops->port_vlan_dump) - return ds->ops->port_vlan_dump(ds, dp->index, vlan, cb); + if (of_phy_is_fixed_link(dn)) { + err = of_phy_register_fixed_link(dn); + if (err) { + dev_err(ds->dev, + "failed to register the fixed PHY of port %d\n", + port); + return err; + } + + phydev = of_phy_find_device(dn); + + mode = of_get_phy_mode(dn); + if (mode < 0) + mode = PHY_INTERFACE_MODE_NA; + phydev->interface = mode; + + genphy_config_init(phydev); + genphy_read_status(phydev); + + if (ds->ops->adjust_link) + ds->ops->adjust_link(ds, port, phydev); + + put_device(&phydev->mdio.dev); + } + + return 0; +} + +void dsa_port_fixed_link_unregister_of(struct dsa_port *dp) +{ + struct device_node *dn = dp->dn; - return -EOPNOTSUPP; + if (of_phy_is_fixed_link(dn)) + of_phy_deregister_fixed_link(dn); } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 9507bd38cf04..d6e7a642493b 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -55,7 +55,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->read = dsa_slave_phy_read; ds->slave_mii_bus->write = dsa_slave_phy_write; snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d.%d", - ds->dst->tree, ds->index); + ds->dst->index, ds->index); ds->slave_mii_bus->parent = ds->dev; ds->slave_mii_bus->phy_mask = ~ds->phys_mii_mask; } @@ -64,18 +64,13 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) /* slave device handling ****************************************************/ static int dsa_slave_get_iflink(const struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - - return dsa_master_netdev(p)->ifindex; + return dsa_slave_to_master(dev)->ifindex; } static int dsa_slave_open(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; - struct dsa_switch *ds = dp->ds; - struct net_device *master = dsa_master_netdev(p); - u8 stp_state = dp->bridge_dev ? BR_STATE_BLOCKING : BR_STATE_FORWARDING; + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); int err; if (!(master->flags & IFF_UP)) @@ -98,16 +93,12 @@ static int dsa_slave_open(struct net_device *dev) goto clear_allmulti; } - if (ds->ops->port_enable) { - err = ds->ops->port_enable(ds, p->dp->index, p->phy); - if (err) - goto clear_promisc; - } - - dsa_port_set_state_now(p->dp, stp_state); + err = dsa_port_enable(dp, dev->phydev); + if (err) + goto clear_promisc; - if (p->phy) - phy_start(p->phy); + if (dev->phydev) + phy_start(dev->phydev); return 0; @@ -126,12 +117,13 @@ out: static int dsa_slave_close(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); - struct dsa_switch *ds = p->dp->ds; + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + + if (dev->phydev) + phy_stop(dev->phydev); - if (p->phy) - phy_stop(p->phy); + dsa_port_disable(dp, dev->phydev); dev_mc_unsync(master, dev); dev_uc_unsync(master, dev); @@ -143,18 +135,12 @@ static int dsa_slave_close(struct net_device *dev) if (!ether_addr_equal(dev->dev_addr, master->dev_addr)) dev_uc_del(master, dev->dev_addr); - if (ds->ops->port_disable) - ds->ops->port_disable(ds, p->dp->index, p->phy); - - dsa_port_set_state_now(p->dp, BR_STATE_DISABLED); - return 0; } static void dsa_slave_change_rx_flags(struct net_device *dev, int change) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); if (change & IFF_ALLMULTI) dev_set_allmulti(master, dev->flags & IFF_ALLMULTI ? 1 : -1); @@ -164,8 +150,7 @@ static void dsa_slave_change_rx_flags(struct net_device *dev, int change) static void dsa_slave_set_rx_mode(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); dev_mc_sync(master, dev); dev_uc_sync(master, dev); @@ -173,8 +158,7 @@ static void dsa_slave_set_rx_mode(struct net_device *dev) static int dsa_slave_set_mac_address(struct net_device *dev, void *a) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); + struct net_device *master = dsa_slave_to_master(dev); struct sockaddr *addr = a; int err; @@ -199,22 +183,90 @@ out: return 0; } -static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +struct dsa_slave_dump_ctx { + struct net_device *dev; + struct sk_buff *skb; + struct netlink_callback *cb; + int idx; +}; + +static int +dsa_slave_port_fdb_do_dump(const unsigned char *addr, u16 vid, + bool is_static, void *data) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_slave_dump_ctx *dump = data; + u32 portid = NETLINK_CB(dump->cb->skb).portid; + u32 seq = dump->cb->nlh->nlmsg_seq; + struct nlmsghdr *nlh; + struct ndmsg *ndm; + + if (dump->idx < dump->cb->args[2]) + goto skip; + + nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, + sizeof(*ndm), NLM_F_MULTI); + if (!nlh) + return -EMSGSIZE; + + ndm = nlmsg_data(nlh); + ndm->ndm_family = AF_BRIDGE; + ndm->ndm_pad1 = 0; + ndm->ndm_pad2 = 0; + ndm->ndm_flags = NTF_SELF; + ndm->ndm_type = 0; + ndm->ndm_ifindex = dump->dev->ifindex; + ndm->ndm_state = is_static ? NUD_NOARP : NUD_REACHABLE; + + if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, addr)) + goto nla_put_failure; + + if (vid && nla_put_u16(dump->skb, NDA_VLAN, vid)) + goto nla_put_failure; + + nlmsg_end(dump->skb, nlh); + +skip: + dump->idx++; + return 0; - if (p->phy != NULL) - return phy_mii_ioctl(p->phy, ifr, cmd); +nla_put_failure: + nlmsg_cancel(dump->skb, nlh); + return -EMSGSIZE; +} - return -EOPNOTSUPP; +static int +dsa_slave_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, + struct net_device *dev, struct net_device *filter_dev, + int *idx) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_slave_dump_ctx dump = { + .dev = dev, + .skb = skb, + .cb = cb, + .idx = *idx, + }; + int err; + + err = dsa_port_fdb_dump(dp, dsa_slave_port_fdb_do_dump, &dump); + *idx = dump.idx; + + return err; +} + +static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) +{ + if (!dev->phydev) + return -ENODEV; + + return phy_mii_ioctl(dev->phydev, ifr, cmd); } static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int ret; switch (attr->id) { @@ -240,8 +292,7 @@ static int dsa_slave_port_obj_add(struct net_device *dev, const struct switchdev_obj *obj, struct switchdev_trans *trans) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int err; /* For the prepare phase, ensure the full set of changes is feasable in @@ -250,12 +301,16 @@ static int dsa_slave_port_obj_add(struct net_device *dev, */ switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_port_fdb_add(dp, SWITCHDEV_OBJ_PORT_FDB(obj), trans); - break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj), trans); break; + case SWITCHDEV_OBJ_ID_HOST_MDB: + /* DSA can directly translate this to a normal MDB add, + * but on the CPU port. + */ + err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj), + trans); + break; case SWITCHDEV_OBJ_ID_PORT_VLAN: err = dsa_port_vlan_add(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), trans); @@ -271,45 +326,21 @@ static int dsa_slave_port_obj_add(struct net_device *dev, static int dsa_slave_port_obj_del(struct net_device *dev, const struct switchdev_obj *obj) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int err; switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_port_fdb_del(dp, SWITCHDEV_OBJ_PORT_FDB(obj)); - break; case SWITCHDEV_OBJ_ID_PORT_MDB: err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; - case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); - break; - default: - err = -EOPNOTSUPP; - break; - } - - return err; -} - -static int dsa_slave_port_obj_dump(struct net_device *dev, - struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; - int err; - - switch (obj->id) { - case SWITCHDEV_OBJ_ID_PORT_FDB: - err = dsa_port_fdb_dump(dp, SWITCHDEV_OBJ_PORT_FDB(obj), cb); - break; - case SWITCHDEV_OBJ_ID_PORT_MDB: - err = dsa_port_mdb_dump(dp, SWITCHDEV_OBJ_PORT_MDB(obj), cb); + case SWITCHDEV_OBJ_ID_HOST_MDB: + /* DSA can directly translate this to a normal MDB add, + * but on the CPU port. + */ + err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj)); break; case SWITCHDEV_OBJ_ID_PORT_VLAN: - err = dsa_port_vlan_dump(dp, SWITCHDEV_OBJ_PORT_VLAN(obj), cb); + err = dsa_port_vlan_del(dp, SWITCHDEV_OBJ_PORT_VLAN(obj)); break; default: err = -EOPNOTSUPP; @@ -322,13 +353,17 @@ static int dsa_slave_port_obj_dump(struct net_device *dev, static int dsa_slave_port_attr_get(struct net_device *dev, struct switchdev_attr *attr) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct dsa_switch_tree *dst = ds->dst; switch (attr->id) { case SWITCHDEV_ATTR_ID_PORT_PARENT_ID: - attr->u.ppid.id_len = sizeof(ds->index); - memcpy(&attr->u.ppid.id, &ds->index, attr->u.ppid.id_len); + attr->u.ppid.id_len = sizeof(dst->index); + memcpy(&attr->u.ppid.id, &dst->index, attr->u.ppid.id_len); + break; + case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS_SUPPORT: + attr->u.brport_flags_support = 0; break; default: return -EOPNOTSUPP; @@ -337,10 +372,12 @@ static int dsa_slave_port_attr_get(struct net_device *dev, return 0; } -static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, - struct sk_buff *skb) +static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev, + struct sk_buff *skb) { #ifdef CONFIG_NET_POLL_CONTROLLER + struct dsa_slave_priv *p = netdev_priv(dev); + if (p->netpoll) netpoll_send_skb(p->netpoll, skb); #else @@ -352,10 +389,14 @@ static inline netdev_tx_t dsa_netpoll_send_skb(struct dsa_slave_priv *p, static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) { struct dsa_slave_priv *p = netdev_priv(dev); + struct pcpu_sw_netstats *s; struct sk_buff *nskb; - dev->stats.tx_packets++; - dev->stats.tx_bytes += skb->len; + s = this_cpu_ptr(p->stats64); + u64_stats_update_begin(&s->syncp); + s->tx_packets++; + s->tx_bytes += skb->len; + u64_stats_update_end(&s->syncp); /* Transmit function may have to reallocate the original SKB, * in which case it must have freed it. Only free it here on error. @@ -370,43 +411,18 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev) * tag to be successfully transmitted */ if (unlikely(netpoll_tx_running(dev))) - return dsa_netpoll_send_skb(p, nskb); + return dsa_slave_netpoll_send_skb(dev, nskb); /* Queue the SKB for transmission on the parent interface, but * do not modify its EtherType */ - nskb->dev = dsa_master_netdev(p); + nskb->dev = dsa_slave_to_master(dev); dev_queue_xmit(nskb); return NETDEV_TX_OK; } /* ethtool operations *******************************************************/ -static int -dsa_slave_get_link_ksettings(struct net_device *dev, - struct ethtool_link_ksettings *cmd) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - if (!p->phy) - return -EOPNOTSUPP; - - phy_ethtool_ksettings_get(p->phy, cmd); - - return 0; -} - -static int -dsa_slave_set_link_ksettings(struct net_device *dev, - const struct ethtool_link_ksettings *cmd) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - if (p->phy != NULL) - return phy_ethtool_ksettings_set(p->phy, cmd); - - return -EOPNOTSUPP; -} static void dsa_slave_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo) @@ -418,11 +434,11 @@ static void dsa_slave_get_drvinfo(struct net_device *dev, static int dsa_slave_get_regs_len(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs_len) - return ds->ops->get_regs_len(ds, p->dp->index); + return ds->ops->get_regs_len(ds, dp->index); return -EOPNOTSUPP; } @@ -430,39 +446,27 @@ static int dsa_slave_get_regs_len(struct net_device *dev) static void dsa_slave_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *_p) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_regs) - ds->ops->get_regs(ds, p->dp->index, regs, _p); -} - -static int dsa_slave_nway_reset(struct net_device *dev) -{ - struct dsa_slave_priv *p = netdev_priv(dev); - - if (p->phy != NULL) - return genphy_restart_aneg(p->phy); - - return -EOPNOTSUPP; + ds->ops->get_regs(ds, dp->index, regs, _p); } static u32 dsa_slave_get_link(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + if (!dev->phydev) + return -ENODEV; - if (p->phy != NULL) { - genphy_update_link(p->phy); - return p->phy->link; - } + genphy_update_link(dev->phydev); - return -EOPNOTSUPP; + return dev->phydev->link; } static int dsa_slave_get_eeprom_len(struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->cd && ds->cd->eeprom_len) return ds->cd->eeprom_len; @@ -476,8 +480,8 @@ static int dsa_slave_get_eeprom_len(struct net_device *dev) static int dsa_slave_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_eeprom) return ds->ops->get_eeprom(ds, eeprom, data); @@ -488,8 +492,8 @@ static int dsa_slave_get_eeprom(struct net_device *dev, static int dsa_slave_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, u8 *data) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->set_eeprom) return ds->ops->set_eeprom(ds, eeprom, data); @@ -500,8 +504,8 @@ static int dsa_slave_set_eeprom(struct net_device *dev, static void dsa_slave_get_strings(struct net_device *dev, uint32_t stringset, uint8_t *data) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (stringset == ETH_SS_STATS) { int len = ETH_GSTRING_LEN; @@ -511,80 +515,7 @@ static void dsa_slave_get_strings(struct net_device *dev, strncpy(data + 2 * len, "rx_packets", len); strncpy(data + 3 * len, "rx_bytes", len); if (ds->ops->get_strings) - ds->ops->get_strings(ds, p->dp->index, data + 4 * len); - } -} - -static void dsa_cpu_port_get_ethtool_stats(struct net_device *dev, - struct ethtool_stats *stats, - uint64_t *data) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - s8 cpu_port = cpu_dp->index; - int count = 0; - - if (cpu_dp->ethtool_ops.get_sset_count) { - count = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); - cpu_dp->ethtool_ops.get_ethtool_stats(dev, stats, data); - } - - if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, cpu_port, data + count); -} - -static int dsa_cpu_port_get_sset_count(struct net_device *dev, int sset) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - int count = 0; - - if (cpu_dp->ethtool_ops.get_sset_count) - count += cpu_dp->ethtool_ops.get_sset_count(dev, sset); - - if (sset == ETH_SS_STATS && ds->ops->get_sset_count) - count += ds->ops->get_sset_count(ds); - - return count; -} - -static void dsa_cpu_port_get_strings(struct net_device *dev, - uint32_t stringset, uint8_t *data) -{ - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; - s8 cpu_port = cpu_dp->index; - int len = ETH_GSTRING_LEN; - int mcount = 0, count; - unsigned int i; - uint8_t pfx[4]; - uint8_t *ndata; - - snprintf(pfx, sizeof(pfx), "p%.2d", cpu_port); - /* We do not want to be NULL-terminated, since this is a prefix */ - pfx[sizeof(pfx) - 1] = '_'; - - if (cpu_dp->ethtool_ops.get_sset_count) { - mcount = cpu_dp->ethtool_ops.get_sset_count(dev, ETH_SS_STATS); - cpu_dp->ethtool_ops.get_strings(dev, stringset, data); - } - - if (stringset == ETH_SS_STATS && ds->ops->get_strings) { - ndata = data + mcount * len; - /* This function copies ETH_GSTRINGS_LEN bytes, we will mangle - * the output after to prepend our CPU port prefix we - * constructed earlier - */ - ds->ops->get_strings(ds, cpu_port, ndata); - count = ds->ops->get_sset_count(ds); - for (i = 0; i < count; i++) { - memmove(ndata + (i * len + sizeof(pfx)), - ndata + i * len, len - sizeof(pfx)); - memcpy(ndata + i * len, pfx, sizeof(pfx)); - } + ds->ops->get_strings(ds, dp->index, data + 4 * len); } } @@ -592,21 +523,37 @@ static void dsa_slave_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, uint64_t *data) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; - - data[0] = dev->stats.tx_packets; - data[1] = dev->stats.tx_bytes; - data[2] = dev->stats.rx_packets; - data[3] = dev->stats.rx_bytes; + struct dsa_switch *ds = dp->ds; + struct pcpu_sw_netstats *s; + unsigned int start; + int i; + + for_each_possible_cpu(i) { + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + + s = per_cpu_ptr(p->stats64, i); + do { + start = u64_stats_fetch_begin_irq(&s->syncp); + tx_packets = s->tx_packets; + tx_bytes = s->tx_bytes; + rx_packets = s->rx_packets; + rx_bytes = s->rx_bytes; + } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + data[0] += tx_packets; + data[1] += tx_bytes; + data[2] += rx_packets; + data[3] += rx_bytes; + } if (ds->ops->get_ethtool_stats) - ds->ops->get_ethtool_stats(ds, p->dp->index, data + 4); + ds->ops->get_ethtool_stats(ds, dp->index, data + 4); } static int dsa_slave_get_sset_count(struct net_device *dev, int sset) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (sset == ETH_SS_STATS) { int count; @@ -623,69 +570,77 @@ static int dsa_slave_get_sset_count(struct net_device *dev, int sset) static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (ds->ops->get_wol) - ds->ops->get_wol(ds, p->dp->index, w); + ds->ops->get_wol(ds, dp->index, w); } static int dsa_slave_set_wol(struct net_device *dev, struct ethtool_wolinfo *w) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int ret = -EOPNOTSUPP; if (ds->ops->set_wol) - ret = ds->ops->set_wol(ds, p->dp->index, w); + ret = ds->ops->set_wol(ds, dp->index, w); return ret; } static int dsa_slave_set_eee(struct net_device *dev, struct ethtool_eee *e) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int ret; - if (!ds->ops->set_eee) + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev) + return -ENODEV; + + if (!ds->ops->set_mac_eee) return -EOPNOTSUPP; - ret = ds->ops->set_eee(ds, p->dp->index, p->phy, e); + ret = ds->ops->set_mac_eee(ds, dp->index, e); if (ret) return ret; - if (p->phy) - ret = phy_ethtool_set_eee(p->phy, e); + if (e->eee_enabled) { + ret = phy_init_eee(dev->phydev, 0); + if (ret) + return ret; + } - return ret; + return phy_ethtool_set_eee(dev->phydev, e); } static int dsa_slave_get_eee(struct net_device *dev, struct ethtool_eee *e) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; int ret; - if (!ds->ops->get_eee) + /* Port's PHY and MAC both need to be EEE capable */ + if (!dev->phydev) + return -ENODEV; + + if (!ds->ops->get_mac_eee) return -EOPNOTSUPP; - ret = ds->ops->get_eee(ds, p->dp->index, e); + ret = ds->ops->get_mac_eee(ds, dp->index, e); if (ret) return ret; - if (p->phy) - ret = phy_ethtool_get_eee(p->phy, e); - - return ret; + return phy_ethtool_get_eee(dev->phydev, e); } #ifdef CONFIG_NET_POLL_CONTROLLER static int dsa_slave_netpoll_setup(struct net_device *dev, struct netpoll_info *ni) { + struct net_device *master = dsa_slave_to_master(dev); struct dsa_slave_priv *p = netdev_priv(dev); - struct net_device *master = dsa_master_netdev(p); struct netpoll *netpoll; int err = 0; @@ -725,18 +680,18 @@ static void dsa_slave_poll_controller(struct net_device *dev) static int dsa_slave_get_phys_port_name(struct net_device *dev, char *name, size_t len) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); - if (snprintf(name, len, "p%d", p->dp->index) >= len) + if (snprintf(name, len, "p%d", dp->index) >= len) return -EINVAL; return 0; } static struct dsa_mall_tc_entry * -dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, - unsigned long cookie) +dsa_slave_mall_tc_entry_find(struct net_device *dev, unsigned long cookie) { + struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) @@ -747,17 +702,18 @@ dsa_slave_mall_tc_entry_find(struct dsa_slave_priv *p, } static int dsa_slave_add_cls_matchall(struct net_device *dev, - __be16 protocol, struct tc_cls_matchall_offload *cls, bool ingress) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); struct dsa_mall_tc_entry *mall_tc_entry; - struct dsa_switch *ds = p->dp->ds; + __be16 protocol = cls->common.protocol; struct net *net = dev_net(dev); - struct dsa_slave_priv *to_p; + struct dsa_switch *ds = dp->ds; struct net_device *to_dev; const struct tc_action *a; + struct dsa_port *to_dp; int err = -EOPNOTSUPP; LIST_HEAD(actions); int ifindex; @@ -765,7 +721,7 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, if (!ds->ops->port_mirror_add) return err; - if (!tc_single_action(cls->exts)) + if (!tcf_exts_has_one_action(cls->exts)) return err; tcf_exts_to_list(cls->exts, &actions); @@ -790,13 +746,12 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, mall_tc_entry->type = DSA_PORT_MALL_MIRROR; mirror = &mall_tc_entry->mirror; - to_p = netdev_priv(to_dev); + to_dp = dsa_slave_to_port(to_dev); - mirror->to_local_port = to_p->dp->index; + mirror->to_local_port = to_dp->index; mirror->ingress = ingress; - err = ds->ops->port_mirror_add(ds, p->dp->index, mirror, - ingress); + err = ds->ops->port_mirror_add(ds, dp->index, mirror, ingress); if (err) { kfree(mall_tc_entry); return err; @@ -811,14 +766,14 @@ static int dsa_slave_add_cls_matchall(struct net_device *dev, static void dsa_slave_del_cls_matchall(struct net_device *dev, struct tc_cls_matchall_offload *cls) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_mall_tc_entry *mall_tc_entry; - struct dsa_switch *ds = p->dp->ds; + struct dsa_switch *ds = dp->ds; if (!ds->ops->port_mirror_del) return; - mall_tc_entry = dsa_slave_mall_tc_entry_find(p, cls->cookie); + mall_tc_entry = dsa_slave_mall_tc_entry_find(dev, cls->cookie); if (!mall_tc_entry) return; @@ -826,8 +781,7 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, switch (mall_tc_entry->type) { case DSA_PORT_MALL_MIRROR: - ds->ops->port_mirror_del(ds, p->dp->index, - &mall_tc_entry->mirror); + ds->ops->port_mirror_del(ds, dp->index, &mall_tc_entry->mirror); break; default: WARN_ON(1); @@ -836,67 +790,143 @@ static void dsa_slave_del_cls_matchall(struct net_device *dev, kfree(mall_tc_entry); } -static int dsa_slave_setup_tc(struct net_device *dev, u32 handle, - u32 chain_index, __be16 protocol, - struct tc_to_netdev *tc) +static int dsa_slave_setup_tc_cls_matchall(struct net_device *dev, + struct tc_cls_matchall_offload *cls, + bool ingress) +{ + if (cls->common.chain_index) + return -EOPNOTSUPP; + + switch (cls->command) { + case TC_CLSMATCHALL_REPLACE: + return dsa_slave_add_cls_matchall(dev, cls, ingress); + case TC_CLSMATCHALL_DESTROY: + dsa_slave_del_cls_matchall(dev, cls); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dsa_slave_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv, bool ingress) { - bool ingress = TC_H_MAJ(handle) == TC_H_MAJ(TC_H_INGRESS); + struct net_device *dev = cb_priv; - if (chain_index) + if (!tc_can_offload(dev)) return -EOPNOTSUPP; - switch (tc->type) { - case TC_SETUP_MATCHALL: - switch (tc->cls_mall->command) { - case TC_CLSMATCHALL_REPLACE: - return dsa_slave_add_cls_matchall(dev, protocol, - tc->cls_mall, - ingress); - case TC_CLSMATCHALL_DESTROY: - dsa_slave_del_cls_matchall(dev, tc->cls_mall); - return 0; - } + switch (type) { + case TC_SETUP_CLSMATCHALL: + return dsa_slave_setup_tc_cls_matchall(dev, type_data, ingress); default: return -EOPNOTSUPP; } } -void dsa_cpu_port_ethtool_init(struct ethtool_ops *ops) +static int dsa_slave_setup_tc_block_cb_ig(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, true); +} + +static int dsa_slave_setup_tc_block_cb_eg(enum tc_setup_type type, + void *type_data, void *cb_priv) { - ops->get_sset_count = dsa_cpu_port_get_sset_count; - ops->get_ethtool_stats = dsa_cpu_port_get_ethtool_stats; - ops->get_strings = dsa_cpu_port_get_strings; + return dsa_slave_setup_tc_block_cb(type, type_data, cb_priv, false); +} + +static int dsa_slave_setup_tc_block(struct net_device *dev, + struct tc_block_offload *f) +{ + tc_setup_cb_t *cb; + + if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + cb = dsa_slave_setup_tc_block_cb_ig; + else if (f->binder_type == TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS) + cb = dsa_slave_setup_tc_block_cb_eg; + else + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, cb, dev, dev); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, cb, dev); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int dsa_slave_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return dsa_slave_setup_tc_block(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + +static void dsa_slave_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct pcpu_sw_netstats *s; + unsigned int start; + int i; + + netdev_stats_to_stats64(stats, &dev->stats); + for_each_possible_cpu(i) { + u64 tx_packets, tx_bytes, rx_packets, rx_bytes; + + s = per_cpu_ptr(p->stats64, i); + do { + start = u64_stats_fetch_begin_irq(&s->syncp); + tx_packets = s->tx_packets; + tx_bytes = s->tx_bytes; + rx_packets = s->rx_packets; + rx_bytes = s->rx_bytes; + } while (u64_stats_fetch_retry_irq(&s->syncp, start)); + + stats->tx_packets += tx_packets; + stats->tx_bytes += tx_bytes; + stats->rx_packets += rx_packets; + stats->rx_bytes += rx_bytes; + } } static int dsa_slave_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc, u32 *rule_locs) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (!ds->ops->get_rxnfc) return -EOPNOTSUPP; - return ds->ops->get_rxnfc(ds, p->dp->index, nfc, rule_locs); + return ds->ops->get_rxnfc(ds, dp->index, nfc, rule_locs); } static int dsa_slave_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *nfc) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; if (!ds->ops->set_rxnfc) return -EOPNOTSUPP; - return ds->ops->set_rxnfc(ds, p->dp->index, nfc); + return ds->ops->set_rxnfc(ds, dp->index, nfc); } static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_drvinfo = dsa_slave_get_drvinfo, .get_regs_len = dsa_slave_get_regs_len, .get_regs = dsa_slave_get_regs, - .nway_reset = dsa_slave_nway_reset, + .nway_reset = phy_ethtool_nway_reset, .get_link = dsa_slave_get_link, .get_eeprom_len = dsa_slave_get_eeprom_len, .get_eeprom = dsa_slave_get_eeprom, @@ -908,8 +938,8 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = { .get_wol = dsa_slave_get_wol, .set_eee = dsa_slave_set_eee, .get_eee = dsa_slave_get_eee, - .get_link_ksettings = dsa_slave_get_link_ksettings, - .set_link_ksettings = dsa_slave_set_link_ksettings, + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, .get_rxnfc = dsa_slave_get_rxnfc, .set_rxnfc = dsa_slave_set_rxnfc, }; @@ -921,9 +951,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_change_rx_flags = dsa_slave_change_rx_flags, .ndo_set_rx_mode = dsa_slave_set_rx_mode, .ndo_set_mac_address = dsa_slave_set_mac_address, - .ndo_fdb_add = switchdev_port_fdb_add, - .ndo_fdb_del = switchdev_port_fdb_del, - .ndo_fdb_dump = switchdev_port_fdb_dump, + .ndo_fdb_add = dsa_legacy_fdb_add, + .ndo_fdb_del = dsa_legacy_fdb_del, + .ndo_fdb_dump = dsa_slave_fdb_dump, .ndo_do_ioctl = dsa_slave_ioctl, .ndo_get_iflink = dsa_slave_get_iflink, #ifdef CONFIG_NET_POLL_CONTROLLER @@ -931,11 +961,9 @@ static const struct net_device_ops dsa_slave_netdev_ops = { .ndo_netpoll_cleanup = dsa_slave_netpoll_cleanup, .ndo_poll_controller = dsa_slave_poll_controller, #endif - .ndo_bridge_getlink = switchdev_port_bridge_getlink, - .ndo_bridge_setlink = switchdev_port_bridge_setlink, - .ndo_bridge_dellink = switchdev_port_bridge_dellink, .ndo_get_phys_port_name = dsa_slave_get_phys_port_name, .ndo_setup_tc = dsa_slave_setup_tc, + .ndo_get_stats64 = dsa_slave_get_stats64, }; static const struct switchdev_ops dsa_slave_switchdev_ops = { @@ -943,7 +971,6 @@ static const struct switchdev_ops dsa_slave_switchdev_ops = { .switchdev_port_attr_set = dsa_slave_port_attr_set, .switchdev_port_obj_add = dsa_slave_port_obj_add, .switchdev_port_obj_del = dsa_slave_port_obj_del, - .switchdev_port_obj_dump = dsa_slave_port_obj_dump, }; static struct device_type dsa_type = { @@ -952,78 +979,81 @@ static struct device_type dsa_type = { static void dsa_slave_adjust_link(struct net_device *dev) { + struct dsa_port *dp = dsa_slave_to_port(dev); struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_switch *ds = p->dp->ds; + struct dsa_switch *ds = dp->ds; unsigned int status_changed = 0; - if (p->old_link != p->phy->link) { + if (p->old_link != dev->phydev->link) { status_changed = 1; - p->old_link = p->phy->link; + p->old_link = dev->phydev->link; } - if (p->old_duplex != p->phy->duplex) { + if (p->old_duplex != dev->phydev->duplex) { status_changed = 1; - p->old_duplex = p->phy->duplex; + p->old_duplex = dev->phydev->duplex; } - if (p->old_pause != p->phy->pause) { + if (p->old_pause != dev->phydev->pause) { status_changed = 1; - p->old_pause = p->phy->pause; + p->old_pause = dev->phydev->pause; } if (ds->ops->adjust_link && status_changed) - ds->ops->adjust_link(ds, p->dp->index, p->phy); + ds->ops->adjust_link(ds, dp->index, dev->phydev); if (status_changed) - phy_print_status(p->phy); + phy_print_status(dev->phydev); } static int dsa_slave_fixed_link_update(struct net_device *dev, struct fixed_phy_status *status) { - struct dsa_slave_priv *p; struct dsa_switch *ds; + struct dsa_port *dp; if (dev) { - p = netdev_priv(dev); - ds = p->dp->ds; + dp = dsa_slave_to_port(dev); + ds = dp->ds; if (ds->ops->fixed_link_update) - ds->ops->fixed_link_update(ds, p->dp->index, status); + ds->ops->fixed_link_update(ds, dp->index, status); } return 0; } /* slave device setup *******************************************************/ -static int dsa_slave_phy_connect(struct dsa_slave_priv *p, - struct net_device *slave_dev, - int addr) +static int dsa_slave_phy_connect(struct net_device *slave_dev, int addr) { - struct dsa_switch *ds = p->dp->ds; + struct dsa_port *dp = dsa_slave_to_port(slave_dev); + struct dsa_slave_priv *p = netdev_priv(slave_dev); + struct dsa_switch *ds = dp->ds; - p->phy = mdiobus_get_phy(ds->slave_mii_bus, addr); - if (!p->phy) { + slave_dev->phydev = mdiobus_get_phy(ds->slave_mii_bus, addr); + if (!slave_dev->phydev) { netdev_err(slave_dev, "no phy at %d\n", addr); return -ENODEV; } /* Use already configured phy mode */ if (p->phy_interface == PHY_INTERFACE_MODE_NA) - p->phy_interface = p->phy->interface; - return phy_connect_direct(slave_dev, p->phy, dsa_slave_adjust_link, - p->phy_interface); + p->phy_interface = slave_dev->phydev->interface; + + return phy_connect_direct(slave_dev, slave_dev->phydev, + dsa_slave_adjust_link, p->phy_interface); } -static int dsa_slave_phy_setup(struct dsa_slave_priv *p, - struct net_device *slave_dev) +static int dsa_slave_phy_setup(struct net_device *slave_dev) { - struct dsa_switch *ds = p->dp->ds; - struct device_node *phy_dn, *port_dn; + struct dsa_port *dp = dsa_slave_to_port(slave_dev); + struct dsa_slave_priv *p = netdev_priv(slave_dev); + struct device_node *port_dn = dp->dn; + struct dsa_switch *ds = dp->ds; + struct device_node *phy_dn; bool phy_is_fixed = false; u32 phy_flags = 0; int mode, ret; - port_dn = p->dp->dn; mode = of_get_phy_mode(port_dn); if (mode < 0) mode = PHY_INTERFACE_MODE_NA; @@ -1044,52 +1074,35 @@ static int dsa_slave_phy_setup(struct dsa_slave_priv *p, } if (ds->ops->get_phy_flags) - phy_flags = ds->ops->get_phy_flags(ds, p->dp->index); + phy_flags = ds->ops->get_phy_flags(ds, dp->index); if (phy_dn) { - int phy_id = of_mdio_parse_addr(&slave_dev->dev, phy_dn); - - /* If this PHY address is part of phys_mii_mask, which means - * that we need to divert reads and writes to/from it, then we - * want to bind this device using the slave MII bus created by - * DSA to make that happen. - */ - if (!phy_is_fixed && phy_id >= 0 && - (ds->phys_mii_mask & (1 << phy_id))) { - ret = dsa_slave_phy_connect(p, slave_dev, phy_id); - if (ret) { - netdev_err(slave_dev, "failed to connect to phy%d: %d\n", phy_id, ret); - of_node_put(phy_dn); - return ret; - } - } else { - p->phy = of_phy_connect(slave_dev, phy_dn, - dsa_slave_adjust_link, - phy_flags, - p->phy_interface); - } - + slave_dev->phydev = of_phy_connect(slave_dev, phy_dn, + dsa_slave_adjust_link, + phy_flags, + p->phy_interface); of_node_put(phy_dn); } - if (p->phy && phy_is_fixed) - fixed_phy_set_link_update(p->phy, dsa_slave_fixed_link_update); + if (slave_dev->phydev && phy_is_fixed) + fixed_phy_set_link_update(slave_dev->phydev, + dsa_slave_fixed_link_update); /* We could not connect to a designated PHY, so use the switch internal * MDIO bus instead */ - if (!p->phy) { - ret = dsa_slave_phy_connect(p, slave_dev, p->dp->index); + if (!slave_dev->phydev) { + ret = dsa_slave_phy_connect(slave_dev, dp->index); if (ret) { netdev_err(slave_dev, "failed to connect to port %d: %d\n", - p->dp->index, ret); + dp->index, ret); if (phy_is_fixed) of_phy_deregister_fixed_link(port_dn); return ret; } } - phy_attached_info(p->phy); + phy_attached_info(slave_dev->phydev); return 0; } @@ -1109,12 +1122,12 @@ int dsa_slave_suspend(struct net_device *slave_dev) netif_device_detach(slave_dev); - if (p->phy) { - phy_stop(p->phy); + if (slave_dev->phydev) { + phy_stop(slave_dev->phydev); p->old_pause = -1; p->old_link = -1; p->old_duplex = -1; - phy_suspend(p->phy); + phy_suspend(slave_dev->phydev); } return 0; @@ -1122,33 +1135,46 @@ int dsa_slave_suspend(struct net_device *slave_dev) int dsa_slave_resume(struct net_device *slave_dev) { - struct dsa_slave_priv *p = netdev_priv(slave_dev); - netif_device_attach(slave_dev); - if (p->phy) { - phy_resume(p->phy); - phy_start(p->phy); + if (slave_dev->phydev) { + phy_resume(slave_dev->phydev); + phy_start(slave_dev->phydev); } return 0; } -int dsa_slave_create(struct dsa_switch *ds, struct device *parent, - int port, const char *name) +static void dsa_slave_notify(struct net_device *dev, unsigned long val) { - struct dsa_switch_tree *dst = ds->dst; - struct net_device *master; + struct net_device *master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_notifier_register_info rinfo = { + .switch_number = dp->ds->index, + .port_number = dp->index, + .master = master, + .info.dev = dev, + }; + + call_dsa_notifiers(val, dev, &rinfo.info); +} + +int dsa_slave_create(struct dsa_port *port) +{ + const struct dsa_port *cpu_dp = port->cpu_dp; + struct net_device *master = cpu_dp->master; + struct dsa_switch *ds = port->ds; + const char *name = port->name; struct net_device *slave_dev; struct dsa_slave_priv *p; - struct dsa_port *cpu_dp; int ret; - cpu_dp = ds->dst->cpu_dp; - master = cpu_dp->netdev; + if (!ds->num_tx_queues) + ds->num_tx_queues = 1; - slave_dev = alloc_netdev(sizeof(struct dsa_slave_priv), name, - NET_NAME_UNKNOWN, ether_setup); + slave_dev = alloc_netdev_mqs(sizeof(struct dsa_slave_priv), name, + NET_NAME_UNKNOWN, ether_setup, + ds->num_tx_queues, 1); if (slave_dev == NULL) return -ENOMEM; @@ -1166,57 +1192,72 @@ int dsa_slave_create(struct dsa_switch *ds, struct device *parent, netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one, NULL); - SET_NETDEV_DEV(slave_dev, parent); - slave_dev->dev.of_node = ds->ports[port].dn; + SET_NETDEV_DEV(slave_dev, port->ds->dev); + slave_dev->dev.of_node = port->dn; slave_dev->vlan_features = master->vlan_features; p = netdev_priv(slave_dev); - p->dp = &ds->ports[port]; + p->stats64 = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); + if (!p->stats64) { + free_netdev(slave_dev); + return -ENOMEM; + } + p->dp = port; INIT_LIST_HEAD(&p->mall_tc_list); - p->xmit = dst->tag_ops->xmit; + p->xmit = cpu_dp->tag_ops->xmit; p->old_pause = -1; p->old_link = -1; p->old_duplex = -1; - ds->ports[port].netdev = slave_dev; - ret = register_netdev(slave_dev); - if (ret) { - netdev_err(master, "error %d registering interface %s\n", - ret, slave_dev->name); - ds->ports[port].netdev = NULL; - free_netdev(slave_dev); - return ret; - } + port->slave = slave_dev; netif_carrier_off(slave_dev); - ret = dsa_slave_phy_setup(p, slave_dev); + ret = dsa_slave_phy_setup(slave_dev); if (ret) { netdev_err(master, "error %d setting up slave phy\n", ret); - unregister_netdev(slave_dev); - free_netdev(slave_dev); - return ret; + goto out_free; + } + + dsa_slave_notify(slave_dev, DSA_PORT_REGISTER); + + ret = register_netdev(slave_dev); + if (ret) { + netdev_err(master, "error %d registering interface %s\n", + ret, slave_dev->name); + goto out_phy; } return 0; + +out_phy: + phy_disconnect(slave_dev->phydev); + if (of_phy_is_fixed_link(port->dn)) + of_phy_deregister_fixed_link(port->dn); +out_free: + free_percpu(p->stats64); + free_netdev(slave_dev); + port->slave = NULL; + return ret; } void dsa_slave_destroy(struct net_device *slave_dev) { + struct dsa_port *dp = dsa_slave_to_port(slave_dev); struct dsa_slave_priv *p = netdev_priv(slave_dev); - struct device_node *port_dn; - - port_dn = p->dp->dn; + struct device_node *port_dn = dp->dn; netif_carrier_off(slave_dev); - if (p->phy) { - phy_disconnect(p->phy); + if (slave_dev->phydev) { + phy_disconnect(slave_dev->phydev); if (of_phy_is_fixed_link(port_dn)) of_phy_deregister_fixed_link(port_dn); } + dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER); unregister_netdev(slave_dev); + free_percpu(p->stats64); free_netdev(slave_dev); } @@ -1228,8 +1269,7 @@ static bool dsa_slave_dev_check(struct net_device *dev) static int dsa_slave_changeupper(struct net_device *dev, struct netdev_notifier_changeupper_info *info) { - struct dsa_slave_priv *p = netdev_priv(dev); - struct dsa_port *dp = p->dp; + struct dsa_port *dp = dsa_slave_to_port(dev); int err = NOTIFY_DONE; if (netif_is_bridge_master(info->upper_dev)) { @@ -1250,7 +1290,7 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - if (dev->netdev_ops != &dsa_slave_netdev_ops) + if (!dsa_slave_dev_check(dev)) return NOTIFY_DONE; if (event == NETDEV_CHANGEUPPER) @@ -1259,19 +1299,142 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, return NOTIFY_DONE; } +struct dsa_switchdev_event_work { + struct work_struct work; + struct switchdev_notifier_fdb_info fdb_info; + struct net_device *dev; + unsigned long event; +}; + +static void dsa_slave_switchdev_event_work(struct work_struct *work) +{ + struct dsa_switchdev_event_work *switchdev_work = + container_of(work, struct dsa_switchdev_event_work, work); + struct net_device *dev = switchdev_work->dev; + struct switchdev_notifier_fdb_info *fdb_info; + struct dsa_port *dp = dsa_slave_to_port(dev); + int err; + + rtnl_lock(); + switch (switchdev_work->event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + err = dsa_port_fdb_add(dp, fdb_info->addr, fdb_info->vid); + if (err) { + netdev_dbg(dev, "fdb add failed err=%d\n", err); + break; + } + call_switchdev_notifiers(SWITCHDEV_FDB_OFFLOADED, dev, + &fdb_info->info); + break; + + case SWITCHDEV_FDB_DEL_TO_DEVICE: + fdb_info = &switchdev_work->fdb_info; + err = dsa_port_fdb_del(dp, fdb_info->addr, fdb_info->vid); + if (err) { + netdev_dbg(dev, "fdb del failed err=%d\n", err); + dev_close(dev); + } + break; + } + rtnl_unlock(); + + kfree(switchdev_work->fdb_info.addr); + kfree(switchdev_work); + dev_put(dev); +} + +static int +dsa_slave_switchdev_fdb_work_init(struct dsa_switchdev_event_work * + switchdev_work, + const struct switchdev_notifier_fdb_info * + fdb_info) +{ + memcpy(&switchdev_work->fdb_info, fdb_info, + sizeof(switchdev_work->fdb_info)); + switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (!switchdev_work->fdb_info.addr) + return -ENOMEM; + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, + fdb_info->addr); + return 0; +} + +/* Called under rcu_read_lock() */ +static int dsa_slave_switchdev_event(struct notifier_block *unused, + unsigned long event, void *ptr) +{ + struct net_device *dev = switchdev_notifier_info_to_dev(ptr); + struct dsa_switchdev_event_work *switchdev_work; + + if (!dsa_slave_dev_check(dev)) + return NOTIFY_DONE; + + switchdev_work = kzalloc(sizeof(*switchdev_work), GFP_ATOMIC); + if (!switchdev_work) + return NOTIFY_BAD; + + INIT_WORK(&switchdev_work->work, + dsa_slave_switchdev_event_work); + switchdev_work->dev = dev; + switchdev_work->event = event; + + switch (event) { + case SWITCHDEV_FDB_ADD_TO_DEVICE: /* fall through */ + case SWITCHDEV_FDB_DEL_TO_DEVICE: + if (dsa_slave_switchdev_fdb_work_init(switchdev_work, + ptr)) + goto err_fdb_work_init; + dev_hold(dev); + break; + default: + kfree(switchdev_work); + return NOTIFY_DONE; + } + + dsa_schedule_work(&switchdev_work->work); + return NOTIFY_OK; + +err_fdb_work_init: + kfree(switchdev_work); + return NOTIFY_BAD; +} + static struct notifier_block dsa_slave_nb __read_mostly = { - .notifier_call = dsa_slave_netdevice_event, + .notifier_call = dsa_slave_netdevice_event, +}; + +static struct notifier_block dsa_slave_switchdev_notifier = { + .notifier_call = dsa_slave_switchdev_event, }; int dsa_slave_register_notifier(void) { - return register_netdevice_notifier(&dsa_slave_nb); + int err; + + err = register_netdevice_notifier(&dsa_slave_nb); + if (err) + return err; + + err = register_switchdev_notifier(&dsa_slave_switchdev_notifier); + if (err) + goto err_switchdev_nb; + + return 0; + +err_switchdev_nb: + unregister_netdevice_notifier(&dsa_slave_nb); + return err; } void dsa_slave_unregister_notifier(void) { int err; + err = unregister_switchdev_notifier(&dsa_slave_switchdev_notifier); + if (err) + pr_err("DSA: failed to unregister switchdev notifier (%d)\n", err); + err = unregister_netdevice_notifier(&dsa_slave_nb); if (err) pr_err("DSA: failed to unregister slave notifier (%d)\n", err); diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 97e2e9c8cf3f..29608d087a7c 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -83,30 +83,20 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds, static int dsa_switch_fdb_add(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - const struct switchdev_obj_port_fdb *fdb = info->fdb; - struct switchdev_trans *trans = info->trans; - /* Do not care yet about other switch chips of the fabric */ if (ds->index != info->sw_index) return 0; - if (switchdev_trans_ph_prepare(trans)) { - if (!ds->ops->port_fdb_prepare || !ds->ops->port_fdb_add) - return -EOPNOTSUPP; - - return ds->ops->port_fdb_prepare(ds, info->port, fdb, trans); - } - - ds->ops->port_fdb_add(ds, info->port, fdb, trans); + if (!ds->ops->port_fdb_add) + return -EOPNOTSUPP; - return 0; + return ds->ops->port_fdb_add(ds, info->port, info->addr, + info->vid); } static int dsa_switch_fdb_del(struct dsa_switch *ds, struct dsa_notifier_fdb_info *info) { - const struct switchdev_obj_port_fdb *fdb = info->fdb; - /* Do not care yet about other switch chips of the fabric */ if (ds->index != info->sw_index) return 0; @@ -114,7 +104,8 @@ static int dsa_switch_fdb_del(struct dsa_switch *ds, if (!ds->ops->port_fdb_del) return -EOPNOTSUPP; - return ds->ops->port_fdb_del(ds, info->port, fdb); + return ds->ops->port_fdb_del(ds, info->port, info->addr, + info->vid); } static int dsa_switch_mdb_add(struct dsa_switch *ds, @@ -130,7 +121,7 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (ds->index == info->sw_index) set_bit(info->port, group); for (port = 0; port < ds->num_ports; port++) - if (dsa_is_cpu_port(ds, port) || dsa_is_dsa_port(ds, port)) + if (dsa_is_dsa_port(ds, port)) set_bit(port, group); if (switchdev_trans_ph_prepare(trans)) { @@ -142,6 +133,8 @@ static int dsa_switch_mdb_add(struct dsa_switch *ds, if (err) return err; } + + return 0; } for_each_set_bit(port, group, ds->num_ports) @@ -189,6 +182,8 @@ static int dsa_switch_vlan_add(struct dsa_switch *ds, if (err) return err; } + + return 0; } for_each_set_bit(port, members, ds->num_ports) diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c index c697d9815177..e6e0b7b6025c 100644 --- a/net/dsa/tag_brcm.c +++ b/net/dsa/tag_brcm.c @@ -59,9 +59,12 @@ #define BRCM_EG_TC_MASK 0x7 #define BRCM_EG_PID_MASK 0x1f -static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev) +static struct sk_buff *brcm_tag_xmit_ll(struct sk_buff *skb, + struct net_device *dev, + unsigned int offset) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + u16 queue = skb_get_queue_mapping(skb); u8 *brcm_tag; if (skb_cow_head(skb, BRCM_TAG_LEN) < 0) @@ -69,40 +72,42 @@ static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, struct net_device *dev skb_push(skb, BRCM_TAG_LEN); - memmove(skb->data, skb->data + BRCM_TAG_LEN, 2 * ETH_ALEN); + if (offset) + memmove(skb->data, skb->data + BRCM_TAG_LEN, offset); - /* Build the tag after the MAC Source Address */ - brcm_tag = skb->data + 2 * ETH_ALEN; + brcm_tag = skb->data + offset; /* Set the ingress opcode, traffic class, tag enforcment is * deprecated */ brcm_tag[0] = (1 << BRCM_OPCODE_SHIFT) | - ((skb->priority << BRCM_IG_TC_SHIFT) & BRCM_IG_TC_MASK); + ((queue & BRCM_IG_TC_MASK) << BRCM_IG_TC_SHIFT); brcm_tag[1] = 0; brcm_tag[2] = 0; - if (p->dp->index == 8) + if (dp->index == 8) brcm_tag[2] = BRCM_IG_DSTMAP2_MASK; - brcm_tag[3] = (1 << p->dp->index) & BRCM_IG_DSTMAP1_MASK; + brcm_tag[3] = (1 << dp->index) & BRCM_IG_DSTMAP1_MASK; + + /* Now tell the master network device about the desired output queue + * as well + */ + skb_set_queue_mapping(skb, BRCM_TAG_SET_PORT_QUEUE(dp->index, queue)); return skb; } -static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) +static struct sk_buff *brcm_tag_rcv_ll(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt, + unsigned int offset) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; int source_port; u8 *brcm_tag; if (unlikely(!pskb_may_pull(skb, BRCM_TAG_LEN))) return NULL; - /* skb->data points to the EtherType, the tag is right before it */ - brcm_tag = skb->data - 2; + brcm_tag = skb->data - offset; /* The opcode should never be different than 0b000 */ if (unlikely((brcm_tag[0] >> BRCM_OPCODE_SHIFT) & BRCM_OPCODE_MASK)) @@ -117,24 +122,67 @@ static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, /* Locate which port this is coming from */ source_port = brcm_tag[3] & BRCM_EG_PID_MASK; - /* Validate port against switch setup, either the port is totally */ - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) return NULL; /* Remove Broadcom tag and update checksum */ skb_pull_rcsum(skb, BRCM_TAG_LEN); + return skb; +} + +#ifdef CONFIG_NET_DSA_TAG_BRCM +static struct sk_buff *brcm_tag_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + /* Build the tag after the MAC Source Address */ + return brcm_tag_xmit_ll(skb, dev, 2 * ETH_ALEN); +} + + +static struct sk_buff *brcm_tag_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt) +{ + struct sk_buff *nskb; + + /* skb->data points to the EtherType, the tag is right before it */ + nskb = brcm_tag_rcv_ll(skb, dev, pt, 2); + if (!nskb) + return nskb; + /* Move the Ethernet DA and SA */ - memmove(skb->data - ETH_HLEN, - skb->data - ETH_HLEN - BRCM_TAG_LEN, + memmove(nskb->data - ETH_HLEN, + nskb->data - ETH_HLEN - BRCM_TAG_LEN, 2 * ETH_ALEN); - skb->dev = ds->ports[source_port].netdev; - - return skb; + return nskb; } const struct dsa_device_ops brcm_netdev_ops = { .xmit = brcm_tag_xmit, .rcv = brcm_tag_rcv, }; +#endif + +#ifdef CONFIG_NET_DSA_TAG_BRCM_PREPEND +static struct sk_buff *brcm_tag_xmit_prepend(struct sk_buff *skb, + struct net_device *dev) +{ + /* tag is prepended to the packet */ + return brcm_tag_xmit_ll(skb, dev, 0); +} + +static struct sk_buff *brcm_tag_rcv_prepend(struct sk_buff *skb, + struct net_device *dev, + struct packet_type *pt) +{ + /* tag is prepended to the packet */ + return brcm_tag_rcv_ll(skb, dev, pt, ETH_HLEN); +} + +const struct dsa_device_ops brcm_prepend_netdev_ops = { + .xmit = brcm_tag_xmit_prepend, + .rcv = brcm_tag_rcv_prepend, +}; +#endif diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c index 12867a4b458f..cd13cfc542ce 100644 --- a/net/dsa/tag_dsa.c +++ b/net/dsa/tag_dsa.c @@ -18,7 +18,7 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u8 *dsa_header; /* @@ -34,8 +34,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) * Construct tagged FROM_CPU DSA tag from 802.1q tag. */ dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x60 | p->dp->ds->index; - dsa_header[1] = p->dp->index << 3; + dsa_header[0] = 0x60 | dp->ds->index; + dsa_header[1] = dp->index << 3; /* * Move CFI field from byte 2 to byte 1. @@ -55,8 +55,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) * Construct untagged FROM_CPU DSA tag. */ dsa_header = skb->data + 2 * ETH_ALEN; - dsa_header[0] = 0x40 | p->dp->ds->index; - dsa_header[1] = p->dp->index << 3; + dsa_header[0] = 0x40 | dp->ds->index; + dsa_header[1] = dp->index << 3; dsa_header[2] = 0x00; dsa_header[3] = 0x00; } @@ -65,11 +65,8 @@ static struct sk_buff *dsa_xmit(struct sk_buff *skb, struct net_device *dev) } static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) + struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; u8 *dsa_header; int source_device; int source_port; @@ -94,18 +91,8 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = dsa_header[0] & 0x1f; source_port = (dsa_header[1] >> 3) & 0x1f; - /* - * Check that the source device exists and that the source - * port is a registered DSA port. - */ - if (source_device >= DSA_MAX_SWITCHES) - return NULL; - - ds = dst->ds[source_device]; - if (!ds) - return NULL; - - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_find_slave(dev, source_device, source_port); + if (!skb->dev) return NULL; /* @@ -154,7 +141,7 @@ static struct sk_buff *dsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->dev = ds->ports[source_port].netdev; + skb->offload_fwd_mark = 1; return skb; } diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c index 67a9d26f9075..4083326b806e 100644 --- a/net/dsa/tag_edsa.c +++ b/net/dsa/tag_edsa.c @@ -19,7 +19,7 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u8 *edsa_header; /* @@ -43,8 +43,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; edsa_header[3] = 0x00; - edsa_header[4] = 0x60 | p->dp->ds->index; - edsa_header[5] = p->dp->index << 3; + edsa_header[4] = 0x60 | dp->ds->index; + edsa_header[5] = dp->index << 3; /* * Move CFI field from byte 6 to byte 5. @@ -68,8 +68,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) edsa_header[1] = ETH_P_EDSA & 0xff; edsa_header[2] = 0x00; edsa_header[3] = 0x00; - edsa_header[4] = 0x40 | p->dp->ds->index; - edsa_header[5] = p->dp->index << 3; + edsa_header[4] = 0x40 | dp->ds->index; + edsa_header[5] = dp->index << 3; edsa_header[6] = 0x00; edsa_header[7] = 0x00; } @@ -78,11 +78,8 @@ static struct sk_buff *edsa_xmit(struct sk_buff *skb, struct net_device *dev) } static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) + struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; u8 *edsa_header; int source_device; int source_port; @@ -107,18 +104,8 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, source_device = edsa_header[0] & 0x1f; source_port = (edsa_header[1] >> 3) & 0x1f; - /* - * Check that the source device exists and that the source - * port is a registered DSA port. - */ - if (source_device >= DSA_MAX_SWITCHES) - return NULL; - - ds = dst->ds[source_device]; - if (!ds) - return NULL; - - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + skb->dev = dsa_master_find_slave(dev, source_device, source_port); + if (!skb->dev) return NULL; /* @@ -173,7 +160,7 @@ static struct sk_buff *edsa_rcv(struct sk_buff *skb, struct net_device *dev, 2 * ETH_ALEN); } - skb->dev = ds->ports[source_port].netdev; + skb->offload_fwd_mark = 1; return skb; } diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index de66ca8e6201..0f62effad88f 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -34,7 +34,7 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); struct sk_buff *nskb; int padlen; u8 *tag; @@ -42,7 +42,8 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) { - if (skb_put_padto(skb, skb->len + padlen)) + /* Let dsa_slave_xmit() free skb */ + if (__skb_put_padto(skb, skb->len + padlen, false)) return NULL; nskb = skb; @@ -60,41 +61,38 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) skb_transport_header(skb) - skb->head); skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - if (skb_put_padto(nskb, nskb->len + padlen)) { - kfree_skb(nskb); + /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free + * skb + */ + if (skb_put_padto(nskb, nskb->len + padlen)) return NULL; - } - kfree_skb(skb); + consume_skb(skb); } tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); tag[0] = 0; - tag[1] = 1 << p->dp->index; /* destination port */ + tag[1] = 1 << dp->index; /* destination port */ return nskb; } static struct sk_buff *ksz_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) + struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; u8 *tag; int source_port; tag = skb_tail_pointer(skb) - KSZ_EGRESS_TAG_LEN; source_port = tag[0] & 7; - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) return NULL; pskb_trim_rcsum(skb, skb->len - KSZ_EGRESS_TAG_LEN); - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/dsa/tag_lan9303.c b/net/dsa/tag_lan9303.c index 247774d149f9..548c00254c07 100644 --- a/net/dsa/tag_lan9303.c +++ b/net/dsa/tag_lan9303.c @@ -11,6 +11,7 @@ * GNU General Public License for more details. * */ +#include <linux/dsa/lan9303.h> #include <linux/etherdevice.h> #include <linux/list.h> #include <linux/slab.h> @@ -39,11 +40,30 @@ */ #define LAN9303_TAG_LEN 4 -#define LAN9303_MAX_PORTS 3 +# define LAN9303_TAG_TX_USE_ALR BIT(3) +# define LAN9303_TAG_TX_STP_OVERRIDE BIT(4) +# define LAN9303_TAG_RX_IGMP BIT(3) +# define LAN9303_TAG_RX_STP BIT(4) +# define LAN9303_TAG_RX_TRAPPED_TO_CPU (LAN9303_TAG_RX_IGMP | \ + LAN9303_TAG_RX_STP) + +/* Decide whether to transmit using ALR lookup, or transmit directly to + * port using tag. ALR learning is performed only when using ALR lookup. + * If the two external ports are bridged and the frame is unicast, + * then use ALR lookup to allow ALR learning on CPU port. + * Otherwise transmit directly to port with STP state override. + * See also: lan9303_separate_ports() and lan9303.pdf 6.4.10.1 + */ +static int lan9303_xmit_use_arl(struct dsa_port *dp, u8 *dest_addr) +{ + struct lan9303 *chip = dp->ds->priv; + + return chip->is_bridged && !is_multicast_ether_addr(dest_addr); +} static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u16 *lan9303_tag; /* insert a special VLAN tag between the MAC addresses @@ -63,26 +83,21 @@ static struct sk_buff *lan9303_xmit(struct sk_buff *skb, struct net_device *dev) lan9303_tag = (u16 *)(skb->data + 2 * ETH_ALEN); lan9303_tag[0] = htons(ETH_P_8021Q); - lan9303_tag[1] = htons(p->dp->index | BIT(4)); + lan9303_tag[1] = lan9303_xmit_use_arl(dp, skb->data) ? + LAN9303_TAG_TX_USE_ALR : + dp->index | LAN9303_TAG_TX_STP_OVERRIDE; + lan9303_tag[1] = htons(lan9303_tag[1]); return skb; } static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, struct net_device *orig_dev) + struct packet_type *pt) { u16 *lan9303_tag; - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; + u16 lan9303_tag1; unsigned int source_port; - ds = dst->ds[0]; - - if (unlikely(!ds)) { - dev_warn_ratelimited(&dev->dev, "Dropping packet, due to missing DSA switch device\n"); - return NULL; - } - if (unlikely(!pskb_may_pull(skb, LAN9303_TAG_LEN))) { dev_warn_ratelimited(&dev->dev, "Dropping packet, cannot pull\n"); @@ -102,27 +117,22 @@ static struct sk_buff *lan9303_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; } - source_port = ntohs(lan9303_tag[1]) & 0x3; + lan9303_tag1 = ntohs(lan9303_tag[1]); + source_port = lan9303_tag1 & 0x3; - if (source_port >= LAN9303_MAX_PORTS) { + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) { dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid source port\n"); return NULL; } - if (!ds->ports[source_port].netdev) { - dev_warn_ratelimited(&dev->dev, "Dropping packet due to invalid netdev or device\n"); - return NULL; - } - /* remove the special VLAN tag between the MAC addresses * and the current ethertype field. */ skb_pull_rcsum(skb, 2 + 2); memmove(skb->data - ETH_HLEN, skb->data - (ETH_HLEN + LAN9303_TAG_LEN), 2 * ETH_ALEN); - - /* forward the packet to the dedicated interface */ - skb->dev = ds->ports[source_port].netdev; + skb->offload_fwd_mark = !(lan9303_tag1 & LAN9303_TAG_RX_TRAPPED_TO_CPU); return skb; } diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index 2f32b7ea3365..8475434af7d5 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -23,7 +23,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u8 *mtk_tag; if (skb_cow_head(skb, MTK_HDR_LEN) < 0) @@ -36,7 +36,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, /* Build the tag after the MAC Source Address */ mtk_tag = skb->data + 2 * ETH_ALEN; mtk_tag[0] = 0; - mtk_tag[1] = (1 << p->dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; mtk_tag[2] = 0; mtk_tag[3] = 0; @@ -44,11 +44,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, } static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) + struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_switch *ds; int port; __be16 *phdr, hdr; @@ -69,25 +66,27 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, skb->data - ETH_HLEN - MTK_HDR_LEN, 2 * ETH_ALEN); - /* This protocol doesn't support cascading multiple - * switches so it's safe to assume the switch is first - * in the tree. - */ - ds = dst->ds[0]; - if (!ds) - return NULL; - /* Get source port information */ port = (hdr & MTK_HDR_RECV_SOURCE_PORT_MASK); - if (!ds->ports[port].netdev) - return NULL; - skb->dev = ds->ports[port].netdev; + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) + return NULL; return skb; } +static int mtk_tag_flow_dissect(const struct sk_buff *skb, __be16 *proto, + int *offset) +{ + *offset = 4; + *proto = ((__be16 *)skb->data)[1]; + + return 0; +} + const struct dsa_device_ops mtk_netdev_ops = { - .xmit = mtk_tag_xmit, - .rcv = mtk_tag_rcv, + .xmit = mtk_tag_xmit, + .rcv = mtk_tag_rcv, + .flow_dissect = mtk_tag_flow_dissect, }; diff --git a/net/dsa/tag_qca.c b/net/dsa/tag_qca.c index 1867a3d11f28..613f4ee97771 100644 --- a/net/dsa/tag_qca.c +++ b/net/dsa/tag_qca.c @@ -38,7 +38,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); u16 *phdr, hdr; dev->stats.tx_packets++; @@ -54,8 +54,7 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) /* Set the version field, and set destination port information */ hdr = QCA_HDR_VERSION << QCA_HDR_XMIT_VERSION_S | - QCA_HDR_XMIT_FROM_CPU | - BIT(p->dp->index); + QCA_HDR_XMIT_FROM_CPU | BIT(dp->index); *phdr = htons(hdr); @@ -63,12 +62,8 @@ static struct sk_buff *qca_tag_xmit(struct sk_buff *skb, struct net_device *dev) } static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) + struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds; u8 ver; int port; __be16 *phdr, hdr; @@ -93,20 +88,12 @@ static struct sk_buff *qca_tag_rcv(struct sk_buff *skb, struct net_device *dev, memmove(skb->data - ETH_HLEN, skb->data - ETH_HLEN - QCA_HDR_LEN, ETH_HLEN - QCA_HDR_LEN); - /* This protocol doesn't support cascading multiple switches so it's - * safe to assume the switch is first in the tree - */ - ds = cpu_dp->ds; - if (!ds) - return NULL; - /* Get source port information */ port = (hdr & QCA_HDR_RECV_SOURCE_PORT_MASK); - if (!ds->ports[port].netdev) - return NULL; - /* Update skb & forward the frame accordingly */ - skb->dev = ds->ports[port].netdev; + skb->dev = dsa_master_find_slave(dev, 0, port); + if (!skb->dev) + return NULL; return skb; } diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index b09e56214005..7d20e1f3de28 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -16,7 +16,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) { - struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); struct sk_buff *nskb; int padlen; u8 *trailer; @@ -40,7 +40,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) skb_set_network_header(nskb, skb_network_header(skb) - skb->head); skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - kfree_skb(skb); + consume_skb(skb); if (padlen) { skb_put_zero(nskb, padlen); @@ -48,7 +48,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) trailer = skb_put(nskb, 4); trailer[0] = 0x80; - trailer[1] = 1 << p->dp->index; + trailer[1] = 1 << dp->index; trailer[2] = 0x10; trailer[3] = 0x00; @@ -56,12 +56,8 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) } static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *pt, - struct net_device *orig_dev) + struct packet_type *pt) { - struct dsa_switch_tree *dst = dev->dsa_ptr; - struct dsa_port *cpu_dp = dsa_get_cpu_port(dst); - struct dsa_switch *ds = cpu_dp->ds; u8 *trailer; int source_port; @@ -74,13 +70,13 @@ static struct sk_buff *trailer_rcv(struct sk_buff *skb, struct net_device *dev, return NULL; source_port = trailer[1] & 7; - if (source_port >= ds->num_ports || !ds->ports[source_port].netdev) + + skb->dev = dsa_master_find_slave(dev, 0, source_port); + if (!skb->dev) return NULL; pskb_trim_rcsum(skb, skb->len - 4); - skb->dev = ds->ports[source_port].netdev; - return skb; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 4e7bdb213cd0..b8cd43c9ed5b 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -314,7 +314,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); - skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) + return; hsr_forward_skb(skb, master); return; @@ -327,12 +328,12 @@ out: /* Announce (supervision frame) timer function */ -static void hsr_announce(unsigned long data) +static void hsr_announce(struct timer_list *t) { struct hsr_priv *hsr; struct hsr_port *master; - hsr = (struct hsr_priv *) data; + hsr = from_timer(hsr, t, announce_timer); rcu_read_lock(); master = hsr_port_get_hsr(hsr, HSR_PT_MASTER); @@ -462,9 +463,8 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], hsr->sequence_nr = HSR_SEQNR_START; hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; - setup_timer(&hsr->announce_timer, hsr_announce, (unsigned long)hsr); - - setup_timer(&hsr->prune_timer, hsr_prune_nodes, (unsigned long)hsr); + timer_setup(&hsr->announce_timer, hsr_announce, 0); + timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); ether_addr_copy(hsr->sup_multicast_addr, def_multicast_addr); hsr->sup_multicast_addr[ETH_ALEN - 1] = multicast_spec; diff --git a/net/hsr/hsr_framereg.c b/net/hsr/hsr_framereg.c index 284a9b820df8..286ceb41ac0c 100644 --- a/net/hsr/hsr_framereg.c +++ b/net/hsr/hsr_framereg.c @@ -365,16 +365,14 @@ static struct hsr_port *get_late_port(struct hsr_priv *hsr, /* Remove stale sequence_nr records. Called by timer every * HSR_LIFE_CHECK_INTERVAL (two seconds or so). */ -void hsr_prune_nodes(unsigned long data) +void hsr_prune_nodes(struct timer_list *t) { - struct hsr_priv *hsr; + struct hsr_priv *hsr = from_timer(hsr, t, prune_timer); struct hsr_node *node; struct hsr_port *port; unsigned long timestamp; unsigned long time_a, time_b; - hsr = (struct hsr_priv *) data; - rcu_read_lock(); list_for_each_entry_rcu(node, &hsr->node_db, mac_list) { /* Shorthand */ diff --git a/net/hsr/hsr_framereg.h b/net/hsr/hsr_framereg.h index 4e04f0e868e9..370b45998121 100644 --- a/net/hsr/hsr_framereg.h +++ b/net/hsr/hsr_framereg.h @@ -33,7 +33,7 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port, int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node, u16 sequence_nr); -void hsr_prune_nodes(unsigned long data); +void hsr_prune_nodes(struct timer_list *t); int hsr_create_self_node(struct list_head *self_node_db, unsigned char addr_a[ETH_ALEN], diff --git a/net/ieee802154/6lowpan/6lowpan_i.h b/net/ieee802154/6lowpan/6lowpan_i.h index ac7c96b73ad5..d8de3bcfb103 100644 --- a/net/ieee802154/6lowpan/6lowpan_i.h +++ b/net/ieee802154/6lowpan/6lowpan_i.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_6LOWPAN_I_H__ #define __IEEE802154_6LOWPAN_I_H__ diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c index de2661cd0328..974765b7d92a 100644 --- a/net/ieee802154/6lowpan/core.c +++ b/net/ieee802154/6lowpan/core.c @@ -54,7 +54,7 @@ static int open_count; -static struct header_ops lowpan_header_ops = { +static const struct header_ops lowpan_header_ops = { .create = lowpan_header_create, }; diff --git a/net/ieee802154/6lowpan/reassembly.c b/net/ieee802154/6lowpan/reassembly.c index 30d875dff6b5..85bf86ad6b18 100644 --- a/net/ieee802154/6lowpan/reassembly.c +++ b/net/ieee802154/6lowpan/reassembly.c @@ -80,12 +80,13 @@ static void lowpan_frag_init(struct inet_frag_queue *q, const void *a) fq->daddr = *arg->dst; } -static void lowpan_frag_expire(unsigned long data) +static void lowpan_frag_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ieee802154_lowpan.frags); spin_lock(&fq->q.lock); @@ -580,19 +581,14 @@ static int __net_init lowpan_frags_init_net(struct net *net) { struct netns_ieee802154_lowpan *ieee802154_lowpan = net_ieee802154_lowpan(net); - int res; ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH; ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH; ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&ieee802154_lowpan->frags); - if (res) - return res; - res = lowpan_frags_ns_sysctl_register(net); - if (res) - inet_frags_uninit_net(&ieee802154_lowpan->frags); - return res; + inet_frags_init_net(&ieee802154_lowpan->frags); + + return lowpan_frags_ns_sysctl_register(net); } static void __net_exit lowpan_frags_exit_net(struct net *net) diff --git a/net/ieee802154/Makefile b/net/ieee802154/Makefile index 9b92ade687a3..f05b7bdae2aa 100644 --- a/net/ieee802154/Makefile +++ b/net/ieee802154/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_IEEE802154) += ieee802154.o obj-$(CONFIG_IEEE802154_SOCKET) += ieee802154_socket.o obj-y += 6lowpan/ diff --git a/net/ieee802154/core.h b/net/ieee802154/core.h index 81141f58d079..1c19f575d574 100644 --- a/net/ieee802154/core.h +++ b/net/ieee802154/core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_CORE_H #define __IEEE802154_CORE_H diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c index 6bde9e5a5503..96636e3b7aa9 100644 --- a/net/ieee802154/netlink.c +++ b/net/ieee802154/netlink.c @@ -89,7 +89,7 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info) return genlmsg_reply(msg, info); } -static const struct genl_ops ieee8021154_ops[] = { +static const struct genl_ops ieee802154_ops[] = { /* see nl-phy.c */ IEEE802154_DUMP(IEEE802154_LIST_PHY, ieee802154_list_phy, ieee802154_dump_phy), @@ -137,8 +137,8 @@ struct genl_family nl802154_family __ro_after_init = { .version = 1, .maxattr = IEEE802154_ATTR_MAX, .module = THIS_MODULE, - .ops = ieee8021154_ops, - .n_ops = ARRAY_SIZE(ieee8021154_ops), + .ops = ieee802154_ops, + .n_ops = ARRAY_SIZE(ieee802154_ops), .mcgrps = ieee802154_mcgrps, .n_mcgrps = ARRAY_SIZE(ieee802154_mcgrps), }; diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h index 3846a89d0958..8c4b6d08954c 100644 --- a/net/ieee802154/nl802154.h +++ b/net/ieee802154/nl802154.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_NL802154_H #define __IEEE802154_NL802154_H diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h index 4441c63b3ea6..598f5af49775 100644 --- a/net/ieee802154/rdev-ops.h +++ b/net/ieee802154/rdev-ops.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CFG802154_RDEV_OPS #define __CFG802154_RDEV_OPS diff --git a/net/ieee802154/sysfs.h b/net/ieee802154/sysfs.h index aa42e39ecbec..337545b639e9 100644 --- a/net/ieee802154/sysfs.h +++ b/net/ieee802154/sysfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __IEEE802154_SYSFS_H #define __IEEE802154_SYSFS_H diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h index 9a471e41ec73..19c2e5d60e76 100644 --- a/net/ieee802154/trace.h +++ b/net/ieee802154/trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/wireless/trace.h */ #undef TRACE_SYSTEM diff --git a/net/ife/ife.c b/net/ife/ife.c index f360341c72eb..7d1ec76e7f43 100644 --- a/net/ife/ife.c +++ b/net/ife/ife.c @@ -137,6 +137,6 @@ int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval) EXPORT_SYMBOL_GPL(ife_tlv_meta_encode); MODULE_AUTHOR("Jamal Hadi Salim <jhs@mojatatu.com>"); -MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>"); MODULE_DESCRIPTION("Inter-FE LFB action"); MODULE_LICENSE("GPL"); diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 91a2557942fa..f48fe6fc7e8c 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -70,11 +70,9 @@ config IP_MULTIPLE_TABLES address into account. Furthermore, the TOS (Type-Of-Service) field of the packet can be used for routing decisions as well. - If you are interested in this, please see the preliminary - documentation at <http://www.compendium.com.ar/policy-routing.txt> - and <ftp://post.tepkom.ru/pub/vol2/Linux/docs/advanced-routing.tex>. - You will need supporting software from - <ftp://ftp.tux.org/pub/net/ip-routing/>. + If you need more information, see the Linux Advanced + Routing and Traffic Control documentation at + <http://lartc.org/howto/lartc.rpdb.html> If unsure, say N. diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index afcb435adfbe..c6c8ad1d4b6d 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux TCP/IP (INET) layer. # diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 2e548eca3489..ce4aa827be05 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -195,7 +195,7 @@ int inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; unsigned char old_state; - int err; + int err, tcp_fastopen; lock_sock(sk); @@ -217,11 +217,12 @@ int inet_listen(struct socket *sock, int backlog) * because the socket was in TCP_LISTEN state previously but * was shutdown() rather than close(). */ - if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && - (sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + if ((tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) && + (tcp_fastopen & TFO_SERVER_ENABLE) && !inet_csk(sk)->icsk_accept_queue.fastopenq.max_qlen) { fastopen_queue_tune(sk, backlog); - tcp_fastopen_init_key_once(true); + tcp_fastopen_init_key_once(sock_net(sk)); } err = inet_csk_listen_start(sk, backlog); @@ -826,6 +827,7 @@ int inet_shutdown(struct socket *sock, int how) err = -ENOTCONN; /* Hack to wake up other listeners, who can poll for POLLHUP, even on eg. unconnected UDP sockets -- RR */ + /* fall through */ default: sk->sk_shutdown |= how; if (sk->sk_prot->shutdown) @@ -839,7 +841,7 @@ int inet_shutdown(struct socket *sock, int how) case TCP_LISTEN: if (!(how & RCV_SHUTDOWN)) break; - /* Fall through */ + /* fall through */ case TCP_SYN_SENT: err = sk->sk_prot->disconnect(sk, O_NONBLOCK); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; @@ -944,6 +946,8 @@ const struct proto_ops inet_stream_ops = { .sendpage = inet_sendpage, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, + .sendmsg_locked = tcp_sendmsg_locked, + .sendpage_locked = tcp_sendpage_locked, .peek_len = tcp_peek_len, #ifdef CONFIG_COMPAT .compat_setsockopt = compat_sock_common_setsockopt, @@ -1219,10 +1223,9 @@ EXPORT_SYMBOL(inet_sk_rebuild_header); struct sk_buff *inet_gso_segment(struct sk_buff *skb, netdev_features_t features) { - bool udpfrag = false, fixedid = false, gso_partial, encap; + bool fixedid = false, gso_partial, encap; struct sk_buff *segs = ERR_PTR(-EINVAL); const struct net_offload *ops; - unsigned int offset = 0; struct iphdr *iph; int proto, tot_len; int nhoff; @@ -1257,7 +1260,6 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, segs = ERR_PTR(-EPROTONOSUPPORT); if (!skb->encapsulation || encap) { - udpfrag = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); fixedid = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TCP_FIXEDID); /* fixed ID is invalid if DF bit is not set */ @@ -1277,13 +1279,7 @@ struct sk_buff *inet_gso_segment(struct sk_buff *skb, skb = segs; do { iph = (struct iphdr *)(skb_mac_header(skb) + nhoff); - if (udpfrag) { - iph->frag_off = htons(offset >> 3); - if (skb->next) - iph->frag_off |= htons(IP_MF); - offset += skb->len - nhoff - ihl; - tot_len = skb->len - nhoff; - } else if (skb_is_gso(skb)) { + if (skb_is_gso(skb)) { if (!fixedid) { iph->id = htons(id); id += skb_shinfo(skb)->gso_segs; @@ -1602,6 +1598,9 @@ static const struct net_protocol igmp_protocol = { }; #endif +/* thinking of making this const? Don't. + * early_demux can change based on sysctl. + */ static struct net_protocol tcp_protocol = { .early_demux = tcp_v4_early_demux, .early_demux_handler = tcp_v4_early_demux, @@ -1612,6 +1611,9 @@ static struct net_protocol tcp_protocol = { .icmp_strict_tag_validation = 1, }; +/* thinking of making this const? Don't. + * early_demux can change based on sysctl. + */ static struct net_protocol udp_protocol = { .early_demux = udp_v4_early_demux, .early_demux_handler = udp_v4_early_demux, @@ -1778,6 +1780,11 @@ static const struct net_offload ipip_offload = { }, }; +static int __init ipip_offload_init(void) +{ + return inet_add_offload(&ipip_offload, IPPROTO_IPIP); +} + static int __init ipv4_offload_init(void) { /* @@ -1787,9 +1794,10 @@ static int __init ipv4_offload_init(void) pr_crit("%s: Cannot add UDP protocol offload\n", __func__); if (tcpv4_offload_init() < 0) pr_crit("%s: Cannot add TCP protocol offload\n", __func__); + if (ipip_offload_init() < 0) + pr_crit("%s: Cannot add IPIP protocol offload\n", __func__); dev_add_offload(&ip_packet_offload); - inet_add_offload(&ipip_offload, IPPROTO_IPIP); return 0; } diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 37db44f60718..4dd95cdd8070 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -240,7 +240,7 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) + if (err == -ENOSPC) err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 8b52179ddc6e..a8d7c5a9fb05 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -171,7 +171,7 @@ struct neigh_table arp_tbl = { [NEIGH_VAR_BASE_REACHABLE_TIME] = 30 * HZ, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, + [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, @@ -1180,6 +1180,7 @@ int arp_ioctl(struct net *net, unsigned int cmd, void __user *arg) case SIOCSARP: if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; + /* fall through */ case SIOCGARP: err = copy_from_user(&r, arg, sizeof(struct arpreq)); if (err) diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 2ae8f54cb321..82178cc69c96 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1951,7 +1951,7 @@ int cipso_v4_req_setattr(struct request_sock *req, buf = NULL; req_inet = inet_rsk(req); - opt = xchg(&req_inet->opt, opt); + opt = xchg((__force struct ip_options_rcu **)&req_inet->ireq_opt, opt); if (opt) kfree_rcu(opt, rcu); @@ -1973,11 +1973,13 @@ req_setattr_failure: * values on failure. * */ -static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) +static int cipso_v4_delopt(struct ip_options_rcu __rcu **opt_ptr) { + struct ip_options_rcu *opt = rcu_dereference_protected(*opt_ptr, 1); int hdr_delta = 0; - struct ip_options_rcu *opt = *opt_ptr; + if (!opt || opt->opt.cipso == 0) + return 0; if (opt->opt.srr || opt->opt.rr || opt->opt.ts || opt->opt.router_alert) { u8 cipso_len; u8 cipso_off; @@ -2039,14 +2041,10 @@ static int cipso_v4_delopt(struct ip_options_rcu **opt_ptr) */ void cipso_v4_sock_delattr(struct sock *sk) { - int hdr_delta; - struct ip_options_rcu *opt; struct inet_sock *sk_inet; + int hdr_delta; sk_inet = inet_sk(sk); - opt = rcu_dereference_protected(sk_inet->inet_opt, 1); - if (!opt || opt->opt.cipso == 0) - return; hdr_delta = cipso_v4_delopt(&sk_inet->inet_opt); if (sk_inet->is_icsk && hdr_delta > 0) { @@ -2066,15 +2064,7 @@ void cipso_v4_sock_delattr(struct sock *sk) */ void cipso_v4_req_delattr(struct request_sock *req) { - struct ip_options_rcu *opt; - struct inet_request_sock *req_inet; - - req_inet = inet_rsk(req); - opt = req_inet->opt; - if (!opt || opt->opt.cipso == 0) - return; - - cipso_v4_delopt(&req_inet->opt); + cipso_v4_delopt(&inet_rsk(req)->ireq_opt); } /** diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 38d9af9b917c..a4573bccd6da 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -137,22 +137,12 @@ static void inet_hash_remove(struct in_ifaddr *ifa) */ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) { - u32 hash = inet_addr_hash(net, addr); struct net_device *result = NULL; struct in_ifaddr *ifa; rcu_read_lock(); - hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) { - if (ifa->ifa_local == addr) { - struct net_device *dev = ifa->ifa_dev->dev; - - if (!net_eq(dev_net(dev), net)) - continue; - result = dev; - break; - } - } - if (!result) { + ifa = inet_lookup_ifaddr_rcu(net, addr); + if (!ifa) { struct flowi4 fl4 = { .daddr = addr }; struct fib_result res = { 0 }; struct fib_table *local; @@ -165,6 +155,8 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) !fib_table_lookup(local, &fl4, &res, FIB_LOOKUP_NOREF) && res.type == RTN_LOCAL) result = FIB_RES_DEV(res); + } else { + result = ifa->ifa_dev->dev; } if (result && devref) dev_hold(result); @@ -173,6 +165,20 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) } EXPORT_SYMBOL(__ip_dev_find); +/* called under RCU lock */ +struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr) +{ + u32 hash = inet_addr_hash(net, addr); + struct in_ifaddr *ifa; + + hlist_for_each_entry_rcu(ifa, &inet_addr_lst[hash], hash) + if (ifa->ifa_local == addr && + net_eq(dev_net(ifa->ifa_dev->dev), net)) + return ifa; + + return NULL; +} + static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32); static BLOCKING_NOTIFIER_HEAD(inetaddr_chain); @@ -438,7 +444,7 @@ static void check_lifetime(struct work_struct *work); static DECLARE_DELAYED_WORK(check_lifetime_work, check_lifetime); static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 portid) + u32 portid, struct netlink_ext_ack *extack) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -483,6 +489,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, */ ivi.ivi_addr = ifa->ifa_address; ivi.ivi_dev = ifa->ifa_dev; + ivi.extack = extack; ret = blocking_notifier_call_chain(&inetaddr_validator_chain, NETDEV_UP, &ivi); ret = notifier_to_errno(ret); @@ -515,7 +522,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, static int inet_insert_ifa(struct in_ifaddr *ifa) { - return __inet_insert_ifa(ifa, NULL, 0); + return __inet_insert_ifa(ifa, NULL, 0, NULL); } static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa) @@ -896,7 +903,8 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, return ret; } } - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid, + extack); } else { inet_free_ifa(ifa); @@ -1516,6 +1524,7 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, if (inetdev_valid_mtu(dev->mtu)) break; /* disable IP when MTU is not enough */ + /* fall through */ case NETDEV_UNREGISTER: inetdev_destroy(in_dev); break; @@ -1751,7 +1760,7 @@ static int inet_validate_link_af(const struct net_device *dev, struct nlattr *a, *tb[IFLA_INET_MAX+1]; int err, rem; - if (dev && !__in_dev_get_rtnl(dev)) + if (dev && !__in_dev_get_rcu(dev)) return -EAFNOSUPPORT; err = nla_parse_nested(tb, IFLA_INET_MAX, nla, inet_af_policy, NULL); @@ -1775,7 +1784,7 @@ static int inet_validate_link_af(const struct net_device *dev, static int inet_set_link_af(struct net_device *dev, const struct nlattr *nla) { - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev = __in_dev_get_rcu(dev); struct nlattr *a, *tb[IFLA_INET_MAX+1]; int rem; @@ -2491,9 +2500,9 @@ void __init devinet_init(void) rtnl_af_register(&inet_af_ops); - rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, NULL); - rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, NULL); - rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, NULL); + rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL, 0); + rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL, 0); + rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr, 0); rtnl_register(PF_INET, RTM_GETNETCONF, inet_netconf_get_devconf, - inet_netconf_dump_devconf, NULL); + inet_netconf_dump_devconf, 0); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0cbee0a666ff..d57aa64fa7c7 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -258,7 +258,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * esp_output_udp_encap(x, skb, esp); if (!skb_cloned(skb)) { - if (tailen <= skb_availroom(skb)) { + if (tailen <= skb_tailroom(skb)) { nfrags = 1; trailer = skb; tail = skb_tail_pointer(trailer); @@ -292,8 +292,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * kunmap_atomic(vaddr); - spin_unlock_bh(&x->lock); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, @@ -301,6 +299,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb_shinfo(skb)->nr_frags = ++nfrags; pfrag->offset = pfrag->offset + allocsize; + + spin_unlock_bh(&x->lock); + nfrags++; skb->len += tailen; @@ -381,7 +382,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; if (!esp->inplace) { int allocsize; @@ -392,7 +393,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * spin_lock_bh(&x->lock); if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { spin_unlock_bh(&x->lock); - goto error; + goto error_free; } skb_shinfo(skb)->nr_frags = 1; @@ -409,7 +410,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -431,7 +432,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * case -EINPROGRESS: goto error; - case -EBUSY: + case -ENOSPC: err = NET_XMIT_DROP; break; @@ -442,8 +443,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * if (sg != dsg) esp_ssg_unref(x, tmp); - kfree(tmp); +error_free: + kfree(tmp); error: return err; } @@ -499,18 +501,59 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) return esp_output_tail(x, skb, &esp); } +static inline int esp_remove_trailer(struct sk_buff *skb) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + struct crypto_aead *aead = x->data; + int alen, hlen, elen; + int padlen, trimlen; + __wsum csumdiff; + u8 nexthdr[2]; + int ret; + + alen = crypto_aead_authsize(aead); + hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + elen = skb->len - hlen; + + if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) { + ret = xo->proto; + goto out; + } + + if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) + BUG(); + + ret = -EINVAL; + padlen = nexthdr[0]; + if (padlen + 2 + alen >= elen) { + net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", + padlen + 2, elen - alen); + goto out; + } + + trimlen = alen + padlen + 2; + if (skb->ip_summed == CHECKSUM_COMPLETE) { + csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0); + skb->csum = csum_block_sub(skb->csum, csumdiff, + skb->len - trimlen); + } + pskb_trim(skb, skb->len - trimlen); + + ret = nexthdr[1]; + +out: + return ret; +} + int esp_input_done2(struct sk_buff *skb, int err) { const struct iphdr *iph; struct xfrm_state *x = xfrm_input_state(skb); struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; - int alen = crypto_aead_authsize(aead); int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); - int elen = skb->len - hlen; int ihl; - u8 nexthdr[2]; - int padlen; if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) kfree(ESP_SKB_CB(skb)->tmp); @@ -518,16 +561,10 @@ int esp_input_done2(struct sk_buff *skb, int err) if (unlikely(err)) goto out; - if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) - BUG(); - - err = -EINVAL; - padlen = nexthdr[0]; - if (padlen + 2 + alen >= elen) + err = esp_remove_trailer(skb); + if (unlikely(err < 0)) goto out; - /* ... check padding bits here. Silly. :-) */ - iph = ip_hdr(skb); ihl = iph->ihl * 4; @@ -568,15 +605,12 @@ int esp_input_done2(struct sk_buff *skb, int err) skb->ip_summed = CHECKSUM_UNNECESSARY; } - pskb_trim(skb, skb->len - alen - padlen - 2); - __skb_pull(skb, hlen); + skb_pull_rcsum(skb, hlen); if (x->props.mode == XFRM_MODE_TUNNEL) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -ihl); - err = nexthdr[1]; - /* RFC4303: Drop dummy packets without any error */ if (err == IPPROTO_NONE) err = -EINVAL; @@ -695,8 +729,10 @@ skip_cow: sg_init_table(sg, nfrags); err = skb_to_sgvec(skb, sg, 0, skb->len); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + kfree(tmp); goto out; + } skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index e0666016a764..f8b918c766b0 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -182,11 +182,13 @@ out: static int esp_input_tail(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; + struct xfrm_offload *xo = xfrm_offload(skb); if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead))) return -EINVAL; - skb->ip_summed = CHECKSUM_NONE; + if (!(xo->flags & CRYPTO_DONE)) + skb->ip_summed = CHECKSUM_NONE; return esp_input_done2(skb, 0); } @@ -257,7 +259,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); err = esp_output_tail(x, skb, &esp); - if (err < 0) + if (err) return err; secpath_reset(skb); @@ -303,3 +305,4 @@ module_init(esp4_offload_init); module_exit(esp4_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); +MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET, XFRM_PROTO_ESP); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 044d2a159a3c..f52d27a422c3 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -73,6 +73,11 @@ fail: fib_free_table(main_table); return -ENOMEM; } + +static bool fib4_has_custom_rules(struct net *net) +{ + return false; +} #else struct fib_table *fib_new_table(struct net *net, u32 id) @@ -128,6 +133,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id) } return NULL; } + +static bool fib4_has_custom_rules(struct net *net) +{ + return net->ipv4.fib_has_custom_rules; +} #endif /* CONFIG_IP_MULTIPLE_TABLES */ static void fib_replace_table(struct net *net, struct fib_table *old, @@ -345,9 +355,6 @@ static int __fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, if (res.type != RTN_UNICAST && (res.type != RTN_LOCAL || !IN_DEV_ACCEPT_LOCAL(idev))) goto e_inval; - if (!rpf && !fib_num_tclassid_users(net) && - (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) - goto last_resort; fib_combine_itag(itag, &res); dev_match = false; @@ -402,13 +409,28 @@ int fib_validate_source(struct sk_buff *skb, __be32 src, __be32 dst, struct in_device *idev, u32 *itag) { int r = secpath_exists(skb) ? 0 : IN_DEV_RPFILTER(idev); + struct net *net = dev_net(dev); - if (!r && !fib_num_tclassid_users(dev_net(dev)) && - IN_DEV_ACCEPT_LOCAL(idev) && + if (!r && !fib_num_tclassid_users(net) && (dev->ifindex != oif || !IN_DEV_TX_REDIRECTS(idev))) { + if (IN_DEV_ACCEPT_LOCAL(idev)) + goto ok; + /* with custom local routes in place, checking local addresses + * only will be too optimistic, with custom rules, checking + * local addresses only can be too strict, e.g. due to vrf + */ + if (net->ipv4.fib_has_custom_local_routes || + fib4_has_custom_rules(net)) + goto full_check; + if (inet_lookup_ifaddr_rcu(net, src)) + return -EINVAL; + +ok: *itag = 0; return 0; } + +full_check: return __fib_validate_source(skb, src, dst, tos, oif, dev, r, idev, itag); } @@ -759,6 +781,8 @@ static int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, } err = fib_table_insert(net, tb, &cfg, extack); + if (!err && cfg.fc_type == RTN_LOCAL) + net->ipv4.fib_has_custom_local_routes = true; errout: return err; } @@ -1247,22 +1271,28 @@ static int __net_init ip_fib_net_init(struct net *net) int err; size_t size = sizeof(struct hlist_head) * FIB_TABLE_HASHSZ; - net->ipv4.fib_seq = 0; + err = fib4_notifier_init(net); + if (err) + return err; /* Avoid false sharing : Use at least a full cache line */ size = max_t(size_t, size, L1_CACHE_BYTES); net->ipv4.fib_table_hash = kzalloc(size, GFP_KERNEL); - if (!net->ipv4.fib_table_hash) - return -ENOMEM; + if (!net->ipv4.fib_table_hash) { + err = -ENOMEM; + goto err_table_hash_alloc; + } err = fib4_rules_init(net); if (err < 0) - goto fail; + goto err_rules_init; return 0; -fail: +err_rules_init: kfree(net->ipv4.fib_table_hash); +err_table_hash_alloc: + fib4_notifier_exit(net); return err; } @@ -1292,6 +1322,7 @@ static void ip_fib_net_exit(struct net *net) #endif rtnl_unlock(); kfree(net->ipv4.fib_table_hash); + fib4_notifier_exit(net); } static int __net_init fib_net_init(struct net *net) @@ -1341,7 +1372,7 @@ void __init ip_fib_init(void) register_netdevice_notifier(&fib_netdev_notifier); register_inetaddr_notifier(&fib_inetaddr_notifier); - rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, NULL); - rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, NULL); - rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, NULL); + rtnl_register(PF_INET, RTM_NEWROUTE, inet_rtm_newroute, NULL, 0); + rtnl_register(PF_INET, RTM_DELROUTE, inet_rtm_delroute, NULL, 0); + rtnl_register(PF_INET, RTM_GETROUTE, NULL, inet_dump_fib, 0); } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 769ab87ebc4b..e6ff282bb7f4 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _FIB_LOOKUP_H #define _FIB_LOOKUP_H @@ -32,6 +33,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, struct netlink_ext_ack *extack); int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, struct netlink_ext_ack *extack); +bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi); int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); diff --git a/net/ipv4/fib_notifier.c b/net/ipv4/fib_notifier.c index e0714d975947..b804ccbdb241 100644 --- a/net/ipv4/fib_notifier.c +++ b/net/ipv4/fib_notifier.c @@ -1,86 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/rtnetlink.h> #include <linux/notifier.h> -#include <linux/rcupdate.h> +#include <linux/socket.h> #include <linux/kernel.h> +#include <linux/export.h> #include <net/net_namespace.h> +#include <net/fib_notifier.h> #include <net/netns/ipv4.h> #include <net/ip_fib.h> -static ATOMIC_NOTIFIER_HEAD(fib_chain); - -int call_fib_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_notifier_info *info) +int call_fib4_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) { - info->net = net; - return nb->notifier_call(nb, event_type, info); + info->family = AF_INET; + return call_fib_notifier(nb, net, event_type, info); } -int call_fib_notifiers(struct net *net, enum fib_event_type event_type, - struct fib_notifier_info *info) +int call_fib4_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) { + ASSERT_RTNL(); + + info->family = AF_INET; net->ipv4.fib_seq++; - info->net = net; - return atomic_notifier_call_chain(&fib_chain, event_type, info); + return call_fib_notifiers(net, event_type, info); } -static unsigned int fib_seq_sum(void) +static unsigned int fib4_seq_read(struct net *net) { - unsigned int fib_seq = 0; - struct net *net; - - rtnl_lock(); - for_each_net(net) - fib_seq += net->ipv4.fib_seq; - rtnl_unlock(); + ASSERT_RTNL(); - return fib_seq; + return net->ipv4.fib_seq + fib4_rules_seq_read(net); } -static bool fib_dump_is_consistent(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb), - unsigned int fib_seq) +static int fib4_dump(struct net *net, struct notifier_block *nb) { - atomic_notifier_chain_register(&fib_chain, nb); - if (fib_seq == fib_seq_sum()) - return true; - atomic_notifier_chain_unregister(&fib_chain, nb); - if (cb) - cb(nb); - return false; + int err; + + err = fib4_rules_dump(net, nb); + if (err) + return err; + + fib_notify(net, nb); + + return 0; } -#define FIB_DUMP_MAX_RETRIES 5 -int register_fib_notifier(struct notifier_block *nb, - void (*cb)(struct notifier_block *nb)) -{ - int retries = 0; +static const struct fib_notifier_ops fib4_notifier_ops_template = { + .family = AF_INET, + .fib_seq_read = fib4_seq_read, + .fib_dump = fib4_dump, + .owner = THIS_MODULE, +}; - do { - unsigned int fib_seq = fib_seq_sum(); - struct net *net; +int __net_init fib4_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; - /* Mutex semantics guarantee that every change done to - * FIB tries before we read the change sequence counter - * is now visible to us. - */ - rcu_read_lock(); - for_each_net_rcu(net) { - fib_rules_notify(net, nb); - fib_notify(net, nb); - } - rcu_read_unlock(); + net->ipv4.fib_seq = 0; - if (fib_dump_is_consistent(nb, cb, fib_seq)) - return 0; - } while (++retries < FIB_DUMP_MAX_RETRIES); + ops = fib_notifier_ops_register(&fib4_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv4.notifier_ops = ops; - return -EBUSY; + return 0; } -EXPORT_SYMBOL(register_fib_notifier); -int unregister_fib_notifier(struct notifier_block *nb) +void __net_exit fib4_notifier_exit(struct net *net) { - return atomic_notifier_chain_unregister(&fib_chain, nb); + fib_notifier_ops_unregister(net->ipv4.notifier_ops); } -EXPORT_SYMBOL(unregister_fib_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 778ecf977eb2..35d646a62ad4 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -68,6 +68,16 @@ bool fib4_rule_default(const struct fib_rule *rule) } EXPORT_SYMBOL_GPL(fib4_rule_default); +int fib4_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, AF_INET); +} + +unsigned int fib4_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, AF_INET); +} + int __fib_lookup(struct net *net, struct flowi4 *flp, struct fib_result *res, unsigned int flags) { @@ -185,38 +195,6 @@ static struct fib_table *fib_empty_table(struct net *net) return NULL; } -static int call_fib_rule_notifier(struct notifier_block *nb, struct net *net, - enum fib_event_type event_type, - struct fib_rule *rule) -{ - struct fib_rule_notifier_info info = { - .rule = rule, - }; - - return call_fib_notifier(nb, net, event_type, &info.info); -} - -static int call_fib_rule_notifiers(struct net *net, - enum fib_event_type event_type, - struct fib_rule *rule) -{ - struct fib_rule_notifier_info info = { - .rule = rule, - }; - - return call_fib_notifiers(net, event_type, &info.info); -} - -/* Called with rcu_read_lock() */ -void fib_rules_notify(struct net *net, struct notifier_block *nb) -{ - struct fib_rules_ops *ops = net->ipv4.rules_ops; - struct fib_rule *rule; - - list_for_each_entry_rcu(rule, &ops->rules_list, list) - call_fib_rule_notifier(nb, net, FIB_EVENT_RULE_ADD, rule); -} - static const struct nla_policy fib4_rule_policy[FRA_MAX+1] = { FRA_GENERIC_POLICY, [FRA_FLOW] = { .type = NLA_U32 }, @@ -273,7 +251,6 @@ static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule4->tos = frh->tos; net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_ADD, rule); err = 0; errout: @@ -295,7 +272,6 @@ static int fib4_rule_delete(struct fib_rule *rule) net->ipv4.fib_num_tclassid_users--; #endif net->ipv4.fib_has_custom_rules = true; - call_fib_rule_notifiers(net, FIB_EVENT_RULE_DEL, rule); errout: return err; } diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ec3a9ce281a6..f04d944f8abe 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -44,6 +44,7 @@ #include <net/netlink.h> #include <net/nexthop.h> #include <net/lwtunnel.h> +#include <net/fib_notifier.h> #include "fib_lookup.h" @@ -219,7 +220,7 @@ static void free_fib_info_rcu(struct rcu_head *head) } endfor_nexthops(fi); m = fi->fib_metrics; - if (m != &dst_default_metrics && atomic_dec_and_test(&m->refcnt)) + if (m != &dst_default_metrics && refcount_dec_and_test(&m->refcnt)) kfree(m); kfree(fi); } @@ -600,17 +601,9 @@ static void fib_rebalance(struct fib_info *fi) atomic_set(&nexthop_nh->nh_upper_bound, upper_bound); } endfor_nexthops(fi); } - -static inline void fib_add_weight(struct fib_info *fi, - const struct fib_nh *nh) -{ - fi->fib_weight += nh->nh_weight; -} - #else /* CONFIG_IP_ROUTE_MULTIPATH */ #define fib_rebalance(fi) do { } while (0) -#define fib_add_weight(fi, nh) do { } while (0) #endif /* CONFIG_IP_ROUTE_MULTIPATH */ @@ -695,6 +688,40 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, return 0; } +bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) +{ + struct nlattr *nla; + int remaining; + + if (!cfg->fc_mx) + return true; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla_type(nla); + u32 val; + + if (!type) + continue; + if (type > RTAX_MAX) + return false; + + if (type == RTAX_CC_ALGO) { + char tmp[TCP_CA_NAME_MAX]; + bool ecn_ca = false; + + nla_strlcpy(tmp, nla, sizeof(tmp)); + val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); + } else { + val = nla_get_u32(nla); + } + + if (fi->fib_metrics->metrics[type - 1] != val) + return false; + } + + return true; +} + /* * Picture @@ -739,8 +766,8 @@ int fib_nh_match(struct fib_config *cfg, struct fib_info *fi, * | * |-> {local prefix} (terminal node) */ -static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, - struct fib_nh *nh, struct netlink_ext_ack *extack) +static int fib_check_nh(struct fib_config *cfg, struct fib_nh *nh, + struct netlink_ext_ack *extack) { int err = 0; struct net *net; @@ -1003,7 +1030,7 @@ fib_convert_metrics(struct fib_info *fi, const struct fib_config *cfg) char tmp[TCP_CA_NAME_MAX]; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(fi->fib_net, tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) return -EINVAL; } else { @@ -1089,7 +1116,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, kfree(fi); return ERR_PTR(err); } - atomic_set(&fi->fib_metrics->refcnt, 1); + refcount_set(&fi->fib_metrics->refcnt, 1); } else { fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; } @@ -1223,7 +1250,7 @@ struct fib_info *fib_create_info(struct fib_config *cfg, int linkdown = 0; change_nexthops(fi) { - err = fib_check_nh(cfg, fi, nexthop_nh, extack); + err = fib_check_nh(cfg, nexthop_nh, extack); if (err != 0) goto failure; if (nexthop_nh->nh_flags & RTNH_F_LINKDOWN) @@ -1240,7 +1267,6 @@ struct fib_info *fib_create_info(struct fib_config *cfg, change_nexthops(fi) { fib_info_update_nh_saddr(net, nexthop_nh); - fib_add_weight(fi, nexthop_nh); } endfor_nexthops(fi) fib_rebalance(fi); @@ -1330,8 +1356,6 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_in_addr(skb, RTA_PREFSRC, fi->fib_prefsrc)) goto nla_put_failure; if (fi->fib_nhs == 1) { - struct in_device *in_dev; - if (fi->fib_nh->nh_gw && nla_put_in_addr(skb, RTA_GATEWAY, fi->fib_nh->nh_gw)) goto nla_put_failure; @@ -1339,11 +1363,17 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, nla_put_u32(skb, RTA_OIF, fi->fib_nh->nh_oif)) goto nla_put_failure; if (fi->fib_nh->nh_flags & RTNH_F_LINKDOWN) { - in_dev = __in_dev_get_rtnl(fi->fib_nh->nh_dev); + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(fi->fib_nh->nh_dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtm->rtm_flags |= RTNH_F_DEAD; + rcu_read_unlock(); } + if (fi->fib_nh->nh_flags & RTNH_F_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; #ifdef CONFIG_IP_ROUTE_CLASSID if (fi->fib_nh[0].nh_tclassid && nla_put_u32(skb, RTA_FLOW, fi->fib_nh[0].nh_tclassid)) @@ -1363,18 +1393,20 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, goto nla_put_failure; for_nexthops(fi) { - struct in_device *in_dev; - rtnh = nla_reserve_nohdr(skb, sizeof(*rtnh)); if (!rtnh) goto nla_put_failure; rtnh->rtnh_flags = nh->nh_flags & 0xFF; if (nh->nh_flags & RTNH_F_LINKDOWN) { - in_dev = __in_dev_get_rtnl(nh->nh_dev); + struct in_device *in_dev; + + rcu_read_lock(); + in_dev = __in_dev_get_rcu(nh->nh_dev); if (in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev)) rtnh->rtnh_flags |= RTNH_F_DEAD; + rcu_read_unlock(); } rtnh->rtnh_hops = nh->nh_weight - 1; rtnh->rtnh_ifindex = nh->nh_oif; @@ -1451,14 +1483,14 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, if (IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) break; - return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, - &info.info); + return call_fib4_notifiers(dev_net(fib_nh->nh_dev), event_type, + &info.info); case FIB_EVENT_NH_DEL: if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) || (fib_nh->nh_flags & RTNH_F_DEAD)) - return call_fib_notifiers(dev_net(fib_nh->nh_dev), - event_type, &info.info); + return call_fib4_notifiers(dev_net(fib_nh->nh_dev), + event_type, &info.info); default: break; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 64668c69dda6..5ddc4aefff12 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -81,39 +81,40 @@ #include <net/tcp.h> #include <net/sock.h> #include <net/ip_fib.h> +#include <net/fib_notifier.h> #include <trace/events/fib.h> #include "fib_lookup.h" static int call_fib_entry_notifier(struct notifier_block *nb, struct net *net, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + int dst_len, struct fib_alias *fa) { struct fib_entry_notifier_info info = { .dst = dst, .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .tb_id = tb_id, + .fi = fa->fa_info, + .tos = fa->fa_tos, + .type = fa->fa_type, + .tb_id = fa->tb_id, }; - return call_fib_notifier(nb, net, event_type, &info.info); + return call_fib4_notifier(nb, net, event_type, &info.info); } static int call_fib_entry_notifiers(struct net *net, enum fib_event_type event_type, u32 dst, - int dst_len, struct fib_info *fi, - u8 tos, u8 type, u32 tb_id) + int dst_len, struct fib_alias *fa, + struct netlink_ext_ack *extack) { struct fib_entry_notifier_info info = { + .info.extack = extack, .dst = dst, .dst_len = dst_len, - .fi = fi, - .tos = tos, - .type = type, - .tb_id = tb_id, + .fi = fa->fa_info, + .tos = fa->fa_tos, + .type = fa->fa_type, + .tb_id = fa->tb_id, }; - return call_fib_notifiers(net, event_type, &info.info); + return call_fib4_notifiers(net, event_type, &info.info); } #define MAX_STAT_DEPTH 32 @@ -1215,9 +1216,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, new_fa->fa_default = -1; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, - key, plen, fi, - new_fa->fa_tos, cfg->fc_type, - tb->tb_id); + key, plen, new_fa, extack); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, &cfg->fc_nlinfo, nlflags); @@ -1272,8 +1271,7 @@ int fib_table_insert(struct net *net, struct fib_table *tb, tb->tb_num_default++; rt_cache_flush(cfg->fc_nlinfo.nl_net); - call_fib_entry_notifiers(net, event, key, plen, fi, tos, cfg->fc_type, - tb->tb_id); + call_fib_entry_notifiers(net, event, key, plen, new_fa, extack); rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, new_fa->tb_id, &cfg->fc_nlinfo, nlflags); succeeded: @@ -1562,7 +1560,8 @@ int fib_table_delete(struct net *net, struct fib_table *tb, fi->fib_prefsrc == cfg->fc_prefsrc) && (!cfg->fc_protocol || fi->fib_protocol == cfg->fc_protocol) && - fib_nh_match(cfg, fi, extack) == 0) { + fib_nh_match(cfg, fi, extack) == 0 && + fib_metrics_match(cfg, fi)) { fa_to_delete = fa; break; } @@ -1572,8 +1571,7 @@ int fib_table_delete(struct net *net, struct fib_table *tb, return -ESRCH; call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, key, plen, - fa_to_delete->fa_info, tos, - fa_to_delete->fa_type, tb->tb_id); + fa_to_delete, extack); rtmsg_fib(RTM_DELROUTE, htonl(key), fa_to_delete, plen, tb->tb_id, &cfg->fc_nlinfo, 0); @@ -1890,9 +1888,8 @@ int fib_table_flush(struct net *net, struct fib_table *tb) call_fib_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, n->key, - KEYLENGTH - fa->fa_slen, - fi, fa->fa_tos, fa->fa_type, - tb->tb_id); + KEYLENGTH - fa->fa_slen, fa, + NULL); hlist_del_rcu(&fa->fa_list); fib_release_info(fa->fa_info); alias_free_mem_rcu(fa); @@ -1930,8 +1927,7 @@ static void fib_leaf_notify(struct net *net, struct key_vector *l, continue; call_fib_entry_notifier(nb, net, FIB_EVENT_ENTRY_ADD, l->key, - KEYLENGTH - fa->fa_slen, fi, fa->fa_tos, - fa->fa_type, fa->tb_id); + KEYLENGTH - fa->fa_slen, fa); } } diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c index d5cac99170b1..1859c473b21a 100644 --- a/net/ipv4/gre_offload.c +++ b/net/ipv4/gre_offload.c @@ -24,7 +24,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, __be16 protocol = skb->protocol; u16 mac_len = skb->mac_len; int gre_offset, outer_hlen; - bool need_csum, ufo, gso_partial; + bool need_csum, gso_partial; if (!skb->encapsulation) goto out; @@ -47,20 +47,8 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_GRE_CSUM); skb->encap_hdr_csum = need_csum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - features &= skb->dev->hw_enc_features; - /* The only checksum offload we care about from here on out is the - * outer one so strip the existing checksum feature flags based - * on the fact that we will be computing our checksum in software. - */ - if (ufo) { - features &= ~NETIF_F_CSUM_MASK; - if (!need_csum) - features |= NETIF_F_HW_CSUM; - } - /* segment inner packet. */ segs = skb_mac_gso_segment(skb, features); if (IS_ERR_OR_NULL(segs)) { @@ -98,7 +86,7 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb, greh = (struct gre_base_hdr *)skb_transport_header(skb); pcsum = (__sum16 *)(greh + 1); - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { unsigned int partial_adj; /* Adjust checksum to account for the fact that diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index c2be26b98b5f..1617604c9284 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -412,7 +412,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) int type = icmp_param->data.icmph.type; int code = icmp_param->data.icmph.code; - if (ip_options_echo(&icmp_param->replyopts.opt.opt, skb)) + if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) return; /* Needed by both icmp_global_allow and icmp_xmit_lock */ @@ -694,7 +694,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) iph->tos; mark = IP4_REPLY_MARK(net, skb_in->mark); - if (ip_options_echo(&icmp_param.replyopts.opt.opt, skb_in)) + if (ip_options_echo(net, &icmp_param.replyopts.opt.opt, skb_in)) goto out_unlock; @@ -782,7 +782,7 @@ static bool icmp_tag_validation(int proto) } /* - * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEED, ICMP_QUENCH, and + * Handle ICMP_DEST_UNREACH, ICMP_TIME_EXCEEDED, ICMP_QUENCH, and * ICMP_PARAMETERPROB. */ @@ -810,7 +810,8 @@ static bool icmp_unreach(struct sk_buff *skb) if (iph->ihl < 5) /* Mangled header, drop. */ goto out_err; - if (icmph->type == ICMP_DEST_UNREACH) { + switch (icmph->type) { + case ICMP_DEST_UNREACH: switch (icmph->code & 15) { case ICMP_NET_UNREACH: case ICMP_HOST_UNREACH: @@ -846,8 +847,16 @@ static bool icmp_unreach(struct sk_buff *skb) } if (icmph->code > NR_ICMP_UNREACH) goto out; - } else if (icmph->type == ICMP_PARAMETERPROB) + break; + case ICMP_PARAMETERPROB: info = ntohl(icmph->un.gateway) >> 24; + break; + case ICMP_TIME_EXCEEDED: + __ICMP_INC_STATS(net, ICMP_MIB_INTIMEEXCDS); + if (icmph->code == ICMP_EXC_FRAGTIME) + goto out; + break; + } /* * Throw it at our lower layers @@ -959,8 +968,9 @@ static bool icmp_timestamp(struct sk_buff *skb) */ icmp_param.data.times[1] = inet_current_timestamp(); icmp_param.data.times[2] = icmp_param.data.times[1]; - if (skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)) - BUG(); + + BUG_ON(skb_copy_bits(skb, 0, &icmp_param.data.times[0], 4)); + icmp_param.data.icmph = *icmp_hdr(skb); icmp_param.data.icmph.type = ICMP_TIMESTAMPREPLY; icmp_param.data.icmph.code = 0; diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 498706b072fb..ab183af0b5b6 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct net_device *dev = skb->dev; + struct in_device *in_dev; int len = skb->len; bool dropped = true; + if (netif_is_l3_master(dev)) { + dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif); + if (!dev) + goto drop; + } + + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto drop; @@ -2549,7 +2557,8 @@ done: /* * check if a multicast source filter allows delivery for a given <src,dst,intf> */ -int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) +int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, + int dif, int sdif) { struct inet_sock *inet = inet_sk(sk); struct ip_mc_socklist *pmc; @@ -2564,7 +2573,8 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif) rcu_read_lock(); for_each_pmc_rcu(inet, pmc) { if (pmc->multi.imr_multiaddr.s_addr == loc_addr && - pmc->multi.imr_ifindex == dif) + (pmc->multi.imr_ifindex == dif || + (sdif && pmc->multi.imr_ifindex == sdif))) break; } ret = inet->mc_all; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 4089c013cb03..4ca46dc08e63 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -39,11 +39,11 @@ EXPORT_SYMBOL(inet_csk_timer_bug_msg); * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, * and 0.0.0.0 equals to 0.0.0.0 only */ -static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, - const struct in6_addr *sk2_rcv_saddr6, - __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk1_ipv6only, bool sk2_ipv6only, - bool match_wildcard) +static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, + const struct in6_addr *sk2_rcv_saddr6, + __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk1_ipv6only, bool sk2_ipv6only, + bool match_wildcard) { int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -52,29 +52,29 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (addr_type == IPV6_ADDR_MAPPED && addr_type2 == IPV6_ADDR_MAPPED) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) - return 1; + return true; if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } - return 0; + return false; } if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) - return 1; + return true; if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) - return 1; + return true; if (addr_type == IPV6_ADDR_ANY && match_wildcard && !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) - return 1; + return true; if (sk2_rcv_saddr6 && ipv6_addr_equal(sk1_rcv_saddr6, sk2_rcv_saddr6)) - return 1; + return true; - return 0; + return false; } #endif @@ -82,20 +82,20 @@ static int ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, * match_wildcard == false: addresses must be exactly the same, i.e. * 0.0.0.0 only equals to 0.0.0.0 */ -static int ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk2_ipv6only, bool match_wildcard) +static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, + bool sk2_ipv6only, bool match_wildcard) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) - return 1; + return true; if (!sk1_rcv_saddr || !sk2_rcv_saddr) return match_wildcard; } - return 0; + return false; } -int inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, - bool match_wildcard) +bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, + bool match_wildcard) { #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == AF_INET6) @@ -266,7 +266,7 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, #if IS_ENABLED(CONFIG_IPV6) if (tb->fast_sk_family == AF_INET6) return ipv6_rcv_saddr_equal(&tb->fast_v6_rcv_saddr, - &sk->sk_v6_rcv_saddr, + inet6_rcv_saddr(sk), tb->fast_rcv_saddr, sk->sk_rcv_saddr, tb->fast_ipv6_only, @@ -321,13 +321,14 @@ tb_found: goto fail_unlock; } success: - if (!hlist_empty(&tb->owners)) { + if (hlist_empty(&tb->owners)) { tb->fastreuse = reuse; if (sk->sk_reuseport) { tb->fastreuseport = FASTREUSEPORT_ANY; tb->fastuid = uid; tb->fast_rcv_saddr = sk->sk_rcv_saddr; tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; #endif @@ -354,6 +355,7 @@ success: tb->fastuid = uid; tb->fast_rcv_saddr = sk->sk_rcv_saddr; tb->fast_ipv6_only = ipv6_only_sock(sk); + tb->fast_sk_family = sk->sk_family; #if IS_ENABLED(CONFIG_IPV6) tb->fast_v6_rcv_saddr = sk->sk_v6_rcv_saddr; #endif @@ -473,6 +475,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err, bool kern) } spin_unlock_bh(&queue->fastopenq.lock); } + mem_cgroup_sk_alloc(newsk); out: release_sock(sk); if (req) @@ -492,17 +495,15 @@ EXPORT_SYMBOL(inet_csk_accept); * to optimize. */ void inet_csk_init_xmit_timers(struct sock *sk, - void (*retransmit_handler)(unsigned long), - void (*delack_handler)(unsigned long), - void (*keepalive_handler)(unsigned long)) + void (*retransmit_handler)(struct timer_list *t), + void (*delack_handler)(struct timer_list *t), + void (*keepalive_handler)(struct timer_list *t)) { struct inet_connection_sock *icsk = inet_csk(sk); - setup_timer(&icsk->icsk_retransmit_timer, retransmit_handler, - (unsigned long)sk); - setup_timer(&icsk->icsk_delack_timer, delack_handler, - (unsigned long)sk); - setup_timer(&sk->sk_timer, keepalive_handler, (unsigned long)sk); + timer_setup(&icsk->icsk_retransmit_timer, retransmit_handler, 0); + timer_setup(&icsk->icsk_delack_timer, delack_handler, 0); + timer_setup(&sk->sk_timer, keepalive_handler, 0); icsk->icsk_pending = icsk->icsk_ack.pending = 0; } EXPORT_SYMBOL(inet_csk_init_xmit_timers); @@ -537,9 +538,11 @@ struct dst_entry *inet_csk_route_req(const struct sock *sk, { const struct inet_request_sock *ireq = inet_rsk(req); struct net *net = read_pnet(&ireq->ireq_net); - struct ip_options_rcu *opt = ireq->opt; + struct ip_options_rcu *opt; struct rtable *rt; + opt = ireq_opt_deref(ireq); + flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -573,10 +576,9 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, struct flowi4 *fl4; struct rtable *rt; + opt = rcu_dereference(ireq->ireq_opt); fl4 = &newinet->cork.fl.u.ip4; - rcu_read_lock(); - opt = rcu_dereference(newinet->inet_opt); flowi4_init_output(fl4, ireq->ir_iif, ireq->ir_mark, RT_CONN_FLAGS(sk), RT_SCOPE_UNIVERSE, sk->sk_protocol, inet_sk_flowi_flags(sk), @@ -589,13 +591,11 @@ struct dst_entry *inet_csk_route_child_sock(const struct sock *sk, goto no_route; if (opt && opt->opt.is_strictroute && rt->rt_uses_gateway) goto route_err; - rcu_read_unlock(); return &rt->dst; route_err: ip_rt_put(rt); no_route: - rcu_read_unlock(); __IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return NULL; } @@ -674,9 +674,9 @@ void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req } EXPORT_SYMBOL(inet_csk_reqsk_queue_drop_and_put); -static void reqsk_timer_handler(unsigned long data) +static void reqsk_timer_handler(struct timer_list *t) { - struct request_sock *req = (struct request_sock *)data; + struct request_sock *req = from_timer(req, t, rsk_timer); struct sock *sk_listener = req->rsk_listener; struct net *net = sock_net(sk_listener); struct inet_connection_sock *icsk = inet_csk(sk_listener); @@ -747,8 +747,7 @@ static void reqsk_queue_hash_req(struct request_sock *req, req->num_timeout = 0; req->sk = NULL; - setup_pinned_timer(&req->rsk_timer, reqsk_timer_handler, - (unsigned long)req); + timer_setup(&req->rsk_timer, reqsk_timer_handler, TIMER_PINNED); mod_timer(&req->rsk_timer, jiffies + timeout); inet_ehash_insert(req_to_sk(req), NULL); @@ -916,7 +915,6 @@ static void inet_child_forget(struct sock *sk, struct request_sock *req, tcp_sk(child)->fastopen_rsk = NULL; } inet_csk_destroy_sock(child); - reqsk_put(req); } struct sock *inet_csk_reqsk_queue_add(struct sock *sk, @@ -987,6 +985,7 @@ void inet_csk_listen_stop(struct sock *sk) sock_hold(child); inet_child_forget(sk, req, child); + reqsk_put(req); bh_unlock_sock(child); local_bh_enable(); sock_put(child); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3828b3a805cd..c9c35b61a027 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -93,8 +93,17 @@ void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk) } EXPORT_SYMBOL_GPL(inet_diag_msg_common_fill); -static size_t inet_sk_attr_size(void) +static size_t inet_sk_attr_size(struct sock *sk, + const struct inet_diag_req_v2 *req, + bool net_admin) { + const struct inet_diag_handler *handler; + size_t aux = 0; + + handler = inet_diag_table[req->sdiag_protocol]; + if (handler && handler->idiag_get_aux_size) + aux = handler->idiag_get_aux_size(sk, net_admin); + return nla_total_size(sizeof(struct tcp_info)) + nla_total_size(1) /* INET_DIAG_SHUTDOWN */ + nla_total_size(1) /* INET_DIAG_TOS */ @@ -105,6 +114,7 @@ static size_t inet_sk_attr_size(void) + nla_total_size(SK_MEMINFO_VARS * sizeof(u32)) + nla_total_size(TCP_CA_NAME_MAX) + nla_total_size(sizeof(struct tcpvegas_info)) + + aux + 64; } @@ -260,6 +270,10 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler->idiag_get_info(sk, r, info); + if (ext & (1 << (INET_DIAG_INFO - 1)) && handler->idiag_get_aux) + if (handler->idiag_get_aux(sk, net_admin, skb) < 0) + goto errout; + if (sk->sk_state < TCP_TIME_WAIT) { union tcp_cc_info info; size_t sz = 0; @@ -274,6 +288,17 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, goto errout; } + if (ext & (1 << (INET_DIAG_CLASS_ID - 1))) { + u32 classid = 0; + +#ifdef CONFIG_SOCK_CGROUP_DATA + classid = sock_cgroup_classid(&sk->sk_cgrp_data); +#endif + + if (nla_put_u32(skb, INET_DIAG_CLASS_ID, classid)) + goto errout; + } + out: nlmsg_end(skb, nlh); return 0; @@ -438,6 +463,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, const struct nlmsghdr *nlh, const struct inet_diag_req_v2 *req) { + bool net_admin = netlink_net_capable(in_skb, CAP_NET_ADMIN); struct net *net = sock_net(in_skb->sk); struct sk_buff *rep; struct sock *sk; @@ -447,7 +473,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, if (IS_ERR(sk)) return PTR_ERR(sk); - rep = nlmsg_new(inet_sk_attr_size(), GFP_KERNEL); + rep = nlmsg_new(inet_sk_attr_size(sk, req, net_admin), GFP_KERNEL); if (!rep) { err = -ENOMEM; goto out; @@ -456,8 +482,7 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).sk), NETLINK_CB(in_skb).portid, - nlh->nlmsg_seq, 0, nlh, - netlink_net_capable(in_skb, CAP_NET_ADMIN)); + nlh->nlmsg_seq, 0, nlh, net_admin); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 96e95e83cc61..26a3d0315728 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -147,7 +147,7 @@ inet_evict_bucket(struct inet_frags *f, struct inet_frag_bucket *hb) spin_unlock(&hb->chain_lock); hlist_for_each_entry_safe(fq, n, &expired, list_evictor) - f->frag_expire((unsigned long) fq); + f->frag_expire(&fq->timer); return evicted; } @@ -164,7 +164,7 @@ static void inet_frag_worker(struct work_struct *work) local_bh_disable(); - for (i = ACCESS_ONCE(f->next_bucket); budget; --budget) { + for (i = READ_ONCE(f->next_bucket); budget; --budget) { evicted += inet_evict_bucket(f, &f->hash[i]); i = (i + 1) & (INETFRAGS_HASHSZ - 1); if (evicted > INETFRAGS_EVICT_MAX) @@ -234,10 +234,8 @@ evict_again: cond_resched(); if (read_seqretry(&f->rnd_seqlock, seq) || - percpu_counter_sum(&nf->mem)) + sum_frag_mem_limit(nf)) goto evict_again; - - percpu_counter_destroy(&nf->mem); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -368,7 +366,7 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf, f->constructor(q, arg); add_frag_mem_limit(nf, f->qsize); - setup_timer(&q->timer, f->frag_expire, (unsigned long)q); + timer_setup(&q->timer, f->frag_expire, 0); spin_lock_init(&q->lock); refcount_set(&q->refcnt, 1); diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 2e3389d614d1..e7d15fb0d94d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -170,7 +170,7 @@ EXPORT_SYMBOL_GPL(__inet_inherit_port); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const __be32 daddr, - const int dif, bool exact_dif) + const int dif, const int sdif, bool exact_dif) { int score = -1; struct inet_sock *inet = inet_sk(sk); @@ -185,9 +185,13 @@ static inline int compute_score(struct sock *sk, struct net *net, score += 4; } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if && dev_match) + score += 4; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; @@ -208,7 +212,7 @@ struct sock *__inet_lookup_listener(struct net *net, struct sk_buff *skb, int doff, const __be32 saddr, __be16 sport, const __be32 daddr, const unsigned short hnum, - const int dif) + const int dif, const int sdif) { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; @@ -218,7 +222,8 @@ struct sock *__inet_lookup_listener(struct net *net, u32 phash = 0; sk_for_each_rcu(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif, exact_dif); + score = compute_score(sk, net, hnum, daddr, + dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -268,7 +273,7 @@ struct sock *__inet_lookup_established(struct net *net, struct inet_hashinfo *hashinfo, const __be32 saddr, const __be16 sport, const __be32 daddr, const u16 hnum, - const int dif) + const int dif, const int sdif) { INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(sport, hnum); @@ -286,11 +291,12 @@ begin: if (sk->sk_hash != hash) continue; if (likely(INET_MATCH(sk, net, acookie, - saddr, daddr, ports, dif))) { + saddr, daddr, ports, dif, sdif))) { if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; if (unlikely(!INET_MATCH(sk, net, acookie, - saddr, daddr, ports, dif))) { + saddr, daddr, ports, + dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -321,9 +327,10 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, __be32 daddr = inet->inet_rcv_saddr; __be32 saddr = inet->inet_daddr; int dif = sk->sk_bound_dev_if; + struct net *net = sock_net(sk); + int sdif = l3mdev_master_ifindex_by_index(net, dif); INET_ADDR_COOKIE(acookie, saddr, daddr); const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); - struct net *net = sock_net(sk); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -339,7 +346,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, continue; if (likely(INET_MATCH(sk2, net, acookie, - saddr, daddr, ports, dif))) { + saddr, daddr, ports, dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) @@ -449,10 +456,7 @@ static int inet_reuseport_add_sock(struct sock *sk, return reuseport_add_sock(sk, sk2); } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } int __inet_hash(struct sock *sk, struct sock *osk) diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 5b039159e67a..c690cd0d9b3f 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -9,7 +9,6 @@ */ #include <linux/kernel.h> -#include <linux/kmemcheck.h> #include <linux/slab.h> #include <linux/module.h> #include <net/inet_hashtables.h> @@ -142,9 +141,9 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, } EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); -static void tw_timer_handler(unsigned long data) +static void tw_timer_handler(struct timer_list *t) { - struct inet_timewait_sock *tw = (struct inet_timewait_sock *)data; + struct inet_timewait_sock *tw = from_timer(tw, t, tw_timer); if (tw->tw_kill) __NET_INC_STATS(twsk_net(tw), LINUX_MIB_TIMEWAITKILLED); @@ -167,8 +166,6 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, if (tw) { const struct inet_sock *inet = inet_sk(sk); - kmemcheck_annotate_bitfield(tw, flags); - tw->tw_dr = dr; /* Give us an identity. */ tw->tw_daddr = inet->inet_daddr; @@ -188,8 +185,7 @@ struct inet_timewait_sock *inet_twsk_alloc(const struct sock *sk, tw->tw_prot = sk->sk_prot_creator; atomic64_set(&tw->tw_cookie, atomic64_read(&sk->sk_cookie)); twsk_net_set(tw, sock_net(sk)); - setup_pinned_timer(&tw->tw_timer, tw_timer_handler, - (unsigned long)tw); + timer_setup(&tw->tw_timer, tw_timer_handler, TIMER_PINNED); /* * Because we use RCU lookups, we should not set tw_refcnt * to a non null value before everything is setup for this diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index c5a117cc6619..914d56928578 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -33,7 +33,7 @@ * also be removed if the pool is overloaded i.e. if the total amount of * entries is greater-or-equal than the threshold. * - * Node pool is organised as an AVL tree. + * Node pool is organised as an RB tree. * Such an implementation has been chosen not just for fun. It's a way to * prevent easy and efficient DoS attacks by creating hash collisions. A huge * amount of long living nodes in a single hash slot would significantly delay @@ -45,7 +45,7 @@ * AND reference count being 0. * 3. Global variable peer_total is modified under the pool lock. * 4. struct inet_peer fields modification: - * avl_left, avl_right, avl_parent, avl_height: pool lock + * rb_node: pool lock * refcnt: atomically against modifications on other CPU; * usually under some other lock to prevent node disappearing * daddr: unchangeable @@ -53,30 +53,15 @@ static struct kmem_cache *peer_cachep __read_mostly; -static LIST_HEAD(gc_list); -static const int gc_delay = 60 * HZ; -static struct delayed_work gc_work; -static DEFINE_SPINLOCK(gc_lock); - -#define node_height(x) x->avl_height - -#define peer_avl_empty ((struct inet_peer *)&peer_fake_node) -#define peer_avl_empty_rcu ((struct inet_peer __rcu __force *)&peer_fake_node) -static const struct inet_peer peer_fake_node = { - .avl_left = peer_avl_empty_rcu, - .avl_right = peer_avl_empty_rcu, - .avl_height = 0 -}; - void inet_peer_base_init(struct inet_peer_base *bp) { - bp->root = peer_avl_empty_rcu; + bp->rb_root = RB_ROOT; seqlock_init(&bp->lock); bp->total = 0; } EXPORT_SYMBOL_GPL(inet_peer_base_init); -#define PEER_MAXDEPTH 40 /* sufficient for about 2^27 nodes */ +#define PEER_MAX_GC 32 /* Exported for sysctl_net_ipv4. */ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries more @@ -84,53 +69,6 @@ int inet_peer_threshold __read_mostly = 65536 + 128; /* start to throw entries m int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ; /* usual time to live: 10 min */ -static void inetpeer_gc_worker(struct work_struct *work) -{ - struct inet_peer *p, *n, *c; - struct list_head list; - - spin_lock_bh(&gc_lock); - list_replace_init(&gc_list, &list); - spin_unlock_bh(&gc_lock); - - if (list_empty(&list)) - return; - - list_for_each_entry_safe(p, n, &list, gc_list) { - - if (need_resched()) - cond_resched(); - - c = rcu_dereference_protected(p->avl_left, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_left = peer_avl_empty_rcu; - } - - c = rcu_dereference_protected(p->avl_right, 1); - if (c != peer_avl_empty) { - list_add_tail(&c->gc_list, &list); - p->avl_right = peer_avl_empty_rcu; - } - - n = list_entry(p->gc_list.next, struct inet_peer, gc_list); - - if (refcount_read(&p->refcnt) == 1) { - list_del(&p->gc_list); - kmem_cache_free(peer_cachep, p); - } - } - - if (list_empty(&list)) - return; - - spin_lock_bh(&gc_lock); - list_splice(&list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - /* Called from ip_output.c:ip_init */ void __init inet_initpeers(void) { @@ -153,225 +91,65 @@ void __init inet_initpeers(void) sizeof(struct inet_peer), 0, SLAB_HWCACHE_ALIGN | SLAB_PANIC, NULL); - - INIT_DEFERRABLE_WORK(&gc_work, inetpeer_gc_worker); } -#define rcu_deref_locked(X, BASE) \ - rcu_dereference_protected(X, lockdep_is_held(&(BASE)->lock.lock)) - -/* - * Called with local BH disabled and the pool lock held. - */ -#define lookup(_daddr, _stack, _base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - \ - stackptr = _stack; \ - *stackptr++ = &_base->root; \ - for (u = rcu_deref_locked(_base->root, _base); \ - u != peer_avl_empty;) { \ - int cmp = inetpeer_addr_cmp(_daddr, &u->daddr); \ - if (cmp == 0) \ - break; \ - if (cmp == -1) \ - v = &u->avl_left; \ - else \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, _base); \ - } \ - u; \ -}) - -/* - * Called with rcu_read_lock() - * Because we hold no lock against a writer, its quite possible we fall - * in an endless loop. - * But every pointer we follow is guaranteed to be valid thanks to RCU. - * We exit from this function if number of links exceeds PEER_MAXDEPTH - */ -static struct inet_peer *lookup_rcu(const struct inetpeer_addr *daddr, - struct inet_peer_base *base) +/* Called with rcu_read_lock() or base->lock held */ +static struct inet_peer *lookup(const struct inetpeer_addr *daddr, + struct inet_peer_base *base, + unsigned int seq, + struct inet_peer *gc_stack[], + unsigned int *gc_cnt, + struct rb_node **parent_p, + struct rb_node ***pp_p) { - struct inet_peer *u = rcu_dereference(base->root); - int count = 0; + struct rb_node **pp, *parent, *next; + struct inet_peer *p; - while (u != peer_avl_empty) { - int cmp = inetpeer_addr_cmp(daddr, &u->daddr); + pp = &base->rb_root.rb_node; + parent = NULL; + while (1) { + int cmp; + + next = rcu_dereference_raw(*pp); + if (!next) + break; + parent = next; + p = rb_entry(parent, struct inet_peer, rb_node); + cmp = inetpeer_addr_cmp(daddr, &p->daddr); if (cmp == 0) { - /* Before taking a reference, check if this entry was - * deleted (refcnt=0) - */ - if (!refcount_inc_not_zero(&u->refcnt)) { - u = NULL; - } - return u; + if (!refcount_inc_not_zero(&p->refcnt)) + break; + return p; + } + if (gc_stack) { + if (*gc_cnt < PEER_MAX_GC) + gc_stack[(*gc_cnt)++] = p; + } else if (unlikely(read_seqretry(&base->lock, seq))) { + break; } if (cmp == -1) - u = rcu_dereference(u->avl_left); + pp = &next->rb_left; else - u = rcu_dereference(u->avl_right); - if (unlikely(++count == PEER_MAXDEPTH)) - break; + pp = &next->rb_right; } + *parent_p = parent; + *pp_p = pp; return NULL; } -/* Called with local BH disabled and the pool lock held. */ -#define lookup_rightempty(start, base) \ -({ \ - struct inet_peer *u; \ - struct inet_peer __rcu **v; \ - *stackptr++ = &start->avl_left; \ - v = &start->avl_left; \ - for (u = rcu_deref_locked(*v, base); \ - u->avl_right != peer_avl_empty_rcu;) { \ - v = &u->avl_right; \ - *stackptr++ = v; \ - u = rcu_deref_locked(*v, base); \ - } \ - u; \ -}) - -/* Called with local BH disabled and the pool lock held. - * Variable names are the proof of operation correctness. - * Look into mm/map_avl.c for more detail description of the ideas. - */ -static void peer_avl_rebalance(struct inet_peer __rcu **stack[], - struct inet_peer __rcu ***stackend, - struct inet_peer_base *base) -{ - struct inet_peer __rcu **nodep; - struct inet_peer *node, *l, *r; - int lh, rh; - - while (stackend > stack) { - nodep = *--stackend; - node = rcu_deref_locked(*nodep, base); - l = rcu_deref_locked(node->avl_left, base); - r = rcu_deref_locked(node->avl_right, base); - lh = node_height(l); - rh = node_height(r); - if (lh > rh + 1) { /* l: RH+2 */ - struct inet_peer *ll, *lr, *lrl, *lrr; - int lrh; - ll = rcu_deref_locked(l->avl_left, base); - lr = rcu_deref_locked(l->avl_right, base); - lrh = node_height(lr); - if (lrh <= node_height(ll)) { /* ll: RH+1 */ - RCU_INIT_POINTER(node->avl_left, lr); /* lr: RH or RH+1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = lrh + 1; /* RH+1 or RH+2 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH+1 */ - RCU_INIT_POINTER(l->avl_right, node); /* node: RH+1 or RH+2 */ - l->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, l); - } else { /* ll: RH, lr: RH+1 */ - lrl = rcu_deref_locked(lr->avl_left, base);/* lrl: RH or RH-1 */ - lrr = rcu_deref_locked(lr->avl_right, base);/* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_left, lrr); /* lrr: RH or RH-1 */ - RCU_INIT_POINTER(node->avl_right, r); /* r: RH */ - node->avl_height = rh + 1; /* node: RH+1 */ - RCU_INIT_POINTER(l->avl_left, ll); /* ll: RH */ - RCU_INIT_POINTER(l->avl_right, lrl); /* lrl: RH or RH-1 */ - l->avl_height = rh + 1; /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_left, l); /* l: RH+1 */ - RCU_INIT_POINTER(lr->avl_right, node); /* node: RH+1 */ - lr->avl_height = rh + 2; - RCU_INIT_POINTER(*nodep, lr); - } - } else if (rh > lh + 1) { /* r: LH+2 */ - struct inet_peer *rr, *rl, *rlr, *rll; - int rlh; - rr = rcu_deref_locked(r->avl_right, base); - rl = rcu_deref_locked(r->avl_left, base); - rlh = node_height(rl); - if (rlh <= node_height(rr)) { /* rr: LH+1 */ - RCU_INIT_POINTER(node->avl_right, rl); /* rl: LH or LH+1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = rlh + 1; /* LH+1 or LH+2 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH+1 */ - RCU_INIT_POINTER(r->avl_left, node); /* node: LH+1 or LH+2 */ - r->avl_height = node->avl_height + 1; - RCU_INIT_POINTER(*nodep, r); - } else { /* rr: RH, rl: RH+1 */ - rlr = rcu_deref_locked(rl->avl_right, base);/* rlr: LH or LH-1 */ - rll = rcu_deref_locked(rl->avl_left, base);/* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_right, rll); /* rll: LH or LH-1 */ - RCU_INIT_POINTER(node->avl_left, l); /* l: LH */ - node->avl_height = lh + 1; /* node: LH+1 */ - RCU_INIT_POINTER(r->avl_right, rr); /* rr: LH */ - RCU_INIT_POINTER(r->avl_left, rlr); /* rlr: LH or LH-1 */ - r->avl_height = lh + 1; /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_right, r); /* r: LH+1 */ - RCU_INIT_POINTER(rl->avl_left, node); /* node: LH+1 */ - rl->avl_height = lh + 2; - RCU_INIT_POINTER(*nodep, rl); - } - } else { - node->avl_height = (lh > rh ? lh : rh) + 1; - } - } -} - -/* Called with local BH disabled and the pool lock held. */ -#define link_to_pool(n, base) \ -do { \ - n->avl_height = 1; \ - n->avl_left = peer_avl_empty_rcu; \ - n->avl_right = peer_avl_empty_rcu; \ - /* lockless readers can catch us now */ \ - rcu_assign_pointer(**--stackptr, n); \ - peer_avl_rebalance(stack, stackptr, base); \ -} while (0) - static void inetpeer_free_rcu(struct rcu_head *head) { kmem_cache_free(peer_cachep, container_of(head, struct inet_peer, rcu)); } -static void unlink_from_pool(struct inet_peer *p, struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH]) -{ - struct inet_peer __rcu ***stackptr, ***delp; - - if (lookup(&p->daddr, stack, base) != p) - BUG(); - delp = stackptr - 1; /* *delp[0] == p */ - if (p->avl_left == peer_avl_empty_rcu) { - *delp[0] = p->avl_right; - --stackptr; - } else { - /* look for a node to insert instead of p */ - struct inet_peer *t; - t = lookup_rightempty(p, base); - BUG_ON(rcu_deref_locked(*stackptr[-1], base) != t); - **--stackptr = t->avl_left; - /* t is removed, t->daddr > x->daddr for any - * x in p->avl_left subtree. - * Put t in the old place of p. */ - RCU_INIT_POINTER(*delp[0], t); - t->avl_left = p->avl_left; - t->avl_right = p->avl_right; - t->avl_height = p->avl_height; - BUG_ON(delp[1] != &p->avl_left); - delp[1] = &t->avl_left; /* was &p->avl_left */ - } - peer_avl_rebalance(stack, stackptr, base); - base->total--; - call_rcu(&p->rcu, inetpeer_free_rcu); -} - /* perform garbage collect on all items stacked during a lookup */ -static int inet_peer_gc(struct inet_peer_base *base, - struct inet_peer __rcu **stack[PEER_MAXDEPTH], - struct inet_peer __rcu ***stackptr) +static void inet_peer_gc(struct inet_peer_base *base, + struct inet_peer *gc_stack[], + unsigned int gc_cnt) { - struct inet_peer *p, *gchead = NULL; + struct inet_peer *p; __u32 delta, ttl; - int cnt = 0; + int i; if (base->total >= inet_peer_threshold) ttl = 0; /* be aggressive */ @@ -379,43 +157,38 @@ static int inet_peer_gc(struct inet_peer_base *base, ttl = inet_peer_maxttl - (inet_peer_maxttl - inet_peer_minttl) / HZ * base->total / inet_peer_threshold * HZ; - stackptr--; /* last stack slot is peer_avl_empty */ - while (stackptr > stack) { - stackptr--; - p = rcu_deref_locked(**stackptr, base); - if (refcount_read(&p->refcnt) == 1) { - smp_rmb(); - delta = (__u32)jiffies - p->dtime; - if (delta >= ttl && refcount_dec_if_one(&p->refcnt)) { - p->gc_next = gchead; - gchead = p; - } - } + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + delta = (__u32)jiffies - p->dtime; + if (delta < ttl || !refcount_dec_if_one(&p->refcnt)) + gc_stack[i] = NULL; } - while ((p = gchead) != NULL) { - gchead = p->gc_next; - cnt++; - unlink_from_pool(p, base, stack); + for (i = 0; i < gc_cnt; i++) { + p = gc_stack[i]; + if (p) { + rb_erase(&p->rb_node, &base->rb_root); + base->total--; + call_rcu(&p->rcu, inetpeer_free_rcu); + } } - return cnt; } struct inet_peer *inet_getpeer(struct inet_peer_base *base, const struct inetpeer_addr *daddr, int create) { - struct inet_peer __rcu **stack[PEER_MAXDEPTH], ***stackptr; - struct inet_peer *p; - unsigned int sequence; - int invalidated, gccnt = 0; + struct inet_peer *p, *gc_stack[PEER_MAX_GC]; + struct rb_node **pp, *parent; + unsigned int gc_cnt, seq; + int invalidated; /* Attempt a lockless lookup first. * Because of a concurrent writer, we might not find an existing entry. */ rcu_read_lock(); - sequence = read_seqbegin(&base->lock); - p = lookup_rcu(daddr, base); - invalidated = read_seqretry(&base->lock, sequence); + seq = read_seqbegin(&base->lock); + p = lookup(daddr, base, seq, NULL, &gc_cnt, &parent, &pp); + invalidated = read_seqretry(&base->lock, seq); rcu_read_unlock(); if (p) @@ -428,36 +201,31 @@ struct inet_peer *inet_getpeer(struct inet_peer_base *base, /* retry an exact lookup, taking the lock before. * At least, nodes should be hot in our cache. */ + parent = NULL; write_seqlock_bh(&base->lock); -relookup: - p = lookup(daddr, stack, base); - if (p != peer_avl_empty) { - refcount_inc(&p->refcnt); - write_sequnlock_bh(&base->lock); - return p; - } - if (!gccnt) { - gccnt = inet_peer_gc(base, stack, stackptr); - if (gccnt && create) - goto relookup; - } - p = create ? kmem_cache_alloc(peer_cachep, GFP_ATOMIC) : NULL; - if (p) { - p->daddr = *daddr; - refcount_set(&p->refcnt, 2); - atomic_set(&p->rid, 0); - p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; - p->rate_tokens = 0; - /* 60*HZ is arbitrary, but chosen enough high so that the first - * calculation of tokens is at its maximum. - */ - p->rate_last = jiffies - 60*HZ; - INIT_LIST_HEAD(&p->gc_list); - - /* Link the node. */ - link_to_pool(p, base); - base->total++; + + gc_cnt = 0; + p = lookup(daddr, base, seq, gc_stack, &gc_cnt, &parent, &pp); + if (!p && create) { + p = kmem_cache_alloc(peer_cachep, GFP_ATOMIC); + if (p) { + p->daddr = *daddr; + refcount_set(&p->refcnt, 2); + atomic_set(&p->rid, 0); + p->metrics[RTAX_LOCK-1] = INETPEER_METRICS_NEW; + p->rate_tokens = 0; + /* 60*HZ is arbitrary, but chosen enough high so that the first + * calculation of tokens is at its maximum. + */ + p->rate_last = jiffies - 60*HZ; + + rb_link_node(&p->rb_node, parent, pp); + rb_insert_color(&p->rb_node, &base->rb_root); + base->total++; + } } + if (gc_cnt) + inet_peer_gc(base, gc_stack, gc_cnt); write_sequnlock_bh(&base->lock); return p; @@ -467,8 +235,9 @@ EXPORT_SYMBOL_GPL(inet_getpeer); void inet_putpeer(struct inet_peer *p) { p->dtime = (__u32)jiffies; - smp_mb__before_atomic(); - refcount_dec(&p->refcnt); + + if (refcount_dec_and_test(&p->refcnt)) + call_rcu(&p->rcu, inetpeer_free_rcu); } EXPORT_SYMBOL_GPL(inet_putpeer); @@ -513,30 +282,19 @@ bool inet_peer_xrlim_allow(struct inet_peer *peer, int timeout) } EXPORT_SYMBOL(inet_peer_xrlim_allow); -static void inetpeer_inval_rcu(struct rcu_head *head) -{ - struct inet_peer *p = container_of(head, struct inet_peer, gc_rcu); - - spin_lock_bh(&gc_lock); - list_add_tail(&p->gc_list, &gc_list); - spin_unlock_bh(&gc_lock); - - schedule_delayed_work(&gc_work, gc_delay); -} - void inetpeer_invalidate_tree(struct inet_peer_base *base) { - struct inet_peer *root; + struct rb_node *p = rb_first(&base->rb_root); - write_seqlock_bh(&base->lock); + while (p) { + struct inet_peer *peer = rb_entry(p, struct inet_peer, rb_node); - root = rcu_deref_locked(base->root, base); - if (root != peer_avl_empty) { - base->root = peer_avl_empty_rcu; - base->total = 0; - call_rcu(&root->gc_rcu, inetpeer_inval_rcu); + p = rb_next(p); + rb_erase(&peer->rb_node, &base->rb_root); + inet_putpeer(peer); + cond_resched(); } - write_sequnlock_bh(&base->lock); + base->total = 0; } EXPORT_SYMBOL(inetpeer_invalidate_tree); diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 9f0a7b96646f..2dd21c3281a1 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 9a8cfac503dc..bbf1b94942c0 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -190,12 +191,13 @@ static bool frag_expire_skip_icmp(u32 user) /* * Oops, a fragment queue timed out. Kill it and send an ICMP reply. */ -static void ip_expire(unsigned long arg) +static void ip_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct ipq *qp; struct net *net; - qp = container_of((struct inet_frag_queue *) arg, struct ipq, q); + qp = container_of(frag, struct ipq, q); net = container_of(qp->q.net, struct net, ipv4.frags); rcu_read_lock(); @@ -844,8 +846,6 @@ static void __init ip4_frags_ctl_register(void) static int __net_init ipv4_frags_init_net(struct net *net) { - int res; - /* Fragment cache limits. * * The fragment memory accounting code, (tries to) account for @@ -871,13 +871,9 @@ static int __net_init ipv4_frags_init_net(struct net *net) net->ipv4.frags.max_dist = 64; - res = inet_frags_init_net(&net->ipv4.frags); - if (res) - return res; - res = ip4_frags_ns_ctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv4.frags); - return res; + inet_frags_init_net(&net->ipv4.frags); + + return ip4_frags_ns_ctl_register(net); } static void __net_exit ipv4_frags_exit_net(struct net *net) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 7a7829e839c2..bb6239169b1a 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -48,6 +48,7 @@ #include <net/rtnetlink.h> #include <net/gre.h> #include <net/dst_metadata.h> +#include <net/erspan.h> /* Problems & solutions @@ -112,9 +113,12 @@ MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); +static void erspan_build_header(struct sk_buff *skb, + __be32 id, u32 index, bool truncate); static unsigned int ipgre_net_id __read_mostly; static unsigned int gre_tap_net_id __read_mostly; +static unsigned int erspan_net_id __read_mostly; static void ipgre_err(struct sk_buff *skb, u32 info, const struct tnl_ptk_info *tpi) @@ -246,6 +250,79 @@ static void gre_err(struct sk_buff *skb, u32 info) ipgre_err(skb, info, &tpi); } +static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi, + int gre_hdr_len) +{ + struct net *net = dev_net(skb->dev); + struct metadata_dst *tun_dst = NULL; + struct ip_tunnel_net *itn; + struct ip_tunnel *tunnel; + struct erspanhdr *ershdr; + const struct iphdr *iph; + __be32 index; + int len; + + itn = net_generic(net, erspan_net_id); + len = gre_hdr_len + sizeof(*ershdr); + + if (unlikely(!pskb_may_pull(skb, len))) + return -ENOMEM; + + iph = ip_hdr(skb); + ershdr = (struct erspanhdr *)(skb->data + gre_hdr_len); + + /* The original GRE header does not have key field, + * Use ERSPAN 10-bit session ID as key. + */ + tpi->key = cpu_to_be32(ntohs(ershdr->session_id) & ID_MASK); + index = ershdr->md.index; + tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex, + tpi->flags | TUNNEL_KEY, + iph->saddr, iph->daddr, tpi->key); + + if (tunnel) { + if (__iptunnel_pull_header(skb, + gre_hdr_len + sizeof(*ershdr), + htons(ETH_P_TEB), + false, false) < 0) + goto drop; + + if (tunnel->collect_md) { + struct ip_tunnel_info *info; + struct erspan_metadata *md; + __be64 tun_id; + __be16 flags; + + tpi->flags |= TUNNEL_KEY; + flags = tpi->flags; + tun_id = key32_to_tunnel_id(tpi->key); + + tun_dst = ip_tun_rx_dst(skb, flags, + tun_id, sizeof(*md)); + if (!tun_dst) + return PACKET_REJECT; + + md = ip_tunnel_info_opts(&tun_dst->u.tun_info); + if (!md) + return PACKET_REJECT; + + md->index = index; + info = &tun_dst->u.tun_info; + info->key.tun_flags |= TUNNEL_ERSPAN_OPT; + info->options_len = sizeof(*md); + } else { + tunnel->index = ntohl(index); + } + + skb_reset_mac_header(skb); + ip_tunnel_rcv(tunnel, skb, tpi, tun_dst, log_ecn_error); + return PACKET_RCVD; + } +drop: + kfree_skb(skb); + return PACKET_RCVD; +} + static int __ipgre_rcv(struct sk_buff *skb, const struct tnl_ptk_info *tpi, struct ip_tunnel_net *itn, int hdr_len, bool raw_proto) { @@ -328,6 +405,11 @@ static int gre_rcv(struct sk_buff *skb) if (hdr_len < 0) goto drop; + if (unlikely(tpi.proto == htons(ETH_P_ERSPAN))) { + if (erspan_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) + return 0; + } + if (ipgre_rcv(skb, &tpi, hdr_len) == PACKET_RCVD) return 0; @@ -376,39 +458,33 @@ static struct rtable *gre_get_rt(struct sk_buff *skb, return ip_route_output_key(net, fl); } -static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, - __be16 proto) +static struct rtable *prepare_fb_xmit(struct sk_buff *skb, + struct net_device *dev, + struct flowi4 *fl, + int tunnel_hlen) { struct ip_tunnel_info *tun_info; const struct ip_tunnel_key *key; struct rtable *rt = NULL; - struct flowi4 fl; int min_headroom; - int tunnel_hlen; - __be16 df, flags; bool use_cache; int err; tun_info = skb_tunnel_info(skb); - if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || - ip_tunnel_info_af(tun_info) != AF_INET)) - goto err_free_skb; - key = &tun_info->key; use_cache = ip_tunnel_dst_cache_usable(skb, tun_info); + if (use_cache) - rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl.saddr); + rt = dst_cache_get_ip4(&tun_info->dst_cache, &fl->saddr); if (!rt) { - rt = gre_get_rt(skb, dev, &fl, key); + rt = gre_get_rt(skb, dev, fl, key); if (IS_ERR(rt)) - goto err_free_skb; + goto err_free_skb; if (use_cache) dst_cache_set_ip4(&tun_info->dst_cache, &rt->dst, - fl.saddr); + fl->saddr); } - tunnel_hlen = gre_calc_hlen(key->tun_flags); - min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len + tunnel_hlen + sizeof(struct iphdr); if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) { @@ -420,6 +496,37 @@ static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, if (unlikely(err)) goto err_free_rt; } + return rt; + +err_free_rt: + ip_rt_put(rt); +err_free_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; + return NULL; +} + +static void gre_fb_xmit(struct sk_buff *skb, struct net_device *dev, + __be16 proto) +{ + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + struct rtable *rt = NULL; + struct flowi4 fl; + int tunnel_hlen; + __be16 df, flags; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) + goto err_free_skb; + + key = &tun_info->key; + tunnel_hlen = gre_calc_hlen(key->tun_flags); + + rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); + if (!rt) + return; /* Push Tunnel header. */ if (gre_handle_offloads(skb, !!(tun_info->key.tun_flags & TUNNEL_CSUM))) @@ -442,6 +549,64 @@ err_free_skb: dev->stats.tx_dropped++; } +static void erspan_fb_xmit(struct sk_buff *skb, struct net_device *dev, + __be16 proto) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + struct ip_tunnel_info *tun_info; + const struct ip_tunnel_key *key; + struct erspan_metadata *md; + struct rtable *rt = NULL; + bool truncate = false; + struct flowi4 fl; + int tunnel_hlen; + __be16 df; + + tun_info = skb_tunnel_info(skb); + if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) || + ip_tunnel_info_af(tun_info) != AF_INET)) + goto err_free_skb; + + key = &tun_info->key; + + /* ERSPAN has fixed 8 byte GRE header */ + tunnel_hlen = 8 + sizeof(struct erspanhdr); + + rt = prepare_fb_xmit(skb, dev, &fl, tunnel_hlen); + if (!rt) + return; + + if (gre_handle_offloads(skb, false)) + goto err_free_rt; + + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); + truncate = true; + } + + md = ip_tunnel_info_opts(tun_info); + if (!md) + goto err_free_rt; + + erspan_build_header(skb, tunnel_id_to_key32(key->tun_id), + ntohl(md->index), truncate); + + gre_build_header(skb, 8, TUNNEL_SEQ, + htons(ETH_P_ERSPAN), 0, htonl(tunnel->o_seqno++)); + + df = key->tun_flags & TUNNEL_DONT_FRAGMENT ? htons(IP_DF) : 0; + + iptunnel_xmit(skb->sk, rt, skb, fl.saddr, key->u.ipv4.dst, IPPROTO_GRE, + key->tos, key->ttl, df, false); + return; + +err_free_rt: + ip_rt_put(rt); +err_free_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; +} + static int gre_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb) { struct ip_tunnel_info *info = skb_tunnel_info(skb); @@ -503,6 +668,86 @@ free_skb: return NETDEV_TX_OK; } +static inline u8 tos_to_cos(u8 tos) +{ + u8 dscp, cos; + + dscp = tos >> 2; + cos = dscp >> 3; + return cos; +} + +static void erspan_build_header(struct sk_buff *skb, + __be32 id, u32 index, bool truncate) +{ + struct iphdr *iphdr = ip_hdr(skb); + struct ethhdr *eth = eth_hdr(skb); + enum erspan_encap_type enc_type; + struct erspanhdr *ershdr; + struct qtag_prefix { + __be16 eth_type; + __be16 tci; + } *qp; + u16 vlan_tci = 0; + + enc_type = ERSPAN_ENCAP_NOVLAN; + + /* If mirrored packet has vlan tag, extract tci and + * perserve vlan header in the mirrored frame. + */ + if (eth->h_proto == htons(ETH_P_8021Q)) { + qp = (struct qtag_prefix *)(skb->data + 2 * ETH_ALEN); + vlan_tci = ntohs(qp->tci); + enc_type = ERSPAN_ENCAP_INFRAME; + } + + skb_push(skb, sizeof(*ershdr)); + ershdr = (struct erspanhdr *)skb->data; + memset(ershdr, 0, sizeof(*ershdr)); + + ershdr->ver_vlan = htons((vlan_tci & VLAN_MASK) | + (ERSPAN_VERSION << VER_OFFSET)); + ershdr->session_id = htons((u16)(ntohl(id) & ID_MASK) | + ((tos_to_cos(iphdr->tos) << COS_OFFSET) & COS_MASK) | + (enc_type << EN_OFFSET & EN_MASK) | + ((truncate << T_OFFSET) & T_MASK)); + ershdr->md.index = htonl(index & INDEX_MASK); +} + +static netdev_tx_t erspan_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + bool truncate = false; + + if (tunnel->collect_md) { + erspan_fb_xmit(skb, dev, skb->protocol); + return NETDEV_TX_OK; + } + + if (gre_handle_offloads(skb, false)) + goto free_skb; + + if (skb_cow_head(skb, dev->needed_headroom)) + goto free_skb; + + if (skb->len > dev->mtu + dev->hard_header_len) { + pskb_trim(skb, dev->mtu + dev->hard_header_len); + truncate = true; + } + + /* Push ERSPAN header */ + erspan_build_header(skb, tunnel->parms.o_key, tunnel->index, truncate); + tunnel->parms.o_flags &= ~TUNNEL_KEY; + __gre_xmit(skb, dev, &tunnel->parms.iph, htons(ETH_P_ERSPAN)); + return NETDEV_TX_OK; + +free_skb: + kfree_skb(skb); + dev->stats.tx_dropped++; + return NETDEV_TX_OK; +} + static netdev_tx_t gre_tap_xmit(struct sk_buff *skb, struct net_device *dev) { @@ -528,20 +773,46 @@ free_skb: return NETDEV_TX_OK; } +static void ipgre_link_update(struct net_device *dev, bool set_mtu) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int len; + + len = tunnel->tun_hlen; + tunnel->tun_hlen = gre_calc_hlen(tunnel->parms.o_flags); + len = tunnel->tun_hlen - len; + tunnel->hlen = tunnel->hlen + len; + + dev->needed_headroom = dev->needed_headroom + len; + if (set_mtu) + dev->mtu = max_t(int, dev->mtu - len, 68); + + if (!(tunnel->parms.o_flags & TUNNEL_SEQ)) { + if (!(tunnel->parms.o_flags & TUNNEL_CSUM) || + tunnel->encap.type == TUNNEL_ENCAP_NONE) { + dev->features |= NETIF_F_GSO_SOFTWARE; + dev->hw_features |= NETIF_F_GSO_SOFTWARE; + } + dev->features |= NETIF_F_LLTX; + } +} + static int ipgre_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - int err; struct ip_tunnel_parm p; + int err; if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) return -EFAULT; + if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) { if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE || - p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)) || - ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING))) + p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) || + ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; } + p.i_flags = gre_flags_to_tnl_flags(p.i_flags); p.o_flags = gre_flags_to_tnl_flags(p.o_flags); @@ -549,11 +820,22 @@ static int ipgre_tunnel_ioctl(struct net_device *dev, if (err) return err; + if (cmd == SIOCCHGTUNNEL) { + struct ip_tunnel *t = netdev_priv(dev); + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; + + if (strcmp(dev->rtnl_link_ops->kind, "erspan")) + ipgre_link_update(dev, true); + } + p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags); p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) return -EFAULT; + return 0; } @@ -766,15 +1048,14 @@ static int __net_init ipgre_init_net(struct net *net) return ip_tunnel_init_net(net, ipgre_net_id, &ipgre_link_ops, NULL); } -static void __net_exit ipgre_exit_net(struct net *net) +static void __net_exit ipgre_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, ipgre_net_id); - ip_tunnel_delete_net(itn, &ipgre_link_ops); + ip_tunnel_delete_nets(list_net, ipgre_net_id, &ipgre_link_ops); } static struct pernet_operations ipgre_net_ops = { .init = ipgre_init_net, - .exit = ipgre_exit_net, + .exit_batch = ipgre_exit_batch_net, .id = &ipgre_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -828,6 +1109,42 @@ out: return ipgre_tunnel_validate(tb, data, extack); } +static int erspan_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + __be16 flags = 0; + int ret; + + if (!data) + return 0; + + ret = ipgre_tap_validate(tb, data, extack); + if (ret) + return ret; + + /* ERSPAN should only have GRE sequence and key flag */ + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (!data[IFLA_GRE_COLLECT_METADATA] && + flags != (GRE_SEQ | GRE_KEY)) + return -EINVAL; + + /* ERSPAN Session ID only has 10-bit. Since we reuse + * 32-bit key field as ID, check it's range. + */ + if (data[IFLA_GRE_IKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_IKEY])) & ~ID_MASK)) + return -EINVAL; + + if (data[IFLA_GRE_OKEY] && + (ntohl(nla_get_be32(data[IFLA_GRE_OKEY])) & ~ID_MASK)) + return -EINVAL; + + return 0; +} + static int ipgre_netlink_parms(struct net_device *dev, struct nlattr *data[], struct nlattr *tb[], @@ -892,6 +1209,13 @@ static int ipgre_netlink_parms(struct net_device *dev, if (data[IFLA_GRE_FWMARK]) *fwmark = nla_get_u32(data[IFLA_GRE_FWMARK]); + if (data[IFLA_GRE_ERSPAN_INDEX]) { + t->index = nla_get_u32(data[IFLA_GRE_ERSPAN_INDEX]); + + if (t->index & ~INDEX_MASK) + return -EINVAL; + } + return 0; } @@ -933,6 +1257,7 @@ static int gre_tap_init(struct net_device *dev) { __gre_tunnel_init(dev); dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); return ip_tunnel_init(dev); } @@ -949,6 +1274,39 @@ static const struct net_device_ops gre_tap_netdev_ops = { .ndo_fill_metadata_dst = gre_fill_metadata_dst, }; +static int erspan_tunnel_init(struct net_device *dev) +{ + struct ip_tunnel *tunnel = netdev_priv(dev); + int t_hlen; + + tunnel->tun_hlen = 8; + tunnel->parms.iph.protocol = IPPROTO_GRE; + tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen + + sizeof(struct erspanhdr); + t_hlen = tunnel->hlen + sizeof(struct iphdr); + + dev->needed_headroom = LL_MAX_HEADER + t_hlen + 4; + dev->mtu = ETH_DATA_LEN - t_hlen - 4; + dev->features |= GRE_FEATURES; + dev->hw_features |= GRE_FEATURES; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); + + return ip_tunnel_init(dev); +} + +static const struct net_device_ops erspan_netdev_ops = { + .ndo_init = erspan_tunnel_init, + .ndo_uninit = ip_tunnel_uninit, + .ndo_start_xmit = erspan_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip_tunnel_change_mtu, + .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, + .ndo_fill_metadata_dst = gre_fill_metadata_dst, +}; + static void ipgre_tap_setup(struct net_device *dev) { ether_setup(dev); @@ -986,9 +1344,9 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], struct netlink_ext_ack *extack) { struct ip_tunnel *t = netdev_priv(dev); - struct ip_tunnel_parm p; struct ip_tunnel_encap ipencap; __u32 fwmark = t->fwmark; + struct ip_tunnel_parm p; int err; if (ipgre_netlink_encap_parms(data, &ipencap)) { @@ -1001,7 +1359,18 @@ static int ipgre_changelink(struct net_device *dev, struct nlattr *tb[], err = ipgre_netlink_parms(dev, data, tb, &p, &fwmark); if (err < 0) return err; - return ip_tunnel_changelink(dev, tb, &p, fwmark); + + err = ip_tunnel_changelink(dev, tb, &p, fwmark); + if (err < 0) + return err; + + t->parms.i_flags = p.i_flags; + t->parms.o_flags = p.o_flags; + + if (strcmp(dev->rtnl_link_ops->kind, "erspan")) + ipgre_link_update(dev, !tb[IFLA_MTU]); + + return 0; } static size_t ipgre_get_size(const struct net_device *dev) @@ -1041,6 +1410,8 @@ static size_t ipgre_get_size(const struct net_device *dev) nla_total_size(1) + /* IFLA_GRE_FWMARK */ nla_total_size(4) + + /* IFLA_GRE_ERSPAN_INDEX */ + nla_total_size(4) + 0; } @@ -1083,12 +1454,25 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; } + if (t->index) + if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index)) + goto nla_put_failure; + return 0; nla_put_failure: return -EMSGSIZE; } +static void erspan_setup(struct net_device *dev) +{ + ether_setup(dev); + dev->netdev_ops = &erspan_netdev_ops; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + ip_tunnel_setup(dev, erspan_net_id); +} + static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_LINK] = { .type = NLA_U32 }, [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, @@ -1107,6 +1491,7 @@ static const struct nla_policy ipgre_policy[IFLA_GRE_MAX + 1] = { [IFLA_GRE_COLLECT_METADATA] = { .type = NLA_FLAG }, [IFLA_GRE_IGNORE_DF] = { .type = NLA_U8 }, [IFLA_GRE_FWMARK] = { .type = NLA_U32 }, + [IFLA_GRE_ERSPAN_INDEX] = { .type = NLA_U32 }, }; static struct rtnl_link_ops ipgre_link_ops __read_mostly = { @@ -1139,6 +1524,21 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; +static struct rtnl_link_ops erspan_link_ops __read_mostly = { + .kind = "erspan", + .maxtype = IFLA_GRE_MAX, + .policy = ipgre_policy, + .priv_size = sizeof(struct ip_tunnel), + .setup = erspan_setup, + .validate = erspan_validate, + .newlink = ipgre_newlink, + .changelink = ipgre_changelink, + .dellink = ip_tunnel_dellink, + .get_size = ipgre_get_size, + .fill_info = ipgre_fill_info, + .get_link_net = ip_tunnel_get_link_net, +}; + struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type) { @@ -1189,19 +1589,36 @@ static int __net_init ipgre_tap_init_net(struct net *net) return ip_tunnel_init_net(net, gre_tap_net_id, &ipgre_tap_ops, "gretap0"); } -static void __net_exit ipgre_tap_exit_net(struct net *net) +static void __net_exit ipgre_tap_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, gre_tap_net_id); - ip_tunnel_delete_net(itn, &ipgre_tap_ops); + ip_tunnel_delete_nets(list_net, gre_tap_net_id, &ipgre_tap_ops); } static struct pernet_operations ipgre_tap_net_ops = { .init = ipgre_tap_init_net, - .exit = ipgre_tap_exit_net, + .exit_batch = ipgre_tap_exit_batch_net, .id = &gre_tap_net_id, .size = sizeof(struct ip_tunnel_net), }; +static int __net_init erspan_init_net(struct net *net) +{ + return ip_tunnel_init_net(net, erspan_net_id, + &erspan_link_ops, "erspan0"); +} + +static void __net_exit erspan_exit_batch_net(struct list_head *net_list) +{ + ip_tunnel_delete_nets(net_list, erspan_net_id, &erspan_link_ops); +} + +static struct pernet_operations erspan_net_ops = { + .init = erspan_init_net, + .exit_batch = erspan_exit_batch_net, + .id = &erspan_net_id, + .size = sizeof(struct ip_tunnel_net), +}; + static int __init ipgre_init(void) { int err; @@ -1214,7 +1631,11 @@ static int __init ipgre_init(void) err = register_pernet_device(&ipgre_tap_net_ops); if (err < 0) - goto pnet_tap_faied; + goto pnet_tap_failed; + + err = register_pernet_device(&erspan_net_ops); + if (err < 0) + goto pnet_erspan_failed; err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { @@ -1230,15 +1651,23 @@ static int __init ipgre_init(void) if (err < 0) goto tap_ops_failed; + err = rtnl_link_register(&erspan_link_ops); + if (err < 0) + goto erspan_link_failed; + return 0; +erspan_link_failed: + rtnl_link_unregister(&ipgre_tap_ops); tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: + unregister_pernet_device(&erspan_net_ops); +pnet_erspan_failed: unregister_pernet_device(&ipgre_tap_net_ops); -pnet_tap_faied: +pnet_tap_failed: unregister_pernet_device(&ipgre_net_ops); return err; } @@ -1247,9 +1676,11 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); + rtnl_link_unregister(&erspan_link_ops); gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); unregister_pernet_device(&ipgre_tap_net_ops); unregister_pernet_device(&ipgre_net_ops); + unregister_pernet_device(&erspan_net_ops); } module_init(ipgre_init); @@ -1257,5 +1688,7 @@ module_exit(ipgre_fini); MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("gre"); MODULE_ALIAS_RTNL_LINK("gretap"); +MODULE_ALIAS_RTNL_LINK("erspan"); MODULE_ALIAS_NETDEV("gre0"); MODULE_ALIAS_NETDEV("gretap0"); +MODULE_ALIAS_NETDEV("erspan0"); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index fa2dc8f692c6..57fc13c6ab2b 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -311,9 +311,10 @@ drop: static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); - struct rtable *rt; + int (*edemux)(struct sk_buff *skb); struct net_device *dev = skb->dev; - void (*edemux)(struct sk_buff *skb); + struct rtable *rt; + int err; /* if ingress device is enslaved to an L3 master device pass the * skb to its handler for processing @@ -331,7 +332,9 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && (edemux = READ_ONCE(ipprot->early_demux))) { - edemux(skb); + err = edemux(skb); + if (unlikely(err)) + goto drop_error; /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } @@ -342,13 +345,10 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) * how the packet travels inside Linux networking. */ if (!skb_valid_dst(skb)) { - int err = ip_route_input_noref(skb, iph->daddr, iph->saddr, - iph->tos, dev); - if (unlikely(err)) { - if (err == -EXDEV) - __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); - goto drop; - } + err = ip_route_input_noref(skb, iph->daddr, iph->saddr, + iph->tos, dev); + if (unlikely(err)) + goto drop_error; } #ifdef CONFIG_IP_ROUTE_CLASSID @@ -399,6 +399,11 @@ static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) drop: kfree_skb(skb); return NET_RX_DROP; + +drop_error: + if (err == -EXDEV) + __NET_INC_STATS(net, LINUX_MIB_IPRPFILTER); + goto drop; } /* diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 93157f2f4758..ed194d46c00e 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -86,8 +87,8 @@ void ip_options_build(struct sk_buff *skb, struct ip_options *opt, * NOTE: dopt cannot point to skb. */ -int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, - const struct ip_options *sopt) +int __ip_options_echo(struct net *net, struct ip_options *dopt, + struct sk_buff *skb, const struct ip_options *sopt) { unsigned char *sptr, *dptr; int soffset, doffset; @@ -140,7 +141,7 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, __be32 addr; memcpy(&addr, dptr+soffset-1, 4); - if (inet_addr_type(dev_net(skb_dst(skb)->dev), addr) != RTN_UNICAST) { + if (inet_addr_type(net, addr) != RTN_UNICAST) { dopt->ts_needtime = 1; soffset += 8; } @@ -174,9 +175,6 @@ int __ip_options_echo(struct ip_options *dopt, struct sk_buff *skb, doffset -= 4; } if (doffset > 3) { - __be32 daddr = fib_compute_spec_dst(skb); - - memcpy(&start[doffset-1], &daddr, 4); dopt->faddr = faddr; dptr[0] = start[0]; dptr[1] = doffset+3; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e153c40c2436..e8e675be60ec 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -853,61 +853,6 @@ csum_page(struct page *page, int offset, int copy) return csum; } -static inline int ip_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int transhdrlen, int maxfraglen, unsigned int flags) -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP fragmentation offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_reset_network_header(skb); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* specify the length of each IP datagram fragment */ - skb_shinfo(skb)->gso_size = maxfraglen - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, @@ -965,19 +910,6 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((skb && skb_is_gso(skb)) || - (((length + (skb ? skb->len : fragheaderlen)) > mtu) && - (skb_queue_len(queue) <= 1) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { - err = ip_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, transhdrlen, - maxfraglen, flags); - if (err) - goto error; - return 0; - } /* So, what's going on in the loop below? * @@ -1288,28 +1220,14 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, if (!skb) return -EINVAL; - if ((size + skb->len > mtu) && - (skb_queue_len(&sk->sk_write_queue) == 1) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO)) { - if (skb->ip_summed != CHECKSUM_PARTIAL) - return -EOPNOTSUPP; - - skb_shinfo(skb)->gso_size = mtu - fragheaderlen; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - } cork->length += size; while (size > 0) { - if (skb_is_gso(skb)) { - len = size; - } else { + /* Check if the remaining data fits into current packet. */ + len = mtu - skb->len; + if (len < size) + len = maxfraglen - skb->len; - /* Check if the remaining data fits into current packet. */ - len = mtu - skb->len; - if (len < size) - len = maxfraglen - skb->len; - } if (len <= 0) { struct sk_buff *skb_prev; int alloclen; @@ -1603,7 +1521,7 @@ void ip_send_unicast_reply(struct sock *sk, struct sk_buff *skb, int err; int oif; - if (__ip_options_echo(&replyopts.opt.opt, skb, sopt)) + if (__ip_options_echo(net, &replyopts.opt.opt, skb, sopt)) return; ipc.addr = daddr; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index ecc4b4a2413e..60fb1eb7d7d8 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -80,7 +81,8 @@ static void ip_cmsg_recv_opts(struct msghdr *msg, struct sk_buff *skb) } -static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) +static void ip_cmsg_recv_retopts(struct net *net, struct msghdr *msg, + struct sk_buff *skb) { unsigned char optbuf[sizeof(struct ip_options) + 40]; struct ip_options *opt = (struct ip_options *)optbuf; @@ -88,7 +90,7 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb) if (IPCB(skb)->opt.optlen == 0) return; - if (ip_options_echo(opt, skb)) { + if (ip_options_echo(net, opt, skb)) { msg->msg_flags |= MSG_CTRUNC; return; } @@ -204,7 +206,7 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sock *sk, } if (flags & IP_CMSG_RETOPTS) { - ip_cmsg_recv_retopts(msg, skb); + ip_cmsg_recv_retopts(sock_net(sk), msg, skb); flags &= ~IP_CMSG_RETOPTS; if (!flags) @@ -1219,22 +1221,20 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb) * (e.g., process binds socket to eth0 for Tx which is * redirected to loopback in the rtable/dst). */ + struct rtable *rt = skb_rtable(skb); + bool l3slave = ipv4_l3mdev_skb(IPCB(skb)->flags); + if (pktinfo->ipi_ifindex == LOOPBACK_IFINDEX) pktinfo->ipi_ifindex = inet_iif(skb); + else if (l3slave && rt && rt->rt_iif) + pktinfo->ipi_ifindex = rt->rt_iif; pktinfo->ipi_spec_dst.s_addr = fib_compute_spec_dst(skb); } else { pktinfo->ipi_ifindex = 0; pktinfo->ipi_spec_dst.s_addr = 0; } - /* We need to keep the dst for __ip_options_echo() - * We could restrict the test to opt.ts_needtime || opt.srr, - * but the following is good enough as IP options are not often used. - */ - if (unlikely(IPCB(skb)->opt.optlen)) - skb_dst_force(skb); - else - skb_dst_drop(skb); + skb_dst_drop(skb); } int ip_setsockopt(struct sock *sk, int level, diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c index 129d1a3616f8..fe6fee728ce4 100644 --- a/net/ipv4/ip_tunnel.c +++ b/net/ipv4/ip_tunnel.c @@ -176,7 +176,7 @@ skip_key_lookup: return cand; t = rcu_dereference(itn->collect_md_tun); - if (t) + if (t && t->dev->flags & IFF_UP) return t; if (itn->fb_tunnel_dev && itn->fb_tunnel_dev->flags & IFF_UP) @@ -618,8 +618,8 @@ void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev, u8 proto) ip_rt_put(rt); goto tx_dropped; } - iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, key->tos, - key->ttl, df, !net_eq(tunnel->net, dev_net(dev))); + iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, proto, tos, ttl, + df, !net_eq(tunnel->net, dev_net(dev))); return; tx_error: dev->stats.tx_errors++; @@ -1061,16 +1061,22 @@ static void ip_tunnel_destroy(struct ip_tunnel_net *itn, struct list_head *head, } } -void ip_tunnel_delete_net(struct ip_tunnel_net *itn, struct rtnl_link_ops *ops) +void ip_tunnel_delete_nets(struct list_head *net_list, unsigned int id, + struct rtnl_link_ops *ops) { + struct ip_tunnel_net *itn; + struct net *net; LIST_HEAD(list); rtnl_lock(); - ip_tunnel_destroy(itn, &list, ops); + list_for_each_entry(net, net_list, exit_list) { + itn = net_generic(net, id); + ip_tunnel_destroy(itn, &list, ops); + } unregister_netdevice_many(&list); rtnl_unlock(); } -EXPORT_SYMBOL_GPL(ip_tunnel_delete_net); +EXPORT_SYMBOL_GPL(ip_tunnel_delete_nets); int ip_tunnel_newlink(struct net_device *dev, struct nlattr *tb[], struct ip_tunnel_parm *p, __u32 fwmark) diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 0192c255e508..949f432a5f04 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -168,6 +168,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, struct ip_tunnel_parm *parms = &tunnel->parms; struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; /* Device to other host */ + int pkt_len = skb->len; int err; int mtu; @@ -197,15 +198,6 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, goto tx_error; } - if (tunnel->err_count > 0) { - if (time_before(jiffies, - tunnel->err_time + IPTUNNEL_ERR_TIMEO)) { - tunnel->err_count--; - dst_link_failure(skb); - } else - tunnel->err_count = 0; - } - mtu = dst_mtu(dst); if (skb->len > mtu) { skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); @@ -229,7 +221,7 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev, err = dst_output(tunnel->net, skb->sk, skb); if (net_xmit_eval(err) == 0) - err = skb->len; + err = pkt_len; iptunnel_xmit_stats(dev, err); return NETDEV_TX_OK; @@ -452,15 +444,14 @@ static int __net_init vti_init_net(struct net *net) return 0; } -static void __net_exit vti_exit_net(struct net *net) +static void __net_exit vti_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, vti_net_id); - ip_tunnel_delete_net(itn, &vti_link_ops); + ip_tunnel_delete_nets(list_net, vti_net_id, &vti_link_ops); } static struct pernet_operations vti_net_ops = { .init = vti_init_net, - .exit = vti_exit_net, + .exit_batch = vti_exit_batch_net, .id = &vti_net_id, .size = sizeof(struct ip_tunnel_net), }; @@ -584,33 +575,6 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = { .get_link_net = ip_tunnel_get_link_net, }; -static bool is_vti_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti_netdev_ops; -} - -static int vti_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip_tunnel *tunnel = netdev_priv(dev); - - if (!is_vti_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(tunnel->net, dev_net(dev))) - xfrm_garbage_collect(tunnel->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti_notifier_block __read_mostly = { - .notifier_call = vti_device_event, -}; - static int __init vti_init(void) { const char *msg; @@ -618,8 +582,6 @@ static int __init vti_init(void) pr_info("IPv4 over IPsec tunneling driver\n"); - register_netdevice_notifier(&vti_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti_net_ops); if (err < 0) @@ -652,7 +614,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti_notifier_block); pr_err("vti init: failed to register %s\n", msg); return err; } @@ -664,7 +625,6 @@ static void __exit vti_fini(void) xfrm4_protocol_deregister(&vti_ah4_protocol, IPPROTO_AH); xfrm4_protocol_deregister(&vti_esp4_protocol, IPPROTO_ESP); unregister_pernet_device(&vti_net_ops); - unregister_netdevice_notifier(&vti_notifier_block); } module_init(vti_init); diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 4c5dfe6bd34d..abdebca848c9 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Automatic Configuration of IP -- use DHCP, BOOTP, RARP, or * user-supplied information to configure own IP address and routes. diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index fb1ad22b5e29..c891235b4966 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -128,43 +128,68 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly; static int ipip_err(struct sk_buff *skb, u32 info) { - -/* All the routers (except for Linux) return only - 8 bytes of packet payload. It means, that precise relaying of - ICMP in the real Internet is absolutely infeasible. - */ + /* All the routers (except for Linux) return only + * 8 bytes of packet payload. It means, that precise relaying of + * ICMP in the real Internet is absolutely infeasible. + */ struct net *net = dev_net(skb->dev); struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); const struct iphdr *iph = (const struct iphdr *)skb->data; - struct ip_tunnel *t; - int err; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; + struct ip_tunnel *t; + int err = 0; + + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_SR_FAILED: + /* Impossible event. */ + goto out; + default: + /* All others are translated to HOST_UNREACH. + * rfc2003 contains "deep thoughts" about NET_UNREACH, + * I believe they are just ether pollution. --ANK + */ + break; + } + break; + + case ICMP_TIME_EXCEEDED: + if (code != ICMP_EXC_TTL) + goto out; + break; + + case ICMP_REDIRECT: + break; + + default: + goto out; + } - err = -ENOENT; t = ip_tunnel_lookup(itn, skb->dev->ifindex, TUNNEL_NO_KEY, iph->daddr, iph->saddr, 0); - if (!t) + if (!t) { + err = -ENOENT; goto out; + } if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { - ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, iph->protocol, 0); - err = 0; + ipv4_update_pmtu(skb, net, info, t->parms.link, 0, + iph->protocol, 0); goto out; } if (type == ICMP_REDIRECT) { - ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - iph->protocol, 0); - err = 0; + ipv4_redirect(skb, net, t->parms.link, 0, iph->protocol, 0); goto out; } - if (t->parms.iph.daddr == 0) + if (t->parms.iph.daddr == 0) { + err = -ENOENT; goto out; + } - err = 0; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) goto out; @@ -634,15 +659,14 @@ static int __net_init ipip_init_net(struct net *net) return ip_tunnel_init_net(net, ipip_net_id, &ipip_link_ops, "tunl0"); } -static void __net_exit ipip_exit_net(struct net *net) +static void __net_exit ipip_exit_batch_net(struct list_head *list_net) { - struct ip_tunnel_net *itn = net_generic(net, ipip_net_id); - ip_tunnel_delete_net(itn, &ipip_link_ops); + ip_tunnel_delete_nets(list_net, ipip_net_id, &ipip_link_ops); } static struct pernet_operations ipip_net_ops = { .init = ipip_init_net, - .exit = ipip_exit_net, + .exit_batch = ipip_exit_batch_net, .id = &ipip_net_id, .size = sizeof(struct ip_tunnel_net), }; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 06863ea3fc5b..40a43ad294cb 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -67,6 +67,7 @@ #include <net/fib_rules.h> #include <linux/netconf.h> #include <net/nexthop.h> +#include <net/switchdev.h> struct ipmr_rule { struct fib_rule common; @@ -264,6 +265,22 @@ static void __net_exit ipmr_rules_exit(struct net *net) fib_rules_unregister(net->ipv4.mr_rules_ops); rtnl_unlock(); } + +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, RTNL_FAMILY_IPMR); +} + +static unsigned int ipmr_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, RTNL_FAMILY_IPMR); +} + +bool ipmr_rule_default(const struct fib_rule *rule) +{ + return fib_rule_matchall(rule) && rule->table == RT_TABLE_DEFAULT; +} +EXPORT_SYMBOL(ipmr_rule_default); #else #define ipmr_for_each_table(mrt, net) \ for (mrt = net->ipv4.mrt; mrt; mrt = NULL) @@ -298,6 +315,22 @@ static void __net_exit ipmr_rules_exit(struct net *net) net->ipv4.mrt = NULL; rtnl_unlock(); } + +static int ipmr_rules_dump(struct net *net, struct notifier_block *nb) +{ + return 0; +} + +static unsigned int ipmr_rules_seq_read(struct net *net) +{ + return 0; +} + +bool ipmr_rule_default(const struct fib_rule *rule) +{ + return true; +} +EXPORT_SYMBOL(ipmr_rule_default); #endif static inline int ipmr_hash_cmp(struct rhashtable_compare_arg *arg, @@ -587,6 +620,82 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) } #endif +static int call_ipmr_vif_entry_notifier(struct notifier_block *nb, + struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + vifi_t vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_ipmr_vif_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct vif_device *vif, + vifi_t vif_index, u32 tb_id) +{ + struct vif_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .dev = vif->dev, + .vif_index = vif_index, + .vif_flags = vif->flags, + .tb_id = tb_id, + }; + + ASSERT_RTNL(); + net->ipv4.ipmr_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + +static int call_ipmr_mfc_entry_notifier(struct notifier_block *nb, + struct net *net, + enum fib_event_type event_type, + struct mfc_cache *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + return call_fib_notifier(nb, net, event_type, &info.info); +} + +static int call_ipmr_mfc_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct mfc_cache *mfc, u32 tb_id) +{ + struct mfc_entry_notifier_info info = { + .info = { + .family = RTNL_FAMILY_IPMR, + .net = net, + }, + .mfc = mfc, + .tb_id = tb_id + }; + + ASSERT_RTNL(); + net->ipv4.ipmr_seq++; + return call_fib_notifiers(net, event_type, &info.info); +} + /** * vif_delete - Delete a VIF entry * @notify: Set to 1, if the caller is a notifier_call @@ -594,6 +703,7 @@ static struct net_device *ipmr_reg_vif(struct net *net, struct mr_table *mrt) static int vif_delete(struct mr_table *mrt, int vifi, int notify, struct list_head *head) { + struct net *net = read_pnet(&mrt->net); struct vif_device *v; struct net_device *dev; struct in_device *in_dev; @@ -603,6 +713,10 @@ static int vif_delete(struct mr_table *mrt, int vifi, int notify, v = &mrt->vif_table[vifi]; + if (VIF_EXISTS(mrt, vifi)) + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_DEL, v, vifi, + mrt->id); + write_lock_bh(&mrt_lock); dev = v->dev; v->dev = NULL; @@ -652,10 +766,11 @@ static void ipmr_cache_free_rcu(struct rcu_head *head) kmem_cache_free(mrt_cachep, c); } -static inline void ipmr_cache_free(struct mfc_cache *c) +void ipmr_cache_free(struct mfc_cache *c) { call_rcu(&c->rcu, ipmr_cache_free_rcu); } +EXPORT_SYMBOL(ipmr_cache_free); /* Destroy an unresolved cache entry, killing queued skbs * and reporting error to netlink readers. @@ -754,6 +869,9 @@ static int vif_add(struct net *net, struct mr_table *mrt, struct vifctl *vifc, int mrtsock) { int vifi = vifc->vifc_vifi; + struct switchdev_attr attr = { + .id = SWITCHDEV_ATTR_ID_PORT_PARENT_ID, + }; struct vif_device *v = &mrt->vif_table[vifi]; struct net_device *dev; struct in_device *in_dev; @@ -828,6 +946,13 @@ static int vif_add(struct net *net, struct mr_table *mrt, /* Fill in the VIF structures */ + attr.orig_dev = dev; + if (!switchdev_port_attr_get(dev, &attr)) { + memcpy(v->dev_parent_id.id, attr.u.ppid.id, attr.u.ppid.id_len); + v->dev_parent_id.id_len = attr.u.ppid.id_len; + } else { + v->dev_parent_id.id_len = 0; + } v->rate_limit = vifc->vifc_rate_limit; v->local = vifc->vifc_lcl_addr.s_addr; v->remote = vifc->vifc_rmt_addr.s_addr; @@ -851,6 +976,7 @@ static int vif_add(struct net *net, struct mr_table *mrt, if (vifi+1 > mrt->maxvif) mrt->maxvif = vifi+1; write_unlock_bh(&mrt_lock); + call_ipmr_vif_entry_notifiers(net, FIB_EVENT_VIF_ADD, v, vifi, mrt->id); return 0; } @@ -949,6 +1075,7 @@ static struct mfc_cache *ipmr_cache_alloc(void) if (c) { c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + refcount_set(&c->mfc_un.res.refcount, 1); } return c; } @@ -1150,6 +1277,7 @@ static int ipmr_cache_unresolved(struct mr_table *mrt, vifi_t vifi, static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) { + struct net *net = read_pnet(&mrt->net); struct mfc_cache *c; /* The entries are added/deleted only under RTNL */ @@ -1161,8 +1289,9 @@ static int ipmr_mfc_delete(struct mr_table *mrt, struct mfcctl *mfc, int parent) return -ENOENT; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); + ipmr_cache_put(c); return 0; } @@ -1189,6 +1318,8 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, if (!mrtsock) c->mfc_flags |= MFC_STATIC; write_unlock_bh(&mrt_lock); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_REPLACE, c, + mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1238,6 +1369,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, ipmr_cache_resolve(net, mrt, uc, c); ipmr_cache_free(uc); } + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_ADD, c, mrt->id); mroute_netlink_event(mrt, c, RTM_NEWROUTE); return 0; } @@ -1245,6 +1377,7 @@ static int ipmr_mfc_add(struct net *net, struct mr_table *mrt, /* Close the multicast socket, and clear the vif tables etc */ static void mroute_clean_tables(struct mr_table *mrt, bool all) { + struct net *net = read_pnet(&mrt->net); struct mfc_cache *c, *tmp; LIST_HEAD(list); int i; @@ -1263,8 +1396,10 @@ static void mroute_clean_tables(struct mr_table *mrt, bool all) continue; rhltable_remove(&mrt->mfc_hash, &c->mnode, ipmr_rht_params); list_del_rcu(&c->list); + call_ipmr_mfc_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, c, + mrt->id); mroute_netlink_event(mrt, c, RTM_DELROUTE); - ipmr_cache_free(c); + ipmr_cache_put(c); } if (atomic_read(&mrt->cache_resolve_queue_len) != 0) { @@ -1393,6 +1528,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, case MRT_ADD_MFC: case MRT_DEL_MFC: parent = -1; + /* fall through */ case MRT_ADD_MFC_PROXY: case MRT_DEL_MFC_PROXY: if (optlen != sizeof(mfc)) { @@ -1724,10 +1860,33 @@ static inline int ipmr_forward_finish(struct net *net, struct sock *sk, return dst_output(net, sk, skb); } +#ifdef CONFIG_NET_SWITCHDEV +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + struct vif_device *out_vif = &mrt->vif_table[out_vifi]; + struct vif_device *in_vif = &mrt->vif_table[in_vifi]; + + if (!skb->offload_mr_fwd_mark) + return false; + if (!out_vif->dev_parent_id.id_len || !in_vif->dev_parent_id.id_len) + return false; + return netdev_phys_item_id_same(&out_vif->dev_parent_id, + &in_vif->dev_parent_id); +} +#else +static bool ipmr_forward_offloaded(struct sk_buff *skb, struct mr_table *mrt, + int in_vifi, int out_vifi) +{ + return false; +} +#endif + /* Processing handlers for ipmr_forward */ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, - struct sk_buff *skb, struct mfc_cache *c, int vifi) + int in_vifi, struct sk_buff *skb, + struct mfc_cache *c, int vifi) { const struct iphdr *iph = ip_hdr(skb); struct vif_device *vif = &mrt->vif_table[vifi]; @@ -1748,6 +1907,9 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, goto out_free; } + if (ipmr_forward_offloaded(skb, mrt, in_vifi, vifi)) + goto out_free; + if (vif->flags & VIFF_TUNNEL) { rt = ip_route_output_ports(net, &fl4, NULL, vif->remote, vif->local, @@ -1925,8 +2087,8 @@ forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, - psend); + ipmr_queue_xmit(net, mrt, true_vifi, + skb2, cache, psend); } psend = ct; } @@ -1937,9 +2099,10 @@ last_forward: struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2) - ipmr_queue_xmit(net, mrt, skb2, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb2, + cache, psend); } else { - ipmr_queue_xmit(net, mrt, skb, cache, psend); + ipmr_queue_xmit(net, mrt, true_vifi, skb, cache, psend); return; } } @@ -2156,6 +2319,9 @@ static int __ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, nla_put_u32(skb, RTA_IIF, mrt->vif_table[c->mfc_parent].dev->ifindex) < 0) return -EMSGSIZE; + if (c->mfc_flags & MFC_OFFLOAD) + rtm->rtm_flags |= RTNH_F_OFFLOAD; + if (!(mp_attr = nla_nest_start(skb, RTA_MULTIPATH))) return -EMSGSIZE; @@ -3048,14 +3214,87 @@ static const struct net_protocol pim_protocol = { }; #endif +static unsigned int ipmr_seq_read(struct net *net) +{ + ASSERT_RTNL(); + + return net->ipv4.ipmr_seq + ipmr_rules_seq_read(net); +} + +static int ipmr_dump(struct net *net, struct notifier_block *nb) +{ + struct mr_table *mrt; + int err; + + err = ipmr_rules_dump(net, nb); + if (err) + return err; + + ipmr_for_each_table(mrt, net) { + struct vif_device *v = &mrt->vif_table[0]; + struct mfc_cache *mfc; + int vifi; + + /* Notifiy on table VIF entries */ + read_lock(&mrt_lock); + for (vifi = 0; vifi < mrt->maxvif; vifi++, v++) { + if (!v->dev) + continue; + + call_ipmr_vif_entry_notifier(nb, net, FIB_EVENT_VIF_ADD, + v, vifi, mrt->id); + } + read_unlock(&mrt_lock); + + /* Notify on table MFC entries */ + list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) + call_ipmr_mfc_entry_notifier(nb, net, + FIB_EVENT_ENTRY_ADD, mfc, + mrt->id); + } + + return 0; +} + +static const struct fib_notifier_ops ipmr_notifier_ops_template = { + .family = RTNL_FAMILY_IPMR, + .fib_seq_read = ipmr_seq_read, + .fib_dump = ipmr_dump, + .owner = THIS_MODULE, +}; + +static int __net_init ipmr_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + net->ipv4.ipmr_seq = 0; + + ops = fib_notifier_ops_register(&ipmr_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv4.ipmr_notifier_ops = ops; + + return 0; +} + +static void __net_exit ipmr_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv4.ipmr_notifier_ops); + net->ipv4.ipmr_notifier_ops = NULL; +} + /* Setup for IP multicast routing */ static int __net_init ipmr_net_init(struct net *net) { int err; + err = ipmr_notifier_init(net); + if (err) + goto ipmr_notifier_fail; + err = ipmr_rules_init(net); if (err < 0) - goto fail; + goto ipmr_rules_fail; #ifdef CONFIG_PROC_FS err = -ENOMEM; @@ -3072,7 +3311,9 @@ proc_cache_fail: proc_vif_fail: ipmr_rules_exit(net); #endif -fail: +ipmr_rules_fail: + ipmr_notifier_exit(net); +ipmr_notifier_fail: return err; } @@ -3082,6 +3323,7 @@ static void __net_exit ipmr_net_exit(struct net *net) remove_proc_entry("ip_mr_cache", net->proc_net); remove_proc_entry("ip_mr_vif", net->proc_net); #endif + ipmr_notifier_exit(net); ipmr_rules_exit(net); } @@ -3114,14 +3356,14 @@ int __init ip_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IPMR, RTM_GETROUTE, - ipmr_rtm_getroute, ipmr_rtm_dumproute, NULL); + ipmr_rtm_getroute, ipmr_rtm_dumproute, 0); rtnl_register(RTNL_FAMILY_IPMR, RTM_NEWROUTE, - ipmr_rtm_route, NULL, NULL); + ipmr_rtm_route, NULL, 0); rtnl_register(RTNL_FAMILY_IPMR, RTM_DELROUTE, - ipmr_rtm_route, NULL, NULL); + ipmr_rtm_route, NULL, 0); rtnl_register(RTNL_FAMILY_IPMR, RTM_GETLINK, - NULL, ipmr_rtm_dumplink, NULL); + NULL, ipmr_rtm_dumplink, 0); return 0; #ifdef CONFIG_IP_PIMSM_V2 diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index f462fee66ac8..adcdae358365 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the netfilter modules on top of IPv4. # diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0bc3c3d73e61..f88221aebc9d 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -268,14 +268,14 @@ unsigned int arpt_do_table(struct sk_buff *skb, acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); - /* Target might have changed stuff. */ - arp = arp_hdr(skb); - - if (verdict == XT_CONTINUE) + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + arp = arp_hdr(skb); e = arpt_next_entry(e); - else + } else { /* Verdict */ break; + } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); @@ -629,7 +629,27 @@ static void get_counters(const struct xt_table_info *t, ADD_COUNTER(counters[i], bcnt, pcnt); ++i; + cond_resched(); + } + } +} + +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct arpt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; } + cond_resched(); } } @@ -909,8 +929,7 @@ static int __do_replace(struct net *net, const char *name, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ loc_cpu_old_entry = oldinfo->entries; @@ -1117,7 +1136,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; - struct xt_target *target; struct arpt_entry *de; unsigned int origsize; int h; @@ -1132,7 +1150,6 @@ compat_copy_entry_from_user(struct compat_arpt_entry *e, void **dstptr, de->target_offset = e->target_offset - (origsize - *size); t = compat_arpt_get_target(e); - target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2a55a40211cb..4cbe5e80f3bf 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -35,12 +35,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv4 packet filter"); -#ifdef CONFIG_NETFILTER_DEBUG -#define IP_NF_ASSERT(x) WARN_ON(!(x)) -#else -#define IP_NF_ASSERT(x) -#endif - void *ipt_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ipt, IPT); @@ -151,7 +145,7 @@ static const char *const comments[] = { [NF_IP_TRACE_COMMENT_POLICY] = "policy", }; -static struct nf_loginfo trace_loginfo = { +static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { @@ -263,7 +257,7 @@ ipt_do_table(struct sk_buff *skb, acpar.hotdrop = false; acpar.state = state; - IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); private = table->private; @@ -293,7 +287,7 @@ ipt_do_table(struct sk_buff *skb, const struct xt_entry_match *ematch; struct xt_counters *counter; - IP_NF_ASSERT(e); + WARN_ON(!e); if (!ip_packet_match(ip, indev, outdev, &e->ip, acpar.fragoff)) { no_match: @@ -312,7 +306,7 @@ ipt_do_table(struct sk_buff *skb, ADD_COUNTER(*counter, skb->len, 1); t = ipt_get_target(e); - IP_NF_ASSERT(t->u.kernel.target); + WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ @@ -352,13 +346,14 @@ ipt_do_table(struct sk_buff *skb, acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); - /* Target might have changed stuff. */ - ip = ip_hdr(skb); - if (verdict == XT_CONTINUE) + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + ip = ip_hdr(skb); e = ipt_next_entry(e); - else + } else { /* Verdict */ break; + } } while (!acpar.hotdrop); xt_write_recseq_end(addend); @@ -781,10 +776,31 @@ get_counters(const struct xt_table_info *t, ADD_COUNTER(counters[i], bcnt, pcnt); ++i; /* macro does multi eval of i */ + cond_resched(); } } } +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ipt_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; /* macro does multi eval of i */ + } + + cond_resched(); + } +} + static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; @@ -1074,8 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) @@ -1355,7 +1370,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, struct xt_table_info *newinfo, unsigned char *base) { struct xt_entry_target *t; - struct xt_target *target; struct ipt_entry *de; unsigned int origsize; int h; @@ -1374,7 +1388,6 @@ compat_copy_entry_from_user(struct compat_ipt_entry *e, void **dstptr, de->target_offset = e->target_offset - (origsize - *size); t = compat_ipt_get_target(e); - target = t->u.kernel.target; xt_compat_target_from_user(t, dstptr, size); de->next_offset = e->next_offset - (origsize - *size); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d72decb80f9..17b4ca562944 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -117,7 +117,8 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ #ifdef CONFIG_PROC_FS - proc_remove(c->pde); + if (cn->procdir) + proc_remove(c->pde); #endif return; } @@ -624,7 +625,7 @@ arp_mangle(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops cip_arp_ops __read_mostly = { +static const struct nf_hook_ops cip_arp_ops = { .hook = arp_mangle, .pf = NFPROTO_ARP, .hooknum = NF_ARP_OUT, @@ -815,6 +816,7 @@ static void clusterip_net_exit(struct net *net) #ifdef CONFIG_PROC_FS struct clusterip_net *cn = net_generic(net, clusterip_net_id); proc_remove(cn->procdir); + cn->procdir = NULL; #endif nf_unregister_net_hook(net, &cip_arp_ops); } diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index f1528f7175a8..f75fc6b53115 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -330,7 +330,8 @@ static unsigned int ipv4_synproxy_hook(void *priv, if (synproxy == NULL) return NF_ACCEPT; - if (nf_is_loopback_packet(skb)) + if (nf_is_loopback_packet(skb) || + ip_hdr(skb)->protocol != IPPROTO_TCP) return NF_ACCEPT; thoff = ip_hdrlen(skb); @@ -416,7 +417,7 @@ static unsigned int ipv4_synproxy_hook(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ipv4_synproxy_ops[] __read_mostly = { +static const struct nf_hook_ops ipv4_synproxy_ops[] = { { .hook = ipv4_synproxy_hook, .pf = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index 138a24bc76ad..a1a07b338ccf 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -67,7 +67,7 @@ static unsigned int iptable_nat_ipv4_local_fn(void *priv, return nf_nat_ipv4_local_fn(priv, skb, state, iptable_nat_do_chain); } -static struct nf_hook_ops nf_nat_ipv4_ops[] __read_mostly = { +static const struct nf_hook_ops nf_nat_ipv4_ops[] = { /* Before packet filtering, change destination */ { .hook = iptable_nat_ipv4_in, diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c index 2e14ed11a35c..89af9d88ca21 100644 --- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c @@ -63,13 +63,6 @@ static bool ipv4_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static void ipv4_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "src=%pI4 dst=%pI4 ", - &tuple->src.u3.ip, &tuple->dst.u3.ip); -} - static int ipv4_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -174,7 +167,7 @@ static unsigned int ipv4_conntrack_local(void *priv, /* Connection tracking may drop packets, but never alters them, so make it the first hook. */ -static struct nf_hook_ops ipv4_conntrack_ops[] __read_mostly = { +static const struct nf_hook_ops ipv4_conntrack_ops[] = { { .hook = ipv4_conntrack_in, .pf = NFPROTO_IPV4, @@ -303,11 +296,6 @@ static int ipv4_nlattr_to_tuple(struct nlattr *tb[], return 0; } - -static int ipv4_nlattr_tuple_size(void) -{ - return nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1); -} #endif static struct nf_sockopt_ops so_getorigdst = { @@ -356,18 +344,17 @@ static void ipv4_hooks_unregister(struct net *net) mutex_unlock(®ister_ipv4_hooks); } -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 __read_mostly = { +const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv4 = { .l3proto = PF_INET, - .name = "ipv4", .pkt_to_tuple = ipv4_pkt_to_tuple, .invert_tuple = ipv4_invert_tuple, - .print_tuple = ipv4_print_tuple, .get_l4proto = ipv4_get_l4proto, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv4_tuple_to_nlattr, - .nlattr_tuple_size = ipv4_nlattr_tuple_size, .nlattr_to_tuple = ipv4_nlattr_to_tuple, .nla_policy = ipv4_nla_policy, + .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32)) + /* CTA_IP_V4_SRC */ + NLA_ALIGN(NLA_HDRLEN + sizeof(u32)), /* CTA_IP_V4_DST */ #endif .net_ns_get = ipv4_hooks_register, .net_ns_put = ipv4_hooks_unregister, @@ -398,24 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto4[] = { static int ipv4_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto4, - ARRAY_SIZE(builtin_l4proto4)); - if (ret < 0) - return ret; - ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv4); - if (ret < 0) { - pr_err("nf_conntrack_ipv4: pernet registration failed\n"); - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, - ARRAY_SIZE(builtin_l4proto4)); - } - return ret; + return nf_ct_l4proto_pernet_register(net, builtin_l4proto4, + ARRAY_SIZE(builtin_l4proto4)); } static void ipv4_net_exit(struct net *net) { - nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv4); nf_ct_l4proto_pernet_unregister(net, builtin_l4proto4, ARRAY_SIZE(builtin_l4proto4)); } @@ -433,6 +408,11 @@ static int __init nf_conntrack_l3proto_ipv4_init(void) need_conntrack(); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + if (WARN_ON(nla_policy_len(ipv4_nla_policy, CTA_IP_MAX + 1) != + nf_conntrack_l3proto_ipv4.nla_size)) + return -EINVAL; +#endif ret = nf_register_sockopt(&so_getorigdst); if (ret < 0) { pr_err("Unable to register netfilter socket option\n"); diff --git a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c index 73c591d8a9a8..1849fedd9b81 100644 --- a/net/ipv4/netfilter/nf_conntrack_proto_icmp.c +++ b/net/ipv4/netfilter/nf_conntrack_proto_icmp.c @@ -71,16 +71,6 @@ static bool icmp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void icmp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); -} - static unsigned int *icmp_get_timeouts(struct net *net) { return &icmp_pernet(net)->timeout; @@ -91,8 +81,6 @@ static int icmp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeout) { /* Do not immediately delete the connection after the first @@ -137,7 +125,7 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(!skb_nfct(skb)); + WARN_ON(skb_nfct(skb)); zone = nf_ct_zone_tmpl(tmpl, skb, &tmp); /* Are they talking about one of our connections? */ @@ -176,6 +164,12 @@ icmp_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, return NF_ACCEPT; } +static void icmp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMP, "%s", msg); +} + /* Small and modified version of icmp_rcv */ static int icmp_error(struct net *net, struct nf_conn *tmpl, @@ -188,18 +182,14 @@ icmp_error(struct net *net, struct nf_conn *tmpl, /* Not enough header? */ icmph = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_ih), &_ih); if (icmph == NULL) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, - NULL, "nf_ct_icmp: short packet "); + icmp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* See ip_conntrack_proto_tcp.c */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip_checksum(skb, hooknum, dataoff, 0)) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: bad HW ICMP checksum "); + icmp_error_log(skb, net, pf, "bad hw icmp checksum"); return -NF_ACCEPT; } @@ -210,9 +200,7 @@ icmp_error(struct net *net, struct nf_conn *tmpl, * discarded. */ if (icmph->type > NR_ICMP_TYPES) { - if (LOG_INVALID(net, IPPROTO_ICMP)) - nf_log_packet(net, PF_INET, 0, skb, NULL, NULL, NULL, - "nf_ct_icmp: invalid ICMP type "); + icmp_error_log(skb, net, pf, "invalid icmp type"); return -NF_ACCEPT; } @@ -270,9 +258,14 @@ static int icmp_nlattr_to_tuple(struct nlattr *tb[], return 0; } -static int icmp_nlattr_tuple_size(void) +static unsigned int icmp_nlattr_tuple_size(void) { - return nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(icmp_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif @@ -362,10 +355,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmp __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_ICMP, - .name = "icmp", .pkt_to_tuple = icmp_pkt_to_tuple, .invert_tuple = icmp_invert_tuple, - .print_tuple = icmp_print_tuple, .packet = icmp_packet, .get_timeouts = icmp_get_timeouts, .new = icmp_new, diff --git a/net/ipv4/netfilter/nf_defrag_ipv4.c b/net/ipv4/netfilter/nf_defrag_ipv4.c index 346bf7ccac08..37fe1616ca0b 100644 --- a/net/ipv4/netfilter/nf_defrag_ipv4.c +++ b/net/ipv4/netfilter/nf_defrag_ipv4.c @@ -90,7 +90,7 @@ static unsigned int ipv4_conntrack_defrag(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ipv4_defrag_ops[] = { +static const struct nf_hook_ops ipv4_defrag_ops[] = { { .hook = ipv4_conntrack_defrag, .pf = NFPROTO_IPV4, diff --git a/net/ipv4/netfilter/nf_log_arp.c b/net/ipv4/netfilter/nf_log_arp.c index 2f3895ddc275..df5c2a2061a4 100644 --- a/net/ipv4/netfilter/nf_log_arp.c +++ b/net/ipv4/netfilter/nf_log_arp.c @@ -25,7 +25,7 @@ #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv4/netfilter/nf_log_ipv4.c b/net/ipv4/netfilter/nf_log_ipv4.c index c83a9963269b..4388de0e5380 100644 --- a/net/ipv4/netfilter/nf_log_ipv4.c +++ b/net/ipv4/netfilter/nf_log_ipv4.c @@ -24,7 +24,7 @@ #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index 574f7ebba0b6..ac8342dcb55e 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -252,16 +252,16 @@ static int nat_rtp_rtcp(struct sk_buff *skb, struct nf_conn *ct, if (set_h245_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, htons((port & htons(1)) ? nated_port + 1 : - nated_port)) == 0) { - /* Save ports */ - info->rtp_port[i][dir] = rtp_port; - info->rtp_port[i][!dir] = htons(nated_port); - } else { + nated_port))) { nf_ct_unexpect_related(rtp_exp); nf_ct_unexpect_related(rtcp_exp); return -1; } + /* Save ports */ + info->rtp_port[i][dir] = rtp_port; + info->rtp_port[i][!dir] = htons(nated_port); + /* Success */ pr_debug("nf_nat_h323: expect RTP %pI4:%hu->%pI4:%hu\n", &rtp_exp->tuple.src.u3.ip, @@ -370,15 +370,15 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, /* Modify signal */ if (set_h225_addr(skb, protoff, data, dataoff, taddr, &ct->tuplehash[!dir].tuple.dst.u3, - htons(nated_port)) == 0) { - /* Save ports */ - info->sig_port[dir] = port; - info->sig_port[!dir] = htons(nated_port); - } else { + htons(nated_port))) { nf_ct_unexpect_related(exp); return -1; } + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + pr_debug("nf_nat_q931: expect H.245 %pI4:%hu->%pI4:%hu\n", &exp->tuple.src.u3.ip, ntohs(exp->tuple.src.u.tcp.port), @@ -462,24 +462,27 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, /* Modify signal */ if (set_h225_addr(skb, protoff, data, 0, &taddr[idx], &ct->tuplehash[!dir].tuple.dst.u3, - htons(nated_port)) == 0) { - /* Save ports */ - info->sig_port[dir] = port; - info->sig_port[!dir] = htons(nated_port); - - /* Fix for Gnomemeeting */ - if (idx > 0 && - get_h225_addr(ct, *data, &taddr[0], &addr, &port) && - (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { - set_h225_addr(skb, protoff, data, 0, &taddr[0], - &ct->tuplehash[!dir].tuple.dst.u3, - info->sig_port[!dir]); - } - } else { + htons(nated_port))) { nf_ct_unexpect_related(exp); return -1; } + /* Save ports */ + info->sig_port[dir] = port; + info->sig_port[!dir] = htons(nated_port); + + /* Fix for Gnomemeeting */ + if (idx > 0 && + get_h225_addr(ct, *data, &taddr[0], &addr, &port) && + (ntohl(addr.ip) & 0xff000000) == 0x7f000000) { + if (set_h225_addr(skb, protoff, data, 0, &taddr[0], + &ct->tuplehash[!dir].tuple.dst.u3, + info->sig_port[!dir])) { + nf_ct_unexpect_related(exp); + return -1; + } + } + /* Success */ pr_debug("nf_nat_ras: expect Q.931 %pI4:%hu->%pI4:%hu\n", &exp->tuple.src.u3.ip, @@ -550,9 +553,9 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, } /* Modify signal */ - if (!set_h225_addr(skb, protoff, data, dataoff, taddr, - &ct->tuplehash[!dir].tuple.dst.u3, - htons(nated_port)) == 0) { + if (set_h225_addr(skb, protoff, data, dataoff, taddr, + &ct->tuplehash[!dir].tuple.dst.u3, + htons(nated_port))) { nf_ct_unexpect_related(exp); return -1; } diff --git a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c index feedd759ca80..0443ca4120b0 100644 --- a/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_l3proto_ipv4.c @@ -190,7 +190,7 @@ int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conntrack_tuple target; unsigned long statusbit; - NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; @@ -276,7 +276,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, else return NF_ACCEPT; } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + /* Only ICMPs can be IP_CT_IS_REPLY: */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. @@ -306,8 +307,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb, default: /* ESTABLISHED */ - NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == IP_CT_ESTABLISHED_REPLY); + WARN_ON(ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } diff --git a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c index f39037fca923..0c366aad89cb 100644 --- a/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c +++ b/net/ipv4/netfilter/nf_nat_masquerade_ipv4.c @@ -34,12 +34,12 @@ nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, const struct rtable *rt; __be32 newsrc, nh; - NF_CT_ASSERT(hooknum == NF_INET_POST_ROUTING); + WARN_ON(hooknum != NF_INET_POST_ROUTING); ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); + WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); /* Source address is 0.0.0.0 - locally generated packet that is * probably not supposed to be masqueraded. @@ -96,7 +96,7 @@ static int masq_device_event(struct notifier_block *this, * conntracks which were associated with that device, * and forget them. */ - NF_CT_ASSERT(dev->ifindex != 0); + WARN_ON(dev->ifindex == 0); nf_ct_iterate_cleanup_net(net, device_cmp, (void *)(long)dev->ifindex, 0, 0); diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c index eeacbdaf7cdf..5cd06ba3535d 100644 --- a/net/ipv4/netfilter/nf_reject_ipv4.c +++ b/net/ipv4/netfilter/nf_reject_ipv4.c @@ -132,6 +132,8 @@ void nf_send_reset(struct net *net, struct sk_buff *oldskb, int hook) if (ip_route_me_harder(net, nskb, RTN_UNSPEC)) goto free_nskb; + niph = ip_hdr(nskb); + /* "Never happens" */ if (nskb->len > dst_mtu(skb_dst(nskb))) goto free_nskb; diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index de3681df2ce7..e50976e3c213 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -32,9 +32,10 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); u32 *dst = ®s->data[priv->dreg]; const struct net_device *dev = NULL; - const struct iphdr *iph; + struct iphdr *iph, _iph; __be32 addr; if (priv->flags & NFTA_FIB_F_IIF) @@ -42,7 +43,12 @@ void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - iph = ip_hdr(pkt->skb); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + if (priv->flags & NFTA_FIB_F_DADDR) addr = iph->daddr; else @@ -61,8 +67,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; - const struct iphdr *iph; + struct iphdr *iph, _iph; struct fib_result res; struct flowi4 fl4 = { .flowi4_scope = RT_SCOPE_UNIVERSE, @@ -95,7 +102,12 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, return; } - iph = ip_hdr(pkt->skb); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + if (ipv4_is_zeronet(iph->saddr)) { if (ipv4_is_lbcast(iph->daddr) || ipv4_is_local_multicast(iph->daddr)) { diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 43eb6567b3a0..9f37c4727861 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -206,18 +206,12 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("DelayedACKLost", LINUX_MIB_DELAYEDACKLOST), SNMP_MIB_ITEM("ListenOverflows", LINUX_MIB_LISTENOVERFLOWS), SNMP_MIB_ITEM("ListenDrops", LINUX_MIB_LISTENDROPS), - SNMP_MIB_ITEM("TCPPrequeued", LINUX_MIB_TCPPREQUEUED), - SNMP_MIB_ITEM("TCPDirectCopyFromBacklog", LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG), - SNMP_MIB_ITEM("TCPDirectCopyFromPrequeue", LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE), - SNMP_MIB_ITEM("TCPPrequeueDropped", LINUX_MIB_TCPPREQUEUEDROPPED), SNMP_MIB_ITEM("TCPHPHits", LINUX_MIB_TCPHPHITS), - SNMP_MIB_ITEM("TCPHPHitsToUser", LINUX_MIB_TCPHPHITSTOUSER), SNMP_MIB_ITEM("TCPPureAcks", LINUX_MIB_TCPPUREACKS), SNMP_MIB_ITEM("TCPHPAcks", LINUX_MIB_TCPHPACKS), SNMP_MIB_ITEM("TCPRenoRecovery", LINUX_MIB_TCPRENORECOVERY), SNMP_MIB_ITEM("TCPSackRecovery", LINUX_MIB_TCPSACKRECOVERY), SNMP_MIB_ITEM("TCPSACKReneging", LINUX_MIB_TCPSACKRENEGING), - SNMP_MIB_ITEM("TCPFACKReorder", LINUX_MIB_TCPFACKREORDER), SNMP_MIB_ITEM("TCPSACKReorder", LINUX_MIB_TCPSACKREORDER), SNMP_MIB_ITEM("TCPRenoReorder", LINUX_MIB_TCPRENOREORDER), SNMP_MIB_ITEM("TCPTSReorder", LINUX_MIB_TCPTSREORDER), @@ -230,14 +224,12 @@ static const struct snmp_mib snmp4_net_list[] = { SNMP_MIB_ITEM("TCPSackFailures", LINUX_MIB_TCPSACKFAILURES), SNMP_MIB_ITEM("TCPLossFailures", LINUX_MIB_TCPLOSSFAILURES), SNMP_MIB_ITEM("TCPFastRetrans", LINUX_MIB_TCPFASTRETRANS), - SNMP_MIB_ITEM("TCPForwardRetrans", LINUX_MIB_TCPFORWARDRETRANS), SNMP_MIB_ITEM("TCPSlowStartRetrans", LINUX_MIB_TCPSLOWSTARTRETRANS), SNMP_MIB_ITEM("TCPTimeouts", LINUX_MIB_TCPTIMEOUTS), SNMP_MIB_ITEM("TCPLossProbes", LINUX_MIB_TCPLOSSPROBES), SNMP_MIB_ITEM("TCPLossProbeRecovery", LINUX_MIB_TCPLOSSPROBERECOVERY), SNMP_MIB_ITEM("TCPRenoRecoveryFail", LINUX_MIB_TCPRENORECOVERYFAIL), SNMP_MIB_ITEM("TCPSackRecoveryFail", LINUX_MIB_TCPSACKRECOVERYFAIL), - SNMP_MIB_ITEM("TCPSchedulerFailed", LINUX_MIB_TCPSCHEDULERFAILED), SNMP_MIB_ITEM("TCPRcvCollapsed", LINUX_MIB_TCPRCVCOLLAPSED), SNMP_MIB_ITEM("TCPDSACKOldSent", LINUX_MIB_TCPDSACKOLDSENT), SNMP_MIB_ITEM("TCPDSACKOfoSent", LINUX_MIB_TCPDSACKOFOSENT), diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index b0bb5d0a30bd..33b70bfd1122 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -122,7 +122,8 @@ void raw_unhash_sk(struct sock *sk) EXPORT_SYMBOL_GPL(raw_unhash_sk); struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, - unsigned short num, __be32 raddr, __be32 laddr, int dif) + unsigned short num, __be32 raddr, __be32 laddr, + int dif, int sdif) { sk_for_each_from(sk) { struct inet_sock *inet = inet_sk(sk); @@ -130,7 +131,8 @@ struct sock *__raw_v4_lookup(struct net *net, struct sock *sk, if (net_eq(sock_net(sk), net) && inet->inet_num == num && !(inet->inet_daddr && inet->inet_daddr != raddr) && !(inet->inet_rcv_saddr && inet->inet_rcv_saddr != laddr) && - !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + !(sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif)) goto found; /* gotcha */ } sk = NULL; @@ -171,6 +173,7 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) */ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) { + int sdif = inet_sdif(skb); struct sock *sk; struct hlist_head *head; int delivered = 0; @@ -184,13 +187,13 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) net = dev_net(skb->dev); sk = __raw_v4_lookup(net, __sk_head(head), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, sdif); while (sk) { delivered = 1; if ((iph->protocol != IPPROTO_ICMP || !icmp_filter(sk, skb)) && ip_mc_sf_allow(sk, iph->daddr, iph->saddr, - skb->dev->ifindex)) { + skb->dev->ifindex, sdif)) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ @@ -199,7 +202,7 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash) } sk = __raw_v4_lookup(net, sk_next(sk), iph->protocol, iph->saddr, iph->daddr, - skb->dev->ifindex); + skb->dev->ifindex, sdif); } out: read_unlock(&raw_v4_hashinfo.lock); @@ -297,12 +300,15 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info) read_lock(&raw_v4_hashinfo.lock); raw_sk = sk_head(&raw_v4_hashinfo.ht[hash]); if (raw_sk) { + int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); + iph = (const struct iphdr *)skb->data; net = dev_net(skb->dev); while ((raw_sk = __raw_v4_lookup(net, raw_sk, protocol, iph->daddr, iph->saddr, - skb->dev->ifindex)) != NULL) { + dif, sdif)) != NULL) { raw_err(raw_sk, skb, info); raw_sk = sk_next(raw_sk); iph = (const struct iphdr *)skb->data; diff --git a/net/ipv4/raw_diag.c b/net/ipv4/raw_diag.c index e1a51ca68d23..c200065ef9a5 100644 --- a/net/ipv4/raw_diag.c +++ b/net/ipv4/raw_diag.c @@ -46,13 +46,13 @@ static struct sock *raw_lookup(struct net *net, struct sock *from, sk = __raw_v4_lookup(net, from, r->sdiag_raw_protocol, r->id.idiag_dst[0], r->id.idiag_src[0], - r->id.idiag_if); + r->id.idiag_if, 0); #if IS_ENABLED(CONFIG_IPV6) else sk = __raw_v6_lookup(net, from, r->sdiag_raw_protocol, (const struct in6_addr *)r->id.idiag_src, (const struct in6_addr *)r->id.idiag_dst, - r->id.idiag_if); + r->id.idiag_if, 0); #endif return sk; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 7effa62beed3..43b69af242e1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -495,7 +495,7 @@ u32 ip_idents_reserve(u32 hash, int segs) { u32 *p_tstamp = ip_tstamps + hash % IP_IDENTS_SZ; atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; - u32 old = ACCESS_ONCE(*p_tstamp); + u32 old = READ_ONCE(*p_tstamp); u32 now = (u32)jiffies; u32 new, delta = 0; @@ -651,9 +651,12 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, struct fnhe_hash_bucket *hash; struct fib_nh_exception *fnhe; struct rtable *rt; + u32 genid, hval; unsigned int i; int depth; - u32 hval = fnhe_hashfun(daddr); + + genid = fnhe_genid(dev_net(nh->nh_dev)); + hval = fnhe_hashfun(daddr); spin_lock_bh(&fnhe_lock); @@ -676,12 +679,13 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, } if (fnhe) { + if (fnhe->fnhe_genid != genid) + fnhe->fnhe_genid = genid; if (gw) fnhe->fnhe_gw = gw; - if (pmtu) { + if (pmtu) fnhe->fnhe_pmtu = pmtu; - fnhe->fnhe_expires = max(1UL, expires); - } + fnhe->fnhe_expires = max(1UL, expires); /* Update all cached dsts too */ rt = rcu_dereference(fnhe->fnhe_rth_input); if (rt) @@ -700,7 +704,7 @@ static void update_or_create_fnhe(struct fib_nh *nh, __be32 daddr, __be32 gw, fnhe->fnhe_next = hash->chain; rcu_assign_pointer(hash->chain, fnhe); } - fnhe->fnhe_genid = fnhe_genid(dev_net(nh->nh_dev)); + fnhe->fnhe_genid = genid; fnhe->fnhe_daddr = daddr; fnhe->fnhe_gw = gw; fnhe->fnhe_pmtu = pmtu; @@ -1250,7 +1254,7 @@ static void set_class_tag(struct rtable *rt, u32 tag) static unsigned int ipv4_default_advmss(const struct dst_entry *dst) { unsigned int header_size = sizeof(struct tcphdr) + sizeof(struct iphdr); - unsigned int advmss = max_t(unsigned int, dst->dev->mtu - header_size, + unsigned int advmss = max_t(unsigned int, ipv4_mtu(dst) - header_size, ip_rt_min_advmss); return min(advmss, IPV4_MAX_PMTU - header_size); @@ -1267,7 +1271,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (mtu) return mtu; - mtu = dst->dev->mtu; + mtu = READ_ONCE(dst->dev->mtu); if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { if (rt->rt_uses_gateway && mtu > 576) @@ -1398,7 +1402,7 @@ static void ipv4_dst_destroy(struct dst_entry *dst) struct dst_metrics *p = (struct dst_metrics *)DST_METRICS_PTR(dst); struct rtable *rt = (struct rtable *) dst; - if (p != &dst_default_metrics && atomic_dec_and_test(&p->refcnt)) + if (p != &dst_default_metrics && refcount_dec_and_test(&p->refcnt)) kfree(p); if (!list_empty(&rt->rt_uncached)) { @@ -1456,7 +1460,7 @@ static void rt_set_nexthop(struct rtable *rt, __be32 daddr, dst_init_metrics(&rt->dst, fi->fib_metrics->metrics, true); if (fi->fib_metrics != &dst_default_metrics) { rt->dst._metrics |= DST_METRICS_REFCOUNTED; - atomic_inc(&fi->fib_metrics->refcnt); + refcount_inc(&fi->fib_metrics->refcnt); } #ifdef CONFIG_IP_ROUTE_CLASSID rt->dst.tclassid = nh->nh_tclassid; @@ -1520,43 +1524,56 @@ struct rtable *rt_dst_alloc(struct net_device *dev, EXPORT_SYMBOL(rt_dst_alloc); /* called in rcu_read_lock() section */ -static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, - u8 tos, struct net_device *dev, int our) +int ip_mc_validate_source(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, + struct in_device *in_dev, u32 *itag) { - struct rtable *rth; - struct in_device *in_dev = __in_dev_get_rcu(dev); - unsigned int flags = RTCF_MULTICAST; - u32 itag = 0; int err; /* Primary sanity checks. */ - if (!in_dev) return -EINVAL; if (ipv4_is_multicast(saddr) || ipv4_is_lbcast(saddr) || skb->protocol != htons(ETH_P_IP)) - goto e_inval; + return -EINVAL; if (ipv4_is_loopback(saddr) && !IN_DEV_ROUTE_LOCALNET(in_dev)) - goto e_inval; + return -EINVAL; if (ipv4_is_zeronet(saddr)) { if (!ipv4_is_local_multicast(daddr)) - goto e_inval; + return -EINVAL; } else { err = fib_validate_source(skb, saddr, 0, tos, 0, dev, - in_dev, &itag); + in_dev, itag); if (err < 0) - goto e_err; + return err; } + return 0; +} + +/* called in rcu_read_lock() section */ +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, + u8 tos, struct net_device *dev, int our) +{ + struct in_device *in_dev = __in_dev_get_rcu(dev); + unsigned int flags = RTCF_MULTICAST; + struct rtable *rth; + u32 itag = 0; + int err; + + err = ip_mc_validate_source(skb, daddr, saddr, tos, dev, in_dev, &itag); + if (err) + return err; + if (our) flags |= RTCF_LOCAL; rth = rt_dst_alloc(dev_net(dev)->loopback_dev, flags, RTN_MULTICAST, IN_DEV_CONF_GET(in_dev, NOPOLICY), false, false); if (!rth) - goto e_nobufs; + return -ENOBUFS; #ifdef CONFIG_IP_ROUTE_CLASSID rth->dst.tclassid = itag; @@ -1572,13 +1589,6 @@ static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, skb_dst_set(skb, &rth->dst); return 0; - -e_nobufs: - return -ENOBUFS; -e_inval: - return -EINVAL; -e_err: - return err; } @@ -2236,7 +2246,7 @@ add: if (!rth) return ERR_PTR(-ENOBUFS); - rth->rt_iif = orig_oif ? : 0; + rth->rt_iif = orig_oif; if (res->table) rth->rt_table_id = res->table->tb_id; @@ -2439,6 +2449,12 @@ struct rtable *ip_route_output_key_hash_rcu(struct net *net, struct flowi4 *fl4, /* L3 master device is the loopback for that domain */ dev_out = l3mdev_master_dev_rcu(FIB_RES_DEV(*res)) ? : net->loopback_dev; + + /* make sure orig_oif points to fib result device even + * though packet rx/tx happens over loopback or l3mdev + */ + orig_oif = FIB_RES_OIF(*res); + fl4->flowi4_oif = dev_out->ifindex; flags |= RTCF_LOCAL; goto make_route; @@ -2501,7 +2517,7 @@ struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_or struct rtable *ort = (struct rtable *) dst_orig; struct rtable *rt; - rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE, 0); + rt = dst_alloc(&ipv4_dst_blackhole_ops, NULL, 1, DST_OBSOLETE_DEAD, 0); if (rt) { struct dst_entry *new = &rt->dst; @@ -2763,14 +2779,21 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = rt->rt_table_id; - if (rtm->rtm_flags & RTM_F_FIB_MATCH) + if (rtm->rtm_flags & RTM_F_FIB_MATCH) { + if (!res.fi) { + err = fib_props[res.type].error; + if (!err) + err = -EHOSTUNREACH; + goto errout_free; + } err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, table_id, rt->rt_type, res.prefix, res.prefixlen, fl4.flowi4_tos, res.fi, 0); - else + } else { err = rt_fill_info(net, dst, src, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); + } if (err < 0) goto errout_free; @@ -3019,7 +3042,6 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly; int __init ip_rt_init(void) { - int rc = 0; int cpu; ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL); @@ -3068,14 +3090,15 @@ int __init ip_rt_init(void) xfrm_init(); xfrm4_init(); #endif - rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, NULL); + rtnl_register(PF_INET, RTM_GETROUTE, inet_rtm_getroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED); #ifdef CONFIG_SYSCTL register_pernet_subsys(&sysctl_route_ops); #endif register_pernet_subsys(&rt_genid_ops); register_pernet_subsys(&ipv4_inetpeer_ops); - return rc; + return 0; } #ifdef CONFIG_SYSCTL diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 03ad8778c395..fda37f2862c9 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -355,7 +355,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) */ - ireq->opt = tcp_v4_save_options(skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(sock_net(sk), skb)); if (security_inet_conn_request(sk, skb, req)) { reqsk_free(req); @@ -385,7 +385,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb) /* Try to redo what tcp_v4_send_synack did. */ req->rsk_window_clamp = tp->window_clamp ? :dst_metric(&rt->dst, RTAX_WINDOW); - tcp_select_initial_window(tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(&rt->dst, RTAX_INITRWND)); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 9bf809726066..93e172118a94 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * sysctl_net_ipv4.c: sysctl interface to net IPV4 subsystem. * @@ -25,6 +26,7 @@ #include <net/inet_frag.h> #include <net/ping.h> #include <net/protocol.h> +#include <net/netevent.h> static int zero; static int one = 1; @@ -45,6 +47,9 @@ static int tcp_syn_retries_max = MAX_TCP_SYNCNT; static int ip_ping_group_range_min[] = { 0, 0 }; static int ip_ping_group_range_max[] = { GID_T_MAX, GID_T_MAX }; +/* obsolete */ +static int sysctl_tcp_low_latency __read_mostly; + /* Update system visible IP port range */ static void set_local_port_range(struct net *net, int range[2]) { @@ -196,6 +201,8 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(ctl->data, struct net, + ipv4.tcp_congestion_control); char val[TCP_CA_NAME_MAX]; struct ctl_table tbl = { .data = val, @@ -203,11 +210,11 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, }; int ret; - tcp_get_default_congestion_control(val); + tcp_get_default_congestion_control(net, val); ret = proc_dostring(&tbl, write, buffer, lenp, ppos); if (write && ret == 0) - ret = tcp_set_default_congestion_control(val); + ret = tcp_set_default_congestion_control(net, val); return ret; } @@ -248,10 +255,12 @@ static int proc_allowed_congestion_control(struct ctl_table *ctl, return ret; } -static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, +static int proc_tcp_fastopen_key(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen); struct ctl_table tbl = { .maxlen = (TCP_FASTOPEN_KEY_LENGTH * 2 + 10) }; struct tcp_fastopen_context *ctxt; int ret; @@ -262,7 +271,7 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, return -ENOMEM; rcu_read_lock(); - ctxt = rcu_dereference(tcp_fastopen_ctx); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); if (ctxt) memcpy(user_key, ctxt->key, TCP_FASTOPEN_KEY_LENGTH); else @@ -279,12 +288,8 @@ static int proc_tcp_fastopen_key(struct ctl_table *ctl, int write, ret = -EINVAL; goto bad_key; } - /* Generate a dummy secret but don't publish it. This - * is needed so we don't regenerate a new key on the - * first invocation of tcp_fastopen_cookie_gen - */ - tcp_fastopen_init_key_once(false); - tcp_fastopen_reset_cipher(user_key, TCP_FASTOPEN_KEY_LENGTH); + tcp_fastopen_reset_cipher(net, NULL, user_key, + TCP_FASTOPEN_KEY_LENGTH); } bad_key: @@ -355,11 +360,13 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, void __user *buffer, size_t *lenp, loff_t *ppos) { + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_fastopen_blackhole_timeout); int ret; ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (write && ret == 0) - tcp_fastopen_active_timeout_reset(); + atomic_set(&net->ipv4.tfo_active_disable_times, 0); return ret; } @@ -382,15 +389,25 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, return ret; } +#ifdef CONFIG_IP_ROUTE_MULTIPATH +static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_fib_multipath_hash_policy); + int ret; + + ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); + if (write && ret == 0) + call_netevent_notifiers(NETEVENT_MULTIPATH_HASH_UPDATE, net); + + return ret; +} +#endif + static struct ctl_table ipv4_table[] = { { - .procname = "tcp_retrans_collapse", - .data = &sysctl_tcp_retrans_collapse, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, .maxlen = sizeof(int), @@ -398,48 +415,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { - .procname = "tcp_fastopen", - .data = &sysctl_tcp_fastopen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_fastopen_key", - .mode = 0600, - .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), - .proc_handler = proc_tcp_fastopen_key, - }, - { - .procname = "tcp_fastopen_blackhole_timeout_sec", - .data = &sysctl_tcp_fastopen_blackhole_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_tfo_blackhole_detect_timeout, - .extra1 = &zero, - }, - { - .procname = "tcp_abort_on_overflow", - .data = &sysctl_tcp_abort_on_overflow, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_stdurg", - .data = &sysctl_tcp_stdurg, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_rfc1337", - .data = &sysctl_tcp_rfc1337, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "inet_peer_threshold", .data = &inet_peer_threshold, .maxlen = sizeof(int), @@ -461,34 +436,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec_jiffies, }, { - .procname = "tcp_fack", - .data = &sysctl_tcp_fack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_recovery", - .data = &sysctl_tcp_recovery, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_max_reordering", - .data = &sysctl_tcp_max_reordering, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_dsack", - .data = &sysctl_tcp_dsack, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_mem", .maxlen = sizeof(sysctl_tcp_mem), .data = &sysctl_tcp_mem, @@ -496,113 +443,12 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_doulongvec_minmax, }, { - .procname = "tcp_wmem", - .data = &sysctl_tcp_wmem, - .maxlen = sizeof(sysctl_tcp_wmem), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - }, - { - .procname = "tcp_rmem", - .data = &sysctl_tcp_rmem, - .maxlen = sizeof(sysctl_tcp_rmem), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - }, - { - .procname = "tcp_app_win", - .data = &sysctl_tcp_app_win, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_adv_win_scale", - .data = &sysctl_tcp_adv_win_scale, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &tcp_adv_win_scale_min, - .extra2 = &tcp_adv_win_scale_max, - }, - { - .procname = "tcp_frto", - .data = &sysctl_tcp_frto, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_min_rtt_wlen", - .data = &sysctl_tcp_min_rtt_wlen, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { .procname = "tcp_low_latency", .data = &sysctl_tcp_low_latency, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "tcp_no_metrics_save", - .data = &sysctl_tcp_nometrics_save, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_moderate_rcvbuf", - .data = &sysctl_tcp_moderate_rcvbuf, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_tso_win_divisor", - .data = &sysctl_tcp_tso_win_divisor, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, - { - .procname = "tcp_congestion_control", - .mode = 0644, - .maxlen = TCP_CA_NAME_MAX, - .proc_handler = proc_tcp_congestion_control, - }, - { - .procname = "tcp_workaround_signed_windows", - .data = &sysctl_tcp_workaround_signed_windows, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_limit_output_bytes", - .data = &sysctl_tcp_limit_output_bytes, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_challenge_ack_limit", - .data = &sysctl_tcp_challenge_ack_limit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_slow_start_after_idle", - .data = &sysctl_tcp_slow_start_after_idle, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, #ifdef CONFIG_NETLABEL { .procname = "cipso_cache_enable", @@ -646,65 +492,6 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_allowed_congestion_control, }, { - .procname = "tcp_thin_linear_timeouts", - .data = &sysctl_tcp_thin_linear_timeouts, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "tcp_early_retrans", - .data = &sysctl_tcp_early_retrans, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &four, - }, - { - .procname = "tcp_min_tso_segs", - .data = &sysctl_tcp_min_tso_segs, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &one, - .extra2 = &gso_max_segs, - }, - { - .procname = "tcp_pacing_ss_ratio", - .data = &sysctl_tcp_pacing_ss_ratio, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &thousand, - }, - { - .procname = "tcp_pacing_ca_ratio", - .data = &sysctl_tcp_pacing_ca_ratio, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &thousand, - }, - { - .procname = "tcp_autocorking", - .data = &sysctl_tcp_autocorking, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero, - .extra2 = &one, - }, - { - .procname = "tcp_invalid_ratelimit", - .data = &sysctl_tcp_invalid_ratelimit, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_ms_jiffies, - }, - { .procname = "tcp_available_ulp", .maxlen = TCP_ULP_BUF_MAX, .mode = 0444, @@ -973,6 +760,13 @@ static struct ctl_table ipv4_net_table[] = { }, #endif { + .procname = "tcp_congestion_control", + .data = &init_net.ipv4.tcp_congestion_control, + .mode = 0644, + .maxlen = TCP_CA_NAME_MAX, + .proc_handler = proc_tcp_congestion_control, + }, + { .procname = "tcp_keepalive_time", .data = &init_net.ipv4.sysctl_tcp_keepalive_time, .maxlen = sizeof(int), @@ -1082,6 +876,28 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_fastopen", + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_fastopen_key", + .mode = 0600, + .data = &init_net.ipv4.sysctl_tcp_fastopen, + .maxlen = ((TCP_FASTOPEN_KEY_LENGTH * 2) + 10), + .proc_handler = proc_tcp_fastopen_key, + }, + { + .procname = "tcp_fastopen_blackhole_timeout_sec", + .data = &init_net.ipv4.sysctl_tcp_fastopen_blackhole_timeout, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_tfo_blackhole_detect_timeout, + .extra1 = &zero, + }, #ifdef CONFIG_IP_ROUTE_MULTIPATH { .procname = "fib_multipath_use_neigh", @@ -1097,7 +913,7 @@ static struct ctl_table ipv4_net_table[] = { .data = &init_net.ipv4.sysctl_fib_multipath_hash_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = proc_dointvec_minmax, + .proc_handler = proc_fib_multipath_hash_policy, .extra1 = &zero, .extra2 = &one, }, @@ -1141,6 +957,216 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "tcp_early_retrans", + .data = &init_net.ipv4.sysctl_tcp_early_retrans, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &four, + }, + { + .procname = "tcp_recovery", + .data = &init_net.ipv4.sysctl_tcp_recovery, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_thin_linear_timeouts", + .data = &init_net.ipv4.sysctl_tcp_thin_linear_timeouts, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_slow_start_after_idle", + .data = &init_net.ipv4.sysctl_tcp_slow_start_after_idle, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_retrans_collapse", + .data = &init_net.ipv4.sysctl_tcp_retrans_collapse, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_stdurg", + .data = &init_net.ipv4.sysctl_tcp_stdurg, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_rfc1337", + .data = &init_net.ipv4.sysctl_tcp_rfc1337, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_abort_on_overflow", + .data = &init_net.ipv4.sysctl_tcp_abort_on_overflow, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_fack", + .data = &init_net.ipv4.sysctl_tcp_fack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_max_reordering", + .data = &init_net.ipv4.sysctl_tcp_max_reordering, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_dsack", + .data = &init_net.ipv4.sysctl_tcp_dsack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_app_win", + .data = &init_net.ipv4.sysctl_tcp_app_win, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_adv_win_scale", + .data = &init_net.ipv4.sysctl_tcp_adv_win_scale, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &tcp_adv_win_scale_min, + .extra2 = &tcp_adv_win_scale_max, + }, + { + .procname = "tcp_frto", + .data = &init_net.ipv4.sysctl_tcp_frto, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_no_metrics_save", + .data = &init_net.ipv4.sysctl_tcp_nometrics_save, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_moderate_rcvbuf", + .data = &init_net.ipv4.sysctl_tcp_moderate_rcvbuf, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_tso_win_divisor", + .data = &init_net.ipv4.sysctl_tcp_tso_win_divisor, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "tcp_workaround_signed_windows", + .data = &init_net.ipv4.sysctl_tcp_workaround_signed_windows, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_limit_output_bytes", + .data = &init_net.ipv4.sysctl_tcp_limit_output_bytes, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_challenge_ack_limit", + .data = &init_net.ipv4.sysctl_tcp_challenge_ack_limit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_min_tso_segs", + .data = &init_net.ipv4.sysctl_tcp_min_tso_segs, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + .extra2 = &gso_max_segs, + }, + { + .procname = "tcp_min_rtt_wlen", + .data = &init_net.ipv4.sysctl_tcp_min_rtt_wlen, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_autocorking", + .data = &init_net.ipv4.sysctl_tcp_autocorking, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { + .procname = "tcp_invalid_ratelimit", + .data = &init_net.ipv4.sysctl_tcp_invalid_ratelimit, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_ms_jiffies, + }, + { + .procname = "tcp_pacing_ss_ratio", + .data = &init_net.ipv4.sysctl_tcp_pacing_ss_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, + { + .procname = "tcp_pacing_ca_ratio", + .data = &init_net.ipv4.sysctl_tcp_pacing_ca_ratio, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &thousand, + }, + { + .procname = "tcp_wmem", + .data = &init_net.ipv4.sysctl_tcp_wmem, + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_wmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, + { + .procname = "tcp_rmem", + .data = &init_net.ipv4.sysctl_tcp_rmem, + .maxlen = sizeof(init_net.ipv4.sysctl_tcp_rmem), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &one, + }, { } }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71ce33decd97..bf97317e6c97 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -269,6 +269,8 @@ #include <linux/err.h> #include <linux/time.h> #include <linux/slab.h> +#include <linux/errqueue.h> +#include <linux/static_key.h> #include <net/icmp.h> #include <net/inet_common.h> @@ -281,24 +283,22 @@ #include <asm/ioctls.h> #include <net/busy_poll.h> -int sysctl_tcp_min_tso_segs __read_mostly = 2; - -int sysctl_tcp_autocorking __read_mostly = 1; +#include <trace/events/tcp.h> struct percpu_counter tcp_orphan_count; EXPORT_SYMBOL_GPL(tcp_orphan_count); long sysctl_tcp_mem[3] __read_mostly; -int sysctl_tcp_wmem[3] __read_mostly; -int sysctl_tcp_rmem[3] __read_mostly; - EXPORT_SYMBOL(sysctl_tcp_mem); -EXPORT_SYMBOL(sysctl_tcp_rmem); -EXPORT_SYMBOL(sysctl_tcp_wmem); atomic_long_t tcp_memory_allocated; /* Current allocated memory. */ EXPORT_SYMBOL(tcp_memory_allocated); +#if IS_ENABLED(CONFIG_SMC) +DEFINE_STATIC_KEY_FALSE(tcp_have_smc); +EXPORT_SYMBOL(tcp_have_smc); +#endif + /* * Current number of TCP sockets. */ @@ -388,6 +388,19 @@ static int retrans_to_secs(u8 retrans, int timeout, int rto_max) return period; } +static u64 tcp_compute_delivery_rate(const struct tcp_sock *tp) +{ + u32 rate = READ_ONCE(tp->rate_delivered); + u32 intv = READ_ONCE(tp->rate_interval_us); + u64 rate64 = 0; + + if (rate && intv) { + rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; + do_div(rate64, intv); + } + return rate64; +} + /* Address-family independent initialization for a tcp_sock. * * NOTE: A lot of things set to zero explicitly by call to @@ -399,9 +412,10 @@ void tcp_init_sock(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); tp->out_of_order_queue = RB_ROOT; + sk->tcp_rtx_queue = RB_ROOT; tcp_init_xmit_timers(sk); - tcp_prequeue_init(tp); INIT_LIST_HEAD(&tp->tsq_node); + INIT_LIST_HEAD(&tp->tsorted_sent_queue); icsk->icsk_rto = TCP_TIMEOUT_INIT; tp->mdev_us = jiffies_to_usecs(TCP_TIMEOUT_INIT); @@ -428,6 +442,7 @@ void tcp_init_sock(struct sock *sk) tcp_assign_congestion_control(sk); tp->tsoffset = 0; + tp->rack.reo_wnd_steps = 1; sk->sk_state = TCP_CLOSE; @@ -436,15 +451,29 @@ void tcp_init_sock(struct sock *sk) icsk->icsk_sync_mss = tcp_sync_mss; - sk->sk_sndbuf = sysctl_tcp_wmem[1]; - sk->sk_rcvbuf = sysctl_tcp_rmem[1]; + sk->sk_sndbuf = sock_net(sk)->ipv4.sysctl_tcp_wmem[1]; + sk->sk_rcvbuf = sock_net(sk)->ipv4.sysctl_tcp_rmem[1]; sk_sockets_allocated_inc(sk); } EXPORT_SYMBOL(tcp_init_sock); -static void tcp_tx_timestamp(struct sock *sk, u16 tsflags, struct sk_buff *skb) +void tcp_init_transfer(struct sock *sk, int bpf_op) { + struct inet_connection_sock *icsk = inet_csk(sk); + + tcp_mtup_init(sk); + icsk->icsk_af_ops->rebuild_header(sk); + tcp_init_metrics(sk); + tcp_call_bpf(sk, bpf_op); + tcp_init_congestion_control(sk); + tcp_init_buffer_space(sk); +} + +static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) +{ + struct sk_buff *skb = tcp_write_queue_tail(sk); + if (tsflags && skb) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct tcp_skb_cb *tcb = TCP_SKB_CB(skb); @@ -662,7 +691,7 @@ static bool tcp_should_autocork(struct sock *sk, struct sk_buff *skb, int size_goal) { return skb->len < size_goal && - sysctl_tcp_autocorking && + sock_net(sk)->ipv4.sysctl_tcp_autocorking && skb != tcp_write_queue_head(sk) && refcount_read(&sk->sk_wmem_alloc) > skb->truesize; } @@ -673,10 +702,9 @@ static void tcp_push(struct sock *sk, int flags, int mss_now, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; - if (!tcp_send_head(sk)) - return; - skb = tcp_write_queue_tail(sk); + if (!skb) + return; if (!(flags & MSG_MORE) || forced_push(tp)) tcp_mark_push(tp, skb); @@ -856,6 +884,7 @@ struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, * available to the caller, no more, no less. */ skb->reserved_tailroom = skb->end - skb->tail - size; + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); return skb; } __kfree_skb(skb); @@ -935,14 +964,14 @@ ssize_t do_tcp_sendpages(struct sock *sk, struct page *page, int offset, int copy, i; bool can_coalesce; - if (!tcp_send_head(sk) || (copy = size_goal - skb->len) <= 0 || + if (!skb || (copy = size_goal - skb->len) <= 0 || !tcp_skb_can_collapse_to(skb)) { new_segment: if (!sk_stream_memory_free(sk)) goto wait_for_sndbuf; skb = sk_stream_alloc_skb(sk, 0, sk->sk_allocation, - skb_queue_empty(&sk->sk_write_queue)); + tcp_rtx_and_write_queues_empty(sk)); if (!skb) goto wait_for_memory; @@ -1014,7 +1043,7 @@ wait_for_memory: out: if (copied) { - tcp_tx_timestamp(sk, sk->sk_tsflags, tcp_write_queue_tail(sk)); + tcp_tx_timestamp(sk, sk->sk_tsflags); if (!(flags & MSG_SENDPAGE_NOTLAST)) tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } @@ -1034,23 +1063,29 @@ out_err: } EXPORT_SYMBOL_GPL(do_tcp_sendpages); -int tcp_sendpage(struct sock *sk, struct page *page, int offset, - size_t size, int flags) +int tcp_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags) { - ssize_t res; - if (!(sk->sk_route_caps & NETIF_F_SG) || !sk_check_csum_caps(sk)) - return sock_no_sendpage(sk->sk_socket, page, offset, size, - flags); - - lock_sock(sk); + return sock_no_sendpage_locked(sk, page, offset, size, flags); tcp_rate_check_app_limited(sk); /* is sending application-limited? */ - res = do_tcp_sendpages(sk, page, offset, size, flags); + return do_tcp_sendpages(sk, page, offset, size, flags); +} +EXPORT_SYMBOL_GPL(tcp_sendpage_locked); + +int tcp_sendpage(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + int ret; + + lock_sock(sk); + ret = tcp_sendpage_locked(sk, page, offset, size, flags); release_sock(sk); - return res; + + return ret; } EXPORT_SYMBOL(tcp_sendpage); @@ -1107,7 +1142,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, struct sockaddr *uaddr = msg->msg_name; int err, flags; - if (!(sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || + if (!(sock_net(sk)->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) || (uaddr && msg->msg_namelen >= sizeof(uaddr->sa_family) && uaddr->sa_family == AF_UNSPEC)) return -EOPNOTSUPP; @@ -1144,9 +1179,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, return err; } -int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) { struct tcp_sock *tp = tcp_sk(sk); + struct ubuf_info *uarg = NULL; struct sk_buff *skb; struct sockcm_cookie sockc; int flags, err, copied = 0; @@ -1155,9 +1191,25 @@ int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) bool sg; long timeo; - lock_sock(sk); - flags = msg->msg_flags; + + if (flags & MSG_ZEROCOPY && size) { + if (sk->sk_state != TCP_ESTABLISHED) { + err = -EINVAL; + goto out_err; + } + + skb = tcp_write_queue_tail(sk); + uarg = sock_zerocopy_realloc(sk, size, skb_zcopy(skb)); + if (!uarg) { + err = -ENOBUFS; + goto out_err; + } + + if (!(sk_check_csum_caps(sk) && sk->sk_route_caps & NETIF_F_SG)) + uarg->zerocopy = 0; + } + if (unlikely(flags & MSG_FASTOPEN || inet_sk(sk)->defer_connect)) { err = tcp_sendmsg_fastopen(sk, msg, &copied_syn, size); if (err == -EINPROGRESS && copied_syn > 0) @@ -1223,7 +1275,7 @@ restart: int max = size_goal; skb = tcp_write_queue_tail(sk); - if (tcp_send_head(sk)) { + if (skb) { if (skb->ip_summed == CHECKSUM_NONE) max = mss_now; copy = max - skb->len; @@ -1243,7 +1295,7 @@ new_segment: process_backlog = false; goto restart; } - first_skb = skb_queue_empty(&sk->sk_write_queue); + first_skb = tcp_rtx_and_write_queues_empty(sk); skb = sk_stream_alloc_skb(sk, select_size(sk, sg, first_skb), sk->sk_allocation, @@ -1281,7 +1333,7 @@ new_segment: err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy); if (err) goto do_fault; - } else { + } else if (!uarg || !uarg->zerocopy) { bool merge = true; int i = skb_shinfo(skb)->nr_frags; struct page_frag *pfrag = sk_page_frag(sk); @@ -1319,6 +1371,13 @@ new_segment: page_ref_inc(pfrag->page); } pfrag->offset += copy; + } else { + err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); + if (err == -EMSGSIZE || err == -EEXIST) + goto new_segment; + if (err < 0) + goto do_error; + copy = err; } if (!copied) @@ -1361,11 +1420,11 @@ wait_for_memory: out: if (copied) { - tcp_tx_timestamp(sk, sockc.tsflags, tcp_write_queue_tail(sk)); + tcp_tx_timestamp(sk, sockc.tsflags); tcp_push(sk, flags, mss_now, tp->nonagle, size_goal); } out_nopush: - release_sock(sk); + sock_zerocopy_put(uarg); return copied + copied_syn; do_fault: @@ -1382,6 +1441,7 @@ do_error: if (copied + copied_syn) goto out; out_err: + sock_zerocopy_put_abort(uarg); err = sk_stream_error(sk, flags, err); /* make sure we wake any epoll edge trigger waiter */ if (unlikely(skb_queue_len(&sk->sk_write_queue) == 0 && @@ -1389,9 +1449,20 @@ out_err: sk->sk_write_space(sk); tcp_chrono_stop(sk, TCP_CHRONO_SNDBUF_LIMITED); } - release_sock(sk); return err; } +EXPORT_SYMBOL_GPL(tcp_sendmsg_locked); + +int tcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) +{ + int ret; + + lock_sock(sk); + ret = tcp_sendmsg_locked(sk, msg, size); + release_sock(sk); + + return ret; +} EXPORT_SYMBOL(tcp_sendmsg); /* @@ -1450,6 +1521,13 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len) /* XXX -- need to support SO_PEEK_OFF */ + skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { + err = skb_copy_datagram_msg(skb, 0, msg, skb->len); + if (err) + return err; + copied += skb->len; + } + skb_queue_walk(&sk->sk_write_queue, skb) { err = skb_copy_datagram_msg(skb, 0, msg, skb->len); if (err) @@ -1525,20 +1603,6 @@ static void tcp_cleanup_rbuf(struct sock *sk, int copied) tcp_send_ack(sk); } -static void tcp_prequeue_process(struct sock *sk) -{ - struct sk_buff *skb; - struct tcp_sock *tp = tcp_sk(sk); - - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUED); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb); - - /* Clear memory counter. */ - tp->ucopy.memory = 0; -} - static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off) { struct sk_buff *skb; @@ -1652,6 +1716,61 @@ int tcp_peek_len(struct socket *sock) } EXPORT_SYMBOL(tcp_peek_len); +static void tcp_update_recv_tstamps(struct sk_buff *skb, + struct scm_timestamping *tss) +{ + if (skb->tstamp) + tss->ts[0] = ktime_to_timespec(skb->tstamp); + else + tss->ts[0] = (struct timespec) {0}; + + if (skb_hwtstamps(skb)->hwtstamp) + tss->ts[2] = ktime_to_timespec(skb_hwtstamps(skb)->hwtstamp); + else + tss->ts[2] = (struct timespec) {0}; +} + +/* Similar to __sock_recv_timestamp, but does not require an skb */ +void tcp_recv_timestamp(struct msghdr *msg, const struct sock *sk, + struct scm_timestamping *tss) +{ + struct timeval tv; + bool has_timestamping = false; + + if (tss->ts[0].tv_sec || tss->ts[0].tv_nsec) { + if (sock_flag(sk, SOCK_RCVTSTAMP)) { + if (sock_flag(sk, SOCK_RCVTSTAMPNS)) { + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPNS, + sizeof(tss->ts[0]), &tss->ts[0]); + } else { + tv.tv_sec = tss->ts[0].tv_sec; + tv.tv_usec = tss->ts[0].tv_nsec / 1000; + + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMP, + sizeof(tv), &tv); + } + } + + if (sk->sk_tsflags & SOF_TIMESTAMPING_SOFTWARE) + has_timestamping = true; + else + tss->ts[0] = (struct timespec) {0}; + } + + if (tss->ts[2].tv_sec || tss->ts[2].tv_nsec) { + if (sk->sk_tsflags & SOF_TIMESTAMPING_RAW_HARDWARE) + has_timestamping = true; + else + tss->ts[2] = (struct timespec) {0}; + } + + if (has_timestamping) { + tss->ts[1] = (struct timespec) {0}; + put_cmsg(msg, SOL_SOCKET, SCM_TIMESTAMPING, + sizeof(*tss), tss); + } +} + /* * This routine copies from a sock struct into the user buffer. * @@ -1671,9 +1790,10 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, int err; int target; /* Read at least this many bytes */ long timeo; - struct task_struct *user_recv = NULL; struct sk_buff *skb, *last; u32 urg_hole = 0; + struct scm_timestamping tss; + bool has_tss = false; if (unlikely(flags & MSG_ERRQUEUE)) return inet_recv_error(sk, msg, len, addr_len); @@ -1806,51 +1926,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, tcp_cleanup_rbuf(sk, copied); - if (!sysctl_tcp_low_latency && tp->ucopy.task == user_recv) { - /* Install new reader */ - if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) { - user_recv = current; - tp->ucopy.task = user_recv; - tp->ucopy.msg = msg; - } - - tp->ucopy.len = len; - - WARN_ON(tp->copied_seq != tp->rcv_nxt && - !(flags & (MSG_PEEK | MSG_TRUNC))); - - /* Ugly... If prequeue is not empty, we have to - * process it before releasing socket, otherwise - * order will be broken at second iteration. - * More elegant solution is required!!! - * - * Look: we have the following (pseudo)queues: - * - * 1. packets in flight - * 2. backlog - * 3. prequeue - * 4. receive_queue - * - * Each queue can be processed only if the next ones - * are empty. At this point we have empty receive_queue. - * But prequeue _can_ be not empty after 2nd iteration, - * when we jumped to start of loop because backlog - * processing added something to receive_queue. - * We cannot release_sock(), because backlog contains - * packets arrived _after_ prequeued ones. - * - * Shortly, algorithm is clear --- to process all - * the queues in order. We could make it more directly, - * requeueing packets from backlog to prequeue, if - * is not empty. It is more elegant, but eats cycles, - * unfortunately. - */ - if (!skb_queue_empty(&tp->ucopy.prequeue)) - goto do_prequeue; - - /* __ Set realtime policy in scheduler __ */ - } - if (copied >= target) { /* Do not sleep, just process backlog. */ release_sock(sk); @@ -1859,31 +1934,6 @@ int tcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int nonblock, sk_wait_data(sk, &timeo, last); } - if (user_recv) { - int chunk; - - /* __ Restore normal policy in scheduler __ */ - - chunk = len - tp->ucopy.len; - if (chunk != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMBACKLOG, chunk); - len -= chunk; - copied += chunk; - } - - if (tp->rcv_nxt == tp->copied_seq && - !skb_queue_empty(&tp->ucopy.prequeue)) { -do_prequeue: - tcp_prequeue_process(sk); - - chunk = len - tp->ucopy.len; - if (chunk != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); - len -= chunk; - copied += chunk; - } - } - } if ((flags & MSG_PEEK) && (peek_seq - copied - urg_hole != tp->copied_seq)) { net_dbg_ratelimited("TCP(%s:%d): Application bug, race in MSG_PEEK\n", @@ -1941,6 +1991,10 @@ skip_copy: if (used + offset < skb->len) continue; + if (TCP_SKB_CB(skb)->has_rxtstamp) { + tcp_update_recv_tstamps(skb, &tss); + has_tss = true; + } if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) @@ -1955,29 +2009,13 @@ skip_copy: break; } while (len > 0); - if (user_recv) { - if (!skb_queue_empty(&tp->ucopy.prequeue)) { - int chunk; - - tp->ucopy.len = copied > 0 ? len : 0; - - tcp_prequeue_process(sk); - - if (copied > 0 && (chunk = len - tp->ucopy.len) != 0) { - NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPDIRECTCOPYFROMPREQUEUE, chunk); - len -= chunk; - copied += chunk; - } - } - - tp->ucopy.task = NULL; - tp->ucopy.len = 0; - } - /* According to UNIX98, msg_name/msg_namelen are ignored * on connected socket. I was just happy when found this 8) --ANK */ + if (has_tss) + tcp_recv_timestamp(msg, sk, &tss); + /* Clean up data we have read: This will do ACK frames. */ tcp_cleanup_rbuf(sk, copied); @@ -2002,6 +2040,8 @@ void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; + trace_tcp_set_state(sk, oldstate, state); + switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED) @@ -2289,6 +2329,37 @@ static inline bool tcp_need_reset(int state) TCPF_FIN_WAIT2 | TCPF_SYN_RECV); } +static void tcp_rtx_queue_purge(struct sock *sk) +{ + struct rb_node *p = rb_first(&sk->tcp_rtx_queue); + + while (p) { + struct sk_buff *skb = rb_to_skb(p); + + p = rb_next(p); + /* Since we are deleting whole queue, no need to + * list_del(&skb->tcp_tsorted_anchor) + */ + tcp_rtx_queue_unlink(skb, sk); + sk_wmem_free_skb(sk, skb); + } +} + +void tcp_write_queue_purge(struct sock *sk) +{ + struct sk_buff *skb; + + tcp_chrono_stop(sk, TCP_CHRONO_BUSY); + while ((skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) { + tcp_skb_tsorted_anchor_cleanup(skb); + sk_wmem_free_skb(sk, skb); + } + tcp_rtx_queue_purge(sk); + INIT_LIST_HEAD(&tcp_sk(sk)->tsorted_sent_queue); + sk_mem_reclaim(sk); + tcp_clear_all_retrans_hints(tcp_sk(sk)); +} + int tcp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -2347,7 +2418,6 @@ int tcp_disconnect(struct sock *sk, int flags) * issue in __tcp_select_window() */ icsk->icsk_ack.rcv_mss = TCP_MIN_MSS; - tcp_init_send_head(sk); memset(&tp->rx_opt, 0, sizeof(tp->rx_opt)); __sk_dst_reset(sk); dst_release(sk->sk_rx_dst); @@ -2439,8 +2509,6 @@ static int tcp_repair_options_est(struct sock *sk, return -EINVAL; tp->rx_opt.sack_ok |= TCP_SACK_SEEN; - if (sysctl_tcp_fack) - tcp_enable_fack(tp); break; case TCPOPT_TIMESTAMP: if (opt.opt_val != 0) @@ -2481,7 +2549,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name, true); + err = tcp_set_congestion_control(sk, name, true, true); release_sock(sk); return err; } @@ -2503,6 +2571,17 @@ static int do_tcp_setsockopt(struct sock *sk, int level, release_sock(sk); return err; } + case TCP_FASTOPEN_KEY: { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + + if (optlen != sizeof(key)) + return -EINVAL; + + if (copy_from_user(key, optval, optlen)) + return -EFAULT; + + return tcp_fastopen_reset_cipher(net, sk, key, sizeof(key)); + } default: /* fallthru */ break; @@ -2734,7 +2813,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN: if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) { - tcp_fastopen_init_key_once(true); + tcp_fastopen_init_key_once(net); fastopen_queue_tune(sk, val); } else { @@ -2744,7 +2823,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, case TCP_FASTOPEN_CONNECT: if (val > 1 || val < 0) { err = -EINVAL; - } else if (sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { + } else if (net->ipv4.sysctl_tcp_fastopen & TFO_CLIENT_ENABLE) { if (sk->sk_state == TCP_CLOSE) tp->fastopen_connect = val; else @@ -2753,6 +2832,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = -EOPNOTSUPP; } break; + case TCP_FASTOPEN_NO_COOKIE: + if (val > 1 || val < 0) + err = -EINVAL; + else if (!((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) + err = -EINVAL; + else + tp->fastopen_no_cookie = val; + break; case TCP_TIMESTAMP: if (!tp->repair) err = -EPERM; @@ -2823,7 +2910,7 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) { const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); - u32 now, intv; + u32 now; u64 rate64; bool slow; u32 rate; @@ -2890,7 +2977,6 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; - info->tcpi_fackets = tp->fackets_out; now = tcp_jiffies32; info->tcpi_last_data_sent = jiffies_to_msecs(now - tp->lsndtime); @@ -2922,13 +3008,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_data_segs_out = tp->data_segs_out; info->tcpi_delivery_rate_app_limited = tp->rate_app_limited ? 1 : 0; - rate = READ_ONCE(tp->rate_delivered); - intv = READ_ONCE(tp->rate_interval_us); - if (rate && intv) { - rate64 = (u64)rate * tp->mss_cache * USEC_PER_SEC; - do_div(rate64, intv); + rate64 = tcp_compute_delivery_rate(tp); + if (rate64) info->tcpi_delivery_rate = rate64; - } unlock_sock_fast(sk, slow); } EXPORT_SYMBOL_GPL(tcp_get_info); @@ -2938,8 +3020,12 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *stats; struct tcp_info info; + u64 rate64; + u32 rate; - stats = alloc_skb(5 * nla_total_size_64bit(sizeof(u64)), GFP_ATOMIC); + stats = alloc_skb(7 * nla_total_size_64bit(sizeof(u64)) + + 3 * nla_total_size(sizeof(u32)) + + 2 * nla_total_size(sizeof(u8)), GFP_ATOMIC); if (!stats) return NULL; @@ -2954,6 +3040,20 @@ struct sk_buff *tcp_get_timestamping_opt_stats(const struct sock *sk) tp->data_segs_out, TCP_NLA_PAD); nla_put_u64_64bit(stats, TCP_NLA_TOTAL_RETRANS, tp->total_retrans, TCP_NLA_PAD); + + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + nla_put_u64_64bit(stats, TCP_NLA_PACING_RATE, rate64, TCP_NLA_PAD); + + rate64 = tcp_compute_delivery_rate(tp); + nla_put_u64_64bit(stats, TCP_NLA_DELIVERY_RATE, rate64, TCP_NLA_PAD); + + nla_put_u32(stats, TCP_NLA_SND_CWND, tp->snd_cwnd); + nla_put_u32(stats, TCP_NLA_REORDERING, tp->reordering); + nla_put_u32(stats, TCP_NLA_MIN_RTT, tcp_min_rtt(tp)); + + nla_put_u8(stats, TCP_NLA_RECUR_RETRANS, inet_csk(sk)->icsk_retransmits); + nla_put_u8(stats, TCP_NLA_DELIVERY_RATE_APP_LMT, !!tp->rate_app_limited); return stats; } @@ -3075,6 +3175,28 @@ static int do_tcp_getsockopt(struct sock *sk, int level, return -EFAULT; return 0; + case TCP_FASTOPEN_KEY: { + __u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct tcp_fastopen_context *ctx; + + if (get_user(len, optlen)) + return -EFAULT; + + rcu_read_lock(); + ctx = rcu_dereference(icsk->icsk_accept_queue.fastopenq.ctx); + if (ctx) + memcpy(key, ctx->key, sizeof(key)); + else + len = 0; + rcu_read_unlock(); + + len = min_t(unsigned int, len, sizeof(key)); + if (put_user(len, optlen)) + return -EFAULT; + if (copy_to_user(optval, key, len)) + return -EFAULT; + return 0; + } case TCP_THIN_LINEAR_TIMEOUTS: val = tp->thin_lto; break; @@ -3137,6 +3259,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, val = tp->fastopen_connect; break; + case TCP_FASTOPEN_NO_COOKIE: + val = tp->fastopen_no_cookie; + break; + case TCP_TIMESTAMP: val = tcp_time_stamp_raw() + tp->tsoffset; break; @@ -3502,13 +3628,13 @@ void __init tcp_init(void) max_wshare = min(4UL*1024*1024, limit); max_rshare = min(6UL*1024*1024, limit); - sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_wmem[1] = 16*1024; - sysctl_tcp_wmem[2] = max(64*1024, max_wshare); + init_net.ipv4.sysctl_tcp_wmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_wmem[1] = 16*1024; + init_net.ipv4.sysctl_tcp_wmem[2] = max(64*1024, max_wshare); - sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; - sysctl_tcp_rmem[1] = 87380; - sysctl_tcp_rmem[2] = max(87380, max_rshare); + init_net.ipv4.sysctl_tcp_rmem[0] = SK_MEM_QUANTUM; + init_net.ipv4.sysctl_tcp_rmem[1] = 87380; + init_net.ipv4.sysctl_tcp_rmem[2] = max(87380, max_rshare); pr_info("Hash tables configured (established %u bind %u)\n", tcp_hashinfo.ehash_mask + 1, tcp_hashinfo.bhash_size); diff --git a/net/ipv4/tcp_bic.c b/net/ipv4/tcp_bic.c index 609965f0e298..fc3614377413 100644 --- a/net/ipv4/tcp_bic.c +++ b/net/ipv4/tcp_bic.c @@ -49,7 +49,6 @@ MODULE_PARM_DESC(smooth_part, "log(B/(B*Smin))/log(B/(B-1))+B, # of RTT from Wma struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ - u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 epoch_start; /* beginning of an epoch */ @@ -72,7 +71,6 @@ static void bictcp_init(struct sock *sk) struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); - ca->loss_cwnd = 0; if (initial_ssthresh) tcp_sk(sk)->snd_ssthresh = initial_ssthresh; @@ -172,22 +170,12 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) else ca->last_max_cwnd = tp->snd_cwnd; - ca->loss_cwnd = tp->snd_cwnd; - if (tp->snd_cwnd <= low_window) return max(tp->snd_cwnd >> 1U, 2U); else return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - const struct bictcp *ca = inet_csk_ca(sk); - - return max(tp->snd_cwnd, ca->loss_cwnd); -} - static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) @@ -214,7 +202,7 @@ static struct tcp_congestion_ops bictcp __read_mostly = { .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, - .undo_cwnd = bictcp_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, .name = "bic", diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 50a0f3e51d5b..06fbe102a425 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -85,7 +85,6 @@ struct cdg { u8 state; u8 delack; u32 rtt_seq; - u32 undo_cwnd; u32 shadow_wnd; u16 backoff_cnt; u16 sample_cnt; @@ -330,8 +329,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); - ca->undo_cwnd = tp->snd_cwnd; - if (ca->state == CDG_BACKOFF) return max(2U, (tp->snd_cwnd * min(1024U, backoff_beta)) >> 10); @@ -344,13 +341,6 @@ static u32 tcp_cdg_ssthresh(struct sock *sk) return max(2U, tp->snd_cwnd >> 1); } -static u32 tcp_cdg_undo_cwnd(struct sock *sk) -{ - struct cdg *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->undo_cwnd); -} - static void tcp_cdg_cwnd_event(struct sock *sk, const enum tcp_ca_event ev) { struct cdg *ca = inet_csk_ca(sk); @@ -399,11 +389,11 @@ static void tcp_cdg_release(struct sock *sk) kfree(ca->gradients); } -struct tcp_congestion_ops tcp_cdg __read_mostly = { +static struct tcp_congestion_ops tcp_cdg __read_mostly = { .cong_avoid = tcp_cdg_cong_avoid, .cwnd_event = tcp_cdg_cwnd_event, .pkts_acked = tcp_cdg_acked, - .undo_cwnd = tcp_cdg_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .ssthresh = tcp_cdg_ssthresh, .release = tcp_cdg_release, .init = tcp_cdg_init, diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index fde983f6376b..bc6c02f16243 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -33,9 +33,11 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name) } /* Must be called with rcu lock held */ -static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name) +static struct tcp_congestion_ops *tcp_ca_find_autoload(struct net *net, + const char *name) { - const struct tcp_congestion_ops *ca = tcp_ca_find(name); + struct tcp_congestion_ops *ca = tcp_ca_find(name); + #ifdef CONFIG_MODULES if (!ca && capable(CAP_NET_ADMIN)) { rcu_read_unlock(); @@ -115,7 +117,7 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca) } EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control); -u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) +u32 tcp_ca_get_key_by_name(struct net *net, const char *name, bool *ecn_ca) { const struct tcp_congestion_ops *ca; u32 key = TCP_CA_UNSPEC; @@ -123,7 +125,7 @@ u32 tcp_ca_get_key_by_name(const char *name, bool *ecn_ca) might_sleep(); rcu_read_lock(); - ca = __tcp_ca_find_autoload(name); + ca = tcp_ca_find_autoload(net, name); if (ca) { key = ca->key; *ecn_ca = ca->flags & TCP_CONG_NEEDS_ECN; @@ -153,23 +155,18 @@ EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key); /* Assign choice of congestion control. */ void tcp_assign_congestion_control(struct sock *sk) { + struct net *net = sock_net(sk); struct inet_connection_sock *icsk = inet_csk(sk); - struct tcp_congestion_ops *ca; + const struct tcp_congestion_ops *ca; rcu_read_lock(); - list_for_each_entry_rcu(ca, &tcp_cong_list, list) { - if (likely(try_module_get(ca->owner))) { - icsk->icsk_ca_ops = ca; - goto out; - } - /* Fallback to next available. The last really - * guaranteed fallback is Reno from this list. - */ - } -out: + ca = rcu_dereference(net->ipv4.tcp_congestion_control); + if (unlikely(!try_module_get(ca->owner))) + ca = &tcp_reno; + icsk->icsk_ca_ops = ca; rcu_read_unlock(); - memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); + memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv)); if (ca->flags & TCP_CONG_NEEDS_ECN) INET_ECN_xmit(sk); else @@ -189,8 +186,8 @@ void tcp_init_congestion_control(struct sock *sk) INET_ECN_dontxmit(sk); } -void tcp_reinit_congestion_control(struct sock *sk, - const struct tcp_congestion_ops *ca) +static void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -214,29 +211,27 @@ void tcp_cleanup_congestion_control(struct sock *sk) } /* Used by sysctl to change default congestion control */ -int tcp_set_default_congestion_control(const char *name) +int tcp_set_default_congestion_control(struct net *net, const char *name) { struct tcp_congestion_ops *ca; - int ret = -ENOENT; - - spin_lock(&tcp_cong_list_lock); - ca = tcp_ca_find(name); -#ifdef CONFIG_MODULES - if (!ca && capable(CAP_NET_ADMIN)) { - spin_unlock(&tcp_cong_list_lock); + const struct tcp_congestion_ops *prev; + int ret; - request_module("tcp_%s", name); - spin_lock(&tcp_cong_list_lock); - ca = tcp_ca_find(name); - } -#endif + rcu_read_lock(); + ca = tcp_ca_find_autoload(net, name); + if (!ca) { + ret = -ENOENT; + } else if (!try_module_get(ca->owner)) { + ret = -EBUSY; + } else { + prev = xchg(&net->ipv4.tcp_congestion_control, ca); + if (prev) + module_put(prev->owner); - if (ca) { - ca->flags |= TCP_CONG_NON_RESTRICTED; /* default is always allowed */ - list_move(&ca->list, &tcp_cong_list); + ca->flags |= TCP_CONG_NON_RESTRICTED; ret = 0; } - spin_unlock(&tcp_cong_list_lock); + rcu_read_unlock(); return ret; } @@ -244,7 +239,8 @@ int tcp_set_default_congestion_control(const char *name) /* Set default value from kernel configuration at bootup */ static int __init tcp_congestion_default(void) { - return tcp_set_default_congestion_control(CONFIG_DEFAULT_TCP_CONG); + return tcp_set_default_congestion_control(&init_net, + CONFIG_DEFAULT_TCP_CONG); } late_initcall(tcp_congestion_default); @@ -264,14 +260,12 @@ void tcp_get_available_congestion_control(char *buf, size_t maxlen) } /* Get current default congestion control */ -void tcp_get_default_congestion_control(char *name) +void tcp_get_default_congestion_control(struct net *net, char *name) { - struct tcp_congestion_ops *ca; - /* We will always have reno... */ - BUG_ON(list_empty(&tcp_cong_list)); + const struct tcp_congestion_ops *ca; rcu_read_lock(); - ca = list_entry(tcp_cong_list.next, struct tcp_congestion_ops, list); + ca = rcu_dereference(net->ipv4.tcp_congestion_control); strncpy(name, ca->name, TCP_CA_NAME_MAX); rcu_read_unlock(); } @@ -338,7 +332,7 @@ out: * tcp_reinit_congestion_control (if the current congestion control was * already initialized. */ -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -351,18 +345,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) if (!load) ca = tcp_ca_find(name); else - ca = __tcp_ca_find_autoload(name); + ca = tcp_ca_find_autoload(sock_net(sk), name); + /* No change asking for existing value */ if (ca == icsk->icsk_ca_ops) { icsk->icsk_ca_setsockopt = 1; goto out; } + if (!ca) { err = -ENOENT; } else if (!load) { - icsk->icsk_ca_ops = ca; - if (!try_module_get(ca->owner)) + const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops; + + if (try_module_get(ca->owner)) { + if (reinit) { + tcp_reinit_congestion_control(sk, ca); + } else { + icsk->icsk_ca_ops = ca; + module_put(old_ca->owner); + } + } else { err = -EBUSY; + } } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { err = -EPERM; @@ -456,7 +461,7 @@ u32 tcp_reno_undo_cwnd(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - return max(tp->snd_cwnd, tp->snd_ssthresh << 1); + return max(tp->snd_cwnd, tp->prior_cwnd); } EXPORT_SYMBOL_GPL(tcp_reno_undo_cwnd); diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c index 57ae5b5ae643..78bfadfcf342 100644 --- a/net/ipv4/tcp_cubic.c +++ b/net/ipv4/tcp_cubic.c @@ -83,7 +83,6 @@ MODULE_PARM_DESC(hystart_ack_delta, "spacing between ack's indicating train (mse struct bictcp { u32 cnt; /* increase cwnd by 1 after ACKs */ u32 last_max_cwnd; /* last maximum snd_cwnd */ - u32 loss_cwnd; /* congestion window at last loss */ u32 last_cwnd; /* the last snd_cwnd */ u32 last_time; /* time when updated last_cwnd */ u32 bic_origin_point;/* origin point of bic function */ @@ -142,7 +141,6 @@ static void bictcp_init(struct sock *sk) struct bictcp *ca = inet_csk_ca(sk); bictcp_reset(ca); - ca->loss_cwnd = 0; if (hystart) bictcp_hystart_reset(sk); @@ -366,18 +364,9 @@ static u32 bictcp_recalc_ssthresh(struct sock *sk) else ca->last_max_cwnd = tp->snd_cwnd; - ca->loss_cwnd = tp->snd_cwnd; - return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U); } -static u32 bictcp_undo_cwnd(struct sock *sk) -{ - struct bictcp *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static void bictcp_state(struct sock *sk, u8 new_state) { if (new_state == TCP_CA_Loss) { @@ -470,7 +459,7 @@ static struct tcp_congestion_ops cubictcp __read_mostly = { .ssthresh = bictcp_recalc_ssthresh, .cong_avoid = bictcp_cong_avoid, .set_state = bictcp_state, - .undo_cwnd = bictcp_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .cwnd_event = bictcp_cwnd_event, .pkts_acked = bictcp_acked, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index a748c74aa8b7..abbf0edcf6c2 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -16,6 +16,7 @@ #include <linux/tcp.h> +#include <net/netlink.h> #include <net/tcp.h> static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, @@ -36,6 +37,100 @@ static void tcp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, tcp_get_info(sk, info); } +#ifdef CONFIG_TCP_MD5SIG +static void tcp_diag_md5sig_fill(struct tcp_diag_md5sig *info, + const struct tcp_md5sig_key *key) +{ + info->tcpm_family = key->family; + info->tcpm_prefixlen = key->prefixlen; + info->tcpm_keylen = key->keylen; + memcpy(info->tcpm_key, key->key, key->keylen); + + if (key->family == AF_INET) + info->tcpm_addr[0] = key->addr.a4.s_addr; + #if IS_ENABLED(CONFIG_IPV6) + else if (key->family == AF_INET6) + memcpy(&info->tcpm_addr, &key->addr.a6, + sizeof(info->tcpm_addr)); + #endif +} + +static int tcp_diag_put_md5sig(struct sk_buff *skb, + const struct tcp_md5sig_info *md5sig) +{ + const struct tcp_md5sig_key *key; + struct tcp_diag_md5sig *info; + struct nlattr *attr; + int md5sig_count = 0; + + hlist_for_each_entry_rcu(key, &md5sig->head, node) + md5sig_count++; + if (md5sig_count == 0) + return 0; + + attr = nla_reserve(skb, INET_DIAG_MD5SIG, + md5sig_count * sizeof(struct tcp_diag_md5sig)); + if (!attr) + return -EMSGSIZE; + + info = nla_data(attr); + memset(info, 0, md5sig_count * sizeof(struct tcp_diag_md5sig)); + hlist_for_each_entry_rcu(key, &md5sig->head, node) { + tcp_diag_md5sig_fill(info++, key); + if (--md5sig_count == 0) + break; + } + + return 0; +} +#endif + +static int tcp_diag_get_aux(struct sock *sk, bool net_admin, + struct sk_buff *skb) +{ +#ifdef CONFIG_TCP_MD5SIG + if (net_admin) { + struct tcp_md5sig_info *md5sig; + int err = 0; + + rcu_read_lock(); + md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); + if (md5sig) + err = tcp_diag_put_md5sig(skb, md5sig); + rcu_read_unlock(); + if (err < 0) + return err; + } +#endif + + return 0; +} + +static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) +{ + size_t size = 0; + +#ifdef CONFIG_TCP_MD5SIG + if (net_admin && sk_fullsock(sk)) { + const struct tcp_md5sig_info *md5sig; + const struct tcp_md5sig_key *key; + size_t md5sig_count = 0; + + rcu_read_lock(); + md5sig = rcu_dereference(tcp_sk(sk)->md5sig_info); + if (md5sig) { + hlist_for_each_entry_rcu(key, &md5sig->head, node) + md5sig_count++; + } + rcu_read_unlock(); + size += nla_total_size(md5sig_count * + sizeof(struct tcp_diag_md5sig)); + } +#endif + + return size; +} + static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { @@ -68,13 +163,15 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, #endif static const struct inet_diag_handler tcp_diag_handler = { - .dump = tcp_diag_dump, - .dump_one = tcp_diag_dump_one, - .idiag_get_info = tcp_diag_get_info, - .idiag_type = IPPROTO_TCP, - .idiag_info_size = sizeof(struct tcp_info), + .dump = tcp_diag_dump, + .dump_one = tcp_diag_dump_one, + .idiag_get_info = tcp_diag_get_info, + .idiag_get_aux = tcp_diag_get_aux, + .idiag_get_aux_size = tcp_diag_get_aux_size, + .idiag_type = IPPROTO_TCP, + .idiag_info_size = sizeof(struct tcp_info), #ifdef CONFIG_INET_DIAG_DESTROY - .destroy = tcp_diag_destroy, + .destroy = tcp_diag_destroy, #endif }; diff --git a/net/ipv4/tcp_fastopen.c b/net/ipv4/tcp_fastopen.c index ce9c7fef200f..78c192ee03a4 100644 --- a/net/ipv4/tcp_fastopen.c +++ b/net/ipv4/tcp_fastopen.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/crypto.h> #include <linux/err.h> #include <linux/init.h> @@ -9,15 +10,18 @@ #include <net/inetpeer.h> #include <net/tcp.h> -int sysctl_tcp_fastopen __read_mostly = TFO_CLIENT_ENABLE; - -struct tcp_fastopen_context __rcu *tcp_fastopen_ctx; - -static DEFINE_SPINLOCK(tcp_fastopen_ctx_lock); - -void tcp_fastopen_init_key_once(bool publish) +void tcp_fastopen_init_key_once(struct net *net) { - static u8 key[TCP_FASTOPEN_KEY_LENGTH]; + u8 key[TCP_FASTOPEN_KEY_LENGTH]; + struct tcp_fastopen_context *ctxt; + + rcu_read_lock(); + ctxt = rcu_dereference(net->ipv4.tcp_fastopen_ctx); + if (ctxt) { + rcu_read_unlock(); + return; + } + rcu_read_unlock(); /* tcp_fastopen_reset_cipher publishes the new context * atomically, so we allow this race happening here. @@ -25,8 +29,8 @@ void tcp_fastopen_init_key_once(bool publish) * All call sites of tcp_fastopen_cookie_gen also check * for a valid cookie, so this is an acceptable risk. */ - if (net_get_random_once(key, sizeof(key)) && publish) - tcp_fastopen_reset_cipher(key, sizeof(key)); + get_random_bytes(key, sizeof(key)); + tcp_fastopen_reset_cipher(net, NULL, key, sizeof(key)); } static void tcp_fastopen_ctx_free(struct rcu_head *head) @@ -37,10 +41,37 @@ static void tcp_fastopen_ctx_free(struct rcu_head *head) kfree(ctx); } -int tcp_fastopen_reset_cipher(void *key, unsigned int len) +void tcp_fastopen_destroy_cipher(struct sock *sk) +{ + struct tcp_fastopen_context *ctx; + + ctx = rcu_dereference_protected( + inet_csk(sk)->icsk_accept_queue.fastopenq.ctx, 1); + if (ctx) + call_rcu(&ctx->rcu, tcp_fastopen_ctx_free); +} + +void tcp_fastopen_ctx_destroy(struct net *net) +{ + struct tcp_fastopen_context *ctxt; + + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); + + ctxt = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, NULL); + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); + + if (ctxt) + call_rcu(&ctxt->rcu, tcp_fastopen_ctx_free); +} + +int tcp_fastopen_reset_cipher(struct net *net, struct sock *sk, + void *key, unsigned int len) { - int err; struct tcp_fastopen_context *ctx, *octx; + struct fastopen_queue *q; + int err; ctx = kmalloc(sizeof(*ctx), GFP_KERNEL); if (!ctx) @@ -61,26 +92,37 @@ error: kfree(ctx); } memcpy(ctx->key, key, len); - spin_lock(&tcp_fastopen_ctx_lock); - octx = rcu_dereference_protected(tcp_fastopen_ctx, - lockdep_is_held(&tcp_fastopen_ctx_lock)); - rcu_assign_pointer(tcp_fastopen_ctx, ctx); - spin_unlock(&tcp_fastopen_ctx_lock); + spin_lock(&net->ipv4.tcp_fastopen_ctx_lock); + if (sk) { + q = &inet_csk(sk)->icsk_accept_queue.fastopenq; + octx = rcu_dereference_protected(q->ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(q->ctx, ctx); + } else { + octx = rcu_dereference_protected(net->ipv4.tcp_fastopen_ctx, + lockdep_is_held(&net->ipv4.tcp_fastopen_ctx_lock)); + rcu_assign_pointer(net->ipv4.tcp_fastopen_ctx, ctx); + } + spin_unlock(&net->ipv4.tcp_fastopen_ctx_lock); if (octx) call_rcu(&octx->rcu, tcp_fastopen_ctx_free); return err; } -static bool __tcp_fastopen_cookie_gen(const void *path, +static bool __tcp_fastopen_cookie_gen(struct sock *sk, const void *path, struct tcp_fastopen_cookie *foc) { struct tcp_fastopen_context *ctx; bool ok = false; rcu_read_lock(); - ctx = rcu_dereference(tcp_fastopen_ctx); + + ctx = rcu_dereference(inet_csk(sk)->icsk_accept_queue.fastopenq.ctx); + if (!ctx) + ctx = rcu_dereference(sock_net(sk)->ipv4.tcp_fastopen_ctx); + if (ctx) { crypto_cipher_encrypt_one(ctx->tfm, foc->val, path); foc->len = TCP_FASTOPEN_COOKIE_SIZE; @@ -96,7 +138,8 @@ static bool __tcp_fastopen_cookie_gen(const void *path, * * XXX (TFO) - refactor when TCP_FASTOPEN_COOKIE_SIZE != AES_BLOCK_SIZE. */ -static bool tcp_fastopen_cookie_gen(struct request_sock *req, +static bool tcp_fastopen_cookie_gen(struct sock *sk, + struct request_sock *req, struct sk_buff *syn, struct tcp_fastopen_cookie *foc) { @@ -104,7 +147,7 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, const struct iphdr *iph = ip_hdr(syn); __be32 path[4] = { iph->saddr, iph->daddr, 0, 0 }; - return __tcp_fastopen_cookie_gen(path, foc); + return __tcp_fastopen_cookie_gen(sk, path, foc); } #if IS_ENABLED(CONFIG_IPV6) @@ -112,13 +155,13 @@ static bool tcp_fastopen_cookie_gen(struct request_sock *req, const struct ipv6hdr *ip6h = ipv6_hdr(syn); struct tcp_fastopen_cookie tmp; - if (__tcp_fastopen_cookie_gen(&ip6h->saddr, &tmp)) { + if (__tcp_fastopen_cookie_gen(sk, &ip6h->saddr, &tmp)) { struct in6_addr *buf = &tmp.addr; int i; for (i = 0; i < 4; i++) buf->s6_addr32[i] ^= ip6h->daddr.s6_addr32[i]; - return __tcp_fastopen_cookie_gen(buf, foc); + return __tcp_fastopen_cookie_gen(sk, buf, foc); } } #endif @@ -171,7 +214,6 @@ void tcp_fastopen_add_skb(struct sock *sk, struct sk_buff *skb) static struct sock *tcp_fastopen_create_child(struct sock *sk, struct sk_buff *skb, - struct dst_entry *dst, struct request_sock *req) { struct tcp_sock *tp; @@ -217,12 +259,7 @@ static struct sock *tcp_fastopen_create_child(struct sock *sk, refcount_set(&req->rsk_refcnt, 2); /* Now finish processing the fastopen child socket. */ - inet_csk(child)->icsk_af_ops->rebuild_header(child); - tcp_init_congestion_control(child); - tcp_mtup_init(child); - tcp_init_metrics(child); - tcp_call_bpf(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); - tcp_init_buffer_space(child); + tcp_init_transfer(child, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; @@ -272,6 +309,15 @@ static bool tcp_fastopen_queue_check(struct sock *sk) return true; } +static bool tcp_fastopen_no_cookie(const struct sock *sk, + const struct dst_entry *dst, + int flag) +{ + return (sock_net(sk)->ipv4.sysctl_tcp_fastopen & flag) || + tcp_sk(sk)->fastopen_no_cookie || + (dst && dst_metric(dst, RTAX_FASTOPEN_NO_COOKIE)); +} + /* Returns true if we should perform Fast Open on the SYN. The cookie (foc) * may be updated and return the client in the SYN-ACK later. E.g., Fast Open * cookie request (foc->len == 0). @@ -279,27 +325,29 @@ static bool tcp_fastopen_queue_check(struct sock *sk) struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, struct request_sock *req, struct tcp_fastopen_cookie *foc, - struct dst_entry *dst) + const struct dst_entry *dst) { - struct tcp_fastopen_cookie valid_foc = { .len = -1 }; bool syn_data = TCP_SKB_CB(skb)->end_seq != TCP_SKB_CB(skb)->seq + 1; + int tcp_fastopen = sock_net(sk)->ipv4.sysctl_tcp_fastopen; + struct tcp_fastopen_cookie valid_foc = { .len = -1 }; struct sock *child; if (foc->len == 0) /* Client requests a cookie */ NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENCOOKIEREQD); - if (!((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) && + if (!((tcp_fastopen & TFO_SERVER_ENABLE) && (syn_data || foc->len >= 0) && tcp_fastopen_queue_check(sk))) { foc->len = -1; return NULL; } - if (syn_data && (sysctl_tcp_fastopen & TFO_SERVER_COOKIE_NOT_REQD)) + if (syn_data && + tcp_fastopen_no_cookie(sk, dst, TFO_SERVER_COOKIE_NOT_REQD)) goto fastopen; if (foc->len >= 0 && /* Client presents or requests a cookie */ - tcp_fastopen_cookie_gen(req, skb, &valid_foc) && + tcp_fastopen_cookie_gen(sk, req, skb, &valid_foc) && foc->len == TCP_FASTOPEN_COOKIE_SIZE && foc->len == valid_foc.len && !memcmp(foc->val, valid_foc.val, foc->len)) { @@ -312,7 +360,7 @@ struct sock *tcp_try_fastopen(struct sock *sk, struct sk_buff *skb, * data in SYN_RECV state. */ fastopen: - child = tcp_fastopen_create_child(sk, skb, dst, req); + child = tcp_fastopen_create_child(sk, skb, req); if (child) { foc->len = -1; NET_INC_STATS(sock_net(sk), @@ -332,6 +380,7 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, struct tcp_fastopen_cookie *cookie) { unsigned long last_syn_loss = 0; + const struct dst_entry *dst; int syn_loss = 0; tcp_fastopen_cache_get(sk, mss, cookie, &syn_loss, &last_syn_loss); @@ -349,7 +398,9 @@ bool tcp_fastopen_cookie_check(struct sock *sk, u16 *mss, return false; } - if (sysctl_tcp_fastopen & TFO_CLIENT_NO_COOKIE) { + dst = __sk_dst_get(sk); + + if (tcp_fastopen_no_cookie(sk, dst, TFO_CLIENT_NO_COOKIE)) { cookie->len = -1; return true; } @@ -403,25 +454,16 @@ EXPORT_SYMBOL(tcp_fastopen_defer_connect); * TFO connection with data exchanges. */ -/* Default to 1hr */ -unsigned int sysctl_tcp_fastopen_blackhole_timeout __read_mostly = 60 * 60; -static atomic_t tfo_active_disable_times __read_mostly = ATOMIC_INIT(0); -static unsigned long tfo_active_disable_stamp __read_mostly; - /* Disable active TFO and record current jiffies and * tfo_active_disable_times */ void tcp_fastopen_active_disable(struct sock *sk) { - atomic_inc(&tfo_active_disable_times); - tfo_active_disable_stamp = jiffies; - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPFASTOPENBLACKHOLE); -} + struct net *net = sock_net(sk); -/* Reset tfo_active_disable_times to 0 */ -void tcp_fastopen_active_timeout_reset(void) -{ - atomic_set(&tfo_active_disable_times, 0); + atomic_inc(&net->ipv4.tfo_active_disable_times); + net->ipv4.tfo_active_disable_stamp = jiffies; + NET_INC_STATS(net, LINUX_MIB_TCPFASTOPENBLACKHOLE); } /* Calculate timeout for tfo active disable @@ -430,17 +472,18 @@ void tcp_fastopen_active_timeout_reset(void) */ bool tcp_fastopen_active_should_disable(struct sock *sk) { - int tfo_da_times = atomic_read(&tfo_active_disable_times); - int multiplier; + unsigned int tfo_bh_timeout = sock_net(sk)->ipv4.sysctl_tcp_fastopen_blackhole_timeout; + int tfo_da_times = atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times); unsigned long timeout; + int multiplier; if (!tfo_da_times) return false; /* Limit timout to max: 2^6 * initial timeout */ multiplier = 1 << min(tfo_da_times - 1, 6); - timeout = multiplier * sysctl_tcp_fastopen_blackhole_timeout * HZ; - if (time_before(jiffies, tfo_active_disable_stamp + timeout)) + timeout = multiplier * tfo_bh_timeout * HZ; + if (time_before(jiffies, sock_net(sk)->ipv4.tfo_active_disable_stamp + timeout)) return true; /* Mark check bit so we can check for successful active TFO @@ -459,27 +502,25 @@ bool tcp_fastopen_active_should_disable(struct sock *sk) void tcp_fastopen_active_disable_ofo_check(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node *p; - struct sk_buff *skb; struct dst_entry *dst; + struct sk_buff *skb; if (!tp->syn_fastopen) return; if (!tp->data_segs_in) { - p = rb_first(&tp->out_of_order_queue); - if (p && !rb_next(p)) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); + if (skb && !skb_rb_next(skb)) { if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) { tcp_fastopen_active_disable(sk); return; } } } else if (tp->syn_fastopen_ch && - atomic_read(&tfo_active_disable_times)) { + atomic_read(&sock_net(sk)->ipv4.tfo_active_disable_times)) { dst = sk_dst_get(sk); if (!(dst && dst->dev && (dst->dev->flags & IFF_LOOPBACK))) - tcp_fastopen_active_timeout_reset(); + atomic_set(&sock_net(sk)->ipv4.tfo_active_disable_times, 0); dst_release(dst); } } diff --git a/net/ipv4/tcp_highspeed.c b/net/ipv4/tcp_highspeed.c index 6d9879e93648..d1c33c91eadc 100644 --- a/net/ipv4/tcp_highspeed.c +++ b/net/ipv4/tcp_highspeed.c @@ -94,7 +94,6 @@ static const struct hstcp_aimd_val { struct hstcp { u32 ai; - u32 loss_cwnd; }; static void hstcp_init(struct sock *sk) @@ -153,22 +152,14 @@ static u32 hstcp_ssthresh(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct hstcp *ca = inet_csk_ca(sk); - ca->loss_cwnd = tp->snd_cwnd; /* Do multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * hstcp_aimd_vals[ca->ai].md) >> 8), 2U); } -static u32 hstcp_cwnd_undo(struct sock *sk) -{ - const struct hstcp *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static struct tcp_congestion_ops tcp_highspeed __read_mostly = { .init = hstcp_init, .ssthresh = hstcp_ssthresh, - .undo_cwnd = hstcp_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = hstcp_cong_avoid, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_htcp.c b/net/ipv4/tcp_htcp.c index 3eb78cde6ff0..082d479462fa 100644 --- a/net/ipv4/tcp_htcp.c +++ b/net/ipv4/tcp_htcp.c @@ -66,7 +66,6 @@ static inline void htcp_reset(struct htcp *ca) static u32 htcp_cwnd_undo(struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); struct htcp *ca = inet_csk_ca(sk); if (ca->undo_last_cong) { @@ -76,7 +75,7 @@ static u32 htcp_cwnd_undo(struct sock *sk) ca->undo_last_cong = 0; } - return max(tp->snd_cwnd, (tp->snd_ssthresh << 7) / ca->beta); + return tcp_reno_undo_cwnd(sk); } static inline void measure_rtt(struct sock *sk, u32 srtt) diff --git a/net/ipv4/tcp_illinois.c b/net/ipv4/tcp_illinois.c index 60352ff4f5a8..7c843578f233 100644 --- a/net/ipv4/tcp_illinois.c +++ b/net/ipv4/tcp_illinois.c @@ -48,7 +48,6 @@ struct illinois { u32 end_seq; /* right edge of current RTT */ u32 alpha; /* Additive increase */ u32 beta; /* Muliplicative decrease */ - u32 loss_cwnd; /* cwnd on loss */ u16 acked; /* # packets acked by current ACK */ u8 rtt_above; /* average rtt has gone above threshold */ u8 rtt_low; /* # of rtts measurements below threshold */ @@ -297,18 +296,10 @@ static u32 tcp_illinois_ssthresh(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct illinois *ca = inet_csk_ca(sk); - ca->loss_cwnd = tp->snd_cwnd; /* Multiplicative decrease */ return max(tp->snd_cwnd - ((tp->snd_cwnd * ca->beta) >> BETA_SHIFT), 2U); } -static u32 tcp_illinois_cwnd_undo(struct sock *sk) -{ - const struct illinois *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - /* Extract info for Tcp socket info provided via netlink. */ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, union tcp_cc_info *info) @@ -336,7 +327,7 @@ static size_t tcp_illinois_info(struct sock *sk, u32 ext, int *attr, static struct tcp_congestion_ops tcp_illinois __read_mostly = { .init = tcp_illinois_init, .ssthresh = tcp_illinois_ssthresh, - .undo_cwnd = tcp_illinois_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_illinois_cong_avoid, .set_state = tcp_illinois_state, .get_info = tcp_illinois_info, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 53de1424c13c..f844c06c0676 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * INET An implementation of the TCP/IP protocol suite for the LINUX * operating system. INET is implemented using the BSD Socket @@ -75,25 +76,10 @@ #include <linux/ipsec.h> #include <asm/unaligned.h> #include <linux/errqueue.h> +#include <trace/events/tcp.h> +#include <linux/static_key.h> -int sysctl_tcp_fack __read_mostly; -int sysctl_tcp_max_reordering __read_mostly = 300; -int sysctl_tcp_dsack __read_mostly = 1; -int sysctl_tcp_app_win __read_mostly = 31; -int sysctl_tcp_adv_win_scale __read_mostly = 1; -EXPORT_SYMBOL(sysctl_tcp_adv_win_scale); - -/* rfc5961 challenge ack rate limiting */ -int sysctl_tcp_challenge_ack_limit = 1000; - -int sysctl_tcp_stdurg __read_mostly; -int sysctl_tcp_rfc1337 __read_mostly; int sysctl_tcp_max_orphans __read_mostly = NR_FILE; -int sysctl_tcp_frto __read_mostly = 2; -int sysctl_tcp_min_rtt_wlen __read_mostly = 300; -int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; -int sysctl_tcp_early_retrans __read_mostly = 3; -int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_DATA 0x01 /* Incoming frame contained data. */ #define FLAG_WIN_UPDATE 0x02 /* Incoming ACK was a window update. */ @@ -114,7 +100,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ACKED (FLAG_DATA_ACKED|FLAG_SYN_ACKED) #define FLAG_NOT_DUP (FLAG_DATA|FLAG_WIN_UPDATE|FLAG_ACKED) -#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE) +#define FLAG_CA_ALERT (FLAG_DATA_SACKED|FLAG_ECE|FLAG_DSACKING_ACK) #define FLAG_FORWARD_PROGRESS (FLAG_ACKED|FLAG_DATA_SACKED) #define TCP_REMNANT (TCP_FLAG_FIN|TCP_FLAG_URG|TCP_FLAG_SYN|TCP_FLAG_PSH) @@ -334,7 +320,7 @@ static void tcp_sndbuf_expand(struct sock *sk) sndmem *= nr_segs * per_mss; if (sk->sk_sndbuf < sndmem) - sk->sk_sndbuf = min(sndmem, sysctl_tcp_wmem[2]); + sk->sk_sndbuf = min(sndmem, sock_net(sk)->ipv4.sysctl_tcp_wmem[2]); } /* 2. Tuning advertised window (window_clamp, rcv_ssthresh) @@ -367,8 +353,8 @@ static int __tcp_grow_window(const struct sock *sk, const struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); /* Optimize this! */ - int truesize = tcp_win_from_space(skb->truesize) >> 1; - int window = tcp_win_from_space(sysctl_tcp_rmem[2]) >> 1; + int truesize = tcp_win_from_space(sk, skb->truesize) >> 1; + int window = tcp_win_from_space(sk, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]) >> 1; while (tp->rcv_ssthresh <= window) { if (truesize <= skb->len) @@ -393,7 +379,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb) /* Check #2. Increase window, if skb with such overhead * will fit to rcvbuf in future. */ - if (tcp_win_from_space(skb->truesize) <= skb->len) + if (tcp_win_from_space(sk, skb->truesize) <= skb->len) incr = 2 * tp->advmss; else incr = __tcp_grow_window(sk, skb); @@ -419,11 +405,11 @@ static void tcp_fixup_rcvbuf(struct sock *sk) /* Dynamic Right Sizing (DRS) has 2 to 3 RTT latency * Allow enough cushion so that sender is not limited by our window */ - if (sysctl_tcp_moderate_rcvbuf) + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf) rcvmem <<= 2; if (sk->sk_rcvbuf < rcvmem) - sk->sk_rcvbuf = min(rcvmem, sysctl_tcp_rmem[2]); + sk->sk_rcvbuf = min(rcvmem, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); } /* 4. Try to fixup all. It is made immediately after connection enters @@ -431,6 +417,7 @@ static void tcp_fixup_rcvbuf(struct sock *sk) */ void tcp_init_buffer_space(struct sock *sk) { + int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win; struct tcp_sock *tp = tcp_sk(sk); int maxwin; @@ -449,14 +436,14 @@ void tcp_init_buffer_space(struct sock *sk) if (tp->window_clamp >= maxwin) { tp->window_clamp = maxwin; - if (sysctl_tcp_app_win && maxwin > 4 * tp->advmss) + if (tcp_app_win && maxwin > 4 * tp->advmss) tp->window_clamp = max(maxwin - - (maxwin >> sysctl_tcp_app_win), + (maxwin >> tcp_app_win), 4 * tp->advmss); } /* Force reservation of one segment. */ - if (sysctl_tcp_app_win && + if (tcp_app_win && tp->window_clamp > 2 * tp->advmss && tp->window_clamp + tp->advmss > maxwin) tp->window_clamp = max(2 * tp->advmss, maxwin - tp->advmss); @@ -470,15 +457,16 @@ static void tcp_clamp_window(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct net *net = sock_net(sk); icsk->icsk_ack.quick = 0; - if (sk->sk_rcvbuf < sysctl_tcp_rmem[2] && + if (sk->sk_rcvbuf < net->ipv4.sysctl_tcp_rmem[2] && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK) && !tcp_under_memory_pressure(sk) && sk_memory_allocated(sk) < sk_prot_mem_limits(sk, 0)) { sk->sk_rcvbuf = min(atomic_read(&sk->sk_rmem_alloc), - sysctl_tcp_rmem[2]); + net->ipv4.sysctl_tcp_rmem[2]); } if (atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) tp->rcv_ssthresh = min(tp->window_clamp, 2U * tp->advmss); @@ -609,7 +597,7 @@ void tcp_rcv_space_adjust(struct sock *sk) * <prev RTT . ><current RTT .. ><next RTT .... > */ - if (sysctl_tcp_moderate_rcvbuf && + if (sock_net(sk)->ipv4.sysctl_tcp_moderate_rcvbuf && !(sk->sk_userlocks & SOCK_RCVBUF_LOCK)) { int rcvwin, rcvmem, rcvbuf; @@ -633,10 +621,11 @@ void tcp_rcv_space_adjust(struct sock *sk) } rcvmem = SKB_TRUESIZE(tp->advmss + MAX_TCP_HEADER); - while (tcp_win_from_space(rcvmem) < tp->advmss) + while (tcp_win_from_space(sk, rcvmem) < tp->advmss) rcvmem += 128; - rcvbuf = min(rcvwin / tp->advmss * rcvmem, sysctl_tcp_rmem[2]); + rcvbuf = min(rcvwin / tp->advmss * rcvmem, + sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); if (rcvbuf > sk->sk_rcvbuf) { sk->sk_rcvbuf = rcvbuf; @@ -780,15 +769,6 @@ static void tcp_rtt_estimator(struct sock *sk, long mrtt_us) tp->srtt_us = max(1U, srtt); } -/* Set the sk_pacing_rate to allow proper sizing of TSO packets. - * Note: TCP stack does not yet implement pacing. - * FQ packet scheduler can be used to implement cheap but effective - * TCP pacing, to smooth the burst on large writes when packets - * in flight is significantly lower than cwnd (or rwin) - */ -int sysctl_tcp_pacing_ss_ratio __read_mostly = 200; -int sysctl_tcp_pacing_ca_ratio __read_mostly = 120; - static void tcp_update_pacing_rate(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); @@ -806,21 +786,21 @@ static void tcp_update_pacing_rate(struct sock *sk) * end of slow start and should slow down. */ if (tp->snd_cwnd < tp->snd_ssthresh / 2) - rate *= sysctl_tcp_pacing_ss_ratio; + rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ss_ratio; else - rate *= sysctl_tcp_pacing_ca_ratio; + rate *= sock_net(sk)->ipv4.sysctl_tcp_pacing_ca_ratio; rate *= max(tp->snd_cwnd, tp->packets_out); if (likely(tp->srtt_us)) do_div(rate, tp->srtt_us); - /* ACCESS_ONCE() is needed because sch_fq fetches sk_pacing_rate + /* WRITE_ONCE() is needed because sch_fq fetches sk_pacing_rate * without any lock. We want to make sure compiler wont store * intermediate values in this location. */ - ACCESS_ONCE(sk->sk_pacing_rate) = min_t(u64, rate, - sk->sk_max_pacing_rate); + WRITE_ONCE(sk->sk_pacing_rate, min_t(u64, rate, + sk->sk_max_pacing_rate)); } /* Calculate rto without backoff. This is the second half of Van Jacobson's @@ -862,60 +842,46 @@ __u32 tcp_init_cwnd(const struct tcp_sock *tp, const struct dst_entry *dst) return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -/* - * Packet counting of FACK is based on in-order assumptions, therefore TCP - * disables it when reordering is detected - */ -void tcp_disable_fack(struct tcp_sock *tp) -{ - /* RFC3517 uses different metric in lost marker => reset on change */ - if (tcp_is_fack(tp)) - tp->lost_skb_hint = NULL; - tp->rx_opt.sack_ok &= ~TCP_FACK_ENABLED; -} - /* Take a notice that peer is sending D-SACKs */ static void tcp_dsack_seen(struct tcp_sock *tp) { tp->rx_opt.sack_ok |= TCP_DSACK_SEEN; + tp->rack.dsack_seen = 1; } -static void tcp_update_reordering(struct sock *sk, const int metric, - const int ts) +/* It's reordering when higher sequence was delivered (i.e. sacked) before + * some lower never-retransmitted sequence ("low_seq"). The maximum reordering + * distance is approximated in full-mss packet distance ("reordering"). + */ +static void tcp_check_sack_reordering(struct sock *sk, const u32 low_seq, + const int ts) { struct tcp_sock *tp = tcp_sk(sk); - int mib_idx; + const u32 mss = tp->mss_cache; + u32 fack, metric; - if (WARN_ON_ONCE(metric < 0)) + fack = tcp_highest_sack_seq(tp); + if (!before(low_seq, fack)) return; - if (metric > tp->reordering) { - tp->reordering = min(sysctl_tcp_max_reordering, metric); - + metric = fack - low_seq; + if ((metric > tp->reordering * mss) && mss) { #if FASTRETRANS_DEBUG > 1 pr_debug("Disorder%d %d %u f%u s%u rr%d\n", tp->rx_opt.sack_ok, inet_csk(sk)->icsk_ca_state, tp->reordering, - tp->fackets_out, + 0, tp->sacked_out, tp->undo_marker ? tp->undo_retrans : 0); #endif - tcp_disable_fack(tp); + tp->reordering = min_t(u32, (metric + mss - 1) / mss, + sock_net(sk)->ipv4.sysctl_tcp_max_reordering); } tp->rack.reord = 1; - /* This exciting event is worth to be remembered. 8) */ - if (ts) - mib_idx = LINUX_MIB_TCPTSREORDER; - else if (tcp_is_reno(tp)) - mib_idx = LINUX_MIB_TCPRENOREORDER; - else if (tcp_is_fack(tp)) - mib_idx = LINUX_MIB_TCPFACKREORDER; - else - mib_idx = LINUX_MIB_TCPSACKREORDER; - - NET_INC_STATS(sock_net(sk), mib_idx); + NET_INC_STATS(sock_net(sk), + ts ? LINUX_MIB_TCPTSREORDER : LINUX_MIB_TCPSACKREORDER); } /* This must be called before lost_out is incremented */ @@ -989,7 +955,6 @@ void tcp_skb_mark_lost_uncond_verify(struct tcp_sock *tp, struct sk_buff *skb) * 3. Loss detection event of two flavors: * A. Scoreboard estimator decided the packet is lost. * A'. Reno "three dupacks" marks head of queue lost. - * A''. Its FACK modification, head until snd.fack is lost. * B. SACK arrives sacking SND.NXT at the moment, when the * segment was retransmitted. * 4. D-SACK added new rule: D-SACK changes any tag to S. @@ -1132,8 +1097,7 @@ static bool tcp_check_dsack(struct sock *sk, const struct sk_buff *ack_skb, } struct tcp_sacktag_state { - int reord; - int fack_count; + u32 reord; /* Timestamps for earliest and latest never-retransmitted segment * that was SACKed. RTO needs the earliest RTT to stay conservative, * but congestion control should still get an accurate delay signal. @@ -1142,6 +1106,7 @@ struct tcp_sacktag_state { u64 last_sackt; struct rate_sample *rate; int flag; + unsigned int mss_now; }; /* Check if skb is fully within the SACK block. In presence of GSO skbs, @@ -1191,7 +1156,8 @@ static int tcp_match_skb_to_sack(struct sock *sk, struct sk_buff *skb, if (pkt_len >= skb->len && !in_sack) return 0; - err = tcp_fragment(sk, skb, pkt_len, mss, GFP_ATOMIC); + err = tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, + pkt_len, mss, GFP_ATOMIC); if (err < 0) return err; } @@ -1207,15 +1173,15 @@ static u8 tcp_sacktag_one(struct sock *sk, u64 xmit_time) { struct tcp_sock *tp = tcp_sk(sk); - int fack_count = state->fack_count; /* Account D-SACK for retransmitted packet. */ if (dup_sack && (sacked & TCPCB_RETRANS)) { if (tp->undo_marker && tp->undo_retrans > 0 && after(end_seq, tp->undo_marker)) tp->undo_retrans--; - if (sacked & TCPCB_SACKED_ACKED) - state->reord = min(fack_count, state->reord); + if ((sacked & TCPCB_SACKED_ACKED) && + before(start_seq, state->reord)) + state->reord = start_seq; } /* Nothing to do; acked frame is about to be dropped (was ACKed). */ @@ -1241,9 +1207,10 @@ static u8 tcp_sacktag_one(struct sock *sk, * which was in hole. It is reordering. */ if (before(start_seq, - tcp_highest_sack_seq(tp))) - state->reord = min(fack_count, - state->reord); + tcp_highest_sack_seq(tp)) && + before(start_seq, state->reord)) + state->reord = start_seq; + if (!after(end_seq, tp->high_seq)) state->flag |= FLAG_ORIG_SACK_ACKED; if (state->first_sackt == 0) @@ -1262,15 +1229,10 @@ static u8 tcp_sacktag_one(struct sock *sk, tp->sacked_out += pcount; tp->delivered += pcount; /* Out-of-order packets delivered */ - fack_count += pcount; - /* Lost marker hint past SACKed? Tweak RFC3517 cnt */ - if (!tcp_is_fack(tp) && tp->lost_skb_hint && + if (tp->lost_skb_hint && before(start_seq, TCP_SKB_CB(tp->lost_skb_hint)->seq)) tp->lost_cnt_hint += pcount; - - if (fack_count > tp->fackets_out) - tp->fackets_out = fack_count; } /* D-SACK. We can detect redundant retransmission in S|R and plain R @@ -1288,13 +1250,13 @@ static u8 tcp_sacktag_one(struct sock *sk, /* Shift newly-SACKed bytes from this skb to the immediately previous * already-SACKed sk_buff. Mark the newly-SACKed bytes as such. */ -static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, +static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *prev, + struct sk_buff *skb, struct tcp_sacktag_state *state, unsigned int pcount, int shifted, int mss, bool dup_sack) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *prev = tcp_write_queue_prev(sk, skb); u32 start_seq = TCP_SKB_CB(skb)->seq; /* start of newly-SACKed */ u32 end_seq = start_seq + shifted; /* end of newly-SACKed */ @@ -1363,8 +1325,7 @@ static bool tcp_shifted_skb(struct sock *sk, struct sk_buff *skb, if (unlikely(TCP_SKB_CB(prev)->tx.delivered_mstamp)) TCP_SKB_CB(prev)->tx.delivered_mstamp = 0; - tcp_unlink_write_queue(skb, sk); - sk_wmem_free_skb(sk, skb); + tcp_rtx_queue_unlink_and_free(skb, sk); NET_INC_STATS(sock_net(sk), LINUX_MIB_SACKMERGED); @@ -1414,9 +1375,9 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, goto fallback; /* Can only happen with delayed DSACK + discard craziness */ - if (unlikely(skb == tcp_write_queue_head(sk))) + prev = skb_rb_prev(skb); + if (!prev) goto fallback; - prev = tcp_write_queue_prev(sk, skb); if ((TCP_SKB_CB(prev)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) goto fallback; @@ -1495,18 +1456,17 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, if (!skb_shift(prev, skb, len)) goto fallback; - if (!tcp_shifted_skb(sk, skb, state, pcount, len, mss, dup_sack)) + if (!tcp_shifted_skb(sk, prev, skb, state, pcount, len, mss, dup_sack)) goto out; /* Hole filled allows collapsing with the next as well, this is very * useful when hole on every nth skb pattern happens */ - if (prev == tcp_write_queue_tail(sk)) + skb = skb_rb_next(prev); + if (!skb) goto out; - skb = tcp_write_queue_next(sk, prev); if (!skb_can_shift(skb) || - (skb == tcp_send_head(sk)) || ((TCP_SKB_CB(skb)->sacked & TCPCB_TAGBITS) != TCPCB_SACKED_ACKED) || (mss != tcp_skb_seglen(skb))) goto out; @@ -1514,11 +1474,11 @@ static struct sk_buff *tcp_shift_skb_data(struct sock *sk, struct sk_buff *skb, len = skb->len; if (skb_shift(prev, skb, len)) { pcount += tcp_skb_pcount(skb); - tcp_shifted_skb(sk, skb, state, tcp_skb_pcount(skb), len, mss, 0); + tcp_shifted_skb(sk, prev, skb, state, tcp_skb_pcount(skb), + len, mss, 0); } out: - state->fack_count += pcount; return prev; noop: @@ -1538,13 +1498,10 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *tmp; - tcp_for_write_queue_from(skb, sk) { + skb_rbtree_walk_from(skb) { int in_sack = 0; bool dup_sack = dup_sack_in; - if (skb == tcp_send_head(sk)) - break; - /* queue is in-order => we can short-circuit the walk early */ if (!before(TCP_SKB_CB(skb)->seq, end_seq)) break; @@ -1593,34 +1550,48 @@ static struct sk_buff *tcp_sacktag_walk(struct sk_buff *skb, struct sock *sk, tcp_skb_pcount(skb), skb->skb_mstamp); tcp_rate_skb_delivered(sk, skb, state->rate); + if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) + list_del_init(&skb->tcp_tsorted_anchor); if (!before(TCP_SKB_CB(skb)->seq, tcp_highest_sack_seq(tp))) tcp_advance_highest_sack(sk, skb); } - - state->fack_count += tcp_skb_pcount(skb); } return skb; } -/* Avoid all extra work that is being done by sacktag while walking in - * a normal way - */ +static struct sk_buff *tcp_sacktag_bsearch(struct sock *sk, + struct tcp_sacktag_state *state, + u32 seq) +{ + struct rb_node *parent, **p = &sk->tcp_rtx_queue.rb_node; + struct sk_buff *skb; + + while (*p) { + parent = *p; + skb = rb_to_skb(parent); + if (before(seq, TCP_SKB_CB(skb)->seq)) { + p = &parent->rb_left; + continue; + } + if (!before(seq, TCP_SKB_CB(skb)->end_seq)) { + p = &parent->rb_right; + continue; + } + return skb; + } + return NULL; +} + static struct sk_buff *tcp_sacktag_skip(struct sk_buff *skb, struct sock *sk, struct tcp_sacktag_state *state, u32 skip_to_seq) { - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - - if (after(TCP_SKB_CB(skb)->end_seq, skip_to_seq)) - break; + if (skb && after(TCP_SKB_CB(skb)->seq, skip_to_seq)) + return skb; - state->fack_count += tcp_skb_pcount(skb); - } - return skb; + return tcp_sacktag_bsearch(sk, state, skip_to_seq); } static struct sk_buff *tcp_maybe_skipping_dsack(struct sk_buff *skb, @@ -1665,13 +1636,10 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, int first_sack_index; state->flag = 0; - state->reord = tp->packets_out; + state->reord = tp->snd_nxt; - if (!tp->sacked_out) { - if (WARN_ON(tp->fackets_out)) - tp->fackets_out = 0; + if (!tp->sacked_out) tcp_highest_sack_reset(sk); - } found_dup_sack = tcp_check_dsack(sk, ack_skb, sp_wire, num_sacks, prior_snd_una); @@ -1742,8 +1710,8 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, } } - skb = tcp_write_queue_head(sk); - state->fack_count = 0; + state->mss_now = tcp_current_mss(sk); + skb = NULL; i = 0; if (!tp->sacked_out) { @@ -1800,7 +1768,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state->fack_count = tp->fackets_out; cache++; goto walk; } @@ -1815,7 +1782,6 @@ tcp_sacktag_write_queue(struct sock *sk, const struct sk_buff *ack_skb, skb = tcp_highest_sack(sk); if (!skb) break; - state->fack_count = tp->fackets_out; } skb = tcp_sacktag_skip(skb, sk, state, start_seq); @@ -1835,9 +1801,8 @@ advance_sp: for (j = 0; j < used_sacks; j++) tp->recv_sack_cache[i++] = sp[j]; - if ((state->reord < tp->fackets_out) && - ((inet_csk(sk)->icsk_ca_state != TCP_CA_Loss) || tp->undo_marker)) - tcp_update_reordering(sk, tp->fackets_out - state->reord, 0); + if (inet_csk(sk)->icsk_ca_state != TCP_CA_Loss || tp->undo_marker) + tcp_check_sack_reordering(sk, state->reord, 0); tcp_verify_left_out(tp); out: @@ -1875,8 +1840,13 @@ static bool tcp_limit_reno_sacked(struct tcp_sock *tp) static void tcp_check_reno_reordering(struct sock *sk, const int addend) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_limit_reno_sacked(tp)) - tcp_update_reordering(sk, tp->packets_out + addend, 0); + + if (!tcp_limit_reno_sacked(tp)) + return; + + tp->reordering = min_t(u32, tp->packets_out + addend, + sock_net(sk)->ipv4.sysctl_tcp_max_reordering); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRENOREORDER); } /* Emulate SACKs for SACKless connection: account for a new dupack. */ @@ -1922,7 +1892,6 @@ void tcp_clear_retrans(struct tcp_sock *tp) tp->lost_out = 0; tp->undo_marker = 0; tp->undo_retrans = -1; - tp->fackets_out = 0; tp->sacked_out = 0; } @@ -1952,6 +1921,7 @@ void tcp_enter_loss(struct sock *sk) !after(tp->high_seq, tp->snd_una) || (icsk->icsk_ca_state == TCP_CA_Loss && !icsk->icsk_retransmits)) { tp->prior_ssthresh = tcp_current_ssthresh(sk); + tp->prior_cwnd = tp->snd_cwnd; tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk); tcp_ca_event(sk, CA_EVENT_LOSS); tcp_init_undo(tp); @@ -1966,19 +1936,15 @@ void tcp_enter_loss(struct sock *sk) if (tcp_is_reno(tp)) tcp_reset_reno_sack(tp); - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED); if (is_reneg) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSACKRENEGING); tp->sacked_out = 0; - tp->fackets_out = 0; } tcp_clear_all_retrans_hints(tp); - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; - + skb_rbtree_walk_from(skb) { mark_lost = (!(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || is_reneg); if (mark_lost) @@ -2012,7 +1978,7 @@ void tcp_enter_loss(struct sock *sk) * falsely raise the receive window, which results in repeated * timeouts and stop-and-go behavior. */ - tp->frto = sysctl_tcp_frto && + tp->frto = net->ipv4.sysctl_tcp_frto && (new_recovery || icsk->icsk_retransmits) && !inet_csk(sk)->icsk_mtup.probe_size; } @@ -2041,19 +2007,10 @@ static bool tcp_check_sack_reneging(struct sock *sk, int flag) return false; } -static inline int tcp_fackets_out(const struct tcp_sock *tp) -{ - return tcp_is_reno(tp) ? tp->sacked_out + 1 : tp->fackets_out; -} - /* Heurestics to calculate number of duplicate ACKs. There's no dupACKs * counter when SACK is enabled (without SACK, sacked_out is used for * that purpose). * - * Instead, with FACK TCP uses fackets_out that includes both SACKed - * segments up to the highest received SACK block so far and holes in - * between them. - * * With reordering, holes may still be in flight, so RFC3517 recovery * uses pure sacked_out (total number of SACKed segments) even though * it violates the RFC that uses duplicate ACKs, often these are equal @@ -2063,10 +2020,10 @@ static inline int tcp_fackets_out(const struct tcp_sock *tp) */ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) { - return tcp_is_fack(tp) ? tp->fackets_out : tp->sacked_out + 1; + return tp->sacked_out + 1; } -/* Linux NewReno/SACK/FACK/ECN state machine. +/* Linux NewReno/SACK/ECN state machine. * -------------------------------------- * * "Open" Normal state, no dubious events, fast path. @@ -2131,16 +2088,6 @@ static inline int tcp_dupack_heuristics(const struct tcp_sock *tp) * dynamically measured and adjusted. This is implemented in * tcp_rack_mark_lost. * - * FACK (Disabled by default. Subsumbed by RACK): - * It is the simplest heuristics. As soon as we decided - * that something is lost, we decide that _all_ not SACKed - * packets until the most forward SACK are lost. I.e. - * lost_out = fackets_out - sacked_out and left_out = fackets_out. - * It is absolutely correct estimate, if network does not reorder - * packets. And it loses any connection to reality when reordering - * takes place. We use FACK by default until reordering - * is suspected on the path to this destination. - * * If the receiver does not support SACK: * * NewReno (RFC6582): in Recovery we assume that one segment @@ -2189,7 +2136,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag) } /* Detect loss in event "A" above by marking head of queue up as lost. - * For FACK or non-SACK(Reno) senders, the first "packets" number of segments + * For non-SACK(Reno) senders, the first "packets" number of segments * are considered lost. For RFC3517 SACK, a segment is considered lost if it * has at least tp->reordering SACKed seqments above it; "packets" refers to * the maximum SACKed segments to pass before reaching this limit. @@ -2204,20 +2151,18 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) const u32 loss_high = tcp_is_sack(tp) ? tp->snd_nxt : tp->high_seq; WARN_ON(packets > tp->packets_out); - if (tp->lost_skb_hint) { - skb = tp->lost_skb_hint; - cnt = tp->lost_cnt_hint; + skb = tp->lost_skb_hint; + if (skb) { /* Head already handled? */ - if (mark_head && skb != tcp_write_queue_head(sk)) + if (mark_head && after(TCP_SKB_CB(skb)->seq, tp->snd_una)) return; + cnt = tp->lost_cnt_hint; } else { - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); cnt = 0; } - tcp_for_write_queue_from(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + skb_rbtree_walk_from(skb) { /* TODO: do this better */ /* this is not the most efficient way to do this... */ tp->lost_skb_hint = skb; @@ -2227,12 +2172,12 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) break; oldcnt = cnt; - if (tcp_is_fack(tp) || tcp_is_reno(tp) || + if (tcp_is_reno(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) cnt += tcp_skb_pcount(skb); if (cnt > packets) { - if ((tcp_is_sack(tp) && !tcp_is_fack(tp)) || + if (tcp_is_sack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) || (oldcnt >= packets)) break; @@ -2241,7 +2186,8 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head) /* If needed, chop off the prefix to mark as lost. */ lost = (packets - oldcnt) * mss; if (lost < skb->len && - tcp_fragment(sk, skb, lost, mss, GFP_ATOMIC) < 0) + tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, + lost, mss, GFP_ATOMIC) < 0) break; cnt = packets; } @@ -2262,11 +2208,6 @@ static void tcp_update_scoreboard(struct sock *sk, int fast_rexmit) if (tcp_is_reno(tp)) { tcp_mark_head_lost(sk, 1, 1); - } else if (tcp_is_fack(tp)) { - int lost = tp->fackets_out - tp->reordering; - if (lost <= 0) - lost = 1; - tcp_mark_head_lost(sk, lost, 0); } else { int sacked_upto = tp->sacked_out - tp->reordering; if (sacked_upto >= 0) @@ -2325,16 +2266,16 @@ static bool tcp_any_retrans_done(const struct sock *sk) if (tp->retrans_out) return true; - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); if (unlikely(skb && TCP_SKB_CB(skb)->sacked & TCPCB_EVER_RETRANS)) return true; return false; } -#if FASTRETRANS_DEBUG > 1 static void DBGUNDO(struct sock *sk, const char *msg) { +#if FASTRETRANS_DEBUG > 1 struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -2356,10 +2297,8 @@ static void DBGUNDO(struct sock *sk, const char *msg) tp->packets_out); } #endif -} -#else -#define DBGUNDO(x...) do { } while (0) #endif +} static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) { @@ -2368,9 +2307,7 @@ static void tcp_undo_cwnd_reduction(struct sock *sk, bool unmark_loss) if (unmark_loss) { struct sk_buff *skb; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { TCP_SKB_CB(skb)->sacked &= ~TCPCB_LOST; } tp->lost_out = 0; @@ -2415,6 +2352,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) mib_idx = LINUX_MIB_TCPFULLUNDO; NET_INC_STATS(sock_net(sk), mib_idx); + } else if (tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_persist--; } if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { /* Hold old state until something *above* high_seq @@ -2434,6 +2373,8 @@ static bool tcp_try_undo_dsack(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && !tp->undo_retrans) { + tp->rack.reo_wnd_persist = min(TCP_RACK_RECOVERY_THRESH, + tp->rack.reo_wnd_persist + 1); DBGUNDO(sk, "D-SACK"); tcp_undo_cwnd_reduction(sk, false); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDSACKUNDO); @@ -2613,11 +2554,8 @@ void tcp_simple_retransmit(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; unsigned int mss = tcp_current_mss(sk); - u32 prior_lost = tp->lost_out; - tcp_for_write_queue(skb, sk) { - if (skb == tcp_send_head(sk)) - break; + skb_rbtree_walk(skb, &sk->tcp_rtx_queue) { if (tcp_skb_seglen(skb) > mss && !(TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) { if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS) { @@ -2630,7 +2568,7 @@ void tcp_simple_retransmit(struct sock *sk) tcp_clear_retrans_hints_partial(tp); - if (prior_lost == tp->lost_out) + if (!tp->lost_out) return; if (tcp_is_reno(tp)) @@ -2711,7 +2649,7 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, * is updated in tcp_ack()). Otherwise fall back to * the conventional recovery. */ - if (tcp_send_head(sk) && + if (!tcp_write_queue_empty(sk) && after(tcp_wnd_end(tp), tp->snd_nxt)) { *rexmit = REXMIT_NEW; return; @@ -2738,15 +2676,15 @@ static void tcp_process_loss(struct sock *sk, int flag, bool is_dupack, } /* Undo during fast recovery after partial ACK. */ -static bool tcp_try_undo_partial(struct sock *sk, const int acked) +static bool tcp_try_undo_partial(struct sock *sk, u32 prior_snd_una) { struct tcp_sock *tp = tcp_sk(sk); if (tp->undo_marker && tcp_packet_delayed(tp)) { /* Plain luck! Hole if filled with delayed - * packet, rather than with a retransmit. + * packet, rather than with a retransmit. Check reordering. */ - tcp_update_reordering(sk, tcp_fackets_out(tp) + acked, 1); + tcp_check_sack_reordering(sk, prior_snd_una, 1); /* We are getting evidence that the reordering degree is higher * than we realized. If there are no retransmits out then we @@ -2773,7 +2711,7 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) struct tcp_sock *tp = tcp_sk(sk); /* Use RACK to detect loss */ - if (sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_LOSS_DETECTION) { u32 prior_retrans = tp->retrans_out; tcp_rack_mark_lost(sk); @@ -2782,6 +2720,14 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) } } +static bool tcp_force_fast_retransmit(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + return after(tcp_highest_sack_seq(tp), + tp->snd_una + tp->reordering * tp->mss_cache); +} + /* Process an event, which can update packets-in-flight not trivially. * Main goal of this function is to calculate new estimate for left_out, * taking into account both packets sitting in receiver's buffer and @@ -2794,19 +2740,17 @@ static void tcp_rack_identify_loss(struct sock *sk, int *ack_flag) * It does _not_ decide what to send, it is made in function * tcp_xmit_retransmit_queue(). */ -static void tcp_fastretrans_alert(struct sock *sk, const int acked, +static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una, bool is_dupack, int *ack_flag, int *rexmit) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); int fast_rexmit = 0, flag = *ack_flag; bool do_lost = is_dupack || ((flag & FLAG_DATA_SACKED) && - (tcp_fackets_out(tp) > tp->reordering)); + tcp_force_fast_retransmit(sk)); - if (WARN_ON(!tp->packets_out && tp->sacked_out)) + if (!tp->packets_out && tp->sacked_out) tp->sacked_out = 0; - if (WARN_ON(!tp->sacked_out && tp->fackets_out)) - tp->fackets_out = 0; /* Now state machine starts. * A. ECE, hence prohibit cwnd undoing, the reduction is required. */ @@ -2853,11 +2797,11 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, if (tcp_is_reno(tp) && is_dupack) tcp_add_reno_sack(sk); } else { - if (tcp_try_undo_partial(sk, acked)) + if (tcp_try_undo_partial(sk, prior_snd_una)) return; /* Partial ACK arrived. Force fast retransmit. */ do_lost = tcp_is_reno(tp) || - tcp_fackets_out(tp) > tp->reordering; + tcp_force_fast_retransmit(sk); } if (tcp_try_undo_dsack(sk)) { tcp_try_keep_open(sk); @@ -2872,6 +2816,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, (*ack_flag & FLAG_LOST_RETRANS))) return; /* Change state if cwnd is undone or retransmits are lost */ + /* fall through */ default: if (tcp_is_reno(tp)) { if (flag & FLAG_SND_UNA_ADVANCED) @@ -2912,8 +2857,8 @@ static void tcp_fastretrans_alert(struct sock *sk, const int acked, static void tcp_update_rtt_min(struct sock *sk, u32 rtt_us) { + u32 wlen = sock_net(sk)->ipv4.sysctl_tcp_min_rtt_wlen * HZ; struct tcp_sock *tp = tcp_sk(sk); - u32 wlen = sysctl_tcp_min_rtt_wlen * HZ; minmax_running_min(&tp->rtt_min, wlen, tcp_jiffies32, rtt_us ? : jiffies_to_usecs(1)); @@ -3009,8 +2954,7 @@ void tcp_rearm_rto(struct sock *sk) /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta_us > 0) - rto = usecs_to_jiffies(delta_us); + rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); @@ -3056,28 +3000,31 @@ static void tcp_ack_tstamp(struct sock *sk, struct sk_buff *skb, shinfo = skb_shinfo(skb); if (!before(shinfo->tskey, prior_snd_una) && - before(shinfo->tskey, tcp_sk(sk)->snd_una)) - __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + before(shinfo->tskey, tcp_sk(sk)->snd_una)) { + tcp_skb_tsorted_save(skb) { + __skb_tstamp_tx(skb, NULL, sk, SCM_TSTAMP_ACK); + } tcp_skb_tsorted_restore(skb); + } } /* Remove acknowledged frames from the retransmission queue. If our packet * is before the ack sequence we can discard it as it's confirmed to have * arrived at the other end. */ -static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, - u32 prior_snd_una, int *acked, +static int tcp_clean_rtx_queue(struct sock *sk, u32 prior_fack, + u32 prior_snd_una, struct tcp_sacktag_state *sack) { const struct inet_connection_sock *icsk = inet_csk(sk); u64 first_ackt, last_ackt; struct tcp_sock *tp = tcp_sk(sk); u32 prior_sacked = tp->sacked_out; - u32 reord = tp->packets_out; + u32 reord = tp->snd_nxt; /* lowest acked un-retx un-sacked seq */ + struct sk_buff *skb, *next; bool fully_acked = true; long sack_rtt_us = -1L; long seq_rtt_us = -1L; long ca_rtt_us = -1L; - struct sk_buff *skb; u32 pkts_acked = 0; u32 last_in_flight = 0; bool rtt_update; @@ -3085,8 +3032,9 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt = 0; - while ((skb = tcp_write_queue_head(sk)) && skb != tcp_send_head(sk)) { + for (skb = skb_rb_first(&sk->tcp_rtx_queue); skb; skb = next) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + const u32 start_seq = scb->seq; u8 sacked = scb->sacked; u32 acked_pcount; @@ -3103,8 +3051,6 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, break; fully_acked = false; } else { - /* Speedup tcp_unlink_write_queue() and next loop */ - prefetchw(skb->next); acked_pcount = tcp_skb_pcount(skb); } @@ -3119,7 +3065,8 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, first_ackt = last_ackt; last_in_flight = TCP_SKB_CB(skb)->tx.in_flight; - reord = min(pkts_acked, reord); + if (before(start_seq, reord)) + reord = start_seq; if (!after(scb->end_seq, tp->high_seq)) flag |= FLAG_ORIG_SACK_ACKED; } @@ -3156,12 +3103,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, if (!fully_acked) break; - tcp_unlink_write_queue(skb, sk); - sk_wmem_free_skb(sk, skb); + next = skb_rb_next(skb); if (unlikely(skb == tp->retransmit_skb_hint)) tp->retransmit_skb_hint = NULL; if (unlikely(skb == tp->lost_skb_hint)) tp->lost_skb_hint = NULL; + tcp_rtx_queue_unlink_and_free(skb, sk); } if (!skb) @@ -3197,16 +3144,12 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, int delta; /* Non-retransmitted hole got filled? That's reordering */ - if (reord < prior_fackets && reord <= tp->fackets_out) - tcp_update_reordering(sk, tp->fackets_out - reord, 0); + if (before(reord, prior_fack)) + tcp_check_sack_reordering(sk, reord, 0); - delta = tcp_is_fack(tp) ? pkts_acked : - prior_sacked - tp->sacked_out; + delta = prior_sacked - tp->sacked_out; tp->lost_cnt_hint -= min(tp->lost_cnt_hint, delta); } - - tp->fackets_out -= min(pkts_acked, tp->fackets_out); - } else if (skb && rtt_update && sack_rtt_us >= 0 && sack_rtt_us > tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp)) { /* Do not re-arm RTO if the sack RTT is measured from data sent @@ -3247,18 +3190,19 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, } } #endif - *acked = pkts_acked; return flag; } static void tcp_ack_probe(struct sock *sk) { - const struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *head = tcp_send_head(sk); + const struct tcp_sock *tp = tcp_sk(sk); /* Was it a usable window open? */ - - if (!after(TCP_SKB_CB(tcp_send_head(sk))->end_seq, tcp_wnd_end(tp))) { + if (!head) + return; + if (!after(TCP_SKB_CB(head)->end_seq, tcp_wnd_end(tp))) { icsk->icsk_backoff = 0; inet_csk_clear_xmit_timer(sk, ICSK_TIME_PROBE0); /* Socket must be waked up by subsequent tcp_data_snd_check(). @@ -3378,7 +3322,7 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32 tp->pred_flags = 0; tcp_fast_path_check(sk); - if (tcp_send_head(sk)) + if (!tcp_write_queue_empty(sk)) tcp_slow_start_after_idle_check(sk); if (nwin > tp->max_window) { @@ -3399,7 +3343,7 @@ static bool __tcp_oow_rate_limited(struct net *net, int mib_idx, if (*last_oow_ack_time) { s32 elapsed = (s32)(tcp_jiffies32 - *last_oow_ack_time); - if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) { + if (0 <= elapsed && elapsed < net->ipv4.sysctl_tcp_invalid_ratelimit) { NET_INC_STATS(net, mib_idx); return true; /* rate-limited: don't send yet! */ } @@ -3435,10 +3379,11 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) static u32 challenge_timestamp; static unsigned int challenge_count; struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); u32 count, now; /* First check our per-socket dupack rate limit. */ - if (__tcp_oow_rate_limited(sock_net(sk), + if (__tcp_oow_rate_limited(net, LINUX_MIB_TCPACKSKIPPEDCHALLENGE, &tp->last_oow_ack_time)) return; @@ -3446,16 +3391,16 @@ static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb) /* Then check host-wide RFC 5961 rate limit. */ now = jiffies / HZ; if (now != challenge_timestamp) { - u32 half = (sysctl_tcp_challenge_ack_limit + 1) >> 1; + u32 ack_limit = net->ipv4.sysctl_tcp_challenge_ack_limit; + u32 half = (ack_limit + 1) >> 1; challenge_timestamp = now; - WRITE_ONCE(challenge_count, half + - prandom_u32_max(sysctl_tcp_challenge_ack_limit)); + WRITE_ONCE(challenge_count, half + prandom_u32_max(ack_limit)); } count = READ_ONCE(challenge_count); if (count > 0) { WRITE_ONCE(challenge_count, count - 1); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPCHALLENGEACK); + NET_INC_STATS(net, LINUX_MIB_TCPCHALLENGEACK); tcp_send_ack(sk); } } @@ -3553,18 +3498,17 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) u32 ack_seq = TCP_SKB_CB(skb)->seq; u32 ack = TCP_SKB_CB(skb)->ack_seq; bool is_dupack = false; - u32 prior_fackets; int prior_packets = tp->packets_out; u32 delivered = tp->delivered; u32 lost = tp->lost; - int acked = 0; /* Number of packets newly acked */ int rexmit = REXMIT_NONE; /* Flag to (re)transmit to recover losses */ + u32 prior_fack; sack_state.first_sackt = 0; sack_state.rate = &rs; - /* We very likely will need to access write queue head. */ - prefetchw(sk->sk_write_queue.next); + /* We very likely will need to access rtx queue. */ + prefetch(sk->tcp_rtx_queue.rb_node); /* If the ack is older than previous acks * then we can probably ignore it. @@ -3590,7 +3534,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) icsk->icsk_retransmits = 0; } - prior_fackets = tp->fackets_out; + prior_fack = tcp_is_sack(tp) ? tcp_highest_sack_seq(tp) : tp->snd_una; rs.prior_in_flight = tcp_packets_in_flight(tp); /* ts_recent update must be made after we are sure that the packet @@ -3646,8 +3590,9 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) goto no_queue; /* See if we can take anything off of the retransmit queue. */ - flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, - &sack_state); + flag |= tcp_clean_rtx_queue(sk, prior_fack, prior_snd_una, &sack_state); + + tcp_rack_update_reo_wnd(sk, &rs); if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -3657,7 +3602,8 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); } if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) @@ -3673,13 +3619,13 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) no_queue: /* If data was DSACKed, see if we can undo a cwnd reduction. */ if (flag & FLAG_DSACKING_ACK) - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); /* If this ack opens up a zero window, clear backoff. It was * being used to time the probes, and is probably far higher than * it needs to be for normal retransmission. */ - if (tcp_send_head(sk)) - tcp_ack_probe(sk); + tcp_ack_probe(sk); if (tp->tlp_high_seq) tcp_process_tlp_ack(sk, ack, flag); @@ -3696,7 +3642,8 @@ old_ack: if (TCP_SKB_CB(skb)->sacked) { flag |= tcp_sacktag_write_queue(sk, skb, prior_snd_una, &sack_state); - tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); + tcp_fastretrans_alert(sk, prior_snd_una, is_dupack, &flag, + &rexmit); tcp_xmit_recovery(sk, rexmit); } @@ -3721,6 +3668,21 @@ static void tcp_parse_fastopen_option(int len, const unsigned char *cookie, foc->exp = exp_opt; } +static void smc_parse_options(const struct tcphdr *th, + struct tcp_options_received *opt_rx, + const unsigned char *ptr, + int opsize) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (th->syn && !(opsize & 1) && + opsize >= TCPOLEN_EXP_SMC_BASE && + get_unaligned_be32(ptr) == TCPOPT_SMC_MAGIC) + opt_rx->smc_ok = 1; + } +#endif +} + /* Look for tcp options. Normally only called on SYN and SYNACK packets. * But, this can also be called on packets in the established flow when * the fast version below fails. @@ -3828,6 +3790,9 @@ void tcp_parse_options(const struct net *net, tcp_parse_fastopen_option(opsize - TCPOLEN_EXP_FASTOPEN_BASE, ptr + 2, th->syn, foc, true); + else + smc_parse_options(th, opt_rx, ptr, + opsize); break; } @@ -3995,6 +3960,8 @@ static inline bool tcp_sequence(const struct tcp_sock *tp, u32 seq, u32 end_seq) /* When we get a reset we do this. */ void tcp_reset(struct sock *sk) { + trace_tcp_receive_reset(sk); + /* We want the right error as BSD sees it (and indeed as we do). */ switch (sk->sk_state) { case TCP_SYN_SENT: @@ -4117,7 +4084,7 @@ static void tcp_dsack_set(struct sock *sk, u32 seq, u32 end_seq) { struct tcp_sock *tp = tcp_sk(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { int mib_idx; if (before(seq, tp->rcv_nxt)) @@ -4152,7 +4119,7 @@ static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb) NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOST); tcp_enter_quickack_mode(sk); - if (tcp_is_sack(tp) && sysctl_tcp_dsack) { + if (tcp_is_sack(tp) && sock_net(sk)->ipv4.sysctl_tcp_dsack) { u32 end_seq = TCP_SKB_CB(skb)->end_seq; if (after(TCP_SKB_CB(skb)->end_seq, tp->rcv_nxt)) @@ -4271,6 +4238,7 @@ static void tcp_sack_remove(struct tcp_sock *tp) /** * tcp_try_coalesce - try to merge skb to prior one * @sk: socket + * @dest: destination queue * @to: prior buffer * @from: buffer to add in queue * @fragstolen: pointer to boolean @@ -4303,6 +4271,12 @@ static bool tcp_try_coalesce(struct sock *sk, TCP_SKB_CB(to)->end_seq = TCP_SKB_CB(from)->end_seq; TCP_SKB_CB(to)->ack_seq = TCP_SKB_CB(from)->ack_seq; TCP_SKB_CB(to)->tcp_flags |= TCP_SKB_CB(from)->tcp_flags; + + if (TCP_SKB_CB(from)->has_rxtstamp) { + TCP_SKB_CB(to)->has_rxtstamp = true; + to->tstamp = from->tstamp; + } + return true; } @@ -4325,7 +4299,7 @@ static void tcp_ofo_queue(struct sock *sk) p = rb_first(&tp->out_of_order_queue); while (p) { - skb = rb_entry(p, struct sk_buff, rbnode); + skb = rb_to_skb(p); if (after(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; @@ -4389,7 +4363,7 @@ static int tcp_try_rmem_schedule(struct sock *sk, struct sk_buff *skb, static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct rb_node **p, *q, *parent; + struct rb_node **p, *parent; struct sk_buff *skb1; u32 seq, end_seq; bool fragstolen; @@ -4429,7 +4403,8 @@ static void tcp_data_queue_ofo(struct sock *sk, struct sk_buff *skb) /* In the typical case, we are adding an skb to the end of the list. * Use of ooo_last_skb avoids the O(Log(N)) rbtree lookup. */ - if (tcp_try_coalesce(sk, tp->ooo_last_skb, skb, &fragstolen)) { + if (tcp_try_coalesce(sk, tp->ooo_last_skb, + skb, &fragstolen)) { coalesce_done: tcp_grow_window(sk, skb); kfree_skb_partial(skb, fragstolen); @@ -4447,7 +4422,7 @@ coalesce_done: parent = NULL; while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(seq, TCP_SKB_CB(skb1)->seq)) { p = &parent->rb_left; continue; @@ -4479,7 +4454,8 @@ coalesce_done: __kfree_skb(skb1); goto merge_right; } - } else if (tcp_try_coalesce(sk, skb1, skb, &fragstolen)) { + } else if (tcp_try_coalesce(sk, skb1, + skb, &fragstolen)) { goto coalesce_done; } p = &parent->rb_right; @@ -4491,9 +4467,7 @@ insert: merge_right: /* Remove other segments covered by skb. */ - while ((q = rb_next(&skb->rbnode)) != NULL) { - skb1 = rb_entry(q, struct sk_buff, rbnode); - + while ((skb1 = skb_rb_next(skb)) != NULL) { if (!after(end_seq, TCP_SKB_CB(skb1)->seq)) break; if (before(end_seq, TCP_SKB_CB(skb1)->end_seq)) { @@ -4508,7 +4482,7 @@ merge_right: tcp_drop(sk, skb1); } /* If there is no skb after us, we are the last_skb ! */ - if (!q) + if (!skb1) tp->ooo_last_skb = skb; add_sack: @@ -4530,7 +4504,8 @@ static int __must_check tcp_queue_rcv(struct sock *sk, struct sk_buff *skb, int __skb_pull(skb, hdrlen); eaten = (tail && - tcp_try_coalesce(sk, tail, skb, fragstolen)) ? 1 : 0; + tcp_try_coalesce(sk, tail, + skb, fragstolen)) ? 1 : 0; tcp_rcv_nxt_update(tcp_sk(sk), TCP_SKB_CB(skb)->end_seq); if (!eaten) { __skb_queue_tail(&sk->sk_receive_queue, skb); @@ -4592,8 +4567,8 @@ err: static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - bool fragstolen = false; - int eaten = -1; + bool fragstolen; + int eaten; if (TCP_SKB_CB(skb)->seq == TCP_SKB_CB(skb)->end_seq) { __kfree_skb(skb); @@ -4615,32 +4590,13 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb) goto out_of_window; /* Ok. In sequence. In window. */ - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && tp->ucopy.len && - sock_owned_by_user(sk) && !tp->urg_data) { - int chunk = min_t(unsigned int, skb->len, - tp->ucopy.len); - - __set_current_state(TASK_RUNNING); - - if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - eaten = (chunk == skb->len); - tcp_rcv_space_adjust(sk); - } - } - - if (eaten <= 0) { queue_and_out: - if (eaten < 0) { - if (skb_queue_len(&sk->sk_receive_queue) == 0) - sk_forced_mem_schedule(sk, skb->truesize); - else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) - goto drop; - } - eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); - } + if (skb_queue_len(&sk->sk_receive_queue) == 0) + sk_forced_mem_schedule(sk, skb->truesize); + else if (tcp_try_rmem_schedule(sk, skb, skb->truesize)) + goto drop; + + eaten = tcp_queue_rcv(sk, skb, 0, &fragstolen); tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); if (skb->len) tcp_event_data_recv(sk, skb); @@ -4712,7 +4668,7 @@ static struct sk_buff *tcp_skb_next(struct sk_buff *skb, struct sk_buff_head *li if (list) return !skb_queue_is_last(list, skb) ? skb->next : NULL; - return rb_entry_safe(rb_next(&skb->rbnode), struct sk_buff, rbnode); + return skb_rb_next(skb); } static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, @@ -4733,7 +4689,7 @@ static struct sk_buff *tcp_collapse_one(struct sock *sk, struct sk_buff *skb, } /* Insert skb into rb tree, ordered by TCP_SKB_CB(skb)->seq */ -static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) +void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -4741,7 +4697,7 @@ static void tcp_rbtree_insert(struct rb_root *root, struct sk_buff *skb) while (*p) { parent = *p; - skb1 = rb_entry(parent, struct sk_buff, rbnode); + skb1 = rb_to_skb(parent); if (before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb1)->seq)) p = &parent->rb_left; else @@ -4788,7 +4744,7 @@ restart: * overlaps to the next one. */ if (!(TCP_SKB_CB(skb)->tcp_flags & (TCPHDR_SYN | TCPHDR_FIN)) && - (tcp_win_from_space(skb->truesize) > skb->len || + (tcp_win_from_space(sk, skb->truesize) > skb->len || before(TCP_SKB_CB(skb)->seq, start))) { end_of_skbs = false; break; @@ -4860,26 +4816,19 @@ static void tcp_collapse_ofo_queue(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb, *head; - struct rb_node *p; u32 start, end; - p = rb_first(&tp->out_of_order_queue); - skb = rb_entry_safe(p, struct sk_buff, rbnode); + skb = skb_rb_first(&tp->out_of_order_queue); new_range: if (!skb) { - p = rb_last(&tp->out_of_order_queue); - /* Note: This is possible p is NULL here. We do not - * use rb_entry_safe(), as ooo_last_skb is valid only - * if rbtree is not empty. - */ - tp->ooo_last_skb = rb_entry(p, struct sk_buff, rbnode); + tp->ooo_last_skb = skb_rb_last(&tp->out_of_order_queue); return; } start = TCP_SKB_CB(skb)->seq; end = TCP_SKB_CB(skb)->end_seq; for (head = skb;;) { - skb = tcp_skb_next(skb, NULL); + skb = skb_rb_next(skb); /* Range is terminated when we see a gap or when * we are at the queue end. @@ -4922,14 +4871,14 @@ static bool tcp_prune_ofo_queue(struct sock *sk) do { prev = rb_prev(node); rb_erase(node, &tp->out_of_order_queue); - tcp_drop(sk, rb_entry(node, struct sk_buff, rbnode)); + tcp_drop(sk, rb_to_skb(node)); sk_mem_reclaim(sk); if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf && !tcp_under_memory_pressure(sk)) break; node = prev; } while (node); - tp->ooo_last_skb = rb_entry(prev, struct sk_buff, rbnode); + tp->ooo_last_skb = rb_to_skb(prev); /* Reset SACK state. A conforming SACK implementation will * do the same at a timeout based retransmit. When a connection @@ -5104,7 +5053,7 @@ static void tcp_check_urg(struct sock *sk, const struct tcphdr *th) struct tcp_sock *tp = tcp_sk(sk); u32 ptr = ntohs(th->urg_ptr); - if (ptr && !sysctl_tcp_stdurg) + if (ptr && !sock_net(sk)->ipv4.sysctl_tcp_stdurg) ptr--; ptr += ntohl(th->seq); @@ -5190,26 +5139,6 @@ static void tcp_urg(struct sock *sk, struct sk_buff *skb, const struct tcphdr *t } } -static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen) -{ - struct tcp_sock *tp = tcp_sk(sk); - int chunk = skb->len - hlen; - int err; - - if (skb_csum_unnecessary(skb)) - err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk); - else - err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg); - - if (!err) { - tp->ucopy.len -= chunk; - tp->copied_seq += chunk; - tcp_rcv_space_adjust(sk); - } - - return err; -} - /* Accept RST for rcv_nxt - 1 after a FIN. * When tcp connections are abruptly terminated from Mac OSX (via ^C), a * FIN is sent followed by a RST packet. The RST is sent with the same @@ -5362,8 +5291,9 @@ discard: * tcp_data_queue when everything is OK. */ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; struct tcp_sock *tp = tcp_sk(sk); tcp_mstamp_refresh(tp); @@ -5449,56 +5379,28 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb, int eaten = 0; bool fragstolen = false; - if (tp->ucopy.task == current && - tp->copied_seq == tp->rcv_nxt && - len - tcp_header_len <= tp->ucopy.len && - sock_owned_by_user(sk)) { - __set_current_state(TASK_RUNNING); - - if (!tcp_copy_to_iovec(sk, skb, tcp_header_len)) { - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + - TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); - - tcp_rcv_rtt_measure_ts(sk, skb); - - __skb_pull(skb, tcp_header_len); - tcp_rcv_nxt_update(tp, TCP_SKB_CB(skb)->end_seq); - NET_INC_STATS(sock_net(sk), - LINUX_MIB_TCPHPHITSTOUSER); - eaten = 1; - } - } - if (!eaten) { - if (tcp_checksum_complete(skb)) - goto csum_error; + if (tcp_checksum_complete(skb)) + goto csum_error; - if ((int)skb->truesize > sk->sk_forward_alloc) - goto step5; + if ((int)skb->truesize > sk->sk_forward_alloc) + goto step5; - /* Predicted packet is in window by definition. - * seq == rcv_nxt and rcv_wup <= rcv_nxt. - * Hence, check seq<=rcv_wup reduces to: - */ - if (tcp_header_len == - (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && - tp->rcv_nxt == tp->rcv_wup) - tcp_store_ts_recent(tp); + /* Predicted packet is in window by definition. + * seq == rcv_nxt and rcv_wup <= rcv_nxt. + * Hence, check seq<=rcv_wup reduces to: + */ + if (tcp_header_len == + (sizeof(struct tcphdr) + TCPOLEN_TSTAMP_ALIGNED) && + tp->rcv_nxt == tp->rcv_wup) + tcp_store_ts_recent(tp); - tcp_rcv_rtt_measure_ts(sk, skb); + tcp_rcv_rtt_measure_ts(sk, skb); - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); + NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPHPHITS); - /* Bulk data transfer: receiver */ - eaten = tcp_queue_rcv(sk, skb, tcp_header_len, - &fragstolen); - } + /* Bulk data transfer: receiver */ + eaten = tcp_queue_rcv(sk, skb, tcp_header_len, + &fragstolen); tcp_event_data_recv(sk, skb); @@ -5571,20 +5473,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) security_inet_conn_established(sk, skb); } - /* Make sure socket is routed, for correct metrics. */ - icsk->icsk_af_ops->rebuild_header(sk); - - tcp_init_metrics(sk); - tcp_call_bpf(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); - tcp_init_congestion_control(sk); + tcp_init_transfer(sk, BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB); /* Prevent spurious tcp_cwnd_restart() on first data * packet. */ tp->lsndtime = tcp_jiffies32; - tcp_init_buffer_space(sk); - if (sock_flag(sk, SOCK_KEEPOPEN)) inet_csk_reset_keepalive_timer(sk, keepalive_time_when(tp)); @@ -5592,14 +5487,13 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) __tcp_fast_path_on(tp, tp->snd_wnd); else tp->pred_flags = 0; - } static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, struct tcp_fastopen_cookie *cookie) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *data = tp->syn_data ? tcp_write_queue_head(sk) : NULL; + struct sk_buff *data = tp->syn_data ? tcp_rtx_queue_head(sk) : NULL; u16 mss = tp->rx_opt.mss_clamp, try_exp = 0; bool syn_drop = false; @@ -5634,9 +5528,8 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, tcp_fastopen_cache_set(sk, mss, cookie, syn_drop, try_exp); if (data) { /* Retransmit unacked data in SYN */ - tcp_for_write_queue_from(data, sk) { - if (data == tcp_send_head(sk) || - __tcp_retransmit_skb(sk, data, 1)) + skb_rbtree_walk_from(data) { + if (__tcp_retransmit_skb(sk, data, 1)) break; } tcp_rearm_rto(sk); @@ -5654,6 +5547,16 @@ static bool tcp_rcv_fastopen_synack(struct sock *sk, struct sk_buff *synack, return false; } +static void smc_check_reset_syn(struct tcp_sock *tp) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (tp->syn_smc && !tp->rx_opt.smc_ok) + tp->syn_smc = 0; + } +#endif +} + static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, const struct tcphdr *th) { @@ -5749,10 +5652,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tp->tcp_header_len = sizeof(struct tcphdr); } - if (tcp_is_sack(tp) && sysctl_tcp_fack) - tcp_enable_fack(tp); - - tcp_mtup_init(sk); tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); @@ -5761,6 +5660,8 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, * is initialized. */ tp->copied_seq = tp->rcv_nxt; + smc_check_reset_syn(tp); + smp_mb(); tcp_finish_connect(sk, skb); @@ -5978,15 +5879,18 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (req) { inet_csk(sk)->icsk_retransmits = 0; reqsk_fastopen_remove(sk, req, false); + /* Re-arm the timer because data may have been sent out. + * This is similar to the regular data transmission case + * when new data has just been ack'ed. + * + * (TFO) - we could try to be more aggressive and + * retransmitting any data sooner based on when they + * are sent out. + */ + tcp_rearm_rto(sk); } else { - /* Make sure socket is routed, for correct metrics. */ - icsk->icsk_af_ops->rebuild_header(sk); - tcp_call_bpf(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); - tcp_init_congestion_control(sk); - - tcp_mtup_init(sk); + tcp_init_transfer(sk, BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB); tp->copied_seq = tp->rcv_nxt; - tcp_init_buffer_space(sk); } smp_mb(); tcp_set_state(sk, TCP_ESTABLISHED); @@ -6006,19 +5910,6 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) if (tp->rx_opt.tstamp_ok) tp->advmss -= TCPOLEN_TSTAMP_ALIGNED; - if (req) { - /* Re-arm the timer because data may have been sent out. - * This is similar to the regular data transmission case - * when new data has just been ack'ed. - * - * (TFO) - we could try to be more aggressive and - * retransmitting any data sooner based on when they - * are sent out. - */ - tcp_rearm_rto(sk); - } else - tcp_init_metrics(sk); - if (!inet_csk(sk)->icsk_ca_ops->cong_control) tcp_update_pacing_rate(sk); @@ -6115,6 +6006,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb) case TCP_LAST_ACK: if (!before(TCP_SKB_CB(skb)->seq, tp->rcv_nxt)) break; + /* fall through */ case TCP_FIN_WAIT1: case TCP_FIN_WAIT2: /* RFC 793 says to queue data in these states, @@ -6223,6 +6115,9 @@ static void tcp_openreq_init(struct request_sock *req, ireq->ir_rmt_port = tcp_hdr(skb)->source; ireq->ir_num = ntohs(tcp_hdr(skb)->dest); ireq->ir_mark = inet_request_mark(sk, skb); +#if IS_ENABLED(CONFIG_SMC) + ireq->smc_ok = rx_opt->smc_ok; +#endif } struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, @@ -6235,8 +6130,7 @@ struct request_sock *inet_reqsk_alloc(const struct request_sock_ops *ops, if (req) { struct inet_request_sock *ireq = inet_rsk(req); - kmemcheck_annotate_bitfield(ireq, flags); - ireq->opt = NULL; + ireq->ireq_opt = NULL; #if IS_ENABLED(CONFIG_IPV6) ireq->pktopts = NULL; #endif @@ -6307,9 +6201,9 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, struct tcp_sock *tp = tcp_sk(sk); struct net *net = sock_net(sk); struct sock *fastopen_sk = NULL; - struct dst_entry *dst = NULL; struct request_sock *req; bool want_cookie = false; + struct dst_entry *dst; struct flowi fl; /* TW buckets are converted to open requests without @@ -6359,6 +6253,10 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, if (tmp_opt.tstamp_ok) tcp_rsk(req)->ts_off = af_ops->init_ts_off(net, skb); + dst = af_ops->route_req(sk, &fl, req); + if (!dst) + goto drop_and_free; + if (!want_cookie && !isn) { /* Kill the following clause, if you dislike this way. */ if (!net->ipv4.sysctl_tcp_syncookies && @@ -6379,11 +6277,6 @@ int tcp_conn_request(struct request_sock_ops *rsk_ops, isn = af_ops->init_seq(skb); } - if (!dst) { - dst = af_ops->route_req(sk, &fl, req); - if (!dst) - goto drop_and_free; - } tcp_ecn_create_request(req, skb, sk, dst); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e9252c7df809..c6bc0c4d19c6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -85,7 +85,7 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> -int sysctl_tcp_low_latency __read_mostly; +#include <trace/events/tcp.h> #ifdef CONFIG_TCP_MD5SIG static int tcp_v4_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key, @@ -385,7 +385,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, th->dest, iph->saddr, ntohs(th->source), - inet_iif(icmp_skb)); + inet_iif(icmp_skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; @@ -482,7 +482,7 @@ void tcp_v4_err(struct sk_buff *icmp_skb, u32 info) TCP_TIMEOUT_INIT; icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX); - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); BUG_ON(!skb); tcp_mstamp_refresh(tp); @@ -661,7 +661,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0, ip_hdr(skb)->saddr, th->source, ip_hdr(skb)->daddr, - ntohs(th->source), inet_iif(skb)); + ntohs(th->source), inet_iif(skb), + tcp_v4_sdif(skb)); /* don't send rst if it can't find key */ if (!sk1) goto out; @@ -702,8 +703,10 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) * routing might fail in this case. No choice here, if we choose to force * input interface, we will misroute in case of asymmetric route. */ - if (sk) + if (sk) { arg.bound_dev_if = sk->sk_bound_dev_if; + trace_tcp_send_reset(sk, skb); + } BUILD_BUG_ON(offsetof(struct sock, sk_bound_dev_if) != offsetof(struct inet_timewait_sock, tw_bound_dev_if)); @@ -878,7 +881,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, err = ip_build_and_send_pkt(skb, sk, ireq->ir_loc_addr, ireq->ir_rmt_addr, - ireq->opt); + ireq_opt_deref(ireq)); err = net_xmit_eval(err); } @@ -890,7 +893,7 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, */ static void tcp_v4_reqsk_destructor(struct request_sock *req) { - kfree(inet_rsk(req)->opt); + kfree(rcu_dereference_protected(inet_rsk(req)->ireq_opt, 1)); } #ifdef CONFIG_TCP_MD5SIG @@ -1266,10 +1269,11 @@ static void tcp_v4_init_req(struct request_sock *req, struct sk_buff *skb) { struct inet_request_sock *ireq = inet_rsk(req); + struct net *net = sock_net(sk_listener); sk_rcv_saddr_set(req_to_sk(req), ip_hdr(skb)->daddr); sk_daddr_set(req_to_sk(req), ip_hdr(skb)->saddr); - ireq->opt = tcp_v4_save_options(skb); + RCU_INIT_POINTER(ireq->ireq_opt, tcp_v4_save_options(net, skb)); } static struct dst_entry *tcp_v4_route_req(const struct sock *sk, @@ -1356,10 +1360,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, sk_daddr_set(newsk, ireq->ir_rmt_addr); sk_rcv_saddr_set(newsk, ireq->ir_loc_addr); newsk->sk_bound_dev_if = ireq->ir_iif; - newinet->inet_saddr = ireq->ir_loc_addr; - inet_opt = ireq->opt; - rcu_assign_pointer(newinet->inet_opt, inet_opt); - ireq->opt = NULL; + newinet->inet_saddr = ireq->ir_loc_addr; + inet_opt = rcu_dereference(ireq->ireq_opt); + RCU_INIT_POINTER(newinet->inet_opt, inet_opt); newinet->mc_index = inet_iif(skb); newinet->mc_ttl = ip_hdr(skb)->ttl; newinet->rcv_tos = ip_hdr(skb)->tos; @@ -1404,9 +1407,12 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, if (__inet_inherit_port(sk, newsk) < 0) goto put_and_exit; *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash)); - if (*own_req) + if (likely(*own_req)) { tcp_move_syn(newtp, req); - + ireq->ireq_opt = NULL; + } else { + newinet->inet_opt = NULL; + } return newsk; exit_overflow: @@ -1417,6 +1423,7 @@ exit: tcp_listendrop(sk); return NULL; put_and_exit: + newinet->inet_opt = NULL; inet_csk_prepare_forced_close(newsk); tcp_done(newsk); goto exit; @@ -1458,7 +1465,7 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + tcp_rcv_established(sk, skb, tcp_hdr(skb)); return 0; } @@ -1504,28 +1511,28 @@ csum_err: } EXPORT_SYMBOL(tcp_v4_do_rcv); -void tcp_v4_early_demux(struct sk_buff *skb) +int tcp_v4_early_demux(struct sk_buff *skb) { const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; if (skb->pkt_type != PACKET_HOST) - return; + return 0; if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) - return; + return 0; iph = ip_hdr(skb); th = tcp_hdr(skb); if (th->doff < sizeof(struct tcphdr) / 4) - return; + return 0; sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), - skb->skb_iif); + skb->skb_iif, inet_sdif(skb)); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; @@ -1539,63 +1546,9 @@ void tcp_v4_early_demux(struct sk_buff *skb) skb_dst_set_noref(skb, dst); } } + return 0; } -/* Packet is added to VJ-style prequeue for processing in process - * context, if a reader task is waiting. Apparently, this exciting - * idea (VJ's mail "Re: query about TCP header on tcp-ip" of 07 Sep 93) - * failed somewhere. Latency? Burstiness? Well, at least now we will - * see, why it failed. 8)8) --ANK - * - */ -bool tcp_prequeue(struct sock *sk, struct sk_buff *skb) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (sysctl_tcp_low_latency || !tp->ucopy.task) - return false; - - if (skb->len <= tcp_hdrlen(skb) && - skb_queue_len(&tp->ucopy.prequeue) == 0) - return false; - - /* Before escaping RCU protected region, we need to take care of skb - * dst. Prequeue is only enabled for established sockets. - * For such sockets, we might need the skb dst only to set sk->sk_rx_dst - * Instead of doing full sk_rx_dst validity here, let's perform - * an optimistic check. - */ - if (likely(sk->sk_rx_dst)) - skb_dst_drop(skb); - else - skb_dst_force_safe(skb); - - __skb_queue_tail(&tp->ucopy.prequeue, skb); - tp->ucopy.memory += skb->truesize; - if (skb_queue_len(&tp->ucopy.prequeue) >= 32 || - tp->ucopy.memory + atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf) { - struct sk_buff *skb1; - - BUG_ON(sock_owned_by_user(sk)); - __NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPPREQUEUEDROPPED, - skb_queue_len(&tp->ucopy.prequeue)); - - while ((skb1 = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb1); - - tp->ucopy.memory = 0; - } else if (skb_queue_len(&tp->ucopy.prequeue) == 1) { - wake_up_interruptible_sync_poll(sk_sleep(sk), - POLLIN | POLLRDNORM | POLLRDBAND); - if (!inet_csk_ack_scheduled(sk)) - inet_csk_reset_xmit_timer(sk, ICSK_TIME_DACK, - (3 * tcp_rto_min(sk)) / 4, - TCP_RTO_MAX); - } - return true; -} -EXPORT_SYMBOL(tcp_prequeue); - bool tcp_add_backlog(struct sock *sk, struct sk_buff *skb) { u32 limit = sk->sk_rcvbuf + sk->sk_sndbuf; @@ -1645,6 +1598,7 @@ EXPORT_SYMBOL(tcp_filter); int tcp_v4_rcv(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + int sdif = inet_sdif(skb); const struct iphdr *iph; const struct tcphdr *th; bool refcounted; @@ -1692,10 +1646,12 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv4_get_dsfield(iph); TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->has_rxtstamp = + skb->tstamp || skb_hwtstamps(skb)->hwtstamp; lookup: sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, - th->dest, &refcounted); + th->dest, sdif, &refcounted); if (!sk) goto no_tcp_socket; @@ -1722,9 +1678,9 @@ process: */ sock_hold(sk); refcounted = true; - if (tcp_filter(sk, skb)) - goto discard_and_relse; - nsk = tcp_check_req(sk, skb, req, false); + nsk = NULL; + if (!tcp_filter(sk, skb)) + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); goto discard_and_relse; @@ -1770,8 +1726,7 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v4_do_rcv(sk, skb); + ret = tcp_v4_do_rcv(sk, skb); } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } @@ -1824,15 +1779,17 @@ do_time_wait: __tcp_hdrlen(th), iph->saddr, th->source, iph->daddr, th->dest, - inet_iif(skb)); + inet_iif(skb), + sdif); if (sk2) { inet_twsk_deschedule_put(inet_twsk(sk)); sk = sk2; refcounted = false; goto process; } - /* Fall through to ACK */ } + /* to ACK */ + /* fall through */ case TCP_TW_ACK: tcp_v4_timewait_ack(sk, skb); break; @@ -1912,6 +1869,8 @@ void tcp_v4_destroy_sock(struct sock *sk) { struct tcp_sock *tp = tcp_sk(sk); + trace_tcp_destroy_sock(sk); + tcp_clear_xmit_timers(sk); tcp_cleanup_congestion_control(sk); @@ -1936,9 +1895,6 @@ void tcp_v4_destroy_sock(struct sock *sk) } #endif - /* Clean prequeue, it must be empty really */ - __skb_queue_purge(&tp->ucopy.prequeue); - /* Clean up a referenced TCP bind bucket. */ if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); @@ -1947,6 +1903,7 @@ void tcp_v4_destroy_sock(struct sock *sk) /* If socket is aborted during connect operation */ tcp_free_fastopen_req(tp); + tcp_fastopen_destroy_cipher(sk); tcp_saved_syn_free(tp); sk_sockets_allocated_dec(sk); @@ -2452,8 +2409,8 @@ struct proto tcp_prot = { .memory_allocated = &tcp_memory_allocated, .memory_pressure = &tcp_memory_pressure, .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, @@ -2473,6 +2430,8 @@ static void __net_exit tcp_sk_exit(struct net *net) { int cpu; + module_put(net->ipv4.tcp_congestion_control->owner); + for_each_possible_cpu(cpu) inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.tcp_sk, cpu)); free_percpu(net->ipv4.tcp_sk); @@ -2527,6 +2486,50 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; + net->ipv4.sysctl_tcp_early_retrans = 3; + net->ipv4.sysctl_tcp_recovery = TCP_RACK_LOSS_DETECTION; + net->ipv4.sysctl_tcp_slow_start_after_idle = 1; /* By default, RFC2861 behavior. */ + net->ipv4.sysctl_tcp_retrans_collapse = 1; + net->ipv4.sysctl_tcp_max_reordering = 300; + net->ipv4.sysctl_tcp_dsack = 1; + net->ipv4.sysctl_tcp_app_win = 31; + net->ipv4.sysctl_tcp_adv_win_scale = 1; + net->ipv4.sysctl_tcp_frto = 2; + net->ipv4.sysctl_tcp_moderate_rcvbuf = 1; + /* This limits the percentage of the congestion window which we + * will allow a single TSO frame to consume. Building TSO frames + * which are too large can cause TCP streams to be bursty. + */ + net->ipv4.sysctl_tcp_tso_win_divisor = 3; + /* Default TSQ limit of four TSO segments */ + net->ipv4.sysctl_tcp_limit_output_bytes = 262144; + /* rfc5961 challenge ack rate limiting */ + net->ipv4.sysctl_tcp_challenge_ack_limit = 1000; + net->ipv4.sysctl_tcp_min_tso_segs = 2; + net->ipv4.sysctl_tcp_min_rtt_wlen = 300; + net->ipv4.sysctl_tcp_autocorking = 1; + net->ipv4.sysctl_tcp_invalid_ratelimit = HZ/2; + net->ipv4.sysctl_tcp_pacing_ss_ratio = 200; + net->ipv4.sysctl_tcp_pacing_ca_ratio = 120; + if (net != &init_net) { + memcpy(net->ipv4.sysctl_tcp_rmem, + init_net.ipv4.sysctl_tcp_rmem, + sizeof(init_net.ipv4.sysctl_tcp_rmem)); + memcpy(net->ipv4.sysctl_tcp_wmem, + init_net.ipv4.sysctl_tcp_wmem, + sizeof(init_net.ipv4.sysctl_tcp_wmem)); + } + net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE; + spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock); + net->ipv4.sysctl_tcp_fastopen_blackhole_timeout = 60 * 60; + atomic_set(&net->ipv4.tfo_active_disable_times, 0); + + /* Reno is always built in */ + if (!net_eq(net, &init_net) && + try_module_get(init_net.ipv4.tcp_congestion_control->owner)) + net->ipv4.tcp_congestion_control = init_net.ipv4.tcp_congestion_control; + else + net->ipv4.tcp_congestion_control = &tcp_reno; return 0; fail: @@ -2537,7 +2540,12 @@ fail: static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { + struct net *net; + inet_twsk_purge(&tcp_hashinfo, AF_INET); + + list_for_each_entry(net, net_exit_list, exit_list) + tcp_fastopen_ctx_destroy(net); } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 102b2c90bb80..7097f92d16e5 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/rcupdate.h> #include <linux/spinlock.h> #include <linux/jiffies.h> @@ -20,8 +21,6 @@ #include <net/tcp.h> #include <net/genetlink.h> -int sysctl_tcp_nometrics_save __read_mostly; - static struct tcp_metrics_block *__tcp_get_metrics(const struct inetpeer_addr *saddr, const struct inetpeer_addr *daddr, struct net *net, unsigned int hash); @@ -330,7 +329,7 @@ void tcp_update_metrics(struct sock *sk) int m; sk_dst_confirm(sk); - if (sysctl_tcp_nometrics_save || !dst) + if (net->ipv4.sysctl_tcp_nometrics_save || !dst) return; rcu_read_lock(); @@ -471,10 +470,8 @@ void tcp_init_metrics(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; } val = tcp_metric_get(tm, TCP_METRIC_REORDERING); - if (val && tp->reordering != val) { - tcp_disable_fack(tp); + if (val && tp->reordering != val) tp->reordering = val; - } crtt = tcp_metric_get(tm, TCP_METRIC_RTT); rcu_read_unlock(); @@ -892,10 +889,14 @@ static void tcp_metrics_flush_all(struct net *net) for (row = 0; row < max_rows; row++, hb++) { struct tcp_metrics_block __rcu **pp; + bool match; + spin_lock_bh(&tcp_metrics_lock); pp = &hb->chain; for (tm = deref_locked(*pp); tm; tm = deref_locked(*pp)) { - if (net_eq(tm_net(tm), net)) { + match = net ? net_eq(tm_net(tm), net) : + !atomic_read(&tm_net(tm)->count); + if (match) { *pp = tm->tcpm_next; kfree_rcu(tm, rcu_head); } else { @@ -1018,14 +1019,14 @@ static int __net_init tcp_net_metrics_init(struct net *net) return 0; } -static void __net_exit tcp_net_metrics_exit(struct net *net) +static void __net_exit tcp_net_metrics_exit_batch(struct list_head *net_exit_list) { - tcp_metrics_flush_all(net); + tcp_metrics_flush_all(NULL); } static __net_initdata struct pernet_operations tcp_net_metrics_ops = { - .init = tcp_net_metrics_init, - .exit = tcp_net_metrics_exit, + .init = tcp_net_metrics_init, + .exit_batch = tcp_net_metrics_exit_batch, }; void __init tcp_metrics_init(void) diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 0ff83c1637d8..e36eff0403f4 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -23,13 +23,12 @@ #include <linux/slab.h> #include <linux/sysctl.h> #include <linux/workqueue.h> +#include <linux/static_key.h> #include <net/tcp.h> #include <net/inet_common.h> #include <net/xfrm.h> #include <net/busy_poll.h> -int sysctl_tcp_abort_on_overflow __read_mostly; - static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) { if (seq == s_win) @@ -180,7 +179,7 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb, * Oh well... nobody has a sufficient solution to this * protocol bug yet. */ - if (sysctl_tcp_rfc1337 == 0) { + if (twsk_net(tw)->ipv4.sysctl_tcp_rfc1337 == 0) { kill: inet_twsk_deschedule_put(tw); return TCP_TW_SUCCESS; @@ -298,8 +297,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) key = tp->af_specific->md5_lookup(sk, sk); if (key) { tcptw->tw_md5_key = kmemdup(key, sizeof(*key), GFP_ATOMIC); - if (tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()) - BUG(); + BUG_ON(tcptw->tw_md5_key && !tcp_alloc_md5sig_pool()); } } while (0); #endif @@ -371,7 +369,7 @@ void tcp_openreq_init_rwin(struct request_sock *req, full_space = rcv_wnd * mss; /* tcp_full_space because it is guaranteed to be the first packet */ - tcp_select_initial_window(full_space, + tcp_select_initial_window(sk_listener, full_space, mss - (ireq->tstamp_ok ? TCPOLEN_TSTAMP_ALIGNED : 0), &req->rsk_rcv_wnd, &req->rsk_window_clamp, @@ -417,6 +415,21 @@ void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst) } EXPORT_SYMBOL_GPL(tcp_ca_openreq_child); +static void smc_check_reset_syn_req(struct tcp_sock *oldtp, + struct request_sock *req, + struct tcp_sock *newtp) +{ +#if IS_ENABLED(CONFIG_SMC) + struct inet_request_sock *ireq; + + if (static_branch_unlikely(&tcp_have_smc)) { + ireq = inet_rsk(req); + if (oldtp->syn_smc && !ireq->smc_ok) + newtp->syn_smc = 0; + } +#endif +} + /* This is not only more efficient than what we used to do, it eliminates * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM * @@ -434,6 +447,9 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, struct tcp_request_sock *treq = tcp_rsk(req); struct inet_connection_sock *newicsk = inet_csk(newsk); struct tcp_sock *newtp = tcp_sk(newsk); + struct tcp_sock *oldtp = tcp_sk(sk); + + smc_check_reset_syn_req(oldtp, req, newtp); /* Now setup tcp_sock */ newtp->pred_flags = 0; @@ -445,8 +461,8 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->snd_sml = newtp->snd_una = newtp->snd_nxt = newtp->snd_up = treq->snt_isn + 1; - tcp_prequeue_init(newtp); INIT_LIST_HEAD(&newtp->tsq_node); + INIT_LIST_HEAD(&newtp->tsorted_sent_queue); tcp_init_wl(newtp, treq->rcv_isn); @@ -459,7 +475,6 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->packets_out = 0; newtp->retrans_out = 0; newtp->sacked_out = 0; - newtp->fackets_out = 0; newtp->snd_ssthresh = TCP_INFINITE_SSTHRESH; newtp->tlp_high_seq = 0; newtp->lsndtime = tcp_jiffies32; @@ -493,10 +508,7 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, keepalive_time_when(newtp)); newtp->rx_opt.tstamp_ok = ireq->tstamp_ok; - if ((newtp->rx_opt.sack_ok = ireq->sack_ok) != 0) { - if (sysctl_tcp_fack) - tcp_enable_fack(newtp); - } + newtp->rx_opt.sack_ok = ireq->sack_ok; newtp->window_clamp = req->rsk_window_clamp; newtp->rcv_ssthresh = req->rsk_rcv_wnd; newtp->rcv_wnd = req->rsk_rcv_wnd; @@ -535,6 +547,10 @@ struct sock *tcp_create_openreq_child(const struct sock *sk, newtp->syn_data_acked = 0; newtp->rack.mstamp = 0; newtp->rack.advanced = 0; + newtp->rack.reo_wnd_steps = 1; + newtp->rack.last_delivered = 0; + newtp->rack.reo_wnd_persist = 0; + newtp->rack.dsack_seen = 0; __TCP_INC_STATS(sock_net(sk), TCP_MIB_PASSIVEOPENS); } @@ -765,7 +781,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, return inet_csk_complete_hashdance(sk, child, req, own_req); listen_overflow: - if (!sysctl_tcp_abort_on_overflow) { + if (!sock_net(sk)->ipv4.sysctl_tcp_abort_on_overflow) { inet_rsk(req)->acked = 1; return NULL; } diff --git a/net/ipv4/tcp_nv.c b/net/ipv4/tcp_nv.c index 6d650ed3cb59..0b5a05bd82e3 100644 --- a/net/ipv4/tcp_nv.c +++ b/net/ipv4/tcp_nv.c @@ -39,7 +39,7 @@ * nv_cong_dec_mult Decrease cwnd by X% (30%) of congestion when detected * nv_ssthresh_factor On congestion set ssthresh to this * <desired cwnd> / 8 * nv_rtt_factor RTT averaging factor - * nv_loss_dec_factor Decrease cwnd by this (50%) when losses occur + * nv_loss_dec_factor Decrease cwnd to this (80%) when losses occur * nv_dec_eval_min_calls Wait this many RTT measurements before dec cwnd * nv_inc_eval_min_calls Wait this many RTT measurements before inc cwnd * nv_ssthresh_eval_min_calls Wait this many RTT measurements before stopping @@ -61,7 +61,7 @@ static int nv_min_cwnd __read_mostly = 2; static int nv_cong_dec_mult __read_mostly = 30 * 128 / 100; /* = 30% */ static int nv_ssthresh_factor __read_mostly = 8; /* = 1 */ static int nv_rtt_factor __read_mostly = 128; /* = 1/2*old + 1/2*new */ -static int nv_loss_dec_factor __read_mostly = 512; /* => 50% */ +static int nv_loss_dec_factor __read_mostly = 819; /* => 80% */ static int nv_cwnd_growth_rate_neg __read_mostly = 8; static int nv_cwnd_growth_rate_pos __read_mostly; /* 0 => fixed like Reno */ static int nv_dec_eval_min_calls __read_mostly = 60; @@ -86,7 +86,6 @@ struct tcpnv { * < 0 => less than 1 packet/RTT */ u8 available8; u16 available16; - u32 loss_cwnd; /* cwnd at last loss */ u8 nv_allow_cwnd_growth:1, /* whether cwnd can grow */ nv_reset:1, /* whether to reset values */ nv_catchup:1; /* whether we are growing because @@ -102,6 +101,11 @@ struct tcpnv { u32 nv_last_rtt; /* last rtt */ u32 nv_min_rtt; /* active min rtt. Used to determine slope */ u32 nv_min_rtt_new; /* min rtt for future use */ + u32 nv_base_rtt; /* If non-zero it represents the threshold for + * congestion */ + u32 nv_lower_bound_rtt; /* Used in conjunction with nv_base_rtt. It is + * set to 80% of nv_base_rtt. It helps reduce + * unfairness between flows */ u32 nv_rtt_max_rate; /* max rate seen during current RTT */ u32 nv_rtt_start_seq; /* current RTT ends when packet arrives * acking beyond nv_rtt_start_seq */ @@ -121,7 +125,6 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); ca->nv_reset = 0; - ca->loss_cwnd = 0; ca->nv_no_cong_cnt = 0; ca->nv_rtt_cnt = 0; ca->nv_last_rtt = 0; @@ -134,9 +137,24 @@ static inline void tcpnv_reset(struct tcpnv *ca, struct sock *sk) static void tcpnv_init(struct sock *sk) { struct tcpnv *ca = inet_csk_ca(sk); + int base_rtt; tcpnv_reset(ca, sk); + /* See if base_rtt is available from socket_ops bpf program. + * It is meant to be used in environments, such as communication + * within a datacenter, where we have reasonable estimates of + * RTTs + */ + base_rtt = tcp_call_bpf(sk, BPF_SOCK_OPS_BASE_RTT); + if (base_rtt > 0) { + ca->nv_base_rtt = base_rtt; + ca->nv_lower_bound_rtt = (base_rtt * 205) >> 8; /* 80% */ + } else { + ca->nv_base_rtt = 0; + ca->nv_lower_bound_rtt = 0; + } + ca->nv_allow_cwnd_growth = 1; ca->nv_min_rtt_reset_jiffies = jiffies + 2 * HZ; ca->nv_min_rtt = NV_INIT_RTT; @@ -146,6 +164,19 @@ static void tcpnv_init(struct sock *sk) ca->cwnd_growth_factor = 0; } +/* If provided, apply upper (base_rtt) and lower (lower_bound_rtt) + * bounds to RTT. + */ +inline u32 nv_get_bounded_rtt(struct tcpnv *ca, u32 val) +{ + if (ca->nv_lower_bound_rtt > 0 && val < ca->nv_lower_bound_rtt) + return ca->nv_lower_bound_rtt; + else if (ca->nv_base_rtt > 0 && val > ca->nv_base_rtt) + return ca->nv_base_rtt; + else + return val; +} + static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -177,19 +208,10 @@ static void tcpnv_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcpnv_recalc_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - struct tcpnv *ca = inet_csk_ca(sk); - ca->loss_cwnd = tp->snd_cwnd; return max((tp->snd_cwnd * nv_loss_dec_factor) >> 10, 2U); } -static u32 tcpnv_undo_cwnd(struct sock *sk) -{ - struct tcpnv *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static void tcpnv_state(struct sock *sk, u8 new_state) { struct tcpnv *ca = inet_csk_ca(sk); @@ -220,7 +242,7 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) struct tcp_sock *tp = tcp_sk(sk); struct tcpnv *ca = inet_csk_ca(sk); unsigned long now = jiffies; - s64 rate64 = 0; + u64 rate64; u32 rate, max_win, cwnd_by_slope; u32 avg_rtt; u32 bytes_acked = 0; @@ -262,8 +284,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) } /* rate in 100's bits per second */ - rate64 = ((u64)sample->in_flight) * 8000000; - rate = (u32)div64_u64(rate64, (u64)(avg_rtt * 100)); + rate64 = ((u64)sample->in_flight) * 80000; + do_div(rate64, avg_rtt ?: 1); + rate = (u32)rate64; /* Remember the maximum rate seen during this RTT * Note: It may be more than one RTT. This function should be @@ -276,6 +299,9 @@ static void tcpnv_acked(struct sock *sk, const struct ack_sample *sample) if (ca->nv_eval_call_cnt < 255) ca->nv_eval_call_cnt++; + /* Apply bounds to rtt. Only used to update min_rtt */ + avg_rtt = nv_get_bounded_rtt(ca, avg_rtt); + /* update min rtt if necessary */ if (avg_rtt < ca->nv_min_rtt) ca->nv_min_rtt = avg_rtt; @@ -446,7 +472,7 @@ static struct tcp_congestion_ops tcpnv __read_mostly = { .ssthresh = tcpnv_recalc_ssthresh, .cong_avoid = tcpnv_cong_avoid, .set_state = tcpnv_state, - .undo_cwnd = tcpnv_undo_cwnd, + .undo_cwnd = tcp_reno_undo_cwnd, .pkts_acked = tcpnv_acked, .get_info = tcpnv_get_info, diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index 11f69bbf9307..b6a2aa1dcf56 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -149,11 +149,19 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb, * is freed by GSO engine */ if (copy_destructor) { + int delta; + swap(gso_skb->sk, skb->sk); swap(gso_skb->destructor, skb->destructor); sum_truesize += skb->truesize; - refcount_add(sum_truesize - gso_skb->truesize, - &skb->sk->sk_wmem_alloc); + delta = sum_truesize - gso_skb->truesize; + /* In some pathological cases, delta can be negative. + * We need to either use refcount_add() or refcount_sub_and_test() + */ + if (likely(delta >= 0)) + refcount_add(delta, &skb->sk->sk_wmem_alloc); + else + WARN_ON_ONCE(refcount_sub_and_test(-delta, &skb->sk->sk_wmem_alloc)); } delta = htonl(oldlen + (skb_tail_pointer(skb) - diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b7661a68d498..540b7d92cc70 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -41,40 +41,25 @@ #include <linux/compiler.h> #include <linux/gfp.h> #include <linux/module.h> +#include <linux/static_key.h> -/* People can turn this off for buggy TCP's found in printers etc. */ -int sysctl_tcp_retrans_collapse __read_mostly = 1; - -/* People can turn this on to work with those rare, broken TCPs that - * interpret the window field as a signed quantity. - */ -int sysctl_tcp_workaround_signed_windows __read_mostly = 0; - -/* Default TSQ limit of four TSO segments */ -int sysctl_tcp_limit_output_bytes __read_mostly = 262144; - -/* This limits the percentage of the congestion window which we - * will allow a single TSO frame to consume. Building TSO frames - * which are too large can cause TCP streams to be bursty. - */ -int sysctl_tcp_tso_win_divisor __read_mostly = 3; - -/* By default, RFC2861 behavior. */ -int sysctl_tcp_slow_start_after_idle __read_mostly = 1; +#include <trace/events/tcp.h> static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, int push_one, gfp_t gfp); /* Account for new data that has been sent to the network. */ -static void tcp_event_new_data_sent(struct sock *sk, const struct sk_buff *skb) +static void tcp_event_new_data_sent(struct sock *sk, struct sk_buff *skb) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); unsigned int prior_packets = tp->packets_out; - tcp_advance_send_head(sk, skb); tp->snd_nxt = TCP_SKB_CB(skb)->end_seq; + __skb_unlink(skb, &sk->sk_write_queue); + tcp_rbtree_insert(&sk->tcp_rtx_queue, skb); + tp->packets_out += tcp_skb_pcount(skb); if (!prior_packets || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) tcp_rearm_rto(sk); @@ -203,7 +188,7 @@ u32 tcp_default_init_rwnd(u32 mss) * be a multiple of mss if possible. We assume here that mss >= 1. * This MUST be enforced by all callers. */ -void tcp_select_initial_window(int __space, __u32 mss, +void tcp_select_initial_window(const struct sock *sk, int __space, __u32 mss, __u32 *rcv_wnd, __u32 *window_clamp, int wscale_ok, __u8 *rcv_wscale, __u32 init_rcv_wnd) @@ -227,7 +212,7 @@ void tcp_select_initial_window(int __space, __u32 mss, * which we interpret as a sign the remote TCP is not * misinterpreting the window field as a signed quantity. */ - if (sysctl_tcp_workaround_signed_windows) + if (sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) (*rcv_wnd) = min(space, MAX_TCP_WINDOW); else (*rcv_wnd) = space; @@ -235,7 +220,7 @@ void tcp_select_initial_window(int __space, __u32 mss, (*rcv_wscale) = 0; if (wscale_ok) { /* Set window scaling on max possible window */ - space = max_t(u32, space, sysctl_tcp_rmem[2]); + space = max_t(u32, space, sock_net(sk)->ipv4.sysctl_tcp_rmem[2]); space = max_t(u32, space, sysctl_rmem_max); space = min_t(u32, space, *window_clamp); while (space > U16_MAX && (*rcv_wscale) < TCP_MAX_WSCALE) { @@ -287,7 +272,8 @@ static u16 tcp_select_window(struct sock *sk) /* Make sure we do not exceed the maximum possible * scaled window. */ - if (!tp->rx_opt.rcv_wscale && sysctl_tcp_workaround_signed_windows) + if (!tp->rx_opt.rcv_wscale && + sock_net(sk)->ipv4.sysctl_tcp_workaround_signed_windows) new_win = min(new_win, MAX_TCP_WINDOW); else new_win = min(new_win, (65535U << tp->rx_opt.rcv_wscale)); @@ -395,7 +381,6 @@ static void tcp_ecn_send(struct sock *sk, struct sk_buff *skb, static void tcp_init_nondata_skb(struct sk_buff *skb, u32 seq, u8 flags) { skb->ip_summed = CHECKSUM_PARTIAL; - skb->csum = 0; TCP_SKB_CB(skb)->tcp_flags = flags; TCP_SKB_CB(skb)->sacked = 0; @@ -418,6 +403,22 @@ static inline bool tcp_urg_mode(const struct tcp_sock *tp) #define OPTION_MD5 (1 << 2) #define OPTION_WSCALE (1 << 3) #define OPTION_FAST_OPEN_COOKIE (1 << 8) +#define OPTION_SMC (1 << 9) + +static void smc_options_write(__be32 *ptr, u16 *options) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (unlikely(OPTION_SMC & *options)) { + *ptr++ = htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_EXP << 8) | + (TCPOLEN_EXP_SMC_BASE)); + *ptr++ = htonl(TCPOPT_SMC_MAGIC); + } + } +#endif +} struct tcp_out_options { u16 options; /* bit field of OPTION_* */ @@ -536,6 +537,41 @@ static void tcp_options_write(__be32 *ptr, struct tcp_sock *tp, } ptr += (len + 3) >> 2; } + + smc_options_write(ptr, &options); +} + +static void smc_set_option(const struct tcp_sock *tp, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (tp->syn_smc) { + if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; + } + } + } +#endif +} + +static void smc_set_option_cond(const struct tcp_sock *tp, + const struct inet_request_sock *ireq, + struct tcp_out_options *opts, + unsigned int *remaining) +{ +#if IS_ENABLED(CONFIG_SMC) + if (static_branch_unlikely(&tcp_have_smc)) { + if (tp->syn_smc && ireq->smc_ok) { + if (*remaining >= TCPOLEN_EXP_SMC_BASE_ALIGNED) { + opts->options |= OPTION_SMC; + *remaining -= TCPOLEN_EXP_SMC_BASE_ALIGNED; + } + } + } +#endif } /* Compute TCP options for SYN packets. This is not the final @@ -603,11 +639,14 @@ static unsigned int tcp_syn_options(struct sock *sk, struct sk_buff *skb, } } + smc_set_option(tp, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; } /* Set up TCP options for SYN-ACKs. */ -static unsigned int tcp_synack_options(struct request_sock *req, +static unsigned int tcp_synack_options(const struct sock *sk, + struct request_sock *req, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, const struct tcp_md5sig_key *md5, @@ -663,6 +702,8 @@ static unsigned int tcp_synack_options(struct request_sock *req, } } + smc_set_option_cond(tcp_sk(sk), ireq, opts, &remaining); + return MAX_TCP_OPTION_SPACE - remaining; } @@ -739,8 +780,10 @@ static void tcp_tsq_handler(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); if (tp->lost_out > tp->retrans_out && - tp->snd_cwnd > tcp_packets_in_flight(tp)) + tp->snd_cwnd > tcp_packets_in_flight(tp)) { + tcp_mstamp_refresh(tp); tcp_xmit_retransmit_queue(sk); + } tcp_write_xmit(sk, tcp_current_mss(sk), tp->nonagle, 0, GFP_ATOMIC); @@ -971,6 +1014,12 @@ static void tcp_internal_pacing(struct sock *sk, const struct sk_buff *skb) HRTIMER_MODE_ABS_PINNED); } +static void tcp_update_skb_after_send(struct tcp_sock *tp, struct sk_buff *skb) +{ + skb->skb_mstamp = tp->tcp_mstamp; + list_move_tail(&skb->tcp_tsorted_anchor, &tp->tsorted_sent_queue); +} + /* This routine actually transmits TCP packets queued in by * tcp_do_sendmsg(). This is used by both the initial * transmission and possible later retransmissions. @@ -991,6 +1040,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, struct tcp_skb_cb *tcb; struct tcp_out_options opts; unsigned int tcp_options_size, tcp_header_size; + struct sk_buff *oskb = NULL; struct tcp_md5sig_key *md5; struct tcphdr *th; int err; @@ -998,19 +1048,22 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, BUG_ON(!skb || !tcp_skb_pcount(skb)); tp = tcp_sk(sk); - skb->skb_mstamp = tp->tcp_mstamp; if (clone_it) { TCP_SKB_CB(skb)->tx.in_flight = TCP_SKB_CB(skb)->end_seq - tp->snd_una; - tcp_rate_skb_sent(sk, skb); + oskb = skb; + + tcp_skb_tsorted_save(oskb) { + if (unlikely(skb_cloned(oskb))) + skb = pskb_copy(oskb, gfp_mask); + else + skb = skb_clone(oskb, gfp_mask); + } tcp_skb_tsorted_restore(oskb); - if (unlikely(skb_cloned(skb))) - skb = pskb_copy(skb, gfp_mask); - else - skb = skb_clone(skb, gfp_mask); if (unlikely(!skb)) return -ENOBUFS; } + skb->skb_mstamp = tp->tcp_mstamp; inet = inet_sk(sk); tcb = TCP_SKB_CB(skb); @@ -1122,12 +1175,15 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it, err = icsk->icsk_af_ops->queue_xmit(sk, skb, &inet->cork.fl); - if (likely(err <= 0)) - return err; - - tcp_enter_cwr(sk); - - return net_xmit_eval(err); + if (unlikely(err > 0)) { + tcp_enter_cwr(sk); + err = net_xmit_eval(err); + } + if (!err && oskb) { + tcp_update_skb_after_send(tp, oskb); + tcp_rate_skb_sent(sk, oskb); + } + return err; } /* This routine just queues the buffer for sending. @@ -1162,21 +1218,6 @@ static void tcp_set_skb_tso_segs(struct sk_buff *skb, unsigned int mss_now) } } -/* When a modification to fackets out becomes necessary, we need to check - * skb is counted to fackets_out or not. - */ -static void tcp_adjust_fackets_out(struct sock *sk, const struct sk_buff *skb, - int decr) -{ - struct tcp_sock *tp = tcp_sk(sk); - - if (!tp->sacked_out || tcp_is_reno(tp)) - return; - - if (after(tcp_highest_sack_seq(tp), TCP_SKB_CB(skb)->seq)) - tp->fackets_out -= decr; -} - /* Pcount in the middle of the write queue got changed, we need to do various * tweaks to fix counters */ @@ -1197,11 +1238,9 @@ static void tcp_adjust_pcount(struct sock *sk, const struct sk_buff *skb, int de if (tcp_is_reno(tp) && decr > 0) tp->sacked_out -= min_t(u32, tp->sacked_out, decr); - tcp_adjust_fackets_out(sk, skb, decr); - if (tp->lost_skb_hint && before(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(tp->lost_skb_hint)->seq) && - (tcp_is_fack(tp) || (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))) + (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)) tp->lost_cnt_hint -= decr; tcp_verify_left_out(tp); @@ -1236,12 +1275,25 @@ static void tcp_skb_fragment_eor(struct sk_buff *skb, struct sk_buff *skb2) TCP_SKB_CB(skb)->eor = 0; } +/* Insert buff after skb on the write or rtx queue of sk. */ +static void tcp_insert_write_queue_after(struct sk_buff *skb, + struct sk_buff *buff, + struct sock *sk, + enum tcp_queue tcp_queue) +{ + if (tcp_queue == TCP_FRAG_IN_WRITE_QUEUE) + __skb_queue_after(&sk->sk_write_queue, skb, buff); + else + tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); +} + /* Function to create two new TCP segments. Shrinks the given segment * to the specified size and appends a new segment with the rest of the * packet to the list. This won't be called frequently, I hope. * Remember, these are still headerless SKBs at this point. */ -int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, +int tcp_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, u32 len, unsigned int mss_now, gfp_t gfp) { struct tcp_sock *tp = tcp_sk(sk); @@ -1324,7 +1376,9 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len, /* Link BUFF into the send queue. */ __skb_header_release(buff); - tcp_insert_write_queue_after(skb, buff, sk); + tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); + if (tcp_queue == TCP_FRAG_IN_RTX_QUEUE) + list_add(&buff->tcp_tsorted_anchor, &skb->tcp_tsorted_anchor); return 0; } @@ -1602,7 +1656,7 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) if (tp->packets_out > tp->snd_cwnd_used) tp->snd_cwnd_used = tp->packets_out; - if (sysctl_tcp_slow_start_after_idle && + if (sock_net(sk)->ipv4.sysctl_tcp_slow_start_after_idle && (s32)(tcp_jiffies32 - tp->snd_cwnd_stamp) >= inet_csk(sk)->icsk_rto && !ca_ops->cong_control) tcp_cwnd_application_limited(sk); @@ -1611,10 +1665,10 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * is caused by insufficient sender buffer: * 1) just sent some data (see tcp_write_xmit) * 2) not cwnd limited (this else condition) - * 3) no more data to send (null tcp_send_head ) + * 3) no more data to send (tcp_write_queue_empty()) * 4) application is hitting buffer limit (SOCK_NOSPACE) */ - if (!tcp_send_head(sk) && sk->sk_socket && + if (tcp_write_queue_empty(sk) && sk->sk_socket && test_bit(SOCK_NOSPACE, &sk->sk_socket->flags) && (1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) tcp_chrono_start(sk, TCP_CHRONO_SNDBUF_LIMITED); @@ -1666,7 +1720,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, { u32 bytes, segs; - bytes = min(sk->sk_pacing_rate >> 10, + bytes = min(sk->sk_pacing_rate >> sk->sk_pacing_shift, sk->sk_gso_max_size - 1 - MAX_TCP_HEADER); /* Goal is to send at least one packet per ms, @@ -1689,7 +1743,8 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; return tso_segs ? : - tcp_tso_autosize(sk, mss_now, sysctl_tcp_min_tso_segs); + tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); } /* Returns the portion of skb which can be sent right away */ @@ -1803,40 +1858,6 @@ static bool tcp_snd_wnd_test(const struct tcp_sock *tp, return !after(end_seq, tcp_wnd_end(tp)); } -/* This checks if the data bearing packet SKB (usually tcp_send_head(sk)) - * should be put on the wire right now. If so, it returns the number of - * packets allowed by the congestion window. - */ -static unsigned int tcp_snd_test(const struct sock *sk, struct sk_buff *skb, - unsigned int cur_mss, int nonagle) -{ - const struct tcp_sock *tp = tcp_sk(sk); - unsigned int cwnd_quota; - - tcp_init_tso_segs(skb, cur_mss); - - if (!tcp_nagle_test(tp, skb, cur_mss, nonagle)) - return 0; - - cwnd_quota = tcp_cwnd_test(tp, skb); - if (cwnd_quota && !tcp_snd_wnd_test(tp, skb, cur_mss)) - cwnd_quota = 0; - - return cwnd_quota; -} - -/* Test if sending is allowed right now. */ -bool tcp_may_send_now(struct sock *sk) -{ - const struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb = tcp_send_head(sk); - - return skb && - tcp_snd_test(sk, skb, tcp_current_mss(sk), - (tcp_skb_is_last(sk, skb) ? - tp->nonagle : TCP_NAGLE_PUSH)); -} - /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet * which is put after SKB on the list. It is very much like * tcp_fragment() except that it may make several kinds of assumptions @@ -1844,7 +1865,8 @@ bool tcp_may_send_now(struct sock *sk) * know that all the data is in scatter-gather pages, and that the * packet has never been sent out before (and thus is not cloned). */ -static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, +static int tso_fragment(struct sock *sk, enum tcp_queue tcp_queue, + struct sk_buff *skb, unsigned int len, unsigned int mss_now, gfp_t gfp) { struct sk_buff *buff; @@ -1853,7 +1875,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* All of a TSO frame must be composed of paged data. */ if (skb->len != skb->data_len) - return tcp_fragment(sk, skb, len, mss_now, gfp); + return tcp_fragment(sk, tcp_queue, skb, len, mss_now, gfp); buff = sk_stream_alloc_skb(sk, 0, gfp, true); if (unlikely(!buff)) @@ -1889,7 +1911,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len, /* Link BUFF into the send queue. */ __skb_header_release(buff); - tcp_insert_write_queue_after(skb, buff, sk); + tcp_insert_write_queue_after(skb, buff, sk, tcp_queue); return 0; } @@ -1939,7 +1961,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, if ((skb != tcp_write_queue_tail(sk)) && (limit >= skb->len)) goto send_now; - win_divisor = ACCESS_ONCE(sysctl_tcp_tso_win_divisor); + win_divisor = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_tso_win_divisor); if (win_divisor) { u32 chunk = min(tp->snd_wnd, tp->snd_cwnd * tp->mss_cache); @@ -1959,8 +1981,10 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb, goto send_now; } - head = tcp_write_queue_head(sk); - + /* TODO : use tsorted_sent_queue ? */ + head = tcp_rtx_queue_head(sk); + if (!head) + goto send_now; age = tcp_stamp_us_delta(tp->tcp_mstamp, head->skb_mstamp); /* If next ACK is likely to come too late (half srtt), do not defer */ if (age < (tp->srtt_us >> 4)) @@ -2091,6 +2115,7 @@ static int tcp_mtu_probe(struct sock *sk) nskb->ip_summed = skb->ip_summed; tcp_insert_write_queue_before(nskb, skb, sk); + tcp_highest_sack_replace(sk, skb, nskb); len = 0; tcp_for_write_queue_from_safe(skb, next, sk) { @@ -2173,18 +2198,18 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, { unsigned int limit; - limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10); - limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes); + limit = max(2 * skb->truesize, sk->sk_pacing_rate >> sk->sk_pacing_shift); + limit = min_t(u32, limit, + sock_net(sk)->ipv4.sysctl_tcp_limit_output_bytes); limit <<= factor; if (refcount_read(&sk->sk_wmem_alloc) > limit) { - /* Always send the 1st or 2nd skb in write queue. + /* Always send skb if rtx queue is empty. * No need to wait for TX completion to call us back, * after softirq/tasklet schedule. * This helps when TX completions are delayed too much. */ - if (skb == sk->sk_write_queue.next || - skb->prev == sk->sk_write_queue.next) + if (tcp_rtx_queue_empty(sk)) return false; set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags); @@ -2235,7 +2260,7 @@ void tcp_chrono_stop(struct sock *sk, const enum tcp_chrono type) * it's the "most interesting" or current chrono we are * tracking and starts busy chrono if we have pending data. */ - if (tcp_write_queue_empty(sk)) + if (tcp_rtx_and_write_queues_empty(sk)) tcp_chrono_set(tp, TCP_CHRONO_UNSPEC); else if (type == tp->chrono_type) tcp_chrono_set(tp, TCP_CHRONO_BUSY); @@ -2268,6 +2293,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, sent_pkts = 0; + tcp_mstamp_refresh(tp); if (!push_one) { /* Do MTU probing. */ result = tcp_mtu_probe(sk); @@ -2279,7 +2305,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, } max_segs = tcp_tso_segs(sk, mss_now); - tcp_mstamp_refresh(tp); while ((skb = tcp_send_head(sk))) { unsigned int limit; @@ -2291,7 +2316,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, if (unlikely(tp->repair) && tp->repair_queue == TCP_SEND_QUEUE) { /* "skb_mstamp" is used as a start point for the retransmit timer */ - skb->skb_mstamp = tp->tcp_mstamp; + tcp_update_skb_after_send(tp, skb); goto repair; /* Skip network transmission */ } @@ -2330,7 +2355,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle, nonagle); if (skb->len > limit && - unlikely(tso_fragment(sk, skb, limit, mss_now, gfp))) + unlikely(tso_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, limit, mss_now, gfp))) break; if (test_bit(TCP_TSQ_DEFERRED, &sk->sk_tsq_flags)) @@ -2370,15 +2396,15 @@ repair: tcp_cwnd_validate(sk, is_cwnd_limited); return false; } - return !tp->packets_out && tcp_send_head(sk); + return !tp->packets_out && !tcp_write_queue_empty(sk); } bool tcp_schedule_loss_probe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); u32 timeout, rto_delta_us; + int early_retrans; /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. @@ -2386,27 +2412,32 @@ bool tcp_schedule_loss_probe(struct sock *sk) if (tp->fastopen_rsk) return false; + early_retrans = sock_net(sk)->ipv4.sysctl_tcp_early_retrans; /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ - if ((sysctl_tcp_early_retrans != 3 && sysctl_tcp_early_retrans != 4) || + if ((early_retrans != 3 && early_retrans != 4) || !tp->packets_out || !tcp_is_sack(tp) || icsk->icsk_ca_state != TCP_CA_Open) return false; if ((tp->snd_cwnd > tcp_packets_in_flight(tp)) && - tcp_send_head(sk)) + !tcp_write_queue_empty(sk)) return false; - /* Probe timeout is at least 1.5*rtt + TCP_DELACK_MAX to account + /* Probe timeout is 2*rtt. Add minimum RTO to account * for delayed ack when there's one outstanding packet. If no RTT * sample is available then probe after TCP_TIMEOUT_INIT. */ - timeout = rtt << 1 ? : TCP_TIMEOUT_INIT; - if (tp->packets_out == 1) - timeout = max_t(u32, timeout, - (rtt + (rtt >> 1) + TCP_DELACK_MAX)); - timeout = max_t(u32, timeout, msecs_to_jiffies(10)); + if (tp->srtt_us) { + timeout = usecs_to_jiffies(tp->srtt_us >> 2); + if (tp->packets_out == 1) + timeout += TCP_RTO_MIN; + else + timeout += TCP_TIMEOUT_MIN; + } else { + timeout = TCP_TIMEOUT_INIT; + } /* If the RTO formula yields an earlier time, then use that time. */ rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */ @@ -2444,18 +2475,14 @@ void tcp_send_loss_probe(struct sock *sk) int mss = tcp_current_mss(sk); skb = tcp_send_head(sk); - if (skb) { - if (tcp_snd_wnd_test(tp, skb, mss)) { - pcount = tp->packets_out; - tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); - if (tp->packets_out > pcount) - goto probe_sent; - goto rearm_timer; - } - skb = tcp_write_queue_prev(sk, skb); - } else { - skb = tcp_write_queue_tail(sk); + if (skb && tcp_snd_wnd_test(tp, skb, mss)) { + pcount = tp->packets_out; + tcp_write_xmit(sk, mss, TCP_NAGLE_OFF, 2, GFP_ATOMIC); + if (tp->packets_out > pcount) + goto probe_sent; + goto rearm_timer; } + skb = skb_rb_last(&sk->tcp_rtx_queue); /* At most one outstanding TLP retransmission. */ if (tp->tlp_high_seq) @@ -2473,10 +2500,11 @@ void tcp_send_loss_probe(struct sock *sk) goto rearm_timer; if ((pcount > 1) && (skb->len > (pcount - 1) * mss)) { - if (unlikely(tcp_fragment(sk, skb, (pcount - 1) * mss, mss, + if (unlikely(tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, + (pcount - 1) * mss, mss, GFP_ATOMIC))) goto rearm_timer; - skb = tcp_write_queue_next(sk, skb); + skb = skb_rb_next(skb); } if (WARN_ON(!skb || !tcp_skb_pcount(skb))) @@ -2676,7 +2704,7 @@ void tcp_skb_collapse_tstamp(struct sk_buff *skb, static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *next_skb = tcp_write_queue_next(sk, skb); + struct sk_buff *next_skb = skb_rb_next(skb); int skb_size, next_skb_size; skb_size = skb->len; @@ -2691,9 +2719,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) else if (!skb_shift(skb, next_skb, next_skb_size)) return false; } - tcp_highest_sack_combine(sk, next_skb, skb); - - tcp_unlink_write_queue(next_skb, sk); + tcp_highest_sack_replace(sk, next_skb, skb); if (next_skb->ip_summed == CHECKSUM_PARTIAL) skb->ip_summed = CHECKSUM_PARTIAL; @@ -2722,7 +2748,7 @@ static bool tcp_collapse_retrans(struct sock *sk, struct sk_buff *skb) tcp_skb_collapse_tstamp(skb, next_skb); - sk_wmem_free_skb(sk, next_skb); + tcp_rtx_queue_unlink_and_free(next_skb, sk); return true; } @@ -2733,8 +2759,6 @@ static bool tcp_can_collapse(const struct sock *sk, const struct sk_buff *skb) return false; if (skb_cloned(skb)) return false; - if (skb == tcp_send_head(sk)) - return false; /* Some heuristics for collapsing over SACK'd could be invented */ if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) return false; @@ -2752,12 +2776,12 @@ static void tcp_retrans_try_collapse(struct sock *sk, struct sk_buff *to, struct sk_buff *skb = to, *tmp; bool first = true; - if (!sysctl_tcp_retrans_collapse) + if (!sock_net(sk)->ipv4.sysctl_tcp_retrans_collapse) return; if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN) return; - tcp_for_write_queue_from_safe(skb, tmp, sk) { + skb_rbtree_walk_from_safe(skb, tmp) { if (!tcp_can_collapse(sk, skb)) break; @@ -2832,7 +2856,8 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) len = cur_mss * segs; if (skb->len > len) { - if (tcp_fragment(sk, skb, len, cur_mss, GFP_ATOMIC)) + if (tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb, len, + cur_mss, GFP_ATOMIC)) return -ENOMEM; /* We'll try again later. */ } else { if (skb_unclone(skb, GFP_ATOMIC)) @@ -2866,16 +2891,23 @@ int __tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) skb_headroom(skb) >= 0xFFFF)) { struct sk_buff *nskb; - skb->skb_mstamp = tp->tcp_mstamp; - nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); - err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : - -ENOBUFS; + tcp_skb_tsorted_save(skb) { + nskb = __pskb_copy(skb, MAX_TCP_HEADER, GFP_ATOMIC); + err = nskb ? tcp_transmit_skb(sk, nskb, 0, GFP_ATOMIC) : + -ENOBUFS; + } tcp_skb_tsorted_restore(skb); + + if (!err) { + tcp_update_skb_after_send(tp, skb); + tcp_rate_skb_sent(sk, skb); + } } else { err = tcp_transmit_skb(sk, skb, 1, GFP_ATOMIC); } if (likely(!err)) { TCP_SKB_CB(skb)->sacked |= TCPCB_EVER_RETRANS; + trace_tcp_retransmit_skb(sk, skb); } else if (err != -EBUSY) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPRETRANSFAIL); } @@ -2912,36 +2944,25 @@ int tcp_retransmit_skb(struct sock *sk, struct sk_buff *skb, int segs) * retransmitted data is acknowledged. It tries to continue * resending the rest of the retransmit queue, until either * we've sent it all or the congestion window limit is reached. - * If doing SACK, the first ACK which comes back for a timeout - * based retransmit packet might feed us FACK information again. - * If so, we use it to avoid unnecessarily retransmissions. */ void tcp_xmit_retransmit_queue(struct sock *sk) { const struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb, *rtx_head, *hole = NULL; struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; - struct sk_buff *hole = NULL; u32 max_segs; int mib_idx; if (!tp->packets_out) return; - if (tp->retransmit_skb_hint) { - skb = tp->retransmit_skb_hint; - } else { - skb = tcp_write_queue_head(sk); - } - + rtx_head = tcp_rtx_queue_head(sk); + skb = tp->retransmit_skb_hint ?: rtx_head; max_segs = tcp_tso_segs(sk, tcp_current_mss(sk)); - tcp_for_write_queue_from(skb, sk) { + skb_rbtree_walk_from(skb) { __u8 sacked; int segs; - if (skb == tcp_send_head(sk)) - break; - if (tcp_pacing_check(sk)) break; @@ -2986,7 +3007,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) if (tcp_in_cwnd_reduction(sk)) tp->prr_out += tcp_skb_pcount(skb); - if (skb == tcp_write_queue_head(sk) && + if (skb == rtx_head && icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT) inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, inet_csk(sk)->icsk_rto, @@ -3028,12 +3049,15 @@ void tcp_send_fin(struct sock *sk) * Note: in the latter case, FIN packet will be sent after a timeout, * as TCP stack thinks it has already been transmitted. */ - if (tskb && (tcp_send_head(sk) || tcp_under_memory_pressure(sk))) { + if (!tskb && tcp_under_memory_pressure(sk)) + tskb = skb_rb_last(&sk->tcp_rtx_queue); + + if (tskb) { coalesce: TCP_SKB_CB(tskb)->tcp_flags |= TCPHDR_FIN; TCP_SKB_CB(tskb)->end_seq++; tp->write_seq++; - if (!tcp_send_head(sk)) { + if (tcp_write_queue_empty(sk)) { /* This means tskb was already sent. * Pretend we included the FIN on previous transmit. * We need to set tp->snd_nxt to the value it would have @@ -3050,6 +3074,7 @@ coalesce: goto coalesce; return; } + INIT_LIST_HEAD(&skb->tcp_tsorted_anchor); skb_reserve(skb, MAX_TCP_HEADER); sk_forced_mem_schedule(sk, skb->truesize); /* FIN eats a sequence byte, write_seq advanced by tcp_queue_skb(). */ @@ -3086,6 +3111,11 @@ void tcp_send_active_reset(struct sock *sk, gfp_t priority) /* Send it off. */ if (tcp_transmit_skb(sk, skb, 0, priority)) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPABORTFAILED); + + /* skb of trace_tcp_send_reset() keeps the skb that caused RST, + * skb here is different to the troublesome skb, so use NULL + */ + trace_tcp_send_reset(sk, NULL); } /* Send a crossed SYN-ACK during socket establishment. @@ -3098,20 +3128,24 @@ int tcp_send_synack(struct sock *sk) { struct sk_buff *skb; - skb = tcp_write_queue_head(sk); + skb = tcp_rtx_queue_head(sk); if (!skb || !(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_SYN)) { - pr_debug("%s: wrong queue state\n", __func__); + pr_err("%s: wrong queue state\n", __func__); return -EFAULT; } if (!(TCP_SKB_CB(skb)->tcp_flags & TCPHDR_ACK)) { if (skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, GFP_ATOMIC); + struct sk_buff *nskb; + + tcp_skb_tsorted_save(skb) { + nskb = skb_copy(skb, GFP_ATOMIC); + } tcp_skb_tsorted_restore(skb); if (!nskb) return -ENOMEM; - tcp_unlink_write_queue(skb, sk); + INIT_LIST_HEAD(&nskb->tcp_tsorted_anchor); + tcp_rtx_queue_unlink_and_free(skb, sk); __skb_header_release(nskb); - __tcp_add_write_queue_head(sk, nskb); - sk_wmem_free_skb(sk, skb); + tcp_rbtree_insert(&sk->tcp_rtx_queue, nskb); sk->sk_wmem_queued += nskb->truesize; sk_mem_charge(sk, nskb->truesize); skb = nskb; @@ -3188,8 +3222,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, md5 = tcp_rsk(req)->af_specific->req_md5_lookup(sk, req_to_sk(req)); #endif skb_set_hash(skb, tcp_rsk(req)->txhash, PKT_HASH_TYPE_L4); - tcp_header_size = tcp_synack_options(req, mss, skb, &opts, md5, foc) + - sizeof(*th); + tcp_header_size = tcp_synack_options(sk, req, mss, skb, &opts, md5, + foc) + sizeof(*th); skb_push(skb, tcp_header_size); skb_reset_transport_header(skb); @@ -3202,13 +3236,8 @@ struct sk_buff *tcp_make_synack(const struct sock *sk, struct dst_entry *dst, th->source = htons(ireq->ir_num); th->dest = ireq->ir_rmt_port; skb->mark = ireq->ir_mark; - /* Setting of flags are superfluous here for callers (and ECE is - * not even correctly set) - */ - tcp_init_nondata_skb(skb, tcp_rsk(req)->snt_isn, - TCPHDR_SYN | TCPHDR_ACK); - - th->seq = htonl(TCP_SKB_CB(skb)->seq); + skb->ip_summed = CHECKSUM_PARTIAL; + th->seq = htonl(tcp_rsk(req)->snt_isn); /* XXX data is queued and acked as is. No buffer/window check */ th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); @@ -3295,7 +3324,7 @@ static void tcp_connect_init(struct sock *sk) if (rcv_wnd == 0) rcv_wnd = dst_metric(dst, RTAX_INITRWND); - tcp_select_initial_window(tcp_full_space(sk), + tcp_select_initial_window(sk, tcp_full_space(sk), tp->advmss - (tp->rx_opt.ts_recent_stamp ? tp->tcp_header_len - sizeof(struct tcphdr) : 0), &tp->rcv_wnd, &tp->window_clamp, @@ -3334,7 +3363,6 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb) tcb->end_seq += skb->len; __skb_header_release(skb); - __tcp_add_write_queue_tail(sk, skb); sk->sk_wmem_queued += skb->truesize; sk_mem_charge(sk, skb->truesize); tp->write_seq = tcb->end_seq; @@ -3382,6 +3410,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) int copied = copy_from_iter(skb_put(syn_data, space), space, &fo->data->msg_iter); if (unlikely(!copied)) { + tcp_skb_tsorted_anchor_cleanup(syn_data); kfree_skb(syn_data); goto fallback; } @@ -3412,10 +3441,15 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn) TCP_SKB_CB(syn_data)->tcp_flags = TCPHDR_ACK | TCPHDR_PSH; if (!err) { tp->syn_data = (fo->copied > 0); + tcp_rbtree_insert(&sk->tcp_rtx_queue, syn_data); NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPORIGDATASENT); goto done; } + /* data was not sent, put it in write_queue */ + __skb_queue_tail(&sk->sk_write_queue, syn_data); + tp->packets_out -= tcp_skb_pcount(syn_data); + fallback: /* Send a regular SYN with Fast Open cookie request option */ if (fo->cookie.len > 0) @@ -3456,6 +3490,7 @@ int tcp_connect(struct sock *sk) tp->retrans_stamp = tcp_time_stamp(tp); tcp_connect_queue_skb(sk, buff); tcp_ecn_send_syn(sk, buff); + tcp_rbtree_insert(&sk->tcp_rtx_queue, buff); /* Send off SYN; include data in Fast Open. */ err = tp->fastopen_req ? tcp_send_syn_data(sk, buff) : @@ -3468,6 +3503,11 @@ int tcp_connect(struct sock *sk) */ tp->snd_nxt = tp->write_seq; tp->pushed_seq = tp->write_seq; + buff = tcp_send_head(sk); + if (unlikely(buff)) { + tp->snd_nxt = TCP_SKB_CB(buff)->seq; + tp->pushed_seq = TCP_SKB_CB(buff)->seq; + } TCP_INC_STATS(sock_net(sk), TCP_MIB_ACTIVEOPENS); /* Timer for repeating the SYN until an answer. */ @@ -3645,7 +3685,8 @@ int tcp_write_wakeup(struct sock *sk, int mib) skb->len > mss) { seg_size = min(seg_size, mss); TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_PSH; - if (tcp_fragment(sk, skb, seg_size, mss, GFP_ATOMIC)) + if (tcp_fragment(sk, TCP_FRAG_IN_WRITE_QUEUE, + skb, seg_size, mss, GFP_ATOMIC)) return -1; } else if (!tcp_skb_pcount(skb)) tcp_set_skb_tso_segs(skb, mss); @@ -3675,7 +3716,7 @@ void tcp_send_probe0(struct sock *sk) err = tcp_write_wakeup(sk, LINUX_MIB_TCPWINPROBE); - if (tp->packets_out || !tcp_send_head(sk)) { + if (tp->packets_out || tcp_write_queue_empty(sk)) { /* Cancel probe timer, if it is not required. */ icsk->icsk_probes_out = 0; icsk->icsk_backoff = 0; @@ -3716,6 +3757,7 @@ int tcp_rtx_synack(const struct sock *sk, struct request_sock *req) __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSYNRETRANS); if (unlikely(tcp_passive_fastopen(sk))) tcp_sk(sk)->total_retrans++; + trace_tcp_retransmit_synack(sk, req); } return res; } diff --git a/net/ipv4/tcp_probe.c b/net/ipv4/tcp_probe.c index f6c50af24a64..697f4c67b2e3 100644 --- a/net/ipv4/tcp_probe.c +++ b/net/ipv4/tcp_probe.c @@ -105,8 +105,9 @@ static inline int tcp_probe_avail(void) * Note: arguments must match tcp_rcv_established()! */ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, - const struct tcphdr *th, unsigned int len) + const struct tcphdr *th) { + unsigned int len = skb->len; const struct tcp_sock *tp = tcp_sk(sk); const struct inet_sock *inet = inet_sk(sk); @@ -145,7 +146,7 @@ static void jtcp_rcv_established(struct sock *sk, struct sk_buff *skb, BUG(); } - p->length = skb->len; + p->length = len; p->snd_nxt = tp->snd_nxt; p->snd_una = tp->snd_una; p->snd_cwnd = tp->snd_cwnd; diff --git a/net/ipv4/tcp_recovery.c b/net/ipv4/tcp_recovery.c index fe9a493d0208..d3ea89020c69 100644 --- a/net/ipv4/tcp_recovery.c +++ b/net/ipv4/tcp_recovery.c @@ -1,8 +1,7 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/tcp.h> #include <net/tcp.h> -int sysctl_tcp_recovery __read_mostly = TCP_RACK_LOSS_DETECTION; - static void tcp_rack_mark_skb_lost(struct sock *sk, struct sk_buff *skb) { struct tcp_sock *tp = tcp_sk(sk); @@ -45,7 +44,8 @@ static bool tcp_rack_sent_after(u64 t1, u64 t2, u32 seq1, u32 seq2) static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) { struct tcp_sock *tp = tcp_sk(sk); - struct sk_buff *skb; + u32 min_rtt = tcp_min_rtt(tp); + struct sk_buff *skb, *n; u32 reo_wnd; *reo_timeout = 0; @@ -55,48 +55,36 @@ static void tcp_rack_detect_loss(struct sock *sk, u32 *reo_timeout) * to queuing or delayed ACKs. */ reo_wnd = 1000; - if ((tp->rack.reord || !tp->lost_out) && tcp_min_rtt(tp) != ~0U) - reo_wnd = max(tcp_min_rtt(tp) >> 2, reo_wnd); + if ((tp->rack.reord || !tp->lost_out) && min_rtt != ~0U) { + reo_wnd = max((min_rtt >> 2) * tp->rack.reo_wnd_steps, reo_wnd); + reo_wnd = min(reo_wnd, tp->srtt_us >> 3); + } - tcp_for_write_queue(skb, sk) { + list_for_each_entry_safe(skb, n, &tp->tsorted_sent_queue, + tcp_tsorted_anchor) { struct tcp_skb_cb *scb = TCP_SKB_CB(skb); + s32 remaining; - if (skb == tcp_send_head(sk)) - break; - - /* Skip ones already (s)acked */ - if (!after(scb->end_seq, tp->snd_una) || - scb->sacked & TCPCB_SACKED_ACKED) + /* Skip ones marked lost but not yet retransmitted */ + if ((scb->sacked & TCPCB_LOST) && + !(scb->sacked & TCPCB_SACKED_RETRANS)) continue; - if (tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, - tp->rack.end_seq, scb->end_seq)) { - /* Step 3 in draft-cheng-tcpm-rack-00.txt: - * A packet is lost if its elapsed time is beyond - * the recent RTT plus the reordering window. - */ - u32 elapsed = tcp_stamp_us_delta(tp->tcp_mstamp, - skb->skb_mstamp); - s32 remaining = tp->rack.rtt_us + reo_wnd - elapsed; - - if (remaining < 0) { - tcp_rack_mark_skb_lost(sk, skb); - continue; - } - - /* Skip ones marked lost but not yet retransmitted */ - if ((scb->sacked & TCPCB_LOST) && - !(scb->sacked & TCPCB_SACKED_RETRANS)) - continue; + if (!tcp_rack_sent_after(tp->rack.mstamp, skb->skb_mstamp, + tp->rack.end_seq, scb->end_seq)) + break; + /* A packet is lost if it has not been s/acked beyond + * the recent RTT plus the reordering window. + */ + remaining = tp->rack.rtt_us + reo_wnd - + tcp_stamp_us_delta(tp->tcp_mstamp, skb->skb_mstamp); + if (remaining < 0) { + tcp_rack_mark_skb_lost(sk, skb); + list_del_init(&skb->tcp_tsorted_anchor); + } else { /* Record maximum wait time (+1 to avoid 0) */ *reo_timeout = max_t(u32, *reo_timeout, 1 + remaining); - - } else if (!(scb->sacked & TCPCB_RETRANS)) { - /* Original data are sent sequentially so stop early - * b/c the rest are all sent after rack_sent - */ - break; } } } @@ -113,7 +101,7 @@ void tcp_rack_mark_lost(struct sock *sk) tp->rack.advanced = 0; tcp_rack_detect_loss(sk, &timeout); if (timeout) { - timeout = usecs_to_jiffies(timeout + TCP_REO_TIMEOUT_MIN); + timeout = usecs_to_jiffies(timeout) + TCP_TIMEOUT_MIN; inet_csk_reset_xmit_timer(sk, ICSK_TIME_REO_TIMEOUT, timeout, inet_csk(sk)->icsk_rto); } @@ -175,3 +163,44 @@ void tcp_rack_reo_timeout(struct sock *sk) if (inet_csk(sk)->icsk_pending != ICSK_TIME_RETRANS) tcp_rearm_rto(sk); } + +/* Updates the RACK's reo_wnd based on DSACK and no. of recoveries. + * + * If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded + * by srtt), since there is possibility that spurious retransmission was + * due to reordering delay longer than reo_wnd. + * + * Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) + * no. of successful recoveries (accounts for full DSACK-based loss + * recovery undo). After that, reset it to default (min_rtt/4). + * + * At max, reo_wnd is incremented only once per rtt. So that the new + * DSACK on which we are reacting, is due to the spurious retx (approx) + * after the reo_wnd has been updated last time. + * + * reo_wnd is tracked in terms of steps (of min_rtt/4), rather than + * absolute value to account for change in rtt. + */ +void tcp_rack_update_reo_wnd(struct sock *sk, struct rate_sample *rs) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (sock_net(sk)->ipv4.sysctl_tcp_recovery & TCP_RACK_STATIC_REO_WND || + !rs->prior_delivered) + return; + + /* Disregard DSACK if a rtt has not passed since we adjusted reo_wnd */ + if (before(rs->prior_delivered, tp->rack.last_delivered)) + tp->rack.dsack_seen = 0; + + /* Adjust the reo_wnd if update is pending */ + if (tp->rack.dsack_seen) { + tp->rack.reo_wnd_steps = min_t(u32, 0xFF, + tp->rack.reo_wnd_steps + 1); + tp->rack.dsack_seen = 0; + tp->rack.last_delivered = tp->delivered; + tp->rack.reo_wnd_persist = TCP_RACK_RECOVERY_THRESH; + } else if (!tp->rack.reo_wnd_persist) { + tp->rack.reo_wnd_steps = 1; + } +} diff --git a/net/ipv4/tcp_scalable.c b/net/ipv4/tcp_scalable.c index f2123075ce6e..addc122f8818 100644 --- a/net/ipv4/tcp_scalable.c +++ b/net/ipv4/tcp_scalable.c @@ -15,10 +15,6 @@ #define TCP_SCALABLE_AI_CNT 50U #define TCP_SCALABLE_MD_SCALE 3 -struct scalable { - u32 loss_cwnd; -}; - static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) { struct tcp_sock *tp = tcp_sk(sk); @@ -36,23 +32,13 @@ static void tcp_scalable_cong_avoid(struct sock *sk, u32 ack, u32 acked) static u32 tcp_scalable_ssthresh(struct sock *sk) { const struct tcp_sock *tp = tcp_sk(sk); - struct scalable *ca = inet_csk_ca(sk); - - ca->loss_cwnd = tp->snd_cwnd; return max(tp->snd_cwnd - (tp->snd_cwnd>>TCP_SCALABLE_MD_SCALE), 2U); } -static u32 tcp_scalable_cwnd_undo(struct sock *sk) -{ - const struct scalable *ca = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd); -} - static struct tcp_congestion_ops tcp_scalable __read_mostly = { .ssthresh = tcp_scalable_ssthresh, - .undo_cwnd = tcp_scalable_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_scalable_cong_avoid, .owner = THIS_MODULE, diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index e906014890b6..16df6dd44b98 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -22,8 +22,6 @@ #include <linux/gfp.h> #include <net/tcp.h> -int sysctl_tcp_thin_linear_timeouts __read_mostly; - /** * tcp_write_err() - close socket and save error info * @sk: The socket the error has appeared on. @@ -109,26 +107,23 @@ static int tcp_orphan_retries(struct sock *sk, bool alive) static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) { - struct net *net = sock_net(sk); + const struct net *net = sock_net(sk); + int mss; /* Black hole detection */ - if (net->ipv4.sysctl_tcp_mtu_probing) { - if (!icsk->icsk_mtup.enabled) { - icsk->icsk_mtup.enabled = 1; - icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - } else { - struct net *net = sock_net(sk); - struct tcp_sock *tp = tcp_sk(sk); - int mss; - - mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; - mss = min(net->ipv4.sysctl_tcp_base_mss, mss); - mss = max(mss, 68 - tp->tcp_header_len); - icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); - tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); - } + if (!net->ipv4.sysctl_tcp_mtu_probing) + return; + + if (!icsk->icsk_mtup.enabled) { + icsk->icsk_mtup.enabled = 1; + icsk->icsk_mtup.probe_timestamp = tcp_jiffies32; + } else { + mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1; + mss = min(net->ipv4.sysctl_tcp_base_mss, mss); + mss = max(mss, 68 - tcp_sk(sk)->tcp_header_len); + icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss); } + tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); } @@ -156,8 +151,13 @@ static bool retransmits_timed_out(struct sock *sk, return false; start_ts = tcp_sk(sk)->retrans_stamp; - if (unlikely(!start_ts)) - start_ts = tcp_skb_timestamp(tcp_write_queue_head(sk)); + if (unlikely(!start_ts)) { + struct sk_buff *head = tcp_rtx_queue_head(sk); + + if (!head) + return false; + start_ts = tcp_skb_timestamp(head); + } if (likely(timeout == 0)) { linear_backoff_thresh = ilog2(TCP_RTO_MAX/rto_base); @@ -239,7 +239,6 @@ static int tcp_write_timeout(struct sock *sk) /* Called with BH disabled */ void tcp_delack_timer_handler(struct sock *sk) { - struct tcp_sock *tp = tcp_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); sk_mem_reclaim_partial(sk); @@ -254,17 +253,6 @@ void tcp_delack_timer_handler(struct sock *sk) } icsk->icsk_ack.pending &= ~ICSK_ACK_TIMER; - if (!skb_queue_empty(&tp->ucopy.prequeue)) { - struct sk_buff *skb; - - __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSCHEDULERFAILED); - - while ((skb = __skb_dequeue(&tp->ucopy.prequeue)) != NULL) - sk_backlog_rcv(sk, skb); - - tp->ucopy.memory = 0; - } - if (inet_csk_ack_scheduled(sk)) { if (!icsk->icsk_ack.pingpong) { /* Delayed ACK missed: inflate ATO. */ @@ -295,15 +283,17 @@ out: * * Returns: Nothing (void) */ -static void tcp_delack_timer(unsigned long data) +static void tcp_delack_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_delack_timer); + struct sock *sk = &icsk->icsk_inet.sk; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { tcp_delack_timer_handler(sk); } else { - inet_csk(sk)->icsk_ack.blocked = 1; + icsk->icsk_ack.blocked = 1; __NET_INC_STATS(sock_net(sk), LINUX_MIB_DELAYEDACKLOCKED); /* deleguate our work to tcp_release_cb() */ if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED, &sk->sk_tsq_flags)) @@ -316,11 +306,12 @@ static void tcp_delack_timer(unsigned long data) static void tcp_probe_timer(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct sk_buff *skb = tcp_send_head(sk); struct tcp_sock *tp = tcp_sk(sk); int max_probes; u32 start_ts; - if (tp->packets_out || !tcp_send_head(sk)) { + if (tp->packets_out || !skb) { icsk->icsk_probes_out = 0; return; } @@ -333,9 +324,9 @@ static void tcp_probe_timer(struct sock *sk) * corresponding system limit. We also implement similar policy when * we use RTO to probe window in tcp_retransmit_timer(). */ - start_ts = tcp_skb_timestamp(tcp_send_head(sk)); + start_ts = tcp_skb_timestamp(skb); if (!start_ts) - tcp_send_head(sk)->skb_mstamp = tp->tcp_mstamp; + skb->skb_mstamp = tp->tcp_mstamp; else if (icsk->icsk_user_timeout && (s32)(tcp_time_stamp(tp) - start_ts) > jiffies_to_msecs(icsk->icsk_user_timeout)) @@ -420,7 +411,7 @@ void tcp_retransmit_timer(struct sock *sk) if (!tp->packets_out) goto out; - WARN_ON(tcp_write_queue_empty(sk)); + WARN_ON(tcp_rtx_queue_empty(sk)); tp->tlp_high_seq = 0; @@ -453,7 +444,7 @@ void tcp_retransmit_timer(struct sock *sk) goto out; } tcp_enter_loss(sk); - tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1); + tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1); __sk_dst_reset(sk); goto out_reset_timer; } @@ -485,7 +476,7 @@ void tcp_retransmit_timer(struct sock *sk) tcp_enter_loss(sk); - if (tcp_retransmit_skb(sk, tcp_write_queue_head(sk), 1) > 0) { + if (tcp_retransmit_skb(sk, tcp_rtx_queue_head(sk), 1) > 0) { /* Retransmission failed because of local congestion, * do not backoff. */ @@ -526,7 +517,7 @@ out_reset_timer: * linear-timeout retransmissions into a black hole */ if (sk->sk_state == TCP_ESTABLISHED && - (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && + (tp->thin_lto || net->ipv4.sysctl_tcp_thin_linear_timeouts) && tcp_stream_is_thin(tp) && icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { icsk->icsk_backoff = 0; @@ -582,9 +573,11 @@ out: sk_mem_reclaim(sk); } -static void tcp_write_timer(unsigned long data) +static void tcp_write_timer(struct timer_list *t) { - struct sock *sk = (struct sock *)data; + struct inet_connection_sock *icsk = + from_timer(icsk, t, icsk_retransmit_timer); + struct sock *sk = &icsk->icsk_inet.sk; bh_lock_sock(sk); if (!sock_owned_by_user(sk)) { @@ -619,9 +612,9 @@ void tcp_set_keepalive(struct sock *sk, int val) EXPORT_SYMBOL_GPL(tcp_set_keepalive); -static void tcp_keepalive_timer (unsigned long data) +static void tcp_keepalive_timer (struct timer_list *t) { - struct sock *sk = (struct sock *) data; + struct sock *sk = from_timer(sk, t, sk_timer); struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); u32 elapsed; @@ -659,7 +652,7 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_when(tp); /* It is alive without keepalive 8) */ - if (tp->packets_out || tcp_send_head(sk)) + if (tp->packets_out || !tcp_write_queue_empty(sk)) goto resched; elapsed = keepalive_time_elapsed(tp); diff --git a/net/ipv4/tcp_vegas.c b/net/ipv4/tcp_vegas.c index 218cfcc77650..ee113ff15fd0 100644 --- a/net/ipv4/tcp_vegas.c +++ b/net/ipv4/tcp_vegas.c @@ -158,7 +158,7 @@ EXPORT_SYMBOL_GPL(tcp_vegas_cwnd_event); static inline u32 tcp_vegas_ssthresh(struct tcp_sock *tp) { - return min(tp->snd_ssthresh, tp->snd_cwnd-1); + return min(tp->snd_ssthresh, tp->snd_cwnd); } static void tcp_vegas_cong_avoid(struct sock *sk, u32 ack, u32 acked) diff --git a/net/ipv4/tcp_vegas.h b/net/ipv4/tcp_vegas.h index 248cfc0ff9ae..4f24d0e37d9c 100644 --- a/net/ipv4/tcp_vegas.h +++ b/net/ipv4/tcp_vegas.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * TCP Vegas congestion control interface */ diff --git a/net/ipv4/tcp_veno.c b/net/ipv4/tcp_veno.c index 76005d4b8dfc..6fcf482d611b 100644 --- a/net/ipv4/tcp_veno.c +++ b/net/ipv4/tcp_veno.c @@ -30,7 +30,6 @@ struct veno { u32 basertt; /* the min of all Veno rtt measurements seen (in usec) */ u32 inc; /* decide whether to increase cwnd */ u32 diff; /* calculate the diff rate */ - u32 loss_cwnd; /* cwnd when loss occured */ }; /* There are several situations when we must "re-start" Veno: @@ -194,7 +193,6 @@ static u32 tcp_veno_ssthresh(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); struct veno *veno = inet_csk_ca(sk); - veno->loss_cwnd = tp->snd_cwnd; if (veno->diff < beta) /* in "non-congestive state", cut cwnd by 1/5 */ return max(tp->snd_cwnd * 4 / 5, 2U); @@ -203,17 +201,10 @@ static u32 tcp_veno_ssthresh(struct sock *sk) return max(tp->snd_cwnd >> 1U, 2U); } -static u32 tcp_veno_cwnd_undo(struct sock *sk) -{ - const struct veno *veno = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, veno->loss_cwnd); -} - static struct tcp_congestion_ops tcp_veno __read_mostly = { .init = tcp_veno_init, .ssthresh = tcp_veno_ssthresh, - .undo_cwnd = tcp_veno_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_veno_cong_avoid, .pkts_acked = tcp_veno_pkts_acked, .set_state = tcp_veno_state, diff --git a/net/ipv4/tcp_yeah.c b/net/ipv4/tcp_yeah.c index e6ff99c4bd3b..96e829b2e2fc 100644 --- a/net/ipv4/tcp_yeah.c +++ b/net/ipv4/tcp_yeah.c @@ -37,7 +37,6 @@ struct yeah { u32 fast_count; u32 pkts_acked; - u32 loss_cwnd; }; static void tcp_yeah_init(struct sock *sk) @@ -220,22 +219,14 @@ static u32 tcp_yeah_ssthresh(struct sock *sk) yeah->fast_count = 0; yeah->reno_count = max(yeah->reno_count>>1, 2U); - yeah->loss_cwnd = tp->snd_cwnd; return max_t(int, tp->snd_cwnd - reduction, 2); } -static u32 tcp_yeah_cwnd_undo(struct sock *sk) -{ - const struct yeah *yeah = inet_csk_ca(sk); - - return max(tcp_sk(sk)->snd_cwnd, yeah->loss_cwnd); -} - static struct tcp_congestion_ops tcp_yeah __read_mostly = { .init = tcp_yeah_init, .ssthresh = tcp_yeah_ssthresh, - .undo_cwnd = tcp_yeah_cwnd_undo, + .undo_cwnd = tcp_reno_undo_cwnd, .cong_avoid = tcp_yeah_cong_avoid, .set_state = tcp_vegas_state, .cwnd_event = tcp_vegas_cwnd_event, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index a7c804f73990..e4ff25c947c5 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -231,10 +231,7 @@ static int udp_reuseport_add_sock(struct sock *sk, struct udp_hslot *hslot) } } - /* Initial allocation may have already happened via setsockopt */ - if (!rcu_access_pointer(sk->sk_reuseport_cb)) - return reuseport_alloc(sk); - return 0; + return reuseport_alloc(sk); } /** @@ -380,8 +377,8 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) static int compute_score(struct sock *sk, struct net *net, __be32 saddr, __be16 sport, - __be32 daddr, unsigned short hnum, int dif, - bool exact_dif) + __be32 daddr, unsigned short hnum, + int dif, int sdif, bool exact_dif) { int score; struct inet_sock *inet; @@ -413,10 +410,15 @@ static int compute_score(struct sock *sk, struct net *net, } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score += 4; + if (sk->sk_bound_dev_if && dev_match) + score += 4; } + if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; return score; @@ -436,10 +438,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, /* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif, bool exact_dif, - struct udp_hslot *hslot2, - struct sk_buff *skb) + __be32 saddr, __be16 sport, + __be32 daddr, unsigned int hnum, + int dif, int sdif, bool exact_dif, + struct udp_hslot *hslot2, + struct sk_buff *skb) { struct sock *sk, *result; int score, badness, matches = 0, reuseport = 0; @@ -449,7 +452,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, exact_dif); + daddr, hnum, dif, sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -477,8 +480,8 @@ static struct sock *udp4_lib_lookup2(struct net *net, * harder than this. -DaveM */ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, - __be16 sport, __be32 daddr, __be16 dport, - int dif, struct udp_table *udptable, struct sk_buff *skb) + __be16 sport, __be32 daddr, __be16 dport, int dif, + int sdif, struct udp_table *udptable, struct sk_buff *skb) { struct sock *sk, *result; unsigned short hnum = ntohs(dport); @@ -496,7 +499,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, goto begin; result = udp4_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -511,7 +514,7 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, goto begin; result = udp4_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); } return result; @@ -521,7 +524,7 @@ begin: badness = 0; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, exact_dif); + daddr, hnum, dif, sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -554,7 +557,7 @@ static inline struct sock *__udp4_lib_lookup_skb(struct sk_buff *skb, return __udp4_lib_lookup(dev_net(skb->dev), iph->saddr, sport, iph->daddr, dport, inet_iif(skb), - udptable, skb); + inet_sdif(skb), udptable, skb); } struct sock *udp4_lib_lookup_skb(struct sk_buff *skb, @@ -576,7 +579,7 @@ struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, struct sock *sk; sk = __udp4_lib_lookup(net, saddr, sport, daddr, dport, - dif, &udp_table, NULL); + dif, 0, &udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -587,7 +590,7 @@ EXPORT_SYMBOL_GPL(udp4_lib_lookup); static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, - int dif, unsigned short hnum) + int dif, int sdif, unsigned short hnum) { struct inet_sock *inet = inet_sk(sk); @@ -597,9 +600,10 @@ static inline bool __udp_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport != rmt_port && inet->inet_dport) || (inet->inet_rcv_saddr && inet->inet_rcv_saddr != loc_addr) || ipv6_only_sock(sk) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif)) return false; - if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif)) + if (!ip_mc_sf_allow(sk, loc_addr, rmt_addr, dif, sdif)) return false; return true; } @@ -628,8 +632,8 @@ void __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) struct net *net = dev_net(skb->dev); sk = __udp4_lib_lookup(net, iph->daddr, uh->dest, - iph->saddr, uh->source, skb->dev->ifindex, udptable, - NULL); + iph->saddr, uh->source, skb->dev->ifindex, 0, + udptable, NULL); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return; /* No socket for error */ @@ -802,7 +806,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ + else if (sk->sk_no_check_tx) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; @@ -1054,7 +1058,7 @@ back_from_confirm: /* ... which is an evident application bug. --ANK */ release_sock(sk); - net_dbg_ratelimited("cork app bug 2\n"); + net_dbg_ratelimited("socket already corked\n"); err = -EINVAL; goto out; } @@ -1137,7 +1141,7 @@ int udp_sendpage(struct sock *sk, struct page *page, int offset, if (unlikely(!up->pending)) { release_sock(sk); - net_dbg_ratelimited("udp cork app bug 3\n"); + net_dbg_ratelimited("cork failed\n"); return -EINVAL; } @@ -1176,7 +1180,11 @@ static void udp_set_dev_scratch(struct sk_buff *skb) scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); #endif - if (likely(!skb->_skb_refdst)) + /* all head states execept sp (dst, sk, nf) are always cleared by + * udp_rcv() and we need to preserve secpath, if present, to eventually + * process IP_CMSG_PASSSEC at recvmsg() time + */ + if (likely(!skb_sec_path(skb))) scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } @@ -1201,8 +1209,7 @@ static void udp_rmem_release(struct sock *sk, int size, int partial, if (likely(partial)) { up->forward_deficit += size; size = up->forward_deficit; - if (size < (sk->sk_rcvbuf >> 2) && - !skb_queue_empty(&up->reader_queue)) + if (size < (sk->sk_rcvbuf >> 2)) return; } else { size += up->forward_deficit; @@ -1386,12 +1393,15 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) unlock_sock_fast(sk, slow); } + if (!skb_unref(skb)) + return; + /* In the more common cases we cleared the head states previously, * see __udp_queue_rcv_skb(). */ if (unlikely(udp_skb_has_head_state(skb))) skb_release_head_state(skb); - consume_stateless_skb(skb); + __consume_stateless_skb(skb); } EXPORT_SYMBOL_GPL(skb_consume_udp); @@ -1574,7 +1584,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -1782,13 +1793,6 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* At recvmsg() time we may access skb->dst or skb->sp depending on - * the IP options and the cmsg flags, elsewhere can we clear all - * pending head states while they are hot in the cache - */ - if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb))) - skb_release_head_state(skb); - rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); @@ -1809,8 +1813,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) static struct static_key udp_encap_needed __read_mostly; void udp_encap_enable(void) { - if (!static_key_enabled(&udp_encap_needed)) - static_key_slow_inc(&udp_encap_needed); + static_key_enable(&udp_encap_needed); } EXPORT_SYMBOL(udp_encap_enable); @@ -1849,7 +1852,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - encap_rcv = ACCESS_ONCE(up->encap_rcv); + encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; @@ -1928,14 +1931,16 @@ drop: /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ -void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; if (dst_hold_safe(dst)) { old = xchg(&sk->sk_rx_dst, dst); dst_release(old); + return old != dst; } + return false; } EXPORT_SYMBOL(udp_sk_rx_dst_set); @@ -1956,6 +1961,7 @@ static int __udp4_lib_mcast_deliver(struct net *net, struct sk_buff *skb, unsigned int hash2 = 0, hash2_any = 0, use_hash2 = (hslot->count > 10); unsigned int offset = offsetof(typeof(*sk), sk_node); int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); struct hlist_node *node; struct sk_buff *nskb; @@ -1970,7 +1976,7 @@ start_lookup: sk_for_each_entry_offset_rcu(sk, node, &hslot->head, offset) { if (!__udp_is_mcast_sock(net, sk, uh->dest, daddr, - uh->source, saddr, dif, hnum)) + uh->source, saddr, dif, sdif, hnum)) continue; if (!first) { @@ -2160,7 +2166,7 @@ drop: static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, - int dif) + int dif, int sdif) { struct sock *sk, *result; unsigned short hnum = ntohs(loc_port); @@ -2174,7 +2180,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, result = NULL; sk_for_each_rcu(sk, &hslot->head) { if (__udp_is_mcast_sock(net, sk, loc_port, loc_addr, - rmt_port, rmt_addr, dif, hnum)) { + rmt_port, rmt_addr, dif, sdif, hnum)) { if (result) return NULL; result = sk; @@ -2191,7 +2197,7 @@ static struct sock *__udp4_lib_mcast_demux_lookup(struct net *net, static struct sock *__udp4_lib_demux_lookup(struct net *net, __be16 loc_port, __be32 loc_addr, __be16 rmt_port, __be32 rmt_addr, - int dif) + int dif, int sdif) { unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp4_portaddr_hash(net, loc_addr, hnum); @@ -2203,7 +2209,7 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (INET_MATCH(sk, net, acookie, rmt_addr, - loc_addr, ports, dif)) + loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -2211,47 +2217,46 @@ static struct sock *__udp4_lib_demux_lookup(struct net *net, return NULL; } -void udp_v4_early_demux(struct sk_buff *skb) +int udp_v4_early_demux(struct sk_buff *skb) { struct net *net = dev_net(skb->dev); + struct in_device *in_dev = NULL; const struct iphdr *iph; const struct udphdr *uh; struct sock *sk = NULL; struct dst_entry *dst; int dif = skb->dev->ifindex; + int sdif = inet_sdif(skb); int ours; /* validate the packet */ if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) - return; + return 0; iph = ip_hdr(skb); uh = udp_hdr(skb); - if (skb->pkt_type == PACKET_BROADCAST || - skb->pkt_type == PACKET_MULTICAST) { - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + if (skb->pkt_type == PACKET_MULTICAST) { + in_dev = __in_dev_get_rcu(skb->dev); if (!in_dev) - return; + return 0; - /* we are supposed to accept bcast packets */ - if (skb->pkt_type == PACKET_MULTICAST) { - ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, - iph->protocol); - if (!ours) - return; - } + ours = ip_check_mc_rcu(in_dev, iph->daddr, iph->saddr, + iph->protocol); + if (!ours) + return 0; sk = __udp4_lib_mcast_demux_lookup(net, uh->dest, iph->daddr, - uh->source, iph->saddr, dif); + uh->source, iph->saddr, + dif, sdif); } else if (skb->pkt_type == PACKET_HOST) { sk = __udp4_lib_demux_lookup(net, uh->dest, iph->daddr, - uh->source, iph->saddr, dif); + uh->source, iph->saddr, dif, sdif); } if (!sk || !refcount_inc_not_zero(&sk->sk_refcnt)) - return; + return 0; skb->sk = sk; skb->destructor = sock_efree; @@ -2260,12 +2265,23 @@ void udp_v4_early_demux(struct sk_buff *skb) if (dst) dst = dst_check(dst, 0); if (dst) { + u32 itag = 0; + /* set noref for now. * any place which wants to hold dst has to call * dst_hold_safe() */ skb_dst_set_noref(skb, dst); + + /* for unconnected multicast sockets we need to validate + * the source on each packet + */ + if (!inet_sk(sk)->inet_daddr && in_dev) + return ip_mc_validate_source(skb, iph->daddr, + iph->saddr, iph->tos, + skb->dev, in_dev, &itag); } + return 0; } int udp_rcv(struct sk_buff *skb) @@ -2281,7 +2297,7 @@ void udp_destroy_sock(struct sock *sk) unlock_sock_fast(sk, slow); if (static_key_false(&udp_encap_needed) && up->encap_type) { void (*encap_destroy)(struct sock *sk); - encap_destroy = ACCESS_ONCE(up->encap_destroy); + encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index 4515836d2a3a..d0390d844ac8 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -45,7 +45,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_src[0], req->id.idiag_sport, req->id.idiag_dst[0], req->id.idiag_dport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) sk = __udp6_lib_lookup(net, @@ -53,7 +53,7 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, req->id.idiag_sport, (struct in6_addr *)req->id.idiag_dst, req->id.idiag_dport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); #endif if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -182,7 +182,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_dst[0], req->id.idiag_dport, req->id.idiag_src[0], req->id.idiag_sport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); #if IS_ENABLED(CONFIG_IPV6) else if (req->sdiag_family == AF_INET6) { if (ipv6_addr_v4mapped((struct in6_addr *)req->id.idiag_dst) && @@ -190,7 +190,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, sk = __udp4_lib_lookup(net, req->id.idiag_dst[3], req->id.idiag_dport, req->id.idiag_src[3], req->id.idiag_sport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); else sk = __udp6_lib_lookup(net, @@ -198,7 +198,7 @@ static int __udp_diag_destroy(struct sk_buff *in_skb, req->id.idiag_dport, (struct in6_addr *)req->id.idiag_src, req->id.idiag_sport, - req->id.idiag_if, tbl, NULL); + req->id.idiag_if, 0, tbl, NULL); } #endif else { diff --git a/net/ipv4/udp_impl.h b/net/ipv4/udp_impl.h index a8cf8c6fb60c..e7d18b140287 100644 --- a/net/ipv4/udp_impl.h +++ b/net/ipv4/udp_impl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _UDP4_IMPL_H #define _UDP4_IMPL_H #include <net/udp.h> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 0932c85b42af..e360d55be555 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -21,7 +21,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, __be16 new_protocol, bool is_ipv6) { int tnl_hlen = skb_inner_mac_header(skb) - skb_transport_header(skb); - bool remcsum, need_csum, offload_csum, ufo, gso_partial; + bool remcsum, need_csum, offload_csum, gso_partial; struct sk_buff *segs = ERR_PTR(-EINVAL); struct udphdr *uh = udp_hdr(skb); u16 mac_offset = skb->mac_header; @@ -61,8 +61,6 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM); skb->remcsum_offload = remcsum; - ufo = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP); - need_ipsec = skb_dst(skb) && dst_xfrm(skb_dst(skb)); /* Try to offload checksum if possible */ offload_csum = !!(need_csum && @@ -77,7 +75,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * outer one so strip the existing checksum feature flags and * instead set the flag based on our outer checksum offload value. */ - if (remcsum || ufo) { + if (remcsum) { features &= ~NETIF_F_CSUM_MASK; if (!need_csum || offload_csum) features |= NETIF_F_HW_CSUM; @@ -122,7 +120,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb, * will be using a length value equal to only one MSS sized * segment instead of the entire frame. */ - if (gso_partial) { + if (gso_partial && skb_is_gso(skb)) { uh->len = htons(skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)uh); @@ -189,66 +187,16 @@ out_unlock: } EXPORT_SYMBOL(skb_udp_tunnel_segment); -static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp4_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - __wsum csum; - struct udphdr *uh; - struct iphdr *iph; if (skb->encapsulation && (skb_shinfo(skb)->gso_type & - (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) { + (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) segs = skb_udp_tunnel_segment(skb, features, false); - goto out; - } - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - segs = NULL; - goto out; - } - - /* Do software UFO. Complete and fill in the UDP checksum as - * HW cannot do checksum of UDP packets sent as multiple - * IP fragments. - */ - - uh = udp_hdr(skb); - iph = ip_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v4_check(skb->len, iph->saddr, iph->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Fragment the skb. IP headers of the fragments are updated in - * inet_gso_segment() - */ - segs = skb_segment(skb, features); -out: return segs; } @@ -382,7 +330,7 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv4_offload = { .callbacks = { - .gso_segment = udp4_ufo_fragment, + .gso_segment = udp4_tunnel_segment, .gro_receive = udp4_gro_receive, .gro_complete = udp4_gro_complete, }, diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c index 58bd39fb14b4..6539ff15e9a3 100644 --- a/net/ipv4/udp_tunnel.c +++ b/net/ipv4/udp_tunnel.c @@ -82,7 +82,8 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, struct sock *sk = sock->sk; struct udp_tunnel_info ti; - if (!dev->netdev_ops->ndo_udp_tunnel_add) + if (!dev->netdev_ops->ndo_udp_tunnel_add || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) return; ti.type = type; @@ -93,6 +94,24 @@ void udp_tunnel_push_rx_port(struct net_device *dev, struct socket *sock, } EXPORT_SYMBOL_GPL(udp_tunnel_push_rx_port); +void udp_tunnel_drop_rx_port(struct net_device *dev, struct socket *sock, + unsigned short type) +{ + struct sock *sk = sock->sk; + struct udp_tunnel_info ti; + + if (!dev->netdev_ops->ndo_udp_tunnel_del || + !(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + return; + + ti.type = type; + ti.sa_family = sk->sk_family; + ti.port = inet_sk(sk)->inet_sport; + + dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); +} +EXPORT_SYMBOL_GPL(udp_tunnel_drop_rx_port); + /* Notify netdevs that UDP port started listening */ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) { @@ -109,6 +128,8 @@ void udp_tunnel_notify_add_rx_port(struct socket *sock, unsigned short type) for_each_netdev_rcu(net, dev) { if (!dev->netdev_ops->ndo_udp_tunnel_add) continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; dev->netdev_ops->ndo_udp_tunnel_add(dev, &ti); } rcu_read_unlock(); @@ -131,6 +152,8 @@ void udp_tunnel_notify_del_rx_port(struct socket *sock, unsigned short type) for_each_netdev_rcu(net, dev) { if (!dev->netdev_ops->ndo_udp_tunnel_del) continue; + if (!(dev->features & NETIF_F_RX_UDP_TUNNEL_PORT)) + continue; dev->netdev_ops->ndo_udp_tunnel_del(dev, &ti); } rcu_read_unlock(); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 1fc684111ce6..e50b7fea57ee 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm4_input.c * diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 71b4ecc195c7..05017e2c849c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm4_policy.c * @@ -20,7 +21,8 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr) + const xfrm_address_t *daddr, + u32 mark) { struct rtable *rt; @@ -28,6 +30,7 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, fl4->daddr = daddr->a4; fl4->flowi4_tos = tos; fl4->flowi4_oif = l3mdev_master_ifindex_by_index(net, oif); + fl4->flowi4_mark = mark; if (saddr) fl4->saddr = saddr->a4; @@ -42,20 +45,22 @@ static struct dst_entry *__xfrm4_dst_lookup(struct net *net, struct flowi4 *fl4, static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr) + const xfrm_address_t *daddr, + u32 mark) { struct flowi4 fl4; - return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr); + return __xfrm4_dst_lookup(net, &fl4, tos, oif, saddr, daddr, mark); } static int xfrm4_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr) + xfrm_address_t *saddr, xfrm_address_t *daddr, + u32 mark) { struct dst_entry *dst; struct flowi4 fl4; - dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr); + dst = __xfrm4_dst_lookup(net, &fl4, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -213,14 +218,6 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse) fl4->flowi4_tos = iph->tos; } -static inline int xfrm4_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); - - xfrm_garbage_collect_deferred(net); - return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); -} - static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -259,14 +256,13 @@ static void xfrm4_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm4_dst_ops_template = { .family = AF_INET, - .gc = xfrm4_garbage_collect, .update_pmtu = xfrm4_update_pmtu, .redirect = xfrm4_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm4_dst_destroy, .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm4_policy_afinfo = { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index d6660a8c0ea5..80c40b4981bb 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm4_state.c * diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 48c452959d2c..ea71e4b0ab7a 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -308,22 +308,12 @@ config IPV6_SEG6_LWTUNNEL depends on IPV6 select LWTUNNEL select DST_CACHE + select IPV6_MULTIPLE_TABLES ---help--- Support for encapsulation of packets within an outer IPv6 header and a Segment Routing Header using the lightweight - tunnels mechanism. - - If unsure, say N. - -config IPV6_SEG6_INLINE - bool "IPv6: direct Segment Routing Header insertion " - depends on IPV6_SEG6_LWTUNNEL - ---help--- - Support for direct insertion of the Segment Routing Header, - also known as inline mode. Be aware that direct insertion of - extension headers (as opposed to encapsulation) may break - multiple mechanisms such as PMTUD or IPSec AH. Use this feature - only if you know exactly what you are doing. + tunnels mechanism. Also enable support for advanced local + processing of SRv6 packets based on their active segment. If unsure, say N. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 217e9ff0e24b..e0026fa1261b 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux TCP/IP (INET6) layer. # @@ -9,7 +10,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \ route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \ raw.o icmp.o mcast.o reassembly.o tcp_ipv6.o ping.o \ exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o \ - udp_offload.o seg6.o + udp_offload.o seg6.o fib6_notifier.o ipv6-offload := ip6_offload.o tcpv6_offload.o exthdrs_offload.o @@ -23,7 +24,7 @@ ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-$(CONFIG_PROC_FS) += proc.o ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o ipv6-$(CONFIG_NETLABEL) += calipso.o -ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o +ipv6-$(CONFIG_IPV6_SEG6_LWTUNNEL) += seg6_iptunnel.o seg6_local.o ipv6-$(CONFIG_IPV6_SEG6_HMAC) += seg6_hmac.o ipv6-objs += $(ipv6-y) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3c46e9513a31..a0ae1c9d37df 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -152,11 +152,13 @@ static void ipv6_regen_rndid(struct inet6_dev *idev); static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); -static int ipv6_count_addresses(struct inet6_dev *idev); +static int ipv6_count_addresses(const struct inet6_dev *idev); static int ipv6_generate_stable_address(struct in6_addr *addr, u8 dad_count, const struct inet6_dev *idev); +#define IN6_ADDR_HSIZE_SHIFT 8 +#define IN6_ADDR_HSIZE (1 << IN6_ADDR_HSIZE_SHIFT) /* * Configured unicast address hash table */ @@ -192,8 +194,6 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifa); static void inet6_prefix_notify(int event, struct inet6_dev *idev, struct prefix_info *pinfo); -static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev); static struct ipv6_devconf ipv6_devconf __read_mostly = { .forwarding = 0, @@ -231,7 +231,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .proxy_ndp = 0, .accept_source_route = 0, /* we do not accept RH0 by default. */ .disable_ipv6 = 0, - .accept_dad = 1, + .accept_dad = 0, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, .stable_secret = { @@ -303,10 +303,10 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .disable_policy = 0, }; -/* Check if a valid qdisc is available */ -static inline bool addrconf_qdisc_ok(const struct net_device *dev) +/* Check if link is ready: is it up and is a valid qdisc available */ +static inline bool addrconf_link_ready(const struct net_device *dev) { - return !qdisc_tx_is_noop(dev); + return netif_oper_up(dev) && !qdisc_tx_is_noop(dev); } static void addrconf_del_rs_timer(struct inet6_dev *idev) @@ -451,7 +451,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->token = in6addr_any; - if (netif_running(dev) && addrconf_qdisc_ok(dev)) + if (netif_running(dev) && addrconf_link_ready(dev)) ndev->if_flags |= IF_READY; ipv6_mc_init_dev(ndev); @@ -616,23 +616,23 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, { struct net *net = sock_net(in_skb->sk); struct nlattr *tb[NETCONFA_MAX+1]; + struct inet6_dev *in6_dev = NULL; + struct net_device *dev = NULL; struct netconfmsg *ncm; struct sk_buff *skb; struct ipv6_devconf *devconf; - struct inet6_dev *in6_dev; - struct net_device *dev; int ifindex; int err; err = nlmsg_parse(nlh, sizeof(*ncm), tb, NETCONFA_MAX, devconf_ipv6_policy, extack); if (err < 0) - goto errout; + return err; - err = -EINVAL; if (!tb[NETCONFA_IFINDEX]) - goto errout; + return -EINVAL; + err = -EINVAL; ifindex = nla_get_s32(tb[NETCONFA_IFINDEX]); switch (ifindex) { case NETCONFA_IFINDEX_ALL: @@ -642,10 +642,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, devconf = net->ipv6.devconf_dflt; break; default: - dev = __dev_get_by_index(net, ifindex); + dev = dev_get_by_index(net, ifindex); if (!dev) - goto errout; - in6_dev = __in6_dev_get(dev); + return -EINVAL; + in6_dev = in6_dev_get(dev); if (!in6_dev) goto errout; devconf = &in6_dev->cnf; @@ -653,7 +653,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, } err = -ENOBUFS; - skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_ATOMIC); + skb = nlmsg_new(inet6_netconf_msgsize_devconf(NETCONFA_ALL), GFP_KERNEL); if (!skb) goto errout; @@ -669,6 +669,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, } err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: + if (in6_dev) + in6_dev_put(in6_dev); + if (dev) + dev_put(dev); return err; } @@ -945,12 +949,50 @@ ipv6_link_dev_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) break; } - list_add_tail(&ifp->if_list, p); + list_add_tail_rcu(&ifp->if_list, p); +} + +static u32 inet6_addr_hash(const struct net *net, const struct in6_addr *addr) +{ + u32 val = ipv6_addr_hash(addr) ^ net_hash_mix(net); + + return hash_32(val, IN6_ADDR_HSIZE_SHIFT); +} + +static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, + struct net_device *dev, unsigned int hash) +{ + struct inet6_ifaddr *ifp; + + hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { + if (!net_eq(dev_net(ifp->idev->dev), net)) + continue; + if (ipv6_addr_equal(&ifp->addr, addr)) { + if (!dev || ifp->idev->dev == dev) + return true; + } + } + return false; } -static u32 inet6_addr_hash(const struct in6_addr *addr) +static int ipv6_add_addr_hash(struct net_device *dev, struct inet6_ifaddr *ifa) { - return hash_32(ipv6_addr_hash(addr), IN6_ADDR_HSIZE_SHIFT); + unsigned int hash = inet6_addr_hash(dev_net(dev), &ifa->addr); + int err = 0; + + spin_lock(&addrconf_hash_lock); + + /* Ignore adding duplicate addresses on an interface */ + if (ipv6_chk_same_addr(dev_net(dev), &ifa->addr, dev, hash)) { + ADBG("ipv6_add_addr: already assigned\n"); + err = -EEXIST; + } else { + hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); + } + + spin_unlock(&addrconf_hash_lock); + + return err; } /* On success it returns ifp with increased reference count */ @@ -958,13 +1000,13 @@ static u32 inet6_addr_hash(const struct in6_addr *addr) static struct inet6_ifaddr * ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, const struct in6_addr *peer_addr, int pfxlen, - int scope, u32 flags, u32 valid_lft, u32 prefered_lft) + int scope, u32 flags, u32 valid_lft, u32 prefered_lft, + bool can_block, struct netlink_ext_ack *extack) { + gfp_t gfp_flags = can_block ? GFP_KERNEL : GFP_ATOMIC; struct net *net = dev_net(idev->dev); struct inet6_ifaddr *ifa = NULL; - struct rt6_info *rt; - struct in6_validator_info i6vi; - unsigned int hash; + struct rt6_info *rt = NULL; int err = 0; int addr_type = ipv6_addr_type(addr); @@ -974,42 +1016,33 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, addr_type & IPV6_ADDR_LOOPBACK)) return ERR_PTR(-EADDRNOTAVAIL); - rcu_read_lock_bh(); - - in6_dev_hold(idev); - if (idev->dead) { err = -ENODEV; /*XXX*/ - goto out2; + goto out; } if (idev->cnf.disable_ipv6) { err = -EACCES; - goto out2; - } - - i6vi.i6vi_addr = *addr; - i6vi.i6vi_dev = idev; - rcu_read_unlock_bh(); - - err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); - - rcu_read_lock_bh(); - err = notifier_to_errno(err); - if (err) - goto out2; - - spin_lock(&addrconf_hash_lock); - - /* Ignore adding duplicate addresses on an interface */ - if (ipv6_chk_same_addr(dev_net(idev->dev), addr, idev->dev)) { - ADBG("ipv6_add_addr: already assigned\n"); - err = -EEXIST; goto out; } - ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); + /* validator notifier needs to be blocking; + * do not call in atomic context + */ + if (can_block) { + struct in6_validator_info i6vi = { + .i6vi_addr = *addr, + .i6vi_dev = idev, + .extack = extack, + }; + + err = inet6addr_validator_notifier_call_chain(NETDEV_UP, &i6vi); + err = notifier_to_errno(err); + if (err < 0) + goto out; + } + ifa = kzalloc(sizeof(*ifa), gfp_flags); if (!ifa) { ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; @@ -1019,6 +1052,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, rt = addrconf_dst_alloc(idev, addr, false); if (IS_ERR(rt)) { err = PTR_ERR(rt); + rt = NULL; goto out; } @@ -1049,16 +1083,21 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->rt = rt; ifa->idev = idev; + in6_dev_hold(idev); + /* For caller */ refcount_set(&ifa->refcnt, 1); - /* Add to big hash table */ - hash = inet6_addr_hash(addr); + rcu_read_lock_bh(); - hlist_add_head_rcu(&ifa->addr_lst, &inet6_addr_lst[hash]); - spin_unlock(&addrconf_hash_lock); + err = ipv6_add_addr_hash(idev->dev, ifa); + if (err < 0) { + rcu_read_unlock_bh(); + goto out; + } write_lock(&idev->lock); + /* Add to inet6_dev unicast addr list. */ ipv6_link_dev_addr(idev, ifa); @@ -1069,21 +1108,23 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, in6_ifa_hold(ifa); write_unlock(&idev->lock); -out2: + rcu_read_unlock_bh(); - if (likely(err == 0)) - inet6addr_notifier_call_chain(NETDEV_UP, ifa); - else { - kfree(ifa); - in6_dev_put(idev); + inet6addr_notifier_call_chain(NETDEV_UP, ifa); +out: + if (unlikely(err < 0)) { + if (rt) + ip6_rt_put(rt); + if (ifa) { + if (ifa->idev) + in6_dev_put(ifa->idev); + kfree(ifa); + } ifa = ERR_PTR(err); } return ifa; -out: - spin_unlock(&addrconf_hash_lock); - goto out2; } enum cleanup_prefix_rt_t { @@ -1204,7 +1245,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (ifp->flags & IFA_F_PERMANENT && !(ifp->flags & IFA_F_NOPREFIXROUTE)) action = check_cleanup_prefix_route(ifp, &expires); - list_del_init(&ifp->if_list); + list_del_rcu(&ifp->if_list); __in6_ifa_put(ifp); write_unlock_bh(&ifp->idev->lock); @@ -1226,7 +1267,9 @@ out: in6_ifa_put(ifp); } -static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, struct inet6_ifaddr *ift) +static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, + struct inet6_ifaddr *ift, + bool block) { struct inet6_dev *idev = ifp->idev; struct in6_addr addr, *tmpaddr; @@ -1330,7 +1373,7 @@ retry: ift = ipv6_add_addr(idev, &addr, NULL, tmp_plen, ipv6_addr_scope(&addr), addr_flags, - tmp_valid_lft, tmp_prefered_lft); + tmp_valid_lft, tmp_prefered_lft, block, NULL); if (IS_ERR(ift)) { in6_ifa_put(ifp); in6_dev_put(idev); @@ -1399,10 +1442,18 @@ static inline int ipv6_saddr_preferred(int type) return 0; } -static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev) +static bool ipv6_use_optimistic_addr(struct net *net, + struct inet6_dev *idev) { #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic; + if (!idev) + return false; + if (!net->ipv6.devconf_all->optimistic_dad && !idev->cnf.optimistic_dad) + return false; + if (!net->ipv6.devconf_all->use_optimistic && !idev->cnf.use_optimistic) + return false; + + return true; #else return false; #endif @@ -1472,7 +1523,7 @@ static int ipv6_get_saddr_eval(struct net *net, /* Rule 3: Avoid deprecated and optimistic addresses */ u8 avoid = IFA_F_DEPRECATED; - if (!ipv6_use_optimistic_addr(score->ifa->idev)) + if (!ipv6_use_optimistic_addr(net, score->ifa->idev)) avoid |= IFA_F_OPTIMISTIC; ret = ipv6_saddr_preferred(score->addr_type) || !(score->ifa->flags & avoid); @@ -1550,8 +1601,7 @@ static int __ipv6_dev_get_saddr(struct net *net, { struct ipv6_saddr_score *score = &scores[1 - hiscore_idx], *hiscore = &scores[hiscore_idx]; - read_lock_bh(&idev->lock); - list_for_each_entry(score->ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(score->ifa, &idev->addr_list, if_list) { int i; /* @@ -1601,11 +1651,6 @@ static int __ipv6_dev_get_saddr(struct net *net, } break; } else if (minihiscore < miniscore) { - if (hiscore->ifa) - in6_ifa_put(hiscore->ifa); - - in6_ifa_hold(score->ifa); - swap(hiscore, score); hiscore_idx = 1 - hiscore_idx; @@ -1617,7 +1662,6 @@ static int __ipv6_dev_get_saddr(struct net *net, } } out: - read_unlock_bh(&idev->lock); return hiscore_idx; } @@ -1654,6 +1698,7 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, int dst_type; bool use_oif_addr = false; int hiscore_idx = 0; + int ret = 0; dst_type = __ipv6_addr_type(daddr); dst.addr = daddr; @@ -1729,15 +1774,14 @@ int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, } out: - rcu_read_unlock(); - hiscore = &scores[hiscore_idx]; if (!hiscore->ifa) - return -EADDRNOTAVAIL; + ret = -EADDRNOTAVAIL; + else + *saddr = hiscore->ifa->addr; - *saddr = hiscore->ifa->addr; - in6_ifa_put(hiscore->ifa); - return 0; + rcu_read_unlock(); + return ret; } EXPORT_SYMBOL(ipv6_dev_get_saddr); @@ -1777,15 +1821,15 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr, return err; } -static int ipv6_count_addresses(struct inet6_dev *idev) +static int ipv6_count_addresses(const struct inet6_dev *idev) { + const struct inet6_ifaddr *ifp; int cnt = 0; - struct inet6_ifaddr *ifp; - read_lock_bh(&idev->lock); - list_for_each_entry(ifp, &idev->addr_list, if_list) + rcu_read_lock(); + list_for_each_entry_rcu(ifp, &idev->addr_list, if_list) cnt++; - read_unlock_bh(&idev->lock); + rcu_read_unlock(); return cnt; } @@ -1800,11 +1844,11 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict, u32 banned_flags) { + unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp; - unsigned int hash = inet6_addr_hash(addr); u32 ifp_flags; - rcu_read_lock_bh(); + rcu_read_lock(); hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; @@ -1818,32 +1862,16 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, !(ifp_flags&banned_flags) && (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { - rcu_read_unlock_bh(); + rcu_read_unlock(); return 1; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return 0; } EXPORT_SYMBOL(ipv6_chk_addr_and_flags); -static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, - struct net_device *dev) -{ - unsigned int hash = inet6_addr_hash(addr); - struct inet6_ifaddr *ifp; - - hlist_for_each_entry(ifp, &inet6_addr_lst[hash], addr_lst) { - if (!net_eq(dev_net(ifp->idev->dev), net)) - continue; - if (ipv6_addr_equal(&ifp->addr, addr)) { - if (!dev || ifp->idev->dev == dev) - return true; - } - } - return false; -} /* Compares an address/prefix_len with addresses on device @dev. * If one is found it returns true. @@ -1851,20 +1879,18 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, bool ipv6_chk_custom_prefix(const struct in6_addr *addr, const unsigned int prefix_len, struct net_device *dev) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; + const struct inet6_ifaddr *ifa; + const struct inet6_dev *idev; bool ret = false; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - list_for_each_entry(ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { ret = ipv6_prefix_equal(addr, &ifa->addr, prefix_len); if (ret) break; } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); @@ -1874,22 +1900,20 @@ EXPORT_SYMBOL(ipv6_chk_custom_prefix); int ipv6_chk_prefix(const struct in6_addr *addr, struct net_device *dev) { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; + const struct inet6_ifaddr *ifa; + const struct inet6_dev *idev; int onlink; onlink = 0; rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { - read_lock_bh(&idev->lock); - list_for_each_entry(ifa, &idev->addr_list, if_list) { + list_for_each_entry_rcu(ifa, &idev->addr_list, if_list) { onlink = ipv6_prefix_equal(addr, &ifa->addr, ifa->prefix_len); if (onlink) break; } - read_unlock_bh(&idev->lock); } rcu_read_unlock(); return onlink; @@ -1899,11 +1923,11 @@ EXPORT_SYMBOL(ipv6_chk_prefix); struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *addr, struct net_device *dev, int strict) { + unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp, *result = NULL; - unsigned int hash = inet6_addr_hash(addr); - rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { + rcu_read_lock(); + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { @@ -1915,7 +1939,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add } } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return result; } @@ -1934,7 +1958,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed) if (ifpub) { in6_ifa_hold(ifpub); spin_unlock_bh(&ifp->lock); - ipv6_create_tempaddr(ifpub, ifp); + ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); } else { spin_unlock_bh(&ifp->lock); @@ -1967,7 +1991,7 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) return err; } -void addrconf_dad_failure(struct inet6_ifaddr *ifp) +void addrconf_dad_failure(struct sk_buff *skb, struct inet6_ifaddr *ifp) { struct inet6_dev *idev = ifp->idev; struct net *net = dev_net(ifp->idev->dev); @@ -1977,8 +2001,8 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) return; } - net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", - ifp->idev->dev->name, &ifp->addr); + net_info_ratelimited("%s: IPv6 duplicate address %pI6c used by %pM detected!\n", + ifp->idev->dev->name, &ifp->addr, eth_hdr(skb)->h_source); spin_lock_bh(&ifp->lock); @@ -2017,7 +2041,7 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, scope, flags, valid_lft, - preferred_lft); + preferred_lft, false, NULL); if (IS_ERR(ifp2)) goto lock_errdad; @@ -2313,24 +2337,24 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, if (!table) return NULL; - read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); + rcu_read_lock(); + fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0, true); if (!fn) goto out; - noflags |= RTF_CACHE; - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (rt->dst.dev->ifindex != dev->ifindex) continue; if ((rt->rt6i_flags & flags) != flags) continue; if ((rt->rt6i_flags & noflags) != 0) continue; - dst_hold(&rt->dst); + if (!dst_hold_safe(&rt->dst)) + rt = NULL; break; } out: - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return rt; } @@ -2434,7 +2458,7 @@ static void manage_tempaddrs(struct inet6_dev *idev, * no temporary address currently exists. */ read_unlock_bh(&idev->lock); - ipv6_create_tempaddr(ifp, NULL); + ipv6_create_tempaddr(ifp, NULL, false); } else { read_unlock_bh(&idev->lock); } @@ -2460,7 +2484,8 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, int max_addresses = in6_dev->cnf.max_addresses; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (in6_dev->cnf.optimistic_dad && + if ((net->ipv6.devconf_all->optimistic_dad || + in6_dev->cnf.optimistic_dad) && !net->ipv6.devconf_all->forwarding && sllao) addr_flags |= IFA_F_OPTIMISTIC; #endif @@ -2474,7 +2499,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev, pinfo->prefix_len, addr_type&IPV6_ADDR_SCOPE_MASK, addr_flags, valid_lft, - prefered_lft); + prefered_lft, false, NULL); if (IS_ERR_OR_NULL(ifp)) return -1; @@ -2784,7 +2809,8 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *pfx, const struct in6_addr *peer_pfx, unsigned int plen, __u32 ifa_flags, - __u32 prefered_lft, __u32 valid_lft) + __u32 prefered_lft, __u32 valid_lft, + struct netlink_ext_ack *extack) { struct inet6_ifaddr *ifp; struct inet6_dev *idev; @@ -2843,7 +2869,7 @@ static int inet6_addr_add(struct net *net, int ifindex, } ifp = ipv6_add_addr(idev, pfx, peer_pfx, plen, scope, ifa_flags, - valid_lft, prefered_lft); + valid_lft, prefered_lft, true, extack); if (!IS_ERR(ifp)) { if (!(ifa_flags & IFA_F_NOPREFIXROUTE)) { @@ -2928,7 +2954,7 @@ int addrconf_add_ifaddr(struct net *net, void __user *arg) rtnl_lock(); err = inet6_addr_add(net, ireq.ifr6_ifindex, &ireq.ifr6_addr, NULL, ireq.ifr6_prefixlen, IFA_F_PERMANENT, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, NULL); rtnl_unlock(); return err; } @@ -2958,7 +2984,8 @@ static void add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifp = ipv6_add_addr(idev, addr, NULL, plen, scope, IFA_F_PERMANENT, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, + true, NULL); if (!IS_ERR(ifp)) { spin_lock_bh(&ifp->lock); ifp->flags &= ~IFA_F_TENTATIVE; @@ -3030,9 +3057,6 @@ static void sit_add_v4_addrs(struct inet6_dev *idev) static void init_loopback(struct net_device *dev) { struct inet6_dev *idev; - struct net_device *sp_dev; - struct inet6_ifaddr *sp_ifa; - struct rt6_info *sp_rt; /* ::1 */ @@ -3045,45 +3069,6 @@ static void init_loopback(struct net_device *dev) } add_addr(idev, &in6addr_loopback, 128, IFA_HOST); - - /* Add routes to other interface's IPv6 addresses */ - for_each_netdev(dev_net(dev), sp_dev) { - if (!strcmp(sp_dev->name, dev->name)) - continue; - - idev = __in6_dev_get(sp_dev); - if (!idev) - continue; - - read_lock_bh(&idev->lock); - list_for_each_entry(sp_ifa, &idev->addr_list, if_list) { - - if (sp_ifa->flags & (IFA_F_DADFAILED | IFA_F_TENTATIVE)) - continue; - - if (sp_ifa->rt) { - /* This dst has been added to garbage list when - * lo device down, release this obsolete dst and - * reallocate a new router for ifa. - */ - if (!atomic_read(&sp_ifa->rt->rt6i_ref)) { - ip6_rt_put(sp_ifa->rt); - sp_ifa->rt = NULL; - } else { - continue; - } - } - - sp_rt = addrconf_dst_alloc(idev, &sp_ifa->addr, false); - - /* Failure cases are ignored */ - if (!IS_ERR(sp_rt)) { - sp_ifa->rt = sp_rt; - ip6_ins_rt(sp_rt); - } - } - read_unlock_bh(&idev->lock); - } } void addrconf_add_linklocal(struct inet6_dev *idev, @@ -3093,13 +3078,14 @@ void addrconf_add_linklocal(struct inet6_dev *idev, u32 addr_flags = flags | IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD - if (idev->cnf.optimistic_dad && + if ((dev_net(idev->dev)->ipv6.devconf_all->optimistic_dad || + idev->cnf.optimistic_dad) && !dev_net(idev->dev)->ipv6.devconf_all->forwarding) addr_flags |= IFA_F_OPTIMISTIC; #endif ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, - INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); + INFINITY_LIFE_TIME, INFINITY_LIFE_TIME, true, NULL); if (!IS_ERR(ifp)) { addrconf_prefix_route(&ifp->addr, ifp->prefix_len, idev->dev, 0, 0); addrconf_dad_start(ifp); @@ -3321,15 +3307,15 @@ static void addrconf_gre_config(struct net_device *dev) static int fixup_permanent_addr(struct inet6_dev *idev, struct inet6_ifaddr *ifp) { - /* rt6i_ref == 0 means the host route was removed from the + /* !rt6i_node means the host route was removed from the * FIB, for example, if 'lo' device is taken down. In that * case regenerate the host route. */ - if (!ifp->rt || !atomic_read(&ifp->rt->rt6i_ref)) { + if (!ifp->rt || !ifp->rt->rt6i_node) { struct rt6_info *rt, *prev; rt = addrconf_dst_alloc(idev, &ifp->addr, false); - if (unlikely(IS_ERR(rt))) + if (IS_ERR(rt)) return PTR_ERR(rt); /* ifp->rt can be accessed outside of rtnl */ @@ -3367,6 +3353,7 @@ static void addrconf_permanent_addr(struct net_device *dev) if ((ifp->flags & IFA_F_PERMANENT) && fixup_permanent_addr(idev, ifp) < 0) { write_unlock_bh(&idev->lock); + in6_ifa_hold(ifp); ipv6_del_addr(ifp); write_lock_bh(&idev->lock); @@ -3435,7 +3422,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, /* restore routes for permanent addresses */ addrconf_permanent_addr(dev); - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is not ready yet. */ pr_info("ADDRCONF(NETDEV_UP): %s: link is not ready\n", dev->name); @@ -3450,7 +3437,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, run_pending = 1; } } else if (event == NETDEV_CHANGE) { - if (!addrconf_qdisc_ok(dev)) { + if (!addrconf_link_ready(dev)) { /* device is still not ready. */ break; } @@ -3594,7 +3581,6 @@ static int addrconf_ifdown(struct net_device *dev, int how) struct net *net = dev_net(dev); struct inet6_dev *idev; struct inet6_ifaddr *ifa, *tmp; - struct list_head del_list; int _keep_addr; bool keep_addr; int state, i; @@ -3686,7 +3672,6 @@ restart: */ keep_addr = (!how && _keep_addr > 0 && !idev->cnf.disable_ipv6); - INIT_LIST_HEAD(&del_list); list_for_each_entry_safe(ifa, tmp, &idev->addr_list, if_list) { struct rt6_info *rt = NULL; bool keep; @@ -3695,8 +3680,6 @@ restart: keep = keep_addr && (ifa->flags & IFA_F_PERMANENT) && !addr_is_local(&ifa->addr); - if (!keep) - list_move(&ifa->if_list, &del_list); write_unlock_bh(&idev->lock); spin_lock_bh(&ifa->lock); @@ -3730,19 +3713,14 @@ restart: } write_lock_bh(&idev->lock); + if (!keep) { + list_del_rcu(&ifa->if_list); + in6_ifa_put(ifa); + } } write_unlock_bh(&idev->lock); - /* now clean up addresses to be removed */ - while (!list_empty(&del_list)) { - ifa = list_first_entry(&del_list, - struct inet6_ifaddr, if_list); - list_del(&ifa->if_list); - - in6_ifa_put(ifa); - } - /* Step 5: Discard anycast and multicast list */ if (how) { ipv6_ac_destroy_dev(idev); @@ -3852,7 +3830,8 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) goto out; if (dev->flags&(IFF_NOARP|IFF_LOOPBACK) || - idev->cnf.accept_dad < 1 || + (dev_net(dev)->ipv6.devconf_all->accept_dad < 1 && + idev->cnf.accept_dad < 1) || !(ifp->flags&IFA_F_TENTATIVE) || ifp->flags & IFA_F_NODAD) { bump_id = ifp->flags & IFA_F_TENTATIVE; @@ -3883,7 +3862,7 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp) */ if (ifp->flags & IFA_F_OPTIMISTIC) { ip6_ins_rt(ifp->rt); - if (ipv6_use_optimistic_addr(idev)) { + if (ipv6_use_optimistic_addr(dev_net(dev), idev)) { /* Because optimistic nodes can use this address, * notify listeners. If DAD fails, RTM_DELADDR is sent. */ @@ -3939,7 +3918,9 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; - if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6 && + if ((dev_net(idev->dev)->ipv6.devconf_all->accept_dad > 1 || + idev->cnf.accept_dad > 1) && + !idev->cnf.disable_ipv6 && !(ifp->flags & IFA_F_STABLE_PRIVACY)) { struct in6_addr addr; @@ -4121,9 +4102,9 @@ struct if6_iter_state { static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) { - struct inet6_ifaddr *ifa = NULL; struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); + struct inet6_ifaddr *ifa = NULL; int p = 0; /* initial bucket if pos is 0 */ @@ -4133,7 +4114,7 @@ static struct inet6_ifaddr *if6_get_first(struct seq_file *seq, loff_t pos) } for (; state->bucket < IN6_ADDR_HSIZE; ++state->bucket) { - hlist_for_each_entry_rcu_bh(ifa, &inet6_addr_lst[state->bucket], + hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -4159,7 +4140,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, struct if6_iter_state *state = seq->private; struct net *net = seq_file_net(seq); - hlist_for_each_entry_continue_rcu_bh(ifa, addr_lst) { + hlist_for_each_entry_continue_rcu(ifa, addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; state->offset++; @@ -4168,7 +4149,7 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, while (++state->bucket < IN6_ADDR_HSIZE) { state->offset = 0; - hlist_for_each_entry_rcu_bh(ifa, + hlist_for_each_entry_rcu(ifa, &inet6_addr_lst[state->bucket], addr_lst) { if (!net_eq(dev_net(ifa->idev->dev), net)) continue; @@ -4181,9 +4162,9 @@ static struct inet6_ifaddr *if6_get_next(struct seq_file *seq, } static void *if6_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(rcu_bh) + __acquires(rcu) { - rcu_read_lock_bh(); + rcu_read_lock(); return if6_get_first(seq, *pos); } @@ -4197,9 +4178,9 @@ static void *if6_seq_next(struct seq_file *seq, void *v, loff_t *pos) } static void if6_seq_stop(struct seq_file *seq, void *v) - __releases(rcu_bh) + __releases(rcu) { - rcu_read_unlock_bh(); + rcu_read_unlock(); } static int if6_seq_show(struct seq_file *seq, void *v) @@ -4268,12 +4249,12 @@ void if6_proc_exit(void) /* Check if address is a home address configured on any interface. */ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) { - int ret = 0; + unsigned int hash = inet6_addr_hash(net, addr); struct inet6_ifaddr *ifp = NULL; - unsigned int hash = inet6_addr_hash(addr); + int ret = 0; - rcu_read_lock_bh(); - hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[hash], addr_lst) { + rcu_read_lock(); + hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) { if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr) && @@ -4282,7 +4263,7 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr) break; } } - rcu_read_unlock_bh(); + rcu_read_unlock(); return ret; } #endif @@ -4372,7 +4353,7 @@ restart: spin_lock(&ifpub->lock); ifpub->regen_count = 0; spin_unlock(&ifpub->lock); - ipv6_create_tempaddr(ifpub, ifp); + ipv6_create_tempaddr(ifpub, ifp, true); in6_ifa_put(ifpub); in6_ifa_put(ifp); goto restart; @@ -4608,7 +4589,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, */ return inet6_addr_add(net, ifm->ifa_index, pfx, peer_pfx, ifm->ifa_prefixlen, ifa_flags, - preferred_lft, valid_lft); + preferred_lft, valid_lft, extack); } if (nlh->nlmsg_flags & NLM_F_EXCL || @@ -4935,17 +4916,15 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv6_policy, extack); if (err < 0) - goto errout; + return err; addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (!addr) { - err = -EINVAL; - goto errout; - } + if (!addr) + return -EINVAL; ifm = nlmsg_data(nlh); if (ifm->ifa_index) - dev = __dev_get_by_index(net, ifm->ifa_index); + dev = dev_get_by_index(net, ifm->ifa_index); ifa = ipv6_get_ifaddr(net, addr, dev, 1); if (!ifa) { @@ -4971,6 +4950,8 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, errout_ifa: in6_ifa_put(ifa); errout: + if (dev) + dev_put(dev); return err; } @@ -4982,9 +4963,10 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) /* Don't send DELADDR notification for TENTATIVE address, * since NEWADDR notification is sent only after removing - * TENTATIVE flag. + * TENTATIVE flag, if DAD has not failed. */ - if (ifa->flags & IFA_F_TENTATIVE && event == RTM_DELADDR) + if (ifa->flags & IFA_F_TENTATIVE && !(ifa->flags & IFA_F_DADFAILED) && + event == RTM_DELADDR) return; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); @@ -5077,6 +5059,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_ENHANCED_DAD] = cnf->enhanced_dad; array[DEVCONF_ADDR_GEN_MODE] = cnf->addr_gen_mode; array[DEVCONF_DISABLE_POLICY] = cnf->disable_policy; + array[DEVCONF_NDISC_TCLASS] = cnf->ndisc_tclass; } static inline size_t inet6_ifla6_size(void) @@ -5556,7 +5539,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!(ifp->rt->rt6i_node)) + if (!rcu_access_pointer(ifp->rt->rt6i_node)) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); @@ -5926,10 +5909,9 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) spin_lock(&ifa->lock); if (ifa->rt) { struct rt6_info *rt = ifa->rt; - struct fib6_table *table = rt->rt6i_table; int cpu; - read_lock(&table->tb6_lock); + rcu_read_lock(); addrconf_set_nopolicy(ifa->rt, val); if (rt->rt6i_pcpu) { for_each_possible_cpu(cpu) { @@ -5939,7 +5921,7 @@ void addrconf_disable_policy_idev(struct inet6_dev *idev, int val) addrconf_set_nopolicy(*rtp, val); } } - read_unlock(&table->tb6_lock); + rcu_read_unlock(); } spin_unlock(&ifa->lock); } @@ -6005,6 +5987,7 @@ int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, } static int minus_one = -1; +static const int zero = 0; static const int one = 1; static const int two_five_five = 255; @@ -6376,6 +6359,15 @@ static const struct ctl_table addrconf_sysctl[] = { .proc_handler = addrconf_sysctl_disable_policy, }, { + .procname = "ndisc_tclass", + .data = &ipv6_devconf.ndisc_tclass, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = (void *)&zero, + .extra2 = (void *)&two_five_five, + }, + { /* sentinel */ } }; @@ -6605,21 +6597,21 @@ int __init addrconf_init(void) rtnl_af_register(&inet6_ops); err = __rtnl_register(PF_INET6, RTM_GETLINK, NULL, inet6_dump_ifinfo, - NULL); + 0); if (err < 0) goto errout; /* Only the first call to __rtnl_register can fail */ - __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, NULL); - __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, NULL); + __rtnl_register(PF_INET6, RTM_NEWADDR, inet6_rtm_newaddr, NULL, 0); + __rtnl_register(PF_INET6, RTM_DELADDR, inet6_rtm_deladdr, NULL, 0); __rtnl_register(PF_INET6, RTM_GETADDR, inet6_rtm_getaddr, - inet6_dump_ifaddr, NULL); + inet6_dump_ifaddr, RTNL_FLAG_DOIT_UNLOCKED); __rtnl_register(PF_INET6, RTM_GETMULTICAST, NULL, - inet6_dump_ifmcaddr, NULL); + inet6_dump_ifmcaddr, 0); __rtnl_register(PF_INET6, RTM_GETANYCAST, NULL, - inet6_dump_ifacaddr, NULL); + inet6_dump_ifacaddr, 0); __rtnl_register(PF_INET6, RTM_GETNETCONF, inet6_netconf_get_devconf, - inet6_netconf_dump_devconf, NULL); + inet6_netconf_dump_devconf, RTNL_FLAG_DOIT_UNLOCKED); ipv6_addr_label_rtnl_register(); @@ -6646,9 +6638,9 @@ void addrconf_cleanup(void) unregister_pernet_subsys(&addrconf_ops); ipv6_addr_label_cleanup(); - rtnl_lock(); + rtnl_af_unregister(&inet6_ops); - __rtnl_af_unregister(&inet6_ops); + rtnl_lock(); /* clean dev list */ for_each_netdev(&init_net, dev) { diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 9e3488d50b15..32b564dfd02a 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -88,7 +88,7 @@ int __ipv6_addr_type(const struct in6_addr *addr) EXPORT_SYMBOL(__ipv6_addr_type); static ATOMIC_NOTIFIER_HEAD(inet6addr_chain); -static ATOMIC_NOTIFIER_HEAD(inet6addr_validator_chain); +static BLOCKING_NOTIFIER_HEAD(inet6addr_validator_chain); int register_inet6addr_notifier(struct notifier_block *nb) { @@ -110,19 +110,20 @@ EXPORT_SYMBOL(inet6addr_notifier_call_chain); int register_inet6addr_validator_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_register(&inet6addr_validator_chain, nb); + return blocking_notifier_chain_register(&inet6addr_validator_chain, nb); } EXPORT_SYMBOL(register_inet6addr_validator_notifier); int unregister_inet6addr_validator_notifier(struct notifier_block *nb) { - return atomic_notifier_chain_unregister(&inet6addr_validator_chain, nb); + return blocking_notifier_chain_unregister(&inet6addr_validator_chain, + nb); } EXPORT_SYMBOL(unregister_inet6addr_validator_notifier); int inet6addr_validator_notifier_call_chain(unsigned long val, void *v) { - return atomic_notifier_call_chain(&inet6addr_validator_chain, val, v); + return blocking_notifier_call_chain(&inet6addr_validator_chain, val, v); } EXPORT_SYMBOL(inet6addr_validator_notifier_call_chain); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 7a428f65c7ec..00e1f8ee08f8 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IPv6 Address Label subsystem * for the IPv6 "Default" Source Address Selection @@ -18,7 +19,6 @@ #include <linux/if_addrlabel.h> #include <linux/netlink.h> #include <linux/rtnetlink.h> -#include <linux/refcount.h> #if 0 #define ADDRLABEL(x...) printk(x) @@ -30,30 +30,15 @@ * Policy Table */ struct ip6addrlbl_entry { - possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; int addrtype; u32 label; struct hlist_node list; - refcount_t refcnt; struct rcu_head rcu; }; -static struct ip6addrlbl_table -{ - struct hlist_head head; - spinlock_t lock; - u32 seq; -} ip6addrlbl_table; - -static inline -struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) -{ - return read_pnet(&lbl->lbl_net); -} - /* * Default policy table (RFC6724 + extensions) * @@ -125,36 +110,11 @@ static const __net_initconst struct ip6addrlbl_init_table } }; -/* Object management */ -static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) -{ - kfree(p); -} - -static void ip6addrlbl_free_rcu(struct rcu_head *h) -{ - ip6addrlbl_free(container_of(h, struct ip6addrlbl_entry, rcu)); -} - -static bool ip6addrlbl_hold(struct ip6addrlbl_entry *p) -{ - return refcount_inc_not_zero(&p->refcnt); -} - -static inline void ip6addrlbl_put(struct ip6addrlbl_entry *p) -{ - if (refcount_dec_and_test(&p->refcnt)) - call_rcu(&p->rcu, ip6addrlbl_free_rcu); -} - /* Find label */ -static bool __ip6addrlbl_match(struct net *net, - const struct ip6addrlbl_entry *p, +static bool __ip6addrlbl_match(const struct ip6addrlbl_entry *p, const struct in6_addr *addr, int addrtype, int ifindex) { - if (!net_eq(ip6addrlbl_net(p), net)) - return false; if (p->ifindex && p->ifindex != ifindex) return false; if (p->addrtype && p->addrtype != addrtype) @@ -169,8 +129,9 @@ static struct ip6addrlbl_entry *__ipv6_addr_label(struct net *net, int type, int ifindex) { struct ip6addrlbl_entry *p; - hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { - if (__ip6addrlbl_match(net, p, addr, type, ifindex)) + + hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { + if (__ip6addrlbl_match(p, addr, type, ifindex)) return p; } return NULL; @@ -196,8 +157,7 @@ u32 ipv6_addr_label(struct net *net, } /* allocate one entry */ -static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, - const struct in6_addr *prefix, +static struct ip6addrlbl_entry *ip6addrlbl_alloc(const struct in6_addr *prefix, int prefixlen, int ifindex, u32 label) { @@ -236,24 +196,22 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); - write_pnet(&newp->lbl_net, net); - refcount_set(&newp->refcnt, 1); return newp; } /* add a label */ -static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) +static int __ip6addrlbl_add(struct net *net, struct ip6addrlbl_entry *newp, + int replace) { - struct hlist_node *n; struct ip6addrlbl_entry *last = NULL, *p = NULL; + struct hlist_node *n; int ret = 0; ADDRLABEL(KERN_DEBUG "%s(newp=%p, replace=%d)\n", __func__, newp, replace); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == newp->prefixlen && - net_eq(ip6addrlbl_net(p), ip6addrlbl_net(newp)) && p->ifindex == newp->ifindex && ipv6_addr_equal(&p->prefix, &newp->prefix)) { if (!replace) { @@ -261,7 +219,7 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) goto out; } hlist_replace_rcu(&p->list, &newp->list); - ip6addrlbl_put(p); + kfree_rcu(p, rcu); goto out; } else if ((p->prefixlen == newp->prefixlen && !p->ifindex) || (p->prefixlen < newp->prefixlen)) { @@ -273,10 +231,10 @@ static int __ip6addrlbl_add(struct ip6addrlbl_entry *newp, int replace) if (last) hlist_add_behind_rcu(&newp->list, &last->list); else - hlist_add_head_rcu(&newp->list, &ip6addrlbl_table.head); + hlist_add_head_rcu(&newp->list, &net->ipv6.ip6addrlbl_table.head); out: if (!ret) - ip6addrlbl_table.seq++; + net->ipv6.ip6addrlbl_table.seq++; return ret; } @@ -292,14 +250,14 @@ static int ip6addrlbl_add(struct net *net, __func__, prefix, prefixlen, ifindex, (unsigned int)label, replace); - newp = ip6addrlbl_alloc(net, prefix, prefixlen, ifindex, label); + newp = ip6addrlbl_alloc(prefix, prefixlen, ifindex, label); if (IS_ERR(newp)) return PTR_ERR(newp); - spin_lock(&ip6addrlbl_table.lock); - ret = __ip6addrlbl_add(newp, replace); - spin_unlock(&ip6addrlbl_table.lock); + spin_lock(&net->ipv6.ip6addrlbl_table.lock); + ret = __ip6addrlbl_add(net, newp, replace); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); if (ret) - ip6addrlbl_free(newp); + kfree(newp); return ret; } @@ -315,13 +273,12 @@ static int __ip6addrlbl_del(struct net *net, ADDRLABEL(KERN_DEBUG "%s(prefix=%pI6, prefixlen=%d, ifindex=%d)\n", __func__, prefix, prefixlen, ifindex); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { if (p->prefixlen == prefixlen && - net_eq(ip6addrlbl_net(p), net) && p->ifindex == ifindex && ipv6_addr_equal(&p->prefix, prefix)) { hlist_del_rcu(&p->list); - ip6addrlbl_put(p); + kfree_rcu(p, rcu); ret = 0; break; } @@ -340,9 +297,9 @@ static int ip6addrlbl_del(struct net *net, __func__, prefix, prefixlen, ifindex); ipv6_addr_prefix(&prefix_buf, prefix, prefixlen); - spin_lock(&ip6addrlbl_table.lock); + spin_lock(&net->ipv6.ip6addrlbl_table.lock); ret = __ip6addrlbl_del(net, &prefix_buf, prefixlen, ifindex); - spin_unlock(&ip6addrlbl_table.lock); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); return ret; } @@ -354,6 +311,9 @@ static int __net_init ip6addrlbl_net_init(struct net *net) ADDRLABEL(KERN_DEBUG "%s\n", __func__); + spin_lock_init(&net->ipv6.ip6addrlbl_table.lock); + INIT_HLIST_HEAD(&net->ipv6.ip6addrlbl_table.head); + for (i = 0; i < ARRAY_SIZE(ip6addrlbl_init_table); i++) { int ret = ip6addrlbl_add(net, ip6addrlbl_init_table[i].prefix, @@ -373,14 +333,12 @@ static void __net_exit ip6addrlbl_net_exit(struct net *net) struct hlist_node *n; /* Remove all labels belonging to the exiting net */ - spin_lock(&ip6addrlbl_table.lock); - hlist_for_each_entry_safe(p, n, &ip6addrlbl_table.head, list) { - if (net_eq(ip6addrlbl_net(p), net)) { - hlist_del_rcu(&p->list); - ip6addrlbl_put(p); - } + spin_lock(&net->ipv6.ip6addrlbl_table.lock); + hlist_for_each_entry_safe(p, n, &net->ipv6.ip6addrlbl_table.head, list) { + hlist_del_rcu(&p->list); + kfree_rcu(p, rcu); } - spin_unlock(&ip6addrlbl_table.lock); + spin_unlock(&net->ipv6.ip6addrlbl_table.lock); } static struct pernet_operations ipv6_addr_label_ops = { @@ -390,8 +348,6 @@ static struct pernet_operations ipv6_addr_label_ops = { int __init ipv6_addr_label_init(void) { - spin_lock_init(&ip6addrlbl_table.lock); - return register_pernet_subsys(&ipv6_addr_label_ops); } @@ -405,6 +361,18 @@ static const struct nla_policy ifal_policy[IFAL_MAX+1] = { [IFAL_LABEL] = { .len = sizeof(u32), }, }; +static bool addrlbl_ifindex_exists(struct net *net, int ifindex) +{ + + struct net_device *dev; + + rcu_read_lock(); + dev = dev_get_by_index_rcu(net, ifindex); + rcu_read_unlock(); + + return dev != NULL; +} + static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, struct netlink_ext_ack *extack) { @@ -439,7 +407,7 @@ static int ip6addrlbl_newdel(struct sk_buff *skb, struct nlmsghdr *nlh, switch (nlh->nlmsg_type) { case RTM_NEWADDRLABEL: if (ifal->ifal_index && - !__dev_get_by_index(net, ifal->ifal_index)) + !addrlbl_ifindex_exists(net, ifal->ifal_index)) return -EINVAL; err = ip6addrlbl_add(net, pfx, ifal->ifal_prefixlen, @@ -498,11 +466,10 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) int err; rcu_read_lock(); - hlist_for_each_entry_rcu(p, &ip6addrlbl_table.head, list) { - if (idx >= s_idx && - net_eq(ip6addrlbl_net(p), net)) { + hlist_for_each_entry_rcu(p, &net->ipv6.ip6addrlbl_table.head, list) { + if (idx >= s_idx) { err = ip6addrlbl_fill(skb, p, - ip6addrlbl_table.seq, + net->ipv6.ip6addrlbl_table.seq, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, @@ -548,55 +515,45 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr *nlh, return -EINVAL; if (ifal->ifal_index && - !__dev_get_by_index(net, ifal->ifal_index)) + !addrlbl_ifindex_exists(net, ifal->ifal_index)) return -EINVAL; if (!tb[IFAL_ADDRESS]) return -EINVAL; addr = nla_data(tb[IFAL_ADDRESS]); - rcu_read_lock(); - p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); - if (p && !ip6addrlbl_hold(p)) - p = NULL; - lseq = ip6addrlbl_table.seq; - rcu_read_unlock(); - - if (!p) { - err = -ESRCH; - goto out; - } - skb = nlmsg_new(ip6addrlbl_msgsize(), GFP_KERNEL); - if (!skb) { - ip6addrlbl_put(p); + if (!skb) return -ENOBUFS; - } - err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, - RTM_NEWADDRLABEL, 0); + err = -ESRCH; - ip6addrlbl_put(p); + rcu_read_lock(); + p = __ipv6_addr_label(net, addr, ipv6_addr_type(addr), ifal->ifal_index); + lseq = net->ipv6.ip6addrlbl_table.seq; + if (p) + err = ip6addrlbl_fill(skb, p, lseq, + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, + RTM_NEWADDRLABEL, 0); + rcu_read_unlock(); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(skb); - goto out; + } else { + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); } - - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); -out: return err; } void __init ipv6_addr_label_rtnl_register(void) { __rtnl_register(PF_INET6, RTM_NEWADDRLABEL, ip6addrlbl_newdel, - NULL, NULL); + NULL, RTNL_FLAG_DOIT_UNLOCKED); __rtnl_register(PF_INET6, RTM_DELADDRLABEL, ip6addrlbl_newdel, - NULL, NULL); + NULL, RTNL_FLAG_DOIT_UNLOCKED); __rtnl_register(PF_INET6, RTM_GETADDRLABEL, ip6addrlbl_get, - ip6addrlbl_dump, NULL); + ip6addrlbl_dump, RTNL_FLAG_DOIT_UNLOCKED); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index a88b5b5b7955..c26f71234b9c 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -210,7 +210,8 @@ lookup_protocol: np->mcast_hops = IPV6_DEFAULT_MCASTHOPS; np->mc_loop = 1; np->pmtudisc = IPV6_PMTUDISC_WANT; - np->autoflowlabel = ip6_default_np_autolabel(sock_net(sk)); + np->autoflowlabel = ip6_default_np_autolabel(net); + np->repflow = net->ipv6.sysctl.flowlabel_reflect; sk->sk_ipv6only = net->ipv6.sysctl.bindv6only; /* Init the ipv4 part of the socket since we can have sockets @@ -554,6 +555,8 @@ const struct proto_ops inet6_stream_ops = { .recvmsg = inet_recvmsg, /* ok */ .mmap = sock_no_mmap, .sendpage = inet_sendpage, + .sendmsg_locked = tcp_sendmsg_locked, + .sendpage_locked = tcp_sendpage_locked, .splice_read = tcp_splice_read, .read_sock = tcp_read_sock, .peek_len = tcp_peek_len, @@ -807,6 +810,10 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.idgen_retries = 3; net->ipv6.sysctl.idgen_delay = 1 * HZ; net->ipv6.sysctl.flowlabel_state_ranges = 0; + net->ipv6.sysctl.max_dst_opts_cnt = IP6_DEFAULT_MAX_DST_OPTS_CNT; + net->ipv6.sysctl.max_hbh_opts_cnt = IP6_DEFAULT_MAX_HBH_OPTS_CNT; + net->ipv6.sysctl.max_dst_opts_len = IP6_DEFAULT_MAX_DST_OPTS_LEN; + net->ipv6.sysctl.max_hbh_opts_len = IP6_DEFAULT_MAX_HBH_OPTS_LEN; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 7802b72196f3..78c974391567 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -271,6 +271,7 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir) case NEXTHDR_DEST: if (dir == XFRM_POLICY_OUT) ipv6_rearrange_destopt(iph, exthdr.opth); + /* fall through */ case NEXTHDR_HOP: if (!zero_out_mutable_opts(exthdr.opth)) { net_dbg_ratelimited("overrun %sopts\n", @@ -443,7 +444,7 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) if (err == -EINPROGRESS) goto out; - if (err == -EBUSY) + if (err == -ENOSPC) err = NET_XMIT_DROP; goto out_free; } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9ed35473dcb5..a902ff8f59be 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -226,7 +226,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info int tailen = esp->tailen; if (!skb_cloned(skb)) { - if (tailen <= skb_availroom(skb)) { + if (tailen <= skb_tailroom(skb)) { nfrags = 1; trailer = skb; tail = skb_tail_pointer(trailer); @@ -260,8 +260,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info kunmap_atomic(vaddr); - spin_unlock_bh(&x->lock); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, @@ -269,6 +267,9 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb_shinfo(skb)->nr_frags = ++nfrags; pfrag->offset = pfrag->offset + allocsize; + + spin_unlock_bh(&x->lock); + nfrags++; skb->len += tailen; @@ -345,7 +346,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; if (!esp->inplace) { int allocsize; @@ -356,7 +357,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info spin_lock_bh(&x->lock); if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { spin_unlock_bh(&x->lock); - goto error; + goto error_free; } skb_shinfo(skb)->nr_frags = 1; @@ -373,7 +374,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -395,7 +396,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info case -EINPROGRESS: goto error; - case -EBUSY: + case -ENOSPC: err = NET_XMIT_DROP; break; @@ -406,8 +407,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info if (sg != dsg) esp_ssg_unref(x, tmp); - kfree(tmp); +error_free: + kfree(tmp); error: return err; } @@ -461,28 +463,30 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) return esp6_output_tail(x, skb, &esp); } -int esp6_input_done2(struct sk_buff *skb, int err) +static inline int esp_remove_trailer(struct sk_buff *skb) { struct xfrm_state *x = xfrm_input_state(skb); struct xfrm_offload *xo = xfrm_offload(skb); struct crypto_aead *aead = x->data; - int alen = crypto_aead_authsize(aead); - int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); - int elen = skb->len - hlen; - int hdr_len = skb_network_header_len(skb); - int padlen; + int alen, hlen, elen; + int padlen, trimlen; + __wsum csumdiff; u8 nexthdr[2]; + int ret; - if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) - kfree(ESP_SKB_CB(skb)->tmp); + alen = crypto_aead_authsize(aead); + hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + elen = skb->len - hlen; - if (unlikely(err)) + if (xo && (xo->flags & XFRM_ESP_NO_TRAILER)) { + ret = xo->proto; goto out; + } - if (skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2)) - BUG(); + ret = skb_copy_bits(skb, skb->len - alen - 2, nexthdr, 2); + BUG_ON(ret); - err = -EINVAL; + ret = -EINVAL; padlen = nexthdr[0]; if (padlen + 2 + alen >= elen) { net_dbg_ratelimited("ipsec esp packet is garbage padlen=%d, elen=%d\n", @@ -490,17 +494,46 @@ int esp6_input_done2(struct sk_buff *skb, int err) goto out; } - /* ... check padding bits here. Silly. :-) */ + trimlen = alen + padlen + 2; + if (skb->ip_summed == CHECKSUM_COMPLETE) { + csumdiff = skb_checksum(skb, skb->len - trimlen, trimlen, 0); + skb->csum = csum_block_sub(skb->csum, csumdiff, + skb->len - trimlen); + } + pskb_trim(skb, skb->len - trimlen); + + ret = nexthdr[1]; + +out: + return ret; +} + +int esp6_input_done2(struct sk_buff *skb, int err) +{ + struct xfrm_state *x = xfrm_input_state(skb); + struct xfrm_offload *xo = xfrm_offload(skb); + struct crypto_aead *aead = x->data; + int hlen = sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead); + int hdr_len = skb_network_header_len(skb); - pskb_trim(skb, skb->len - alen - padlen - 2); - __skb_pull(skb, hlen); + if (!xo || (xo && !(xo->flags & CRYPTO_DONE))) + kfree(ESP_SKB_CB(skb)->tmp); + + if (unlikely(err)) + goto out; + + err = esp_remove_trailer(skb); + if (unlikely(err < 0)) + goto out; + + skb_postpull_rcsum(skb, skb_network_header(skb), + skb_network_header_len(skb)); + skb_pull_rcsum(skb, hlen); if (x->props.mode == XFRM_MODE_TUNNEL) skb_reset_transport_header(skb); else skb_set_transport_header(skb, -hdr_len); - err = nexthdr[1]; - /* RFC4303: Drop dummy packets without any error */ if (err == IPPROTO_NONE) err = -EINVAL; @@ -526,14 +559,14 @@ static void esp_input_restore_header(struct sk_buff *skb) static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi) { struct xfrm_state *x = xfrm_input_state(skb); - struct ip_esp_hdr *esph = (struct ip_esp_hdr *)skb->data; /* For ESN we move the header forward by 4 bytes to * accomodate the high bits. We will move it back after * decryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = skb_push(skb, 4); + struct ip_esp_hdr *esph = skb_push(skb, 4); + *seqhi = esph->spi; esph->spi = esph->seq_no; esph->seq_no = XFRM_SKB_CB(skb)->seq.input.hi; diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index f02f131f6435..333a478aa161 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -209,11 +209,13 @@ out: static int esp6_input_tail(struct xfrm_state *x, struct sk_buff *skb) { struct crypto_aead *aead = x->data; + struct xfrm_offload *xo = xfrm_offload(skb); if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr) + crypto_aead_ivsize(aead))) return -EINVAL; - skb->ip_summed = CHECKSUM_NONE; + if (!(xo->flags & CRYPTO_DONE)) + skb->ip_summed = CHECKSUM_NONE; return esp6_input_done2(skb, 0); } @@ -286,7 +288,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); err = esp6_output_tail(x, skb, &esp); - if (err < 0) + if (err) return err; secpath_reset(skb); @@ -332,3 +334,4 @@ module_init(esp6_offload_init); module_exit(esp6_offload_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Steffen Klassert <steffen.klassert@secunet.com>"); +MODULE_ALIAS_XFRM_OFFLOAD_TYPE(AF_INET6, XFRM_PROTO_ESP); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 3cec529c6113..83bd75713535 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -74,8 +74,20 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff, + bool disallow_unknowns) { + if (disallow_unknowns) { + /* If unknown TLVs are disallowed by configuration + * then always silently drop packet. Note this also + * means no ICMP parameter problem is sent which + * could be a good property to mitigate a reflection DOS + * attack. + */ + + goto drop; + } + switch ((skb_network_header(skb)[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return true; @@ -89,25 +101,36 @@ static bool ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) */ if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) break; + /* fall through */ case 2: /* send ICMP PARM PROB regardless and drop packet */ icmpv6_param_prob(skb, ICMPV6_UNK_OPTION, optoff); return false; } +drop: kfree_skb(skb); return false; } /* Parse tlv encoded option header (hop-by-hop or destination) */ -static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) +static bool ip6_parse_tlv(const struct tlvtype_proc *procs, + struct sk_buff *skb, + int max_count) { - const struct tlvtype_proc *curr; + int len = (skb_transport_header(skb)[1] + 1) << 3; const unsigned char *nh = skb_network_header(skb); int off = skb_network_header_len(skb); - int len = (skb_transport_header(skb)[1] + 1) << 3; + const struct tlvtype_proc *curr; + bool disallow_unknowns = false; + int tlv_count = 0; int padlen = 0; + if (unlikely(max_count < 0)) { + disallow_unknowns = true; + max_count = -max_count; + } + if (skb_transport_offset(skb) + len > skb_headlen(skb)) goto bad; @@ -148,6 +171,11 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) default: /* Other TLV code so scan list */ if (optlen > len) goto bad; + + tlv_count++; + if (tlv_count > max_count) + goto bad; + for (curr = procs; curr->type >= 0; curr++) { if (curr->type == nh[off]) { /* type specific length/alignment @@ -158,10 +186,10 @@ static bool ip6_parse_tlv(const struct tlvtype_proc *procs, struct sk_buff *skb) break; } } - if (curr->type < 0) { - if (ip6_tlvopt_unknown(skb, off) == 0) - return false; - } + if (curr->type < 0 && + !ip6_tlvopt_unknown(skb, off, disallow_unknowns)) + return false; + padlen = 0; break; } @@ -186,7 +214,6 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) struct ipv6_destopt_hao *hao; struct inet6_skb_parm *opt = IP6CB(skb); struct ipv6hdr *ipv6h = ipv6_hdr(skb); - struct in6_addr tmp_addr; int ret; if (opt->dsthao) { @@ -228,9 +255,7 @@ static bool ipv6_dest_hao(struct sk_buff *skb, int optoff) if (skb->ip_summed == CHECKSUM_COMPLETE) skb->ip_summed = CHECKSUM_NONE; - tmp_addr = ipv6h->saddr; - ipv6h->saddr = hao->addr; - hao->addr = tmp_addr; + swap(ipv6h->saddr, hao->addr); if (skb->tstamp == 0) __net_timestamp(skb); @@ -260,23 +285,31 @@ static int ipv6_destopt_rcv(struct sk_buff *skb) __u16 dstbuf; #endif struct dst_entry *dst = skb_dst(skb); + struct net *net = dev_net(skb->dev); + int extlen; if (!pskb_may_pull(skb, skb_transport_offset(skb) + 8) || !pskb_may_pull(skb, (skb_transport_offset(skb) + ((skb_transport_header(skb)[1] + 1) << 3)))) { __IP6_INC_STATS(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); +fail_and_free: kfree_skb(skb); return -1; } + extlen = (skb_transport_header(skb)[1] + 1) << 3; + if (extlen > net->ipv6.sysctl.max_dst_opts_len) + goto fail_and_free; + opt->lastopt = opt->dst1 = skb_network_header_len(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) dstbuf = opt->dst1; #endif - if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { - skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; + if (ip6_parse_tlv(tlvprocdestopt_lst, skb, + init_net.ipv6.sysctl.max_dst_opts_cnt)) { + skb->transport_header += extlen; opt = IP6CB(skb); #if IS_ENABLED(CONFIG_IPV6_MIP6) opt->nhoff = dstbuf; @@ -805,6 +838,8 @@ static const struct tlvtype_proc tlvprochopopt_lst[] = { int ipv6_parse_hopopts(struct sk_buff *skb) { struct inet6_skb_parm *opt = IP6CB(skb); + struct net *net = dev_net(skb->dev); + int extlen; /* * skb_network_header(skb) is equal to skb->data, and @@ -815,13 +850,19 @@ int ipv6_parse_hopopts(struct sk_buff *skb) if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + 8) || !pskb_may_pull(skb, (sizeof(struct ipv6hdr) + ((skb_transport_header(skb)[1] + 1) << 3)))) { +fail_and_free: kfree_skb(skb); return -1; } + extlen = (skb_transport_header(skb)[1] + 1) << 3; + if (extlen > net->ipv6.sysctl.max_hbh_opts_len) + goto fail_and_free; + opt->flags |= IP6SKB_HOPBYHOP; - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { - skb->transport_header += (skb_transport_header(skb)[1] + 1) << 3; + if (ip6_parse_tlv(tlvprochopopt_lst, skb, + init_net.ipv6.sysctl.max_hbh_opts_cnt)) { + skb->transport_header += extlen; opt = IP6CB(skb); opt->nhoff = sizeof(struct ipv6hdr); return 1; @@ -882,7 +923,7 @@ static void ipv6_push_rthdr4(struct sk_buff *skb, u8 *proto, (hops - 1) * sizeof(struct in6_addr)); sr_phdr->segments[0] = **addr_p; - *addr_p = &sr_ihdr->segments[hops - 1]; + *addr_p = &sr_ihdr->segments[sr_ihdr->segments_left]; #ifdef CONFIG_IPV6_SEG6_HMAC if (sr_has_hmac(sr_phdr)) { @@ -1174,7 +1215,7 @@ struct in6_addr *fl6_update_dst(struct flowi6 *fl6, { struct ipv6_sr_hdr *srh = (struct ipv6_sr_hdr *)opt->srcrt; - fl6->daddr = srh->segments[srh->first_segment]; + fl6->daddr = srh->segments[srh->segments_left]; break; } default: diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 305e2ed730bf..11025f8d124b 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -99,7 +99,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, break; hdrlen = 8; } else if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hp->hdrlen+2)<<2; + hdrlen = ipv6_authlen(hp); else hdrlen = ipv6_optlen(hp); @@ -187,7 +187,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, { unsigned int start = skb_network_offset(skb) + sizeof(struct ipv6hdr); u8 nexthdr = ipv6_hdr(skb)->nexthdr; - unsigned int len; bool found; if (fragoff) @@ -204,7 +203,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, start = *offset + sizeof(struct ipv6hdr); nexthdr = ip6->nexthdr; } - len = skb->len - start; do { struct ipv6_opt_hdr _hdr, *hp; @@ -273,7 +271,6 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, if (!found) { nexthdr = hp->nexthdr; - len -= hdrlen; start += hdrlen; } } while (!found); diff --git a/net/ipv6/fib6_notifier.c b/net/ipv6/fib6_notifier.c new file mode 100644 index 000000000000..05f82baaa99e --- /dev/null +++ b/net/ipv6/fib6_notifier.c @@ -0,0 +1,63 @@ +#include <linux/notifier.h> +#include <linux/socket.h> +#include <linux/kernel.h> +#include <linux/export.h> +#include <net/net_namespace.h> +#include <net/fib_notifier.h> +#include <net/netns/ipv6.h> +#include <net/ip6_fib.h> + +int call_fib6_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->family = AF_INET6; + return call_fib_notifier(nb, net, event_type, info); +} + +int call_fib6_notifiers(struct net *net, enum fib_event_type event_type, + struct fib_notifier_info *info) +{ + info->family = AF_INET6; + return call_fib_notifiers(net, event_type, info); +} + +static unsigned int fib6_seq_read(struct net *net) +{ + return fib6_tables_seq_read(net) + fib6_rules_seq_read(net); +} + +static int fib6_dump(struct net *net, struct notifier_block *nb) +{ + int err; + + err = fib6_rules_dump(net, nb); + if (err) + return err; + + return fib6_tables_dump(net, nb); +} + +static const struct fib_notifier_ops fib6_notifier_ops_template = { + .family = AF_INET6, + .fib_seq_read = fib6_seq_read, + .fib_dump = fib6_dump, + .owner = THIS_MODULE, +}; + +int __net_init fib6_notifier_init(struct net *net) +{ + struct fib_notifier_ops *ops; + + ops = fib_notifier_ops_register(&fib6_notifier_ops_template, net); + if (IS_ERR(ops)) + return PTR_ERR(ops); + net->ipv6.notifier_ops = ops; + + return 0; +} + +void __net_exit fib6_notifier_exit(struct net *net) +{ + fib_notifier_ops_unregister(net->ipv6.notifier_ops); +} diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index ec849d88a662..b240f24a6e52 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -14,6 +14,7 @@ */ #include <linux/netdevice.h> +#include <linux/notifier.h> #include <linux/export.h> #include <net/fib_rules.h> @@ -29,22 +30,65 @@ struct fib6_rule { u8 tclass; }; -struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, - int flags, pol_lookup_t lookup) +static bool fib6_rule_matchall(const struct fib_rule *rule) +{ + struct fib6_rule *r = container_of(rule, struct fib6_rule, common); + + if (r->dst.plen || r->src.plen || r->tclass) + return false; + return fib_rule_matchall(rule); +} + +bool fib6_rule_default(const struct fib_rule *rule) { - struct fib_lookup_arg arg = { - .lookup_ptr = lookup, - .flags = FIB_LOOKUP_NOREF, - }; + if (!fib6_rule_matchall(rule) || rule->action != FR_ACT_TO_TBL || + rule->l3mdev) + return false; + if (rule->table != RT6_TABLE_LOCAL && rule->table != RT6_TABLE_MAIN) + return false; + return true; +} +EXPORT_SYMBOL_GPL(fib6_rule_default); - /* update flow if oif or iif point to device enslaved to l3mdev */ - l3mdev_update_flow(net, flowi6_to_flowi(fl6)); +int fib6_rules_dump(struct net *net, struct notifier_block *nb) +{ + return fib_rules_dump(net, nb, AF_INET6); +} - fib_rules_lookup(net->ipv6.fib6_rules_ops, - flowi6_to_flowi(fl6), flags, &arg); +unsigned int fib6_rules_seq_read(struct net *net) +{ + return fib_rules_seq_read(net, AF_INET6); +} - if (arg.result) - return arg.result; +struct dst_entry *fib6_rule_lookup(struct net *net, struct flowi6 *fl6, + int flags, pol_lookup_t lookup) +{ + if (net->ipv6.fib6_has_custom_rules) { + struct fib_lookup_arg arg = { + .lookup_ptr = lookup, + .flags = FIB_LOOKUP_NOREF, + }; + + /* update flow if oif or iif point to device enslaved to l3mdev */ + l3mdev_update_flow(net, flowi6_to_flowi(fl6)); + + fib_rules_lookup(net->ipv6.fib6_rules_ops, + flowi6_to_flowi(fl6), flags, &arg); + + if (arg.result) + return arg.result; + } else { + struct rt6_info *rt; + + rt = lookup(net, net->ipv6.fib6_local_tbl, fl6, flags); + if (rt != net->ipv6.ip6_null_entry && rt->dst.error != -EAGAIN) + return &rt->dst; + ip6_rt_put(rt); + rt = lookup(net, net->ipv6.fib6_main_tbl, fl6, flags); + if (rt->dst.error != -EAGAIN) + return &rt->dst; + ip6_rt_put(rt); + } dst_hold(&net->ipv6.ip6_null_entry->dst); return &net->ipv6.ip6_null_entry->dst; @@ -214,6 +258,7 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; + net->ipv6.fib6_has_custom_rules = true; err = 0; errout: return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 8d7b113958b1..6ae5dd3f4d0d 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -250,16 +250,15 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) return (*op & 0xC0) == 0x80; } -int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, - struct icmp6hdr *thdr, int len) +void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, + struct icmp6hdr *thdr, int len) { struct sk_buff *skb; struct icmp6hdr *icmp6h; - int err = 0; skb = skb_peek(&sk->sk_write_queue); if (!skb) - goto out; + return; icmp6h = icmp6_hdr(skb); memcpy(icmp6h, thdr, sizeof(struct icmp6hdr)); @@ -287,8 +286,6 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, tmp_csum); } ip6_push_pending_frames(sk); -out: - return err; } struct icmpv6_msg { @@ -399,6 +396,24 @@ relookup_failed: return ERR_PTR(err); } +static int icmp6_iif(const struct sk_buff *skb) +{ + int iif = skb->dev->ifindex; + + /* for local traffic to local address, skb dev is the loopback + * device. Check if there is a dst attached to the skb and if so + * get the real device index. + */ + if (unlikely(iif == LOOPBACK_IFINDEX)) { + const struct rt6_info *rt6 = skb_rt6_info(skb); + + if (rt6) + iif = rt6->rt6i_idev->dev->ifindex; + } + + return iif; +} + /* * Send an ICMP message in response to a packet in error */ @@ -420,7 +435,6 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, int iif = 0; int addr_type = 0; int len; - int err = 0; u32 mark = IP6_REPLY_MARK(net, skb->mark); if ((u8 *)hdr < skb->head || @@ -459,9 +473,9 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, * Source addr check */ - if (__ipv6_addr_needs_scope_id(addr_type)) - iif = skb->dev->ifindex; - else { + if (__ipv6_addr_needs_scope_id(addr_type)) { + iif = icmp6_iif(skb); + } else { dst = skb_dst(skb); iif = l3mdev_master_ifindex(dst ? dst->dev : skb->dev); } @@ -508,6 +522,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, fl6.fl6_icmp_type = type; fl6.fl6_icmp_code = code; fl6.flowi6_uid = sock_net_uid(net, NULL); + fl6.mp_hash = rt6_multipath_hash(&fl6, skb); security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); @@ -556,17 +571,16 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, rcu_read_lock(); idev = __in6_dev_get(skb->dev); - err = ip6_append_data(sk, icmpv6_getfrag, &msg, - len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), - &ipc6, &fl6, (struct rt6_info *)dst, - MSG_DONTWAIT, &sockc_unused); - if (err) { + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + len + sizeof(struct icmp6hdr), + sizeof(struct icmp6hdr), + &ipc6, &fl6, (struct rt6_info *)dst, + MSG_DONTWAIT, &sockc_unused)) { ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, - len + sizeof(struct icmp6hdr)); + icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + len + sizeof(struct icmp6hdr)); } rcu_read_unlock(); out_dst_release: @@ -663,7 +677,6 @@ static void icmpv6_echo_reply(struct sk_buff *skb) struct icmpv6_msg msg; struct dst_entry *dst; struct ipcm6_cookie ipc6; - int err = 0; u32 mark = IP6_REPLY_MARK(net, skb->mark); struct sockcm_cookie sockc_unused = {0}; @@ -682,7 +695,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) fl6.daddr = ipv6_hdr(skb)->saddr; if (saddr) fl6.saddr = *saddr; - fl6.flowi6_oif = skb->dev->ifindex; + fl6.flowi6_oif = icmp6_iif(skb); fl6.fl6_icmp_type = ICMPV6_ECHO_REPLY; fl6.flowi6_mark = mark; fl6.flowi6_uid = sock_net_uid(net, NULL); @@ -700,8 +713,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) else if (!fl6.flowi6_oif) fl6.flowi6_oif = np->ucast_oif; - err = ip6_dst_lookup(net, sk, &dst, &fl6); - if (err) + if (ip6_dst_lookup(net, sk, &dst, &fl6)) goto out; dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0); if (IS_ERR(dst)) @@ -718,17 +730,16 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipc6.dontfrag = np->dontfrag; ipc6.opt = NULL; - err = ip6_append_data(sk, icmpv6_getfrag, &msg, skb->len + sizeof(struct icmp6hdr), - sizeof(struct icmp6hdr), &ipc6, &fl6, - (struct rt6_info *)dst, MSG_DONTWAIT, - &sockc_unused); - - if (err) { + if (ip6_append_data(sk, icmpv6_getfrag, &msg, + skb->len + sizeof(struct icmp6hdr), + sizeof(struct icmp6hdr), &ipc6, &fl6, + (struct rt6_info *)dst, MSG_DONTWAIT, + &sockc_unused)) { __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { - err = icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, - skb->len + sizeof(struct icmp6hdr)); + icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr, + skb->len + sizeof(struct icmp6hdr)); } dst_release(dst); out: @@ -853,10 +864,8 @@ static int icmpv6_rcv(struct sk_buff *skb) goto discard_it; hdr = icmp6_hdr(skb); - /* - * Drop through to notify - */ - + /* to notify */ + /* fall through */ case ICMPV6_DEST_UNREACH: case ICMPV6_TIME_EXCEED: case ICMPV6_PARAMPROB: diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h index e0170f62bc39..3c7a11b62334 100644 --- a/net/ipv6/ila/ila.h +++ b/net/ipv6/ila/ila.h @@ -55,17 +55,6 @@ struct ila_identifier { }; }; -enum { - ILA_ATYPE_IID = 0, - ILA_ATYPE_LUID, - ILA_ATYPE_VIRT_V4, - ILA_ATYPE_VIRT_UNI_V6, - ILA_ATYPE_VIRT_MULTI_V6, - ILA_ATYPE_RSVD_1, - ILA_ATYPE_RSVD_2, - ILA_ATYPE_RSVD_3, -}; - #define CSUM_NEUTRAL_FLAG htonl(0x10000000) struct ila_addr { @@ -93,6 +82,7 @@ struct ila_params { struct ila_locator locator_match; __wsum csum_diff; u8 csum_mode; + u8 ident_type; }; static inline __wsum compute_csum_diff8(const __be32 *from, const __be32 *to) diff --git a/net/ipv6/ila/ila_common.c b/net/ipv6/ila/ila_common.c index aba0998ddbfb..8c88ecf29b93 100644 --- a/net/ipv6/ila/ila_common.c +++ b/net/ipv6/ila/ila_common.c @@ -13,30 +13,37 @@ #include <uapi/linux/ila.h> #include "ila.h" -static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +void ila_init_saved_csum(struct ila_params *p) { - struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + if (!p->locator_match.v64) + return; + p->csum_diff = compute_csum_diff8( + (__be32 *)&p->locator, + (__be32 *)&p->locator_match); +} + +static __wsum get_csum_diff_iaddr(struct ila_addr *iaddr, struct ila_params *p) +{ if (p->locator_match.v64) return p->csum_diff; else - return compute_csum_diff8((__be32 *)&iaddr->loc, - (__be32 *)&p->locator); + return compute_csum_diff8((__be32 *)&p->locator, + (__be32 *)&iaddr->loc); } -static void ila_csum_do_neutral(struct ila_addr *iaddr, - struct ila_params *p) +static __wsum get_csum_diff(struct ipv6hdr *ip6h, struct ila_params *p) +{ + return get_csum_diff_iaddr(ila_a2i(&ip6h->daddr), p); +} + +static void ila_csum_do_neutral_fmt(struct ila_addr *iaddr, + struct ila_params *p) { __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff, fval; - /* Check if checksum adjust value has been cached */ - if (p->locator_match.v64) { - diff = p->csum_diff; - } else { - diff = compute_csum_diff8((__be32 *)&p->locator, - (__be32 *)iaddr); - } + diff = get_csum_diff_iaddr(iaddr, p); fval = (__force __wsum)(ila_csum_neutral_set(iaddr->ident) ? CSUM_NEUTRAL_FLAG : ~CSUM_NEUTRAL_FLAG); @@ -53,13 +60,23 @@ static void ila_csum_do_neutral(struct ila_addr *iaddr, iaddr->ident.csum_neutral ^= 1; } -static void ila_csum_adjust_transport(struct sk_buff *skb, +static void ila_csum_do_neutral_nofmt(struct ila_addr *iaddr, struct ila_params *p) { + __sum16 *adjust = (__force __sum16 *)&iaddr->ident.v16[3]; __wsum diff; - struct ipv6hdr *ip6h = ipv6_hdr(skb); - struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); + + diff = get_csum_diff_iaddr(iaddr, p); + + *adjust = ~csum_fold(csum_add(diff, csum_unfold(*adjust))); +} + +static void ila_csum_adjust_transport(struct sk_buff *skb, + struct ila_params *p) +{ size_t nhoff = sizeof(struct ipv6hdr); + struct ipv6hdr *ip6h = ipv6_hdr(skb); + __wsum diff; switch (ip6h->nexthdr) { case NEXTHDR_TCP: @@ -98,52 +115,45 @@ static void ila_csum_adjust_transport(struct sk_buff *skb, } break; } - - /* Now change destination address */ - iaddr->loc = p->locator; } void ila_update_ipv6_locator(struct sk_buff *skb, struct ila_params *p, - bool set_csum_neutral) + bool sir2ila) { struct ipv6hdr *ip6h = ipv6_hdr(skb); struct ila_addr *iaddr = ila_a2i(&ip6h->daddr); - /* First deal with the transport checksum */ - if (ila_csum_neutral_set(iaddr->ident)) { - /* C-bit is set in the locator indicating that this - * is a locator being translated to a SIR address. - * Perform (receiver) checksum-neutral translation. - */ - if (!set_csum_neutral) - ila_csum_do_neutral(iaddr, p); - } else { - switch (p->csum_mode) { - case ILA_CSUM_ADJUST_TRANSPORT: - ila_csum_adjust_transport(skb, p); - break; - case ILA_CSUM_NEUTRAL_MAP: - ila_csum_do_neutral(iaddr, p); - break; - case ILA_CSUM_NO_ACTION: + switch (p->csum_mode) { + case ILA_CSUM_ADJUST_TRANSPORT: + ila_csum_adjust_transport(skb, p); + break; + case ILA_CSUM_NEUTRAL_MAP: + if (sir2ila) { + if (WARN_ON(ila_csum_neutral_set(iaddr->ident))) { + /* Checksum flag should never be + * set in a formatted SIR address. + */ + break; + } + } else if (!ila_csum_neutral_set(iaddr->ident)) { + /* ILA to SIR translation and C-bit isn't + * set so we're good. + */ break; } + ila_csum_do_neutral_fmt(iaddr, p); + break; + case ILA_CSUM_NEUTRAL_MAP_AUTO: + ila_csum_do_neutral_nofmt(iaddr, p); + break; + case ILA_CSUM_NO_ACTION: + break; } /* Now change destination address */ iaddr->loc = p->locator; } -void ila_init_saved_csum(struct ila_params *p) -{ - if (!p->locator_match.v64) - return; - - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator, - (__be32 *)&p->locator_match); -} - static int __init ila_init(void) { int ret; diff --git a/net/ipv6/ila/ila_lwt.c b/net/ipv6/ila/ila_lwt.c index 0c02a09bc351..3d56a2fb6f86 100644 --- a/net/ipv6/ila/ila_lwt.c +++ b/net/ipv6/ila/ila_lwt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/errno.h> #include <linux/ip.h> #include <linux/kernel.h> @@ -19,6 +20,7 @@ struct ila_lwt { struct ila_params p; struct dst_cache dst_cache; u32 connected : 1; + u32 lwt_output : 1; }; static inline struct ila_lwt *ila_lwt_lwtunnel( @@ -44,8 +46,10 @@ static int ila_output(struct net *net, struct sock *sk, struct sk_buff *skb) if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(orig_dst->lwtstate), - true); + if (ilwt->lwt_output) + ila_update_ipv6_locator(skb, + ila_params_lwtunnel(orig_dst->lwtstate), + true); if (rt->rt6i_flags & (RTF_GATEWAY | RTF_CACHE)) { /* Already have a next hop address in route, no need for @@ -97,11 +101,15 @@ drop: static int ila_input(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(dst->lwtstate); if (skb->protocol != htons(ETH_P_IPV6)) goto drop; - ila_update_ipv6_locator(skb, ila_params_lwtunnel(dst->lwtstate), false); + if (!ilwt->lwt_output) + ila_update_ipv6_locator(skb, + ila_params_lwtunnel(dst->lwtstate), + false); return dst->lwtstate->orig_input(skb); @@ -113,6 +121,8 @@ drop: static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR] = { .type = NLA_U64, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, + [ILA_ATTR_HOOK_TYPE] = { .type = NLA_U8, }, }; static int ila_build_state(struct nlattr *nla, @@ -126,33 +136,84 @@ static int ila_build_state(struct nlattr *nla, struct lwtunnel_state *newts; const struct fib6_config *cfg6 = cfg; struct ila_addr *iaddr; + u8 ident_type = ILA_ATYPE_USE_FORMAT; + u8 hook_type = ILA_HOOK_ROUTE_OUTPUT; + u8 csum_mode = ILA_CSUM_NO_ACTION; + bool lwt_output = true; + u8 eff_ident_type; int ret; if (family != AF_INET6) return -EINVAL; - if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { - /* Need to have full locator and at least type field - * included in destination - */ + ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); + if (ret < 0) + return ret; + + if (!tb[ILA_ATTR_LOCATOR]) return -EINVAL; - } iaddr = (struct ila_addr *)&cfg6->fc_dst; - if (!ila_addr_is_ila(iaddr) || ila_csum_neutral_set(iaddr->ident)) { - /* Don't allow translation for a non-ILA address or checksum - * neutral flag to be set. + if (tb[ILA_ATTR_IDENT_TYPE]) + ident_type = nla_get_u8(tb[ILA_ATTR_IDENT_TYPE]); + + if (ident_type == ILA_ATYPE_USE_FORMAT) { + /* Infer identifier type from type field in formatted + * identifier. */ + + if (cfg6->fc_dst_len < 8 * sizeof(struct ila_locator) + 3) { + /* Need to have full locator and at least type field + * included in destination + */ + return -EINVAL; + } + + eff_ident_type = iaddr->ident.type; + } else { + eff_ident_type = ident_type; + } + + switch (eff_ident_type) { + case ILA_ATYPE_IID: + /* Don't allow ILA for IID type */ + return -EINVAL; + case ILA_ATYPE_LUID: + break; + case ILA_ATYPE_VIRT_V4: + case ILA_ATYPE_VIRT_UNI_V6: + case ILA_ATYPE_VIRT_MULTI_V6: + case ILA_ATYPE_NONLOCAL_ADDR: + /* These ILA formats are not supported yet. */ + default: return -EINVAL; } - ret = nla_parse_nested(tb, ILA_ATTR_MAX, nla, ila_nl_policy, extack); - if (ret < 0) - return ret; + if (tb[ILA_ATTR_HOOK_TYPE]) + hook_type = nla_get_u8(tb[ILA_ATTR_HOOK_TYPE]); + + switch (hook_type) { + case ILA_HOOK_ROUTE_OUTPUT: + lwt_output = true; + break; + case ILA_HOOK_ROUTE_INPUT: + lwt_output = false; + break; + default: + return -EINVAL; + } - if (!tb[ILA_ATTR_LOCATOR]) + if (tb[ILA_ATTR_CSUM_MODE]) + csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); + + if (csum_mode == ILA_CSUM_NEUTRAL_MAP && + ila_csum_neutral_set(iaddr->ident)) { + /* Don't allow translation if checksum neutral bit is + * configured and it's set in the SIR address. + */ return -EINVAL; + } newts = lwtunnel_state_alloc(sizeof(*ilwt)); if (!newts) @@ -165,19 +226,18 @@ static int ila_build_state(struct nlattr *nla, return ret; } + ilwt->lwt_output = !!lwt_output; + p = ila_params_lwtunnel(newts); + p->csum_mode = csum_mode; + p->ident_type = ident_type; p->locator.v64 = (__force __be64)nla_get_u64(tb[ILA_ATTR_LOCATOR]); /* Precompute checksum difference for translation since we * know both the old locator and the new one. */ p->locator_match = iaddr->loc; - p->csum_diff = compute_csum_diff8( - (__be32 *)&p->locator_match, (__be32 *)&p->locator); - - if (tb[ILA_ATTR_CSUM_MODE]) - p->csum_mode = nla_get_u8(tb[ILA_ATTR_CSUM_MODE]); ila_init_saved_csum(p); @@ -202,13 +262,23 @@ static int ila_fill_encap_info(struct sk_buff *skb, struct lwtunnel_state *lwtstate) { struct ila_params *p = ila_params_lwtunnel(lwtstate); + struct ila_lwt *ilwt = ila_lwt_lwtunnel(lwtstate); if (nla_put_u64_64bit(skb, ILA_ATTR_LOCATOR, (__force u64)p->locator.v64, ILA_ATTR_PAD)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_CSUM_MODE, (__force u8)p->csum_mode)) goto nla_put_failure; + if (nla_put_u8(skb, ILA_ATTR_IDENT_TYPE, (__force u8)p->ident_type)) + goto nla_put_failure; + + if (nla_put_u8(skb, ILA_ATTR_HOOK_TYPE, + ilwt->lwt_output ? ILA_HOOK_ROUTE_OUTPUT : + ILA_HOOK_ROUTE_INPUT)) + goto nla_put_failure; + return 0; nla_put_failure: @@ -219,6 +289,8 @@ static int ila_encap_nlsize(struct lwtunnel_state *lwtstate) { return nla_total_size_64bit(sizeof(u64)) + /* ILA_ATTR_LOCATOR */ nla_total_size(sizeof(u8)) + /* ILA_ATTR_CSUM_MODE */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_IDENT_TYPE */ + nla_total_size(sizeof(u8)) + /* ILA_ATTR_HOOK_TYPE */ 0; } diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c index 77f7f8c7d93d..6eb5e68f112a 100644 --- a/net/ipv6/ila/ila_xlat.c +++ b/net/ipv6/ila/ila_xlat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/jhash.h> #include <linux/netfilter.h> #include <linux/rcupdate.h> @@ -120,6 +121,7 @@ static const struct nla_policy ila_nl_policy[ILA_ATTR_MAX + 1] = { [ILA_ATTR_LOCATOR_MATCH] = { .type = NLA_U64, }, [ILA_ATTR_IFINDEX] = { .type = NLA_U32, }, [ILA_ATTR_CSUM_MODE] = { .type = NLA_U8, }, + [ILA_ATTR_IDENT_TYPE] = { .type = NLA_U8, }, }; static int parse_nl_config(struct genl_info *info, @@ -137,6 +139,14 @@ static int parse_nl_config(struct genl_info *info, if (info->attrs[ILA_ATTR_CSUM_MODE]) xp->ip.csum_mode = nla_get_u8(info->attrs[ILA_ATTR_CSUM_MODE]); + else + xp->ip.csum_mode = ILA_CSUM_NO_ACTION; + + if (info->attrs[ILA_ATTR_IDENT_TYPE]) + xp->ip.ident_type = nla_get_u8( + info->attrs[ILA_ATTR_IDENT_TYPE]); + else + xp->ip.ident_type = ILA_ATYPE_USE_FORMAT; if (info->attrs[ILA_ATTR_IFINDEX]) xp->ifindex = nla_get_s32(info->attrs[ILA_ATTR_IFINDEX]); @@ -197,7 +207,7 @@ static void ila_free_cb(void *ptr, void *arg) } } -static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral); +static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila); static unsigned int ila_nf_input(void *priv, @@ -208,7 +218,7 @@ ila_nf_input(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ila_nf_hook_ops[] __read_mostly = { +static const struct nf_hook_ops ila_nf_hook_ops[] = { { .hook = ila_nf_input, .pf = NFPROTO_IPV6, @@ -395,7 +405,8 @@ static int ila_fill_info(struct ila_map *ila, struct sk_buff *msg) (__force u64)ila->xp.ip.locator_match.v64, ILA_ATTR_PAD) || nla_put_s32(msg, ILA_ATTR_IFINDEX, ila->xp.ifindex) || - nla_put_u32(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode)) + nla_put_u8(msg, ILA_ATTR_CSUM_MODE, ila->xp.ip.csum_mode) || + nla_put_u8(msg, ILA_ATTR_IDENT_TYPE, ila->xp.ip.ident_type)) return -1; return 0; @@ -606,7 +617,7 @@ static struct pernet_operations ila_net_ops = { .size = sizeof(struct ila_net), }; -static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) +static int ila_xlat_addr(struct sk_buff *skb, bool sir2ila) { struct ila_map *ila; struct ipv6hdr *ip6h = ipv6_hdr(skb); @@ -616,16 +627,16 @@ static int ila_xlat_addr(struct sk_buff *skb, bool set_csum_neutral) /* Assumes skb contains a valid IPv6 header that is pulled */ - if (!ila_addr_is_ila(iaddr)) { - /* Type indicates this is not an ILA address */ - return 0; - } + /* No check here that ILA type in the mapping matches what is in the + * address. We assume that whatever sender gaves us can be translated. + * The checksum mode however is relevant. + */ rcu_read_lock(); ila = ila_lookup_wildcards(iaddr, skb->dev->ifindex, ilan); if (ila) - ila_update_ipv6_locator(skb, &ila->xp.ip, set_csum_neutral); + ila_update_ipv6_locator(skb, &ila->xp.ip, sir2ila); rcu_read_unlock(); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index b13b8f93079d..b01858f5deb1 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -56,7 +56,7 @@ struct sock *__inet6_lookup_established(struct net *net, const __be16 sport, const struct in6_addr *daddr, const u16 hnum, - const int dif) + const int dif, const int sdif) { struct sock *sk; const struct hlist_nulls_node *node; @@ -73,12 +73,12 @@ begin: sk_nulls_for_each_rcu(sk, node, &head->chain) { if (sk->sk_hash != hash) continue; - if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif)) + if (!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif)) continue; if (unlikely(!refcount_inc_not_zero(&sk->sk_refcnt))) goto out; - if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif))) { + if (unlikely(!INET6_MATCH(sk, net, saddr, daddr, ports, dif, sdif))) { sock_gen_put(sk); goto begin; } @@ -96,7 +96,7 @@ EXPORT_SYMBOL(__inet6_lookup_established); static inline int compute_score(struct sock *sk, struct net *net, const unsigned short hnum, const struct in6_addr *daddr, - const int dif, bool exact_dif) + const int dif, const int sdif, bool exact_dif) { int score = -1; @@ -110,9 +110,13 @@ static inline int compute_score(struct sock *sk, struct net *net, score++; } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score++; + if (sk->sk_bound_dev_if && dev_match) + score++; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) score++; @@ -126,7 +130,7 @@ struct sock *inet6_lookup_listener(struct net *net, struct sk_buff *skb, int doff, const struct in6_addr *saddr, const __be16 sport, const struct in6_addr *daddr, - const unsigned short hnum, const int dif) + const unsigned short hnum, const int dif, const int sdif) { unsigned int hash = inet_lhashfn(net, hnum); struct inet_listen_hashbucket *ilb = &hashinfo->listening_hash[hash]; @@ -136,7 +140,7 @@ struct sock *inet6_lookup_listener(struct net *net, u32 phash = 0; sk_for_each(sk, &ilb->head) { - score = compute_score(sk, net, hnum, daddr, dif, exact_dif); + score = compute_score(sk, net, hnum, daddr, dif, sdif, exact_dif); if (score > hiscore) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -171,7 +175,7 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo, bool refcounted; sk = __inet6_lookup(net, hashinfo, skb, doff, saddr, sport, daddr, - ntohs(dport), dif, &refcounted); + ntohs(dport), dif, 0, &refcounted); if (sk && !refcounted && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -187,8 +191,9 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &sk->sk_v6_rcv_saddr; const struct in6_addr *saddr = &sk->sk_v6_daddr; const int dif = sk->sk_bound_dev_if; - const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); struct net *net = sock_net(sk); + const int sdif = l3mdev_master_ifindex_by_index(net, dif); + const __portpair ports = INET_COMBINED_PORTS(inet->inet_dport, lport); const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, inet->inet_dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -203,7 +208,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, if (sk2->sk_hash != hash) continue; - if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, dif))) { + if (likely(INET6_MATCH(sk2, net, saddr, daddr, ports, + dif, sdif))) { if (sk2->sk_state == TCP_TIME_WAIT) { tw = inet_twsk(sk2); if (twsk_unique(sk, sk2, twp)) diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index c0cbcb259f5a..ec43d18b5ff9 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <net/ip.h> #include <net/udp.h> #include <net/udplite.h> diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ebb299cf72b7..2e2804f5823e 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -33,18 +33,11 @@ #include <net/ndisc.h> #include <net/addrconf.h> #include <net/lwtunnel.h> +#include <net/fib_notifier.h> #include <net/ip6_fib.h> #include <net/ip6_route.h> -#define RT6_DEBUG 2 - -#if RT6_DEBUG >= 3 -#define RT6_TRACE(x...) pr_debug(x) -#else -#define RT6_TRACE(x...) do { ; } while (0) -#endif - static struct kmem_cache *fib6_node_kmem __read_mostly; struct fib6_cleaner { @@ -61,9 +54,12 @@ struct fib6_cleaner { #define FWS_INIT FWS_L #endif -static void fib6_prune_clones(struct net *net, struct fib6_node *fn); -static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn); -static struct fib6_node *fib6_repair_tree(struct net *net, struct fib6_node *fn); +static struct rt6_info *fib6_find_prefix(struct net *net, + struct fib6_table *table, + struct fib6_node *fn); +static struct fib6_node *fib6_repair_tree(struct net *net, + struct fib6_table *table, + struct fib6_node *fn); static int fib6_walk(struct net *net, struct fib6_walker *w); static int fib6_walk_continue(struct fib6_walker *w); @@ -109,6 +105,20 @@ enum { FIB6_NO_SERNUM_CHANGE = 0, }; +void fib6_update_sernum(struct rt6_info *rt) +{ + struct fib6_table *table = rt->rt6i_table; + struct net *net = dev_net(rt->dst.dev); + struct fib6_node *fn; + + spin_lock_bh(&table->tb6_lock); + fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&table->tb6_lock)); + if (fn) + fn->fn_sernum = fib6_new_sernum(net); + spin_unlock_bh(&table->tb6_lock); +} + /* * Auxiliary address test functions for the radix tree. * @@ -139,21 +149,37 @@ static __be32 addr_bit_set(const void *token, int fn_bit) addr[fn_bit >> 5]; } -static struct fib6_node *node_alloc(void) +static struct fib6_node *node_alloc(struct net *net) { struct fib6_node *fn; fn = kmem_cache_zalloc(fib6_node_kmem, GFP_ATOMIC); + if (fn) + net->ipv6.rt6_stats->fib_nodes++; return fn; } -static void node_free(struct fib6_node *fn) +static void node_free_immediate(struct net *net, struct fib6_node *fn) +{ + kmem_cache_free(fib6_node_kmem, fn); + net->ipv6.rt6_stats->fib_nodes--; +} + +static void node_free_rcu(struct rcu_head *head) { + struct fib6_node *fn = container_of(head, struct fib6_node, rcu); + kmem_cache_free(fib6_node_kmem, fn); } -static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) +static void node_free(struct net *net, struct fib6_node *fn) +{ + call_rcu(&fn->rcu, node_free_rcu); + net->ipv6.rt6_stats->fib_nodes--; +} + +void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -172,18 +198,13 @@ static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) *ppcpu_rt = NULL; } } - - free_percpu(non_pcpu_rt->rt6i_pcpu); - non_pcpu_rt->rt6i_pcpu = NULL; } +EXPORT_SYMBOL_GPL(rt6_free_pcpu); -static void rt6_release(struct rt6_info *rt) +static void fib6_free_table(struct fib6_table *table) { - if (atomic_dec_and_test(&rt->rt6i_ref)) { - rt6_free_pcpu(rt); - dst_dev_put(&rt->dst); - dst_release(&rt->dst); - } + inetpeer_invalidate_tree(&table->tb6_peers); + kfree(table); } static void fib6_link_table(struct net *net, struct fib6_table *tb) @@ -194,8 +215,7 @@ static void fib6_link_table(struct net *net, struct fib6_table *tb) * Initialize table lock at a single place to give lockdep a key, * tables aren't visible prior to being linked to the list. */ - rwlock_init(&tb->tb6_lock); - + spin_lock_init(&tb->tb6_lock); h = tb->tb6_id & (FIB6_TABLE_HASHSZ - 1); /* @@ -214,7 +234,8 @@ static struct fib6_table *fib6_alloc_table(struct net *net, u32 id) table = kzalloc(sizeof(*table), GFP_ATOMIC); if (table) { table->tb6_id = id; - table->tb6_root.leaf = net->ipv6.ip6_null_entry; + rcu_assign_pointer(table->tb6_root.leaf, + net->ipv6.ip6_null_entry); table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&table->tb6_peers); } @@ -302,12 +323,114 @@ static void __net_init fib6_tables_init(struct net *net) #endif +unsigned int fib6_tables_seq_read(struct net *net) +{ + unsigned int h, fib_seq = 0; + + rcu_read_lock(); + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + struct fib6_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb6_hlist) + fib_seq += tb->fib_seq; + } + rcu_read_unlock(); + + return fib_seq; +} + +static int call_fib6_entry_notifier(struct notifier_block *nb, struct net *net, + enum fib_event_type event_type, + struct rt6_info *rt) +{ + struct fib6_entry_notifier_info info = { + .rt = rt, + }; + + return call_fib6_notifier(nb, net, event_type, &info.info); +} + +static int call_fib6_entry_notifiers(struct net *net, + enum fib_event_type event_type, + struct rt6_info *rt, + struct netlink_ext_ack *extack) +{ + struct fib6_entry_notifier_info info = { + .info.extack = extack, + .rt = rt, + }; + + rt->rt6i_table->fib_seq++; + return call_fib6_notifiers(net, event_type, &info.info); +} + +struct fib6_dump_arg { + struct net *net; + struct notifier_block *nb; +}; + +static void fib6_rt_dump(struct rt6_info *rt, struct fib6_dump_arg *arg) +{ + if (rt == arg->net->ipv6.ip6_null_entry) + return; + call_fib6_entry_notifier(arg->nb, arg->net, FIB_EVENT_ENTRY_ADD, rt); +} + +static int fib6_node_dump(struct fib6_walker *w) +{ + struct rt6_info *rt; + + for_each_fib6_walker_rt(w) + fib6_rt_dump(rt, w->args); + w->leaf = NULL; + return 0; +} + +static void fib6_table_dump(struct net *net, struct fib6_table *tb, + struct fib6_walker *w) +{ + w->root = &tb->tb6_root; + spin_lock_bh(&tb->tb6_lock); + fib6_walk(net, w); + spin_unlock_bh(&tb->tb6_lock); +} + +/* Called with rcu_read_lock() */ +int fib6_tables_dump(struct net *net, struct notifier_block *nb) +{ + struct fib6_dump_arg arg; + struct fib6_walker *w; + unsigned int h; + + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (!w) + return -ENOMEM; + + w->func = fib6_node_dump; + arg.net = net; + arg.nb = nb; + w->args = &arg; + + for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[h]; + struct fib6_table *tb; + + hlist_for_each_entry_rcu(tb, head, tb6_hlist) + fib6_table_dump(net, tb, w); + } + + kfree(w); + + return 0; +} + static int fib6_dump_node(struct fib6_walker *w) { int res; struct rt6_info *rt; - for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_walker_rt(w) { res = rt6_dump_route(rt, w->args); if (res < 0) { /* Frame is full, suspend walking */ @@ -366,9 +489,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, w->count = 0; w->skip = 0; - read_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); res = fib6_walk(net, w); - read_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); if (res > 0) { cb->args[4] = 1; cb->args[5] = w->root->fn_sernum; @@ -383,9 +506,9 @@ static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, } else w->skip = 0; - read_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); res = fib6_walk_continue(w); - read_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); if (res <= 0) { fib6_walker_unlink(net, w); cb->args[4] = 0; @@ -466,11 +589,13 @@ out: * node. */ -static struct fib6_node *fib6_add_1(struct fib6_node *root, - struct in6_addr *addr, int plen, - int offset, int allow_create, - int replace_required, int sernum, - struct netlink_ext_ack *extack) +static struct fib6_node *fib6_add_1(struct net *net, + struct fib6_table *table, + struct fib6_node *root, + struct in6_addr *addr, int plen, + int offset, int allow_create, + int replace_required, + struct netlink_ext_ack *extack) { struct fib6_node *fn, *in, *ln; struct fib6_node *pn = NULL; @@ -485,7 +610,9 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, fn = root; do { - key = (struct rt6key *)((u8 *)fn->leaf + offset); + struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + key = (struct rt6key *)((u8 *)leaf + offset); /* * Prefix match @@ -511,12 +638,10 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, if (plen == fn->fn_bit) { /* clean up an intermediate node */ if (!(fn->fn_flags & RTN_RTINFO)) { - rt6_release(fn->leaf); - fn->leaf = NULL; + RCU_INIT_POINTER(fn->leaf, NULL); + rt6_release(leaf); } - fn->fn_sernum = sernum; - return fn; } @@ -525,10 +650,13 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, */ /* Try to walk down on tree. */ - fn->fn_sernum = sernum; dir = addr_bit_set(addr, fn->fn_bit); pn = fn; - fn = dir ? fn->right : fn->left; + fn = dir ? + rcu_dereference_protected(fn->right, + lockdep_is_held(&table->tb6_lock)) : + rcu_dereference_protected(fn->left, + lockdep_is_held(&table->tb6_lock)); } while (fn); if (!allow_create) { @@ -554,19 +682,17 @@ static struct fib6_node *fib6_add_1(struct fib6_node *root, * Create new leaf node without children. */ - ln = node_alloc(); + ln = node_alloc(net); if (!ln) return ERR_PTR(-ENOMEM); ln->fn_bit = plen; - - ln->parent = pn; - ln->fn_sernum = sernum; + RCU_INIT_POINTER(ln->parent, pn); if (dir) - pn->right = ln; + rcu_assign_pointer(pn->right, ln); else - pn->left = ln; + rcu_assign_pointer(pn->left, ln); return ln; @@ -580,7 +706,8 @@ insert_above: * and the current */ - pn = fn->parent; + pn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&table->tb6_lock)); /* find 1st bit in difference between the 2 addrs. @@ -596,14 +723,14 @@ insert_above: * (new leaf node)[ln] (old node)[fn] */ if (plen > bit) { - in = node_alloc(); - ln = node_alloc(); + in = node_alloc(net); + ln = node_alloc(net); if (!in || !ln) { if (in) - node_free(in); + node_free_immediate(net, in); if (ln) - node_free(ln); + node_free_immediate(net, ln); return ERR_PTR(-ENOMEM); } @@ -617,31 +744,28 @@ insert_above: in->fn_bit = bit; - in->parent = pn; + RCU_INIT_POINTER(in->parent, pn); in->leaf = fn->leaf; - atomic_inc(&in->leaf->rt6i_ref); - - in->fn_sernum = sernum; + atomic_inc(&rcu_dereference_protected(in->leaf, + lockdep_is_held(&table->tb6_lock))->rt6i_ref); /* update parent pointer */ if (dir) - pn->right = in; + rcu_assign_pointer(pn->right, in); else - pn->left = in; + rcu_assign_pointer(pn->left, in); ln->fn_bit = plen; - ln->parent = in; - fn->parent = in; - - ln->fn_sernum = sernum; + RCU_INIT_POINTER(ln->parent, in); + rcu_assign_pointer(fn->parent, in); if (addr_bit_set(addr, bit)) { - in->right = ln; - in->left = fn; + rcu_assign_pointer(in->right, ln); + rcu_assign_pointer(in->left, fn); } else { - in->left = ln; - in->right = fn; + rcu_assign_pointer(in->left, ln); + rcu_assign_pointer(in->right, fn); } } else { /* plen <= bit */ @@ -651,28 +775,26 @@ insert_above: * (old node)[fn] NULL */ - ln = node_alloc(); + ln = node_alloc(net); if (!ln) return ERR_PTR(-ENOMEM); ln->fn_bit = plen; - ln->parent = pn; - - ln->fn_sernum = sernum; - - if (dir) - pn->right = ln; - else - pn->left = ln; + RCU_INIT_POINTER(ln->parent, pn); if (addr_bit_set(&key->addr, plen)) - ln->right = fn; + RCU_INIT_POINTER(ln->right, fn); else - ln->left = fn; + RCU_INIT_POINTER(ln->left, fn); + + rcu_assign_pointer(fn->parent, ln); - fn->parent = ln; + if (dir) + rcu_assign_pointer(pn->right, ln); + else + rcu_assign_pointer(pn->left, ln); } return ln; } @@ -718,6 +840,8 @@ static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc) static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, struct net *net) { + struct fib6_table *table = rt->rt6i_table; + if (atomic_read(&rt->rt6i_ref) != 1) { /* This route is used as dummy address holder in some split * nodes. It is not leaked, but it still holds other resources, @@ -726,15 +850,18 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, * to still alive ones. */ while (fn) { - if (!(fn->fn_flags & RTN_RTINFO) && fn->leaf == rt) { - fn->leaf = fib6_find_prefix(net, fn); - atomic_inc(&fn->leaf->rt6i_ref); + struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *new_leaf; + if (!(fn->fn_flags & RTN_RTINFO) && leaf == rt) { + new_leaf = fib6_find_prefix(net, table, fn); + atomic_inc(&new_leaf->rt6i_ref); + rcu_assign_pointer(fn->leaf, new_leaf); rt6_release(rt); } - fn = fn->parent; + fn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&table->tb6_lock)); } - /* No more references are possible at this point. */ - BUG_ON(atomic_read(&rt->rt6i_ref) != 1); } } @@ -743,11 +870,14 @@ static void fib6_purge_rt(struct rt6_info *rt, struct fib6_node *fn, */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nl_info *info, struct mx6_config *mxc) + struct nl_info *info, struct mx6_config *mxc, + struct netlink_ext_ack *extack) { + struct rt6_info *leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct rt6_info *iter = NULL; - struct rt6_info **ins; - struct rt6_info **fallback_ins = NULL; + struct rt6_info __rcu **ins; + struct rt6_info __rcu **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -762,7 +892,9 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, ins = &fn->leaf; - for (iter = fn->leaf; iter; iter = iter->dst.rt6_next) { + for (iter = leaf; iter; + iter = rcu_dereference_protected(iter->dst.rt6_next, + lockdep_is_held(&rt->rt6i_table->tb6_lock))) { /* * Search for duplicates */ @@ -824,7 +956,8 @@ next_iter: if (fallback_ins && !found) { /* No ECMP-able route found, replace first non-ECMP one */ ins = fallback_ins; - iter = *ins; + iter = rcu_dereference_protected(*ins, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); found++; } @@ -838,7 +971,7 @@ next_iter: struct rt6_info *sibling, *temp_sibling; /* Find the first route that have the same metric */ - sibling = fn->leaf; + sibling = leaf; while (sibling) { if (sibling->rt6i_metric == rt->rt6i_metric && rt6_qualify_for_ecmp(sibling)) { @@ -846,7 +979,8 @@ next_iter: &sibling->rt6i_siblings); break; } - sibling = sibling->dst.rt6_next; + sibling = rcu_dereference_protected(sibling->dst.rt6_next, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); } /* For each sibling in the list, increment the counter of * siblings. BUG() if counters does not match, list of siblings @@ -875,10 +1009,12 @@ add: if (err) return err; - rt->dst.rt6_next = iter; - *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->dst.rt6_next, iter); atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(rt->rt6i_node, fn); + rcu_assign_pointer(*ins, rt); + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_ADD, + rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); info->nl_net->ipv6.rt6_stats->fib_rt_entries++; @@ -902,10 +1038,12 @@ add: if (err) return err; - *ins = rt; - rt->rt6i_node = fn; - rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(rt->rt6i_node, fn); + rt->dst.rt6_next = iter->dst.rt6_next; + rcu_assign_pointer(*ins, rt); + call_fib6_entry_notifiers(info->nl_net, FIB_EVENT_ENTRY_REPLACE, + rt, extack); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, NLM_F_REPLACE); if (!(fn->fn_flags & RTN_RTINFO)) { @@ -913,25 +1051,34 @@ add: fn->fn_flags |= RTN_RTINFO; } nsiblings = iter->rt6i_nsiblings; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); + if (rcu_access_pointer(fn->rr_ptr) == iter) + fn->rr_ptr = NULL; rt6_release(iter); if (nsiblings) { /* Replacing an ECMP route, remove all siblings */ ins = &rt->dst.rt6_next; - iter = *ins; + iter = rcu_dereference_protected(*ins, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); while (iter) { if (iter->rt6i_metric > rt->rt6i_metric) break; if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; + iter->rt6i_node = NULL; fib6_purge_rt(iter, fn, info->nl_net); + if (rcu_access_pointer(fn->rr_ptr) == iter) + fn->rr_ptr = NULL; rt6_release(iter); nsiblings--; + info->nl_net->ipv6.rt6_stats->fib_rt_entries--; } else { ins = &iter->dst.rt6_next; } - iter = *ins; + iter = rcu_dereference_protected(*ins, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); } WARN_ON(nsiblings != 0); } @@ -955,16 +1102,33 @@ void fib6_force_start_gc(struct net *net) jiffies + net->ipv6.sysctl.ip6_rt_gc_interval); } +static void fib6_update_sernum_upto_root(struct rt6_info *rt, + int sernum) +{ + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + + /* paired with smp_rmb() in rt6_get_cookie_safe() */ + smp_wmb(); + while (fn) { + fn->fn_sernum = sernum; + fn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + } +} + /* * Add routing information to the routing tree. * <destination addr>/<source addr> * with source addr info in sub-trees + * Need to own table->tb6_lock */ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info, struct mx6_config *mxc, struct netlink_ext_ack *extack) { + struct fib6_table *table = rt->rt6i_table; struct fib6_node *fn, *pn = NULL; int err = -ENOMEM; int allow_create = 1; @@ -973,6 +1137,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (WARN_ON_ONCE(!atomic_read(&rt->dst.__refcnt))) return -EINVAL; + if (WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE)) + return -EINVAL; if (info->nlh) { if (!(info->nlh->nlmsg_flags & NLM_F_CREATE)) @@ -983,9 +1149,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (!allow_create && !replace_required) pr_warn("RTM_NEWROUTE with no NLM_F_CREATE or NLM_F_REPLACE\n"); - fn = fib6_add_1(root, &rt->rt6i_dst.addr, rt->rt6i_dst.plen, + fn = fib6_add_1(info->nl_net, table, root, + &rt->rt6i_dst.addr, rt->rt6i_dst.plen, offsetof(struct rt6_info, rt6i_dst), allow_create, - replace_required, sernum, extack); + replace_required, extack); if (IS_ERR(fn)) { err = PTR_ERR(fn); fn = NULL; @@ -998,7 +1165,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (rt->rt6i_src.plen) { struct fib6_node *sn; - if (!fn->subtree) { + if (!rcu_access_pointer(fn->subtree)) { struct fib6_node *sfn; /* @@ -1012,62 +1179,59 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, */ /* Create subtree root node */ - sfn = node_alloc(); + sfn = node_alloc(info->nl_net); if (!sfn) - goto st_failure; + goto failure; - sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); + rcu_assign_pointer(sfn->leaf, + info->nl_net->ipv6.ip6_null_entry); sfn->fn_flags = RTN_ROOT; - sfn->fn_sernum = sernum; /* Now add the first leaf node to new subtree */ - sn = fib6_add_1(sfn, &rt->rt6i_src.addr, - rt->rt6i_src.plen, + sn = fib6_add_1(info->nl_net, table, sfn, + &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum, - extack); + allow_create, replace_required, extack); if (IS_ERR(sn)) { /* If it is failed, discard just allocated - root, and then (in st_failure) stale node + root, and then (in failure) stale node in main tree. */ - node_free(sfn); + node_free_immediate(info->nl_net, sfn); err = PTR_ERR(sn); - goto st_failure; + goto failure; } /* Now link new subtree to main tree */ - sfn->parent = fn; - fn->subtree = sfn; + rcu_assign_pointer(sfn->parent, fn); + rcu_assign_pointer(fn->subtree, sfn); } else { - sn = fib6_add_1(fn->subtree, &rt->rt6i_src.addr, - rt->rt6i_src.plen, + sn = fib6_add_1(info->nl_net, table, FIB6_SUBTREE(fn), + &rt->rt6i_src.addr, rt->rt6i_src.plen, offsetof(struct rt6_info, rt6i_src), - allow_create, replace_required, sernum, - extack); + allow_create, replace_required, extack); if (IS_ERR(sn)) { err = PTR_ERR(sn); - goto st_failure; + goto failure; } } - if (!fn->leaf) { - fn->leaf = rt; + if (!rcu_access_pointer(fn->leaf)) { atomic_inc(&rt->rt6i_ref); + rcu_assign_pointer(fn->leaf, rt); } fn = sn; } #endif - err = fib6_add_rt2node(fn, rt, info, mxc); + err = fib6_add_rt2node(fn, rt, info, mxc, extack); if (!err) { + fib6_update_sernum_upto_root(rt, sernum); fib6_start_gc(info->nl_net, rt); - if (!(rt->rt6i_flags & RTF_CACHE)) - fib6_prune_clones(info->nl_net, pn); } out: @@ -1077,41 +1241,43 @@ out: * If fib6_add_1 has cleared the old leaf pointer in the * super-tree leaf node we have to find a new one for it. */ - if (pn != fn && pn->leaf == rt) { - pn->leaf = NULL; + struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + if (pn != fn && pn_leaf == rt) { + pn_leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); atomic_dec(&rt->rt6i_ref); } - if (pn != fn && !pn->leaf && !(pn->fn_flags & RTN_RTINFO)) { - pn->leaf = fib6_find_prefix(info->nl_net, pn); + if (pn != fn && !pn_leaf && !(pn->fn_flags & RTN_RTINFO)) { + pn_leaf = fib6_find_prefix(info->nl_net, table, pn); #if RT6_DEBUG >= 2 - if (!pn->leaf) { - WARN_ON(pn->leaf == NULL); - pn->leaf = info->nl_net->ipv6.ip6_null_entry; + if (!pn_leaf) { + WARN_ON(!pn_leaf); + pn_leaf = info->nl_net->ipv6.ip6_null_entry; } #endif - atomic_inc(&pn->leaf->rt6i_ref); + atomic_inc(&pn_leaf->rt6i_ref); + rcu_assign_pointer(pn->leaf, pn_leaf); } #endif - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); + goto failure; } return err; -#ifdef CONFIG_IPV6_SUBTREES - /* Subtree creation failed, probably main tree node - is orphan. If it is, shoot it. +failure: + /* fn->leaf could be NULL if fn is an intermediate node and we + * failed to add the new route to it in both subtree creation + * failure and fib6_add_rt2node() failure case. + * In both cases, fib6_repair_tree() should be called to fix + * fn->leaf. */ -st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) - fib6_repair_tree(info->nl_net, fn); + fib6_repair_tree(info->nl_net, table, fn); /* Always release dst as dst->__refcnt is guaranteed * to be taken before entering this function */ dst_release_immediate(&rt->dst); return err; -#endif } /* @@ -1144,7 +1310,8 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, dir = addr_bit_set(args->addr, fn->fn_bit); - next = dir ? fn->right : fn->left; + next = dir ? rcu_dereference(fn->right) : + rcu_dereference(fn->left); if (next) { fn = next; @@ -1154,18 +1321,22 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, } while (fn) { - if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { + struct fib6_node *subtree = FIB6_SUBTREE(fn); + + if (subtree || fn->fn_flags & RTN_RTINFO) { + struct rt6_info *leaf = rcu_dereference(fn->leaf); struct rt6key *key; - key = (struct rt6key *) ((u8 *) fn->leaf + - args->offset); + if (!leaf) + goto backtrack; + + key = (struct rt6key *) ((u8 *)leaf + args->offset); if (ipv6_prefix_equal(&key->addr, args->addr, key->plen)) { #ifdef CONFIG_IPV6_SUBTREES - if (fn->subtree) { + if (subtree) { struct fib6_node *sfn; - sfn = fib6_lookup_1(fn->subtree, - args + 1); + sfn = fib6_lookup_1(subtree, args + 1); if (!sfn) goto backtrack; fn = sfn; @@ -1175,18 +1346,18 @@ static struct fib6_node *fib6_lookup_1(struct fib6_node *root, return fn; } } -#ifdef CONFIG_IPV6_SUBTREES backtrack: -#endif if (fn->fn_flags & RTN_ROOT) break; - fn = fn->parent; + fn = rcu_dereference(fn->parent); } return NULL; } +/* called with rcu_read_lock() held + */ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *daddr, const struct in6_addr *saddr) { @@ -1217,54 +1388,87 @@ struct fib6_node *fib6_lookup(struct fib6_node *root, const struct in6_addr *dad /* * Get node with specified destination prefix (and source prefix, * if subtrees are used) + * exact_match == true means we try to find fn with exact match of + * the passed in prefix addr + * exact_match == false means we try to find fn with longest prefix + * match of the passed in prefix addr. This is useful for finding fn + * for cached route as it will be stored in the exception table under + * the node with longest prefix length. */ static struct fib6_node *fib6_locate_1(struct fib6_node *root, const struct in6_addr *addr, - int plen, int offset) + int plen, int offset, + bool exact_match) { - struct fib6_node *fn; + struct fib6_node *fn, *prev = NULL; for (fn = root; fn ; ) { - struct rt6key *key = (struct rt6key *)((u8 *)fn->leaf + offset); + struct rt6_info *leaf = rcu_dereference(fn->leaf); + struct rt6key *key; + + /* This node is being deleted */ + if (!leaf) { + if (plen <= fn->fn_bit) + goto out; + else + goto next; + } + + key = (struct rt6key *)((u8 *)leaf + offset); /* * Prefix match */ if (plen < fn->fn_bit || !ipv6_prefix_equal(&key->addr, addr, fn->fn_bit)) - return NULL; + goto out; if (plen == fn->fn_bit) return fn; + prev = fn; + +next: /* * We have more bits to go */ if (addr_bit_set(addr, fn->fn_bit)) - fn = fn->right; + fn = rcu_dereference(fn->right); else - fn = fn->left; + fn = rcu_dereference(fn->left); } - return NULL; +out: + if (exact_match) + return NULL; + else + return prev; } struct fib6_node *fib6_locate(struct fib6_node *root, const struct in6_addr *daddr, int dst_len, - const struct in6_addr *saddr, int src_len) + const struct in6_addr *saddr, int src_len, + bool exact_match) { struct fib6_node *fn; fn = fib6_locate_1(root, daddr, dst_len, - offsetof(struct rt6_info, rt6i_dst)); + offsetof(struct rt6_info, rt6i_dst), + exact_match); #ifdef CONFIG_IPV6_SUBTREES if (src_len) { WARN_ON(saddr == NULL); - if (fn && fn->subtree) - fn = fib6_locate_1(fn->subtree, saddr, src_len, - offsetof(struct rt6_info, rt6i_src)); + if (fn) { + struct fib6_node *subtree = FIB6_SUBTREE(fn); + + if (subtree) { + fn = fib6_locate_1(subtree, saddr, src_len, + offsetof(struct rt6_info, rt6i_src), + exact_match); + } + } } #endif @@ -1280,16 +1484,26 @@ struct fib6_node *fib6_locate(struct fib6_node *root, * */ -static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) +static struct rt6_info *fib6_find_prefix(struct net *net, + struct fib6_table *table, + struct fib6_node *fn) { + struct fib6_node *child_left, *child_right; + if (fn->fn_flags & RTN_ROOT) return net->ipv6.ip6_null_entry; while (fn) { - if (fn->left) - return fn->left->leaf; - if (fn->right) - return fn->right->leaf; + child_left = rcu_dereference_protected(fn->left, + lockdep_is_held(&table->tb6_lock)); + child_right = rcu_dereference_protected(fn->right, + lockdep_is_held(&table->tb6_lock)); + if (child_left) + return rcu_dereference_protected(child_left->leaf, + lockdep_is_held(&table->tb6_lock)); + if (child_right) + return rcu_dereference_protected(child_right->leaf, + lockdep_is_held(&table->tb6_lock)); fn = FIB6_SUBTREE(fn); } @@ -1299,31 +1513,49 @@ static struct rt6_info *fib6_find_prefix(struct net *net, struct fib6_node *fn) /* * Called to trim the tree of intermediate nodes when possible. "fn" * is the node we want to try and remove. + * Need to own table->tb6_lock */ static struct fib6_node *fib6_repair_tree(struct net *net, - struct fib6_node *fn) + struct fib6_table *table, + struct fib6_node *fn) { int children; int nstate; - struct fib6_node *child, *pn; + struct fib6_node *child; struct fib6_walker *w; int iter = 0; for (;;) { + struct fib6_node *fn_r = rcu_dereference_protected(fn->right, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *fn_l = rcu_dereference_protected(fn->left, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *pn = rcu_dereference_protected(fn->parent, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *pn_r = rcu_dereference_protected(pn->right, + lockdep_is_held(&table->tb6_lock)); + struct fib6_node *pn_l = rcu_dereference_protected(pn->left, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *fn_leaf = rcu_dereference_protected(fn->leaf, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *pn_leaf = rcu_dereference_protected(pn->leaf, + lockdep_is_held(&table->tb6_lock)); + struct rt6_info *new_fn_leaf; + RT6_TRACE("fixing tree: plen=%d iter=%d\n", fn->fn_bit, iter); iter++; WARN_ON(fn->fn_flags & RTN_RTINFO); WARN_ON(fn->fn_flags & RTN_TL_ROOT); - WARN_ON(fn->leaf); + WARN_ON(fn_leaf); children = 0; child = NULL; - if (fn->right) - child = fn->right, children |= 1; - if (fn->left) - child = fn->left, children |= 2; + if (fn_r) + child = fn_r, children |= 1; + if (fn_l) + child = fn_l, children |= 2; if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES @@ -1331,36 +1563,36 @@ static struct fib6_node *fib6_repair_tree(struct net *net, || (children && fn->fn_flags & RTN_ROOT) #endif ) { - fn->leaf = fib6_find_prefix(net, fn); + new_fn_leaf = fib6_find_prefix(net, table, fn); #if RT6_DEBUG >= 2 - if (!fn->leaf) { - WARN_ON(!fn->leaf); - fn->leaf = net->ipv6.ip6_null_entry; + if (!new_fn_leaf) { + WARN_ON(!new_fn_leaf); + new_fn_leaf = net->ipv6.ip6_null_entry; } #endif - atomic_inc(&fn->leaf->rt6i_ref); - return fn->parent; + atomic_inc(&new_fn_leaf->rt6i_ref); + rcu_assign_pointer(fn->leaf, new_fn_leaf); + return pn; } - pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { WARN_ON(!(fn->fn_flags & RTN_ROOT)); - FIB6_SUBTREE(pn) = NULL; + RCU_INIT_POINTER(pn->subtree, NULL); nstate = FWS_L; } else { WARN_ON(fn->fn_flags & RTN_ROOT); #endif - if (pn->right == fn) - pn->right = child; - else if (pn->left == fn) - pn->left = child; + if (pn_r == fn) + rcu_assign_pointer(pn->right, child); + else if (pn_l == fn) + rcu_assign_pointer(pn->left, child); #if RT6_DEBUG >= 2 else WARN_ON(1); #endif if (child) - child->parent = pn; + rcu_assign_pointer(child->parent, pn); nstate = FWS_R; #ifdef CONFIG_IPV6_SUBTREES } @@ -1369,19 +1601,12 @@ static struct fib6_node *fib6_repair_tree(struct net *net, read_lock(&net->ipv6.fib6_walker_lock); FOR_WALKERS(net, w) { if (!child) { - if (w->root == fn) { - w->root = w->node = NULL; - RT6_TRACE("W %p adjusted by delroot 1\n", w); - } else if (w->node == fn) { + if (w->node == fn) { RT6_TRACE("W %p adjusted by delnode 1, s=%d/%d\n", w, w->state, nstate); w->node = pn; w->state = nstate; } } else { - if (w->root == fn) { - w->root = child; - RT6_TRACE("W %p adjusted by delroot 2\n", w); - } if (w->node == fn) { w->node = child; if (children&2) { @@ -1396,33 +1621,39 @@ static struct fib6_node *fib6_repair_tree(struct net *net, } read_unlock(&net->ipv6.fib6_walker_lock); - node_free(fn); + node_free(net, fn); if (pn->fn_flags & RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; - rt6_release(pn->leaf); - pn->leaf = NULL; + RCU_INIT_POINTER(pn->leaf, NULL); + rt6_release(pn_leaf); fn = pn; } } -static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nl_info *info) +static void fib6_del_route(struct fib6_table *table, struct fib6_node *fn, + struct rt6_info __rcu **rtp, struct nl_info *info) { struct fib6_walker *w; - struct rt6_info *rt = *rtp; + struct rt6_info *rt = rcu_dereference_protected(*rtp, + lockdep_is_held(&table->tb6_lock)); struct net *net = info->nl_net; RT6_TRACE("fib6_del_route\n"); + WARN_ON_ONCE(rt->rt6i_flags & RTF_CACHE); + /* Unlink it */ *rtp = rt->dst.rt6_next; rt->rt6i_node = NULL; net->ipv6.rt6_stats->fib_rt_entries--; net->ipv6.rt6_stats->fib_discarded_routes++; + /* Flush all cached dst in exception table */ + rt6_flush_exceptions(rt); + /* Reset round-robin state, if necessary */ - if (fn->rr_ptr == rt) + if (rcu_access_pointer(fn->rr_ptr) == rt) fn->rr_ptr = NULL; /* Remove this entry from other siblings */ @@ -1441,34 +1672,38 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, FOR_WALKERS(net, w) { if (w->state == FWS_C && w->leaf == rt) { RT6_TRACE("walker %p adjusted by delroute\n", w); - w->leaf = rt->dst.rt6_next; + w->leaf = rcu_dereference_protected(rt->dst.rt6_next, + lockdep_is_held(&table->tb6_lock)); if (!w->leaf) w->state = FWS_U; } } read_unlock(&net->ipv6.fib6_walker_lock); - rt->dst.rt6_next = NULL; - /* If it was last route, expunge its radix tree node */ - if (!fn->leaf) { + if (!rcu_access_pointer(fn->leaf)) { fn->fn_flags &= ~RTN_RTINFO; net->ipv6.rt6_stats->fib_route_nodes--; - fn = fib6_repair_tree(net, fn); + fn = fib6_repair_tree(net, table, fn); } fib6_purge_rt(rt, fn, net); + call_fib6_entry_notifiers(net, FIB_EVENT_ENTRY_DEL, rt, NULL); if (!info->skip_notify) inet6_rt_notify(RTM_DELROUTE, rt, info, 0); rt6_release(rt); } +/* Need to own table->tb6_lock */ int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); + struct fib6_table *table = rt->rt6i_table; struct net *net = info->nl_net; - struct fib6_node *fn = rt->rt6i_node; - struct rt6_info **rtp; + struct rt6_info __rcu **rtp; + struct rt6_info __rcu **rtp_next; #if RT6_DEBUG >= 2 if (rt->dst.obsolete > 0) { @@ -1481,28 +1716,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) WARN_ON(!(fn->fn_flags & RTN_RTINFO)); - if (!(rt->rt6i_flags & RTF_CACHE)) { - struct fib6_node *pn = fn; -#ifdef CONFIG_IPV6_SUBTREES - /* clones of this route might be in another subtree */ - if (rt->rt6i_src.plen) { - while (!(pn->fn_flags & RTN_ROOT)) - pn = pn->parent; - pn = pn->parent; - } -#endif - fib6_prune_clones(info->nl_net, pn); - } + /* remove cached dst from exception table */ + if (rt->rt6i_flags & RTF_CACHE) + return rt6_remove_exception_rt(rt); /* * Walk the leaf entries looking for ourself */ - for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->dst.rt6_next) { - if (*rtp == rt) { - fib6_del_route(fn, rtp, info); + for (rtp = &fn->leaf; *rtp; rtp = rtp_next) { + struct rt6_info *cur = rcu_dereference_protected(*rtp, + lockdep_is_held(&table->tb6_lock)); + if (rt == cur) { + fib6_del_route(table, fn, rtp, info); return 0; } + rtp_next = &cur->dst.rt6_next; } return -ENOENT; } @@ -1529,22 +1758,22 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) * 0 -> walk is complete. * >0 -> walk is incomplete (i.e. suspended) * <0 -> walk is terminated by an error. + * + * This function is called with tb6_lock held. */ static int fib6_walk_continue(struct fib6_walker *w) { - struct fib6_node *fn, *pn; + struct fib6_node *fn, *pn, *left, *right; + + /* w->root should always be table->tb6_root */ + WARN_ON_ONCE(!(w->root->fn_flags & RTN_TL_ROOT)); for (;;) { fn = w->node; if (!fn) return 0; - if (w->prune && fn != w->root && - fn->fn_flags & RTN_RTINFO && w->state < FWS_C) { - w->state = FWS_C; - w->leaf = fn->leaf; - } switch (w->state) { #ifdef CONFIG_IPV6_SUBTREES case FWS_S: @@ -1554,21 +1783,26 @@ static int fib6_walk_continue(struct fib6_walker *w) } w->state = FWS_L; #endif + /* fall through */ case FWS_L: - if (fn->left) { - w->node = fn->left; + left = rcu_dereference_protected(fn->left, 1); + if (left) { + w->node = left; w->state = FWS_INIT; continue; } w->state = FWS_R; + /* fall through */ case FWS_R: - if (fn->right) { - w->node = fn->right; + right = rcu_dereference_protected(fn->right, 1); + if (right) { + w->node = right; w->state = FWS_INIT; continue; } w->state = FWS_C; - w->leaf = fn->leaf; + w->leaf = rcu_dereference_protected(fn->leaf, 1); + /* fall through */ case FWS_C: if (w->leaf && fn->fn_flags & RTN_RTINFO) { int err; @@ -1587,10 +1821,13 @@ static int fib6_walk_continue(struct fib6_walker *w) } skip: w->state = FWS_U; + /* fall through */ case FWS_U: if (fn == w->root) return 0; - pn = fn->parent; + pn = rcu_dereference_protected(fn->parent, 1); + left = rcu_dereference_protected(pn->left, 1); + right = rcu_dereference_protected(pn->right, 1); w->node = pn; #ifdef CONFIG_IPV6_SUBTREES if (FIB6_SUBTREE(pn) == fn) { @@ -1599,13 +1836,13 @@ skip: continue; } #endif - if (pn->left == fn) { + if (left == fn) { w->state = FWS_R; continue; } - if (pn->right == fn) { + if (right == fn) { w->state = FWS_C; - w->leaf = w->node->leaf; + w->leaf = rcu_dereference_protected(w->node->leaf, 1); continue; } #if RT6_DEBUG >= 2 @@ -1648,7 +1885,7 @@ static int fib6_clean_node(struct fib6_walker *w) return 0; } - for (rt = w->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_walker_rt(w) { res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; @@ -1656,7 +1893,9 @@ static int fib6_clean_node(struct fib6_walker *w) if (res) { #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", - __func__, rt, rt->rt6i_node, res); + __func__, rt, + rcu_access_pointer(rt->rt6i_node), + res); #endif continue; } @@ -1674,20 +1913,16 @@ static int fib6_clean_node(struct fib6_walker *w) * func is called on each route. * It may return -1 -> delete this route. * 0 -> continue walking - * - * prune==1 -> only immediate children of node (certainly, - * ignoring pure split nodes) will be scanned. */ static void fib6_clean_tree(struct net *net, struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), - bool prune, int sernum, void *arg) + int sernum, void *arg) { struct fib6_cleaner c; c.w.root = root; c.w.func = fib6_clean_node; - c.w.prune = prune; c.w.count = 0; c.w.skip = 0; c.func = func; @@ -1710,10 +1945,10 @@ static void __fib6_clean_all(struct net *net, for (h = 0; h < FIB6_TABLE_HASHSZ; h++) { head = &net->ipv6.fib_table_hash[h]; hlist_for_each_entry_rcu(table, head, tb6_hlist) { - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); fib6_clean_tree(net, &table->tb6_root, - func, false, sernum, arg); - write_unlock_bh(&table->tb6_lock); + func, sernum, arg); + spin_unlock_bh(&table->tb6_lock); } } rcu_read_unlock(); @@ -1725,22 +1960,6 @@ void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *), __fib6_clean_all(net, func, FIB6_NO_SERNUM_CHANGE, arg); } -static int fib6_prune_clone(struct rt6_info *rt, void *arg) -{ - if (rt->rt6i_flags & RTF_CACHE) { - RT6_TRACE("pruning clone %p\n", rt); - return -1; - } - - return 0; -} - -static void fib6_prune_clones(struct net *net, struct fib6_node *fn) -{ - fib6_clean_tree(net, fn, fib6_prune_clone, true, - FIB6_NO_SERNUM_CHANGE, NULL); -} - static void fib6_flush_trees(struct net *net) { int new_sernum = fib6_new_sernum(net); @@ -1752,12 +1971,6 @@ static void fib6_flush_trees(struct net *net) * Garbage collection */ -struct fib6_gc_args -{ - int timeout; - int more; -}; - static int fib6_age(struct rt6_info *rt, void *arg) { struct fib6_gc_args *gc_args = arg; @@ -1766,9 +1979,6 @@ static int fib6_age(struct rt6_info *rt, void *arg) /* * check addrconf expiration here. * Routes are expired even if they are in use. - * - * Also age clones. Note, that clones are aged out - * only if they are not in use now. */ if (rt->rt6i_flags & RTF_EXPIRES && rt->dst.expires) { @@ -1777,29 +1987,14 @@ static int fib6_age(struct rt6_info *rt, void *arg) return -1; } gc_args->more++; - } else if (rt->rt6i_flags & RTF_CACHE) { - if (atomic_read(&rt->dst.__refcnt) == 1 && - time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { - RT6_TRACE("aging clone %p\n", rt); - return -1; - } else if (rt->rt6i_flags & RTF_GATEWAY) { - struct neighbour *neigh; - __u8 neigh_flags = 0; - - neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); - if (neigh) { - neigh_flags = neigh->flags; - neigh_release(neigh); - } - if (!(neigh_flags & NTF_ROUTER)) { - RT6_TRACE("purging route %p via non-router but gateway\n", - rt); - return -1; - } - } - gc_args->more++; } + /* Also age clones in the exception table. + * Note, that clones are aged out + * only if they are not in use now. + */ + rt6_age_exceptions(rt, gc_args, now); + return 0; } @@ -1839,6 +2034,11 @@ static void fib6_gc_timer_cb(unsigned long arg) static int __net_init fib6_net_init(struct net *net) { size_t size = sizeof(struct hlist_head) * FIB6_TABLE_HASHSZ; + int err; + + err = fib6_notifier_init(net); + if (err) + return err; spin_lock_init(&net->ipv6.fib6_gc_lock); rwlock_init(&net->ipv6.fib6_walker_lock); @@ -1862,7 +2062,8 @@ static int __net_init fib6_net_init(struct net *net) goto out_fib_table_hash; net->ipv6.fib6_main_tbl->tb6_id = RT6_TABLE_MAIN; - net->ipv6.fib6_main_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + rcu_assign_pointer(net->ipv6.fib6_main_tbl->tb6_root.leaf, + net->ipv6.ip6_null_entry); net->ipv6.fib6_main_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_main_tbl->tb6_peers); @@ -1873,7 +2074,8 @@ static int __net_init fib6_net_init(struct net *net) if (!net->ipv6.fib6_local_tbl) goto out_fib6_main_tbl; net->ipv6.fib6_local_tbl->tb6_id = RT6_TABLE_LOCAL; - net->ipv6.fib6_local_tbl->tb6_root.leaf = net->ipv6.ip6_null_entry; + rcu_assign_pointer(net->ipv6.fib6_local_tbl->tb6_root.leaf, + net->ipv6.ip6_null_entry); net->ipv6.fib6_local_tbl->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; inet_peer_base_init(&net->ipv6.fib6_local_tbl->tb6_peers); @@ -1891,22 +2093,31 @@ out_fib_table_hash: out_rt6_stats: kfree(net->ipv6.rt6_stats); out_timer: + fib6_notifier_exit(net); return -ENOMEM; } static void fib6_net_exit(struct net *net) { + unsigned int i; + rt6_ifdown(net, NULL); del_timer_sync(&net->ipv6.ip6_fib_timer); -#ifdef CONFIG_IPV6_MULTIPLE_TABLES - inetpeer_invalidate_tree(&net->ipv6.fib6_local_tbl->tb6_peers); - kfree(net->ipv6.fib6_local_tbl); -#endif - inetpeer_invalidate_tree(&net->ipv6.fib6_main_tbl->tb6_peers); - kfree(net->ipv6.fib6_main_tbl); + for (i = 0; i < FIB6_TABLE_HASHSZ; i++) { + struct hlist_head *head = &net->ipv6.fib_table_hash[i]; + struct hlist_node *tmp; + struct fib6_table *tb; + + hlist_for_each_entry_safe(tb, tmp, head, tb6_hlist) { + hlist_del(&tb->tb6_hlist); + fib6_free_table(tb); + } + } + kfree(net->ipv6.fib_table_hash); kfree(net->ipv6.rt6_stats); + fib6_notifier_exit(net); } static struct pernet_operations fib6_net_ops = { @@ -1930,7 +2141,7 @@ int __init fib6_init(void) goto out_kmem_cache_create; ret = __rtnl_register(PF_INET6, RTM_GETROUTE, NULL, inet6_dump_fib, - NULL); + 0); if (ret) goto out_unregister_subsys; @@ -1994,7 +2205,9 @@ static int ipv6_route_yield(struct fib6_walker *w) return 1; do { - iter->w.leaf = iter->w.leaf->dst.rt6_next; + iter->w.leaf = rcu_dereference_protected( + iter->w.leaf->dst.rt6_next, + lockdep_is_held(&iter->tbl->tb6_lock)); iter->skip--; if (!iter->skip && iter->w.leaf) return 1; @@ -2059,7 +2272,7 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) if (!v) goto iter_table; - n = ((struct rt6_info *)v)->dst.rt6_next; + n = rcu_dereference_bh(((struct rt6_info *)v)->dst.rt6_next); if (n) { ++*pos; return n; @@ -2067,9 +2280,9 @@ static void *ipv6_route_seq_next(struct seq_file *seq, void *v, loff_t *pos) iter_table: ipv6_route_check_sernum(iter); - read_lock(&iter->tbl->tb6_lock); + spin_lock_bh(&iter->tbl->tb6_lock); r = fib6_walk_continue(&iter->w); - read_unlock(&iter->tbl->tb6_lock); + spin_unlock_bh(&iter->tbl->tb6_lock); if (r > 0) { if (v) ++*pos; diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 8081bafe441b..9f2e73c71768 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -47,7 +47,7 @@ static atomic_t fl_size = ATOMIC_INIT(0); static struct ip6_flowlabel __rcu *fl_ht[FL_HASH_MASK+1]; static void ip6_fl_gc(unsigned long dummy); -static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc, 0, 0); +static DEFINE_TIMER(ip6_fl_gc_timer, ip6_fl_gc); /* FL hash table lock: it protects only of GC */ @@ -315,6 +315,7 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, } opt_space->dst1opt = fopt->dst1opt; opt_space->opt_flen = fopt->opt_flen; + opt_space->tot_len = fopt->tot_len; return opt_space; } EXPORT_SYMBOL_GPL(fl6_merge_options); diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 67ff2aaf5dcb..b90bad7a4e56 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -369,6 +369,7 @@ static void ip6gre_tunnel_uninit(struct net_device *dev) static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { + struct net *net = dev_net(skb->dev); const struct gre_base_hdr *greh; const struct ipv6hdr *ipv6h; int grehlen = sizeof(*greh); @@ -402,19 +403,21 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; switch (type) { - __u32 teli; struct ipv6_tlv_tnl_enc_lim *tel; - __u32 mtu; + __u32 teli; case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); - break; + if (code != ICMPV6_PORT_UNREACH) + break; + return; case ICMPV6_TIME_EXCEED: if (code == ICMPV6_EXC_HOPLIMIT) { net_dbg_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", t->parms.name); + break; } - break; + return; case ICMPV6_PARAMPROB: teli = 0; if (code == ICMPV6_HDR_FIELD) @@ -430,13 +433,14 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, net_dbg_ratelimited("%s: Recipient unable to parse tunneled packet!\n", t->parms.name); } - break; + return; case ICMPV6_PKT_TOOBIG: - mtu = be32_to_cpu(info) - offset; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; - t->dev->mtu = mtu; - break; + ip6_update_pmtu(skb, net, info, 0, 0, sock_net_uid(net, NULL)); + return; + case NDISC_REDIRECT: + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); + return; } if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) @@ -498,8 +502,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, __u32 *pmtu, __be16 proto) { struct ip6_tnl *tunnel = netdev_priv(dev); - __be16 protocol = (dev->type == ARPHRD_ETHER) ? - htons(ETH_P_TEB) : proto; + __be16 protocol; if (dev->type == ARPHRD_ETHER) IPCB(skb)->flags = 0; @@ -513,6 +516,7 @@ static netdev_tx_t __gre6_xmit(struct sk_buff *skb, tunnel->o_seqno++; /* Push GRE header. */ + protocol = (dev->type == ARPHRD_ETHER) ? htons(ETH_P_TEB) : proto; gre_build_header(skb, tunnel->tun_hlen, tunnel->parms.o_flags, protocol, tunnel->parms.o_key, htonl(tunnel->o_seqno)); @@ -938,24 +942,25 @@ done: } static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, - unsigned short type, - const void *daddr, const void *saddr, unsigned int len) + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) { struct ip6_tnl *t = netdev_priv(dev); - struct ipv6hdr *ipv6h = skb_push(skb, t->hlen); - __be16 *p = (__be16 *)(ipv6h+1); + struct ipv6hdr *ipv6h; + __be16 *p; - ip6_flow_hdr(ipv6h, 0, - ip6_make_flowlabel(dev_net(dev), skb, - t->fl.u.ip6.flowlabel, true, - &t->fl.u.ip6)); + ipv6h = skb_push(skb, t->hlen + sizeof(*ipv6h)); + ip6_flow_hdr(ipv6h, 0, ip6_make_flowlabel(dev_net(dev), skb, + t->fl.u.ip6.flowlabel, + true, &t->fl.u.ip6)); ipv6h->hop_limit = t->parms.hop_limit; ipv6h->nexthdr = NEXTHDR_GRE; ipv6h->saddr = t->parms.laddr; ipv6h->daddr = t->parms.raddr; - p[0] = t->parms.o_flags; - p[1] = htons(type); + p = (__be16 *)(ipv6h + 1); + p[0] = t->parms.o_flags; + p[1] = htons(type); /* * Set the source hardware address. @@ -1153,19 +1158,21 @@ err_alloc_dev: return err; } -static void __net_exit ip6gre_exit_net(struct net *net) +static void __net_exit ip6gre_exit_batch_net(struct list_head *net_list) { + struct net *net; LIST_HEAD(list); rtnl_lock(); - ip6gre_destroy_tunnels(net, &list); + list_for_each_entry(net, net_list, exit_list) + ip6gre_destroy_tunnels(net, &list); unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6gre_net_ops = { .init = ip6gre_init_net, - .exit = ip6gre_exit_net, + .exit_batch = ip6gre_exit_batch_net, .id = &ip6gre_net_id, .size = sizeof(struct ip6gre_net), }; @@ -1308,6 +1315,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + netif_keep_dst(dev); } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], diff --git a/net/ipv6/ip6_icmp.c b/net/ipv6/ip6_icmp.c index 713676f14a0e..02045494c24c 100644 --- a/net/ipv6/ip6_icmp.c +++ b/net/ipv6/ip6_icmp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/export.h> #include <linux/icmpv6.h> #include <linux/mutex.h> diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index cdb3728faca7..4a87f9428ca5 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -105,7 +105,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, for (skb = segs; skb; skb = skb->next) { ipv6h = (struct ipv6hdr *)(skb_mac_header(skb) + nhoff); - if (gso_partial) + if (gso_partial && skb_is_gso(skb)) payload_len = skb_shinfo(skb)->gso_size + SKB_GSO_CB(skb)->data_offset + skb->head - (unsigned char *)(ipv6h + 1); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2dfe50d8d609..5110a418cc4d 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1110,69 +1110,6 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -static inline int ip6_ufo_append_data(struct sock *sk, - struct sk_buff_head *queue, - int getfrag(void *from, char *to, int offset, int len, - int odd, struct sk_buff *skb), - void *from, int length, int hh_len, int fragheaderlen, - int exthdrlen, int transhdrlen, int mtu, - unsigned int flags, const struct flowi6 *fl6) - -{ - struct sk_buff *skb; - int err; - - /* There is support for UDP large send offload by network - * device, so create one single skb packet containing complete - * udp datagram - */ - skb = skb_peek_tail(queue); - if (!skb) { - skb = sock_alloc_send_skb(sk, - hh_len + fragheaderlen + transhdrlen + 20, - (flags & MSG_DONTWAIT), &err); - if (!skb) - return err; - - /* reserve space for Hardware header */ - skb_reserve(skb, hh_len); - - /* create space for UDP/IP header */ - skb_put(skb, fragheaderlen + transhdrlen); - - /* initialize network header pointer */ - skb_set_network_header(skb, exthdrlen); - - /* initialize protocol header pointer */ - skb->transport_header = skb->network_header + fragheaderlen; - - skb->protocol = htons(ETH_P_IPV6); - skb->csum = 0; - - if (flags & MSG_CONFIRM) - skb_set_dst_pending_confirm(skb, 1); - - __skb_queue_tail(queue, skb); - } else if (skb_is_gso(skb)) { - goto append; - } - - skb->ip_summed = CHECKSUM_PARTIAL; - /* Specify the length of each IPv6 datagram fragment. - * It has to be a multiple of 8. - */ - skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - - sizeof(struct frag_hdr)) & ~7; - skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), - &fl6->daddr, - &fl6->saddr); - -append: - return skb_append_datato_frags(sk, skb, getfrag, from, - (length - transhdrlen)); -} - static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src, gfp_t gfp) { @@ -1224,11 +1161,11 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, if (WARN_ON(v6_cork->opt)) return -EINVAL; - v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); + v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation); if (unlikely(!v6_cork->opt)) return -ENOBUFS; - v6_cork->opt->tot_len = opt->tot_len; + v6_cork->opt->tot_len = sizeof(*opt); v6_cork->opt->opt_flen = opt->opt_flen; v6_cork->opt->opt_nflen = opt->opt_nflen; @@ -1381,20 +1318,6 @@ emsgsize: */ cork->length += length; - if ((skb && skb_is_gso(skb)) || - (((length + (skb ? skb->len : headersize)) > mtu) && - (skb_queue_len(queue) <= 1) && - (sk->sk_protocol == IPPROTO_UDP) && - (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { - err = ip6_ufo_append_data(sk, queue, getfrag, from, length, - hh_len, fragheaderlen, exthdrlen, - transhdrlen, mtu, flags, fl6); - if (err) - goto error; - return 0; - } - if (!skb) goto alloc_new_skb; diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 3a0ba2ae4b0f..3d3092adf1d2 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -171,7 +171,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ } t = rcu_dereference(ip6n->collect_md_tun); - if (t) + if (t && t->dev->flags & IFF_UP) return t; t = rcu_dereference(ip6n->tnls_wc[0]); @@ -471,15 +471,16 @@ static int ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, u8 *type, u8 *code, int *msg, __u32 *info, int offset) { - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) skb->data; - struct ip6_tnl *t; - int rel_msg = 0; + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; + struct net *net = dev_net(skb->dev); u8 rel_type = ICMPV6_DEST_UNREACH; u8 rel_code = ICMPV6_ADDR_UNREACH; - u8 tproto; __u32 rel_info = 0; - __u16 len; + struct ip6_tnl *t; int err = -ENOENT; + int rel_msg = 0; + u8 tproto; + __u16 len; /* If the packet doesn't contain the original IPv6 header we are in trouble since we might need the source address for further @@ -490,16 +491,15 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, if (!t) goto out; - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto out; err = 0; switch (*type) { - __u32 teli; struct ipv6_tlv_tnl_enc_lim *tel; - __u32 mtu; + __u32 mtu, teli; case ICMPV6_DEST_UNREACH: net_dbg_ratelimited("%s: Path to destination invalid or inactive!\n", t->parms.name); @@ -530,11 +530,11 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, } break; case ICMPV6_PKT_TOOBIG: + ip6_update_pmtu(skb, net, htonl(*info), 0, 0, + sock_net_uid(net, NULL)); mtu = *info - offset; if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - t->dev->mtu = mtu; - len = sizeof(*ipv6h) + ntohs(ipv6h->payload_len); if (len > mtu) { rel_type = ICMPV6_PKT_TOOBIG; @@ -543,6 +543,10 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rel_msg = 1; } break; + case NDISC_REDIRECT: + ip6_redirect(skb, net, skb->dev->ifindex, 0, + sock_net_uid(net, NULL)); + break; } *type = rel_type; @@ -559,13 +563,12 @@ static int ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - int rel_msg = 0; - u8 rel_type = type; - u8 rel_code = code; __u32 rel_info = ntohl(info); - int err; - struct sk_buff *skb2; const struct iphdr *eiph; + struct sk_buff *skb2; + int err, rel_msg = 0; + u8 rel_type = type; + u8 rel_code = code; struct rtable *rt; struct flowi4 fl4; @@ -590,9 +593,6 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, rel_type = ICMP_DEST_UNREACH; rel_code = ICMP_FRAG_NEEDED; break; - case NDISC_REDIRECT: - rel_type = ICMP_REDIRECT; - rel_code = ICMP_REDIR_HOST; default: return 0; } @@ -611,33 +611,26 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, eiph = ip_hdr(skb2); /* Try to guess incoming interface */ - rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, - eiph->saddr, 0, - 0, 0, - IPPROTO_IPIP, RT_TOS(eiph->tos), 0); + rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, eiph->saddr, + 0, 0, 0, IPPROTO_IPIP, RT_TOS(eiph->tos), 0); if (IS_ERR(rt)) goto out; skb2->dev = rt->dst.dev; + ip_rt_put(rt); /* route "incoming" packet */ if (rt->rt_flags & RTCF_LOCAL) { - ip_rt_put(rt); - rt = NULL; rt = ip_route_output_ports(dev_net(skb->dev), &fl4, NULL, - eiph->daddr, eiph->saddr, - 0, 0, - IPPROTO_IPIP, - RT_TOS(eiph->tos), 0); - if (IS_ERR(rt) || - rt->dst.dev->type != ARPHRD_TUNNEL) { + eiph->daddr, eiph->saddr, 0, 0, + IPPROTO_IPIP, RT_TOS(eiph->tos), 0); + if (IS_ERR(rt) || rt->dst.dev->type != ARPHRD_TUNNEL) { if (!IS_ERR(rt)) ip_rt_put(rt); goto out; } skb_dst_set(skb2, &rt->dst); } else { - ip_rt_put(rt); if (ip_route_input(skb2, eiph->daddr, eiph->saddr, eiph->tos, skb2->dev) || skb_dst(skb2)->dev->type != ARPHRD_TUNNEL) @@ -649,10 +642,9 @@ ip4ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rel_info > dst_mtu(skb_dst(skb2))) goto out; - skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, rel_info); + skb_dst(skb2)->ops->update_pmtu(skb_dst(skb2), NULL, skb2, + rel_info); } - if (rel_type == ICMP_REDIRECT) - skb_dst(skb2)->ops->redirect(skb_dst(skb2), NULL, skb2); icmp_send(skb2, rel_type, rel_code, htonl(rel_info)); @@ -665,11 +657,10 @@ static int ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { - int rel_msg = 0; + __u32 rel_info = ntohl(info); + int err, rel_msg = 0; u8 rel_type = type; u8 rel_code = code; - __u32 rel_info = ntohl(info); - int err; err = ip6_tnl_err(skb, IPPROTO_IPV6, opt, &rel_type, &rel_code, &rel_msg, &rel_info, offset); @@ -769,7 +760,8 @@ int ip6_tnl_rcv_ctl(struct ip6_tnl *t, if ((ipv6_addr_is_multicast(laddr) || likely(ipv6_chk_addr(net, laddr, ldev, 0))) && - likely(!ipv6_chk_addr(net, raddr, NULL, 0))) + ((p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) || + likely(!ipv6_chk_addr(net, raddr, NULL, 0)))) ret = 1; } return ret; @@ -899,7 +891,7 @@ static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto, t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); if (t) { - u8 tproto = ACCESS_ONCE(t->parms.proto); + u8 tproto = READ_ONCE(t->parms.proto); if (tproto != ipproto && tproto != 0) goto drop; @@ -999,7 +991,8 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, if (unlikely(!ipv6_chk_addr(net, laddr, ldev, 0))) pr_warn("%s xmit: Local address not yet configured!\n", p->name); - else if (!ipv6_addr_is_multicast(raddr) && + else if (!(p->flags & IP6_TNL_F_ALLOW_LOCAL_REMOTE) && + !ipv6_addr_is_multicast(raddr) && unlikely(ipv6_chk_addr(net, raddr, NULL, 0))) pr_warn("%s xmit: Routing loop! Remote address found on this node!\n", p->name); @@ -1043,6 +1036,7 @@ int ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev, __u8 dsfield, struct dst_entry *dst = NULL, *ndst = NULL; struct net_device *tdev; int mtu; + unsigned int eth_hlen = t->dev->type == ARPHRD_ETHER ? ETH_HLEN : 0; unsigned int psh_hlen = sizeof(struct ipv6hdr) + t->encap_hlen; unsigned int max_headroom = psh_hlen; bool use_cache = false; @@ -1124,7 +1118,7 @@ route_lookup: t->parms.name); goto tx_err_dst_release; } - mtu = dst_mtu(dst) - psh_hlen - t->tun_hlen; + mtu = dst_mtu(dst) - eth_hlen - psh_hlen - t->tun_hlen; if (encap_limit >= 0) { max_headroom += 8; mtu -= 8; @@ -1133,7 +1127,7 @@ route_lookup: mtu = IPV6_MIN_MTU; if (skb_dst(skb) && !t->parms.collect_md) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); - if (skb->len - t->tun_hlen > mtu && !skb_is_gso(skb)) { + if (skb->len - t->tun_hlen - eth_hlen > mtu && !skb_is_gso(skb)) { *pmtu = mtu; err = -EMSGSIZE; goto tx_err_dst_release; @@ -1184,6 +1178,7 @@ route_lookup: init_tel_txopt(&opt, encap_limit); ipv6_push_frag_opts(skb, &opt.ops, &proto); } + hop_limit = hop_limit ? : ip6_dst_hoplimit(dst); /* Calculate max headroom for all the headers and adjust * needed_headroom if necessary. @@ -1231,7 +1226,7 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if (tproto != IPPROTO_IPIP && tproto != 0) return -1; @@ -1301,7 +1296,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) u8 tproto; int err; - tproto = ACCESS_ONCE(t->parms.proto); + tproto = READ_ONCE(t->parms.proto); if ((tproto != IPPROTO_IPV6 && tproto != 0) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; @@ -2166,17 +2161,16 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = { .priority = 1, }; -static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) +static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list) { struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); struct net_device *dev, *aux; int h; struct ip6_tnl *t; - LIST_HEAD(list); for_each_netdev_safe(net, dev, aux) if (dev->rtnl_link_ops == &ip6_link_ops) - unregister_netdevice_queue(dev, &list); + unregister_netdevice_queue(dev, list); for (h = 0; h < IP6_TUNNEL_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); @@ -2185,12 +2179,10 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) * been added to the list by the previous loop. */ if (!net_eq(dev_net(t->dev), net)) - unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } - - unregister_netdevice_many(&list); } static int __net_init ip6_tnl_init_net(struct net *net) @@ -2234,16 +2226,21 @@ err_alloc_dev: return err; } -static void __net_exit ip6_tnl_exit_net(struct net *net) +static void __net_exit ip6_tnl_exit_batch_net(struct list_head *net_list) { + struct net *net; + LIST_HEAD(list); + rtnl_lock(); - ip6_tnl_destroy_tunnels(net); + list_for_each_entry(net, net_list, exit_list) + ip6_tnl_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations ip6_tnl_net_ops = { .init = ip6_tnl_init_net, - .exit = ip6_tnl_exit_net, + .exit_batch = ip6_tnl_exit_batch_net, .id = &ip6_tnl_net_id, .size = sizeof(struct ip6_tnl_net), }; @@ -2258,6 +2255,9 @@ static int __init ip6_tunnel_init(void) { int err; + if (!ipv6_mod_enabled()) + return -EOPNOTSUPP; + err = register_pernet_device(&ip6_tnl_net_ops); if (err < 0) goto out_pernet; diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 486c2305f53c..dbb74f3c57a7 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -445,6 +445,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct dst_entry *dst = skb_dst(skb); struct net_device *tdev; struct xfrm_state *x; + int pkt_len = skb->len; int err = -1; int mtu; @@ -502,7 +503,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); u64_stats_update_begin(&tstats->syncp); - tstats->tx_bytes += skb->len; + tstats->tx_bytes += pkt_len; tstats->tx_packets++; u64_stats_update_end(&tstats->syncp); } else { @@ -1052,23 +1053,22 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = { .get_link_net = ip6_tnl_get_link_net, }; -static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) +static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n, + struct list_head *list) { int h; struct ip6_tnl *t; - LIST_HEAD(list); for (h = 0; h < IP6_VTI_HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); while (t) { - unregister_netdevice_queue(t->dev, &list); + unregister_netdevice_queue(t->dev, list); t = rtnl_dereference(t->next); } } t = rtnl_dereference(ip6n->tnls_wc[0]); - unregister_netdevice_queue(t->dev, &list); - unregister_netdevice_many(&list); + unregister_netdevice_queue(t->dev, list); } static int __net_init vti6_init_net(struct net *net) @@ -1108,18 +1108,24 @@ err_alloc_dev: return err; } -static void __net_exit vti6_exit_net(struct net *net) +static void __net_exit vti6_exit_batch_net(struct list_head *net_list) { - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n; + struct net *net; + LIST_HEAD(list); rtnl_lock(); - vti6_destroy_tunnels(ip6n); + list_for_each_entry(net, net_list, exit_list) { + ip6n = net_generic(net, vti6_net_id); + vti6_destroy_tunnels(ip6n, &list); + } + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations vti6_net_ops = { .init = vti6_init_net, - .exit = vti6_exit_net, + .exit_batch = vti6_exit_batch_net, .id = &vti6_net_id, .size = sizeof(struct vti6_net), }; @@ -1145,33 +1151,6 @@ static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = { .priority = 100, }; -static bool is_vti6_tunnel(const struct net_device *dev) -{ - return dev->netdev_ops == &vti6_netdev_ops; -} - -static int vti6_device_event(struct notifier_block *unused, - unsigned long event, void *ptr) -{ - struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct ip6_tnl *t = netdev_priv(dev); - - if (!is_vti6_tunnel(dev)) - return NOTIFY_DONE; - - switch (event) { - case NETDEV_DOWN: - if (!net_eq(t->net, dev_net(dev))) - xfrm_garbage_collect(t->net); - break; - } - return NOTIFY_DONE; -} - -static struct notifier_block vti6_notifier_block __read_mostly = { - .notifier_call = vti6_device_event, -}; - /** * vti6_tunnel_init - register protocol and reserve needed resources * @@ -1182,8 +1161,6 @@ static int __init vti6_tunnel_init(void) const char *msg; int err; - register_netdevice_notifier(&vti6_notifier_block); - msg = "tunnel device"; err = register_pernet_device(&vti6_net_ops); if (err < 0) @@ -1216,7 +1193,6 @@ xfrm_proto_ah_failed: xfrm_proto_esp_failed: unregister_pernet_device(&vti6_net_ops); pernet_dev_failed: - unregister_netdevice_notifier(&vti6_notifier_block); pr_err("vti6 init: failed to register %s\n", msg); return err; } @@ -1231,7 +1207,6 @@ static void __exit vti6_tunnel_cleanup(void) xfrm6_protocol_deregister(&vti_ah6_protocol, IPPROTO_AH); xfrm6_protocol_deregister(&vti_esp6_protocol, IPPROTO_ESP); unregister_pernet_device(&vti6_net_ops); - unregister_netdevice_notifier(&vti6_notifier_block); } module_init(vti6_tunnel_init); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 7454850f2098..9c24b85949c1 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1427,7 +1427,7 @@ int __init ip6_mr_init(void) } #endif rtnl_register(RTNL_FAMILY_IP6MR, RTM_GETROUTE, NULL, - ip6mr_rtm_dumproute, NULL); + ip6mr_rtm_dumproute, 0); return 0; #ifdef CONFIG_IPV6_PIMSM_V2 add_proto_fail: @@ -1617,6 +1617,10 @@ int ip6mr_sk_done(struct sock *sk) struct net *net = sock_net(sk); struct mr6_table *mrt; + if (sk->sk_type != SOCK_RAW || + inet_sk(sk)->inet_num != IPPROTO_ICMPV6) + return err; + rtnl_lock(); ip6mr_for_each_table(mrt, net) { if (sk == mrt->mroute6_sk) { @@ -1722,6 +1726,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns case MRT6_ADD_MFC: case MRT6_DEL_MFC: parent = -1; + /* fall through */ case MRT6_ADD_MFC_PROXY: case MRT6_DEL_MFC_PROXY: if (optlen < sizeof(mfc)) diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 02d795fe3d7f..b9404feabd78 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -242,7 +242,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); - sk->sk_destruct = inet_sock_destruct; /* * ... and add it to the refcnt debug socks count * in the new family. -acme @@ -378,6 +377,14 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, retv = 0; break; + case IPV6_FREEBIND: + if (optlen < sizeof(int)) + goto e_inval; + /* we also don't have a separate freebind bit for IPV6 */ + inet_sk(sk)->freebind = valbool; + retv = 0; + break; + case IPV6_RECVORIGDSTADDR: if (optlen < sizeof(int)) goto e_inval; @@ -1215,6 +1222,10 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname, val = inet_sk(sk)->transparent; break; + case IPV6_FREEBIND: + val = inet_sk(sk)->freebind; + break; + case IPV6_RECVORIGDSTADDR: val = np->rxopt.bits.rxorigdstaddr; break; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0327c1f2e6fc..b3cea200c85e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -46,6 +46,7 @@ #endif #include <linux/if_addr.h> +#include <linux/if_ether.h> #include <linux/if_arp.h> #include <linux/ipv6.h> #include <linux/icmpv6.h> @@ -127,7 +128,7 @@ struct neigh_table nd_tbl = { [NEIGH_VAR_BASE_REACHABLE_TIME] = ND_REACHABLE_TIME, [NEIGH_VAR_DELAY_PROBE_TIME] = 5 * HZ, [NEIGH_VAR_GC_STALETIME] = 60 * HZ, - [NEIGH_VAR_QUEUE_LEN_BYTES] = 64 * 1024, + [NEIGH_VAR_QUEUE_LEN_BYTES] = SK_WMEM_MAX, [NEIGH_VAR_PROXY_QLEN] = 64, [NEIGH_VAR_ANYCAST_DELAY] = 1 * HZ, [NEIGH_VAR_PROXY_DELAY] = (8 * HZ) / 10, @@ -426,12 +427,19 @@ static void ip6_nd_hdr(struct sk_buff *skb, int hop_limit, int len) { struct ipv6hdr *hdr; + struct inet6_dev *idev; + unsigned tclass; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + tclass = idev ? idev->cnf.ndisc_tclass : 0; + rcu_read_unlock(); skb_push(skb, sizeof(*hdr)); skb_reset_network_header(skb); hdr = ipv6_hdr(skb); - ip6_flow_hdr(hdr, 0, 0); + ip6_flow_hdr(hdr, tclass, 0); hdr->payload_len = htons(len); hdr->nexthdr = IPPROTO_ICMPV6; @@ -822,7 +830,7 @@ have_ifp: * who is doing DAD * so fail our DAD process */ - addrconf_dad_failure(ifp); + addrconf_dad_failure(skb, ifp); return; } else { /* @@ -975,7 +983,7 @@ static void ndisc_recv_na(struct sk_buff *skb) if (ifp) { if (skb->pkt_type != PACKET_LOOPBACK && (ifp->flags & IFA_F_TENTATIVE)) { - addrconf_dad_failure(ifp); + addrconf_dad_failure(skb, ifp); return; } /* What should we make now? The advertisement @@ -989,8 +997,8 @@ static void ndisc_recv_na(struct sk_buff *skb) */ if (skb->pkt_type != PACKET_LOOPBACK) ND_PRINTK(1, warn, - "NA: someone advertises our address %pI6 on %s!\n", - &ifp->addr, ifp->idev->dev->name); + "NA: %pM advertised our address %pI6c on %s!\n", + eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name); in6_ifa_put(ifp); return; } @@ -1779,6 +1787,7 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, static struct notifier_block ndisc_netdev_notifier = { .notifier_call = ndisc_netdev_event, + .priority = ADDRCONF_NOTIFY_PRIORITY - 5, }; #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index fe180c96040e..c6ee0cdd0ba9 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the netfilter modules on top of IPv6. # diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 1f90644056ac..f06e25065a34 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -39,12 +39,6 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Netfilter Core Team <coreteam@netfilter.org>"); MODULE_DESCRIPTION("IPv6 packet filter"); -#ifdef CONFIG_NETFILTER_DEBUG -#define IP_NF_ASSERT(x) WARN_ON(!(x)) -#else -#define IP_NF_ASSERT(x) -#endif - void *ip6t_alloc_initial_table(const struct xt_table *info) { return xt_alloc_initial_table(ip6t, IP6T); @@ -176,7 +170,7 @@ static const char *const comments[] = { [NF_IP6_TRACE_COMMENT_POLICY] = "policy", }; -static struct nf_loginfo trace_loginfo = { +static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { @@ -284,7 +278,7 @@ ip6t_do_table(struct sk_buff *skb, acpar.hotdrop = false; acpar.state = state; - IP_NF_ASSERT(table->valid_hooks & (1 << hook)); + WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); @@ -315,7 +309,7 @@ ip6t_do_table(struct sk_buff *skb, const struct xt_entry_match *ematch; struct xt_counters *counter; - IP_NF_ASSERT(e); + WARN_ON(!e); acpar.thoff = 0; if (!ip6_packet_match(skb, indev, outdev, &e->ipv6, &acpar.thoff, &acpar.fragoff, &acpar.hotdrop)) { @@ -335,7 +329,7 @@ ip6t_do_table(struct sk_buff *skb, ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); - IP_NF_ASSERT(t->u.kernel.target); + WARN_ON(!t->u.kernel.target); #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ @@ -801,7 +795,27 @@ get_counters(const struct xt_table_info *t, ADD_COUNTER(counters[i], bcnt, pcnt); ++i; + cond_resched(); + } + } +} + +static void get_old_counters(const struct xt_table_info *t, + struct xt_counters counters[]) +{ + struct ip6t_entry *iter; + unsigned int cpu, i; + + for_each_possible_cpu(cpu) { + i = 0; + xt_entry_foreach(iter, t->entries, t->size) { + const struct xt_counters *tmp; + + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); + ADD_COUNTER(counters[i], tmp->bcnt, tmp->pcnt); + ++i; } + cond_resched(); } } @@ -1095,8 +1109,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, (newinfo->number <= oldinfo->initial_entries)) module_put(t->me); - /* Get the old counters, and synchronize with replace */ - get_counters(oldinfo, counters); + get_old_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index ce203dd729e0..437af8c95277 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -353,7 +353,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, nexthdr = ipv6_hdr(skb)->nexthdr; thoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, &frag_off); - if (thoff < 0) + if (thoff < 0 || nexthdr != IPPROTO_TCP) return NF_ACCEPT; th = skb_header_pointer(skb, thoff, sizeof(_th), &_th); @@ -438,7 +438,7 @@ static unsigned int ipv6_synproxy_hook(void *priv, return NF_ACCEPT; } -static struct nf_hook_ops ipv6_synproxy_ops[] __read_mostly = { +static const struct nf_hook_ops ipv6_synproxy_ops[] = { { .hook = ipv6_synproxy_hook, .pf = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 7d2bd940291f..991512576c8c 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -69,7 +69,7 @@ static unsigned int ip6table_nat_local_fn(void *priv, return nf_nat_ipv6_local_fn(priv, skb, state, ip6table_nat_do_chain); } -static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { +static const struct nf_hook_ops nf_nat_ipv6_ops[] = { /* Before packet filtering, change destination */ { .hook = ip6table_nat_in, diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4e3402486833..3b80a38f62b8 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -67,13 +67,6 @@ static bool ipv6_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static void ipv6_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "src=%pI6 dst=%pI6 ", - tuple->src.u3.ip6, tuple->dst.u3.ip6); -} - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -191,7 +184,7 @@ static unsigned int ipv6_conntrack_local(void *priv, return nf_conntrack_in(state->net, PF_INET6, state->hook, skb); } -static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { +static const struct nf_hook_ops ipv6_conntrack_ops[] = { { .hook = ipv6_conntrack_in, .pf = NFPROTO_IPV6, @@ -308,11 +301,6 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } - -static int ipv6_nlattr_tuple_size(void) -{ - return nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1); -} #endif static int ipv6_hooks_register(struct net *net) @@ -351,18 +339,17 @@ static void ipv6_hooks_unregister(struct net *net) mutex_unlock(®ister_ipv6_hooks); } -struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { +const struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 = { .l3proto = PF_INET6, - .name = "ipv6", .pkt_to_tuple = ipv6_pkt_to_tuple, .invert_tuple = ipv6_invert_tuple, - .print_tuple = ipv6_print_tuple, .get_l4proto = ipv6_get_l4proto, #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .tuple_to_nlattr = ipv6_tuple_to_nlattr, - .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, + .nla_size = NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])) + + NLA_ALIGN(NLA_HDRLEN + sizeof(u32[4])), #endif .net_ns_get = ipv6_hooks_register, .net_ns_put = ipv6_hooks_unregister, @@ -398,25 +385,12 @@ static struct nf_conntrack_l4proto *builtin_l4proto6[] = { static int ipv6_net_init(struct net *net) { - int ret = 0; - - ret = nf_ct_l4proto_pernet_register(net, builtin_l4proto6, - ARRAY_SIZE(builtin_l4proto6)); - if (ret < 0) - return ret; - - ret = nf_ct_l3proto_pernet_register(net, &nf_conntrack_l3proto_ipv6); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: pernet registration failed.\n"); - nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, - ARRAY_SIZE(builtin_l4proto6)); - } - return ret; + return nf_ct_l4proto_pernet_register(net, builtin_l4proto6, + ARRAY_SIZE(builtin_l4proto6)); } static void ipv6_net_exit(struct net *net) { - nf_ct_l3proto_pernet_unregister(net, &nf_conntrack_l3proto_ipv6); nf_ct_l4proto_pernet_unregister(net, builtin_l4proto6, ARRAY_SIZE(builtin_l4proto6)); } @@ -434,6 +408,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) need_conntrack(); +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + if (WARN_ON(nla_policy_len(ipv6_nla_policy, CTA_IP_MAX + 1) != + nf_conntrack_l3proto_ipv6.nla_size)) + return -EINVAL; +#endif + ret = nf_register_sockopt(&so_getorigdst6); if (ret < 0) { pr_err("Unable to register netfilter socket option\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c index d5f028e33f65..3ac0d826afc4 100644 --- a/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c +++ b/net/ipv6/netfilter/nf_conntrack_proto_icmpv6.c @@ -84,16 +84,6 @@ static bool icmpv6_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void icmpv6_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "type=%u code=%u id=%u ", - tuple->dst.u.icmp.type, - tuple->dst.u.icmp.code, - ntohs(tuple->src.u.icmp.id)); -} - static unsigned int *icmpv6_get_timeouts(struct net *net) { return &icmpv6_pernet(net)->timeout; @@ -104,8 +94,6 @@ static int icmpv6_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeout) { /* Do not immediately delete the connection after the first @@ -131,11 +119,6 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, pr_debug("icmpv6: can't create new conn with type %u\n", type + 128); nf_ct_dump_tuple_ipv6(&ct->tuplehash[0].tuple); - if (LOG_INVALID(nf_ct_net(ct), IPPROTO_ICMPV6)) - nf_log_packet(nf_ct_net(ct), PF_INET6, 0, skb, NULL, - NULL, NULL, - "nf_ct_icmpv6: invalid new with type %d ", - type + 128); return false; } return true; @@ -144,8 +127,7 @@ static bool icmpv6_new(struct nf_conn *ct, const struct sk_buff *skb, static int icmpv6_error_message(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, - unsigned int icmp6off, - unsigned int hooknum) + unsigned int icmp6off) { struct nf_conntrack_tuple intuple, origtuple; const struct nf_conntrack_tuple_hash *h; @@ -153,7 +135,7 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, enum ip_conntrack_info ctinfo; struct nf_conntrack_zone tmp; - NF_CT_ASSERT(!skb_nfct(skb)); + WARN_ON(skb_nfct(skb)); /* Are they talking about one of our connections? */ if (!nf_ct_get_tuplepr(skb, @@ -193,6 +175,12 @@ icmpv6_error_message(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; } +static void icmpv6_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_ICMPV6, "%s", msg); +} + static int icmpv6_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -204,17 +192,13 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, icmp6h = skb_header_pointer(skb, dataoff, sizeof(_ih), &_ih); if (icmp6h == NULL) { - if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, - "nf_ct_icmpv6: short packet "); + icmpv6_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_ip6_checksum(skb, hooknum, dataoff, IPPROTO_ICMPV6)) { - if (LOG_INVALID(net, IPPROTO_ICMPV6)) - nf_log_packet(net, PF_INET6, 0, skb, NULL, NULL, NULL, - "nf_ct_icmpv6: ICMPv6 checksum failed "); + icmpv6_error_log(skb, net, pf, "ICMPv6 checksum failed"); return -NF_ACCEPT; } @@ -229,7 +213,7 @@ icmpv6_error(struct net *net, struct nf_conn *tmpl, if (icmp6h->icmp6_type >= 128) return NF_ACCEPT; - return icmpv6_error_message(net, tmpl, skb, dataoff, hooknum); + return icmpv6_error_message(net, tmpl, skb, dataoff); } #if IS_ENABLED(CONFIG_NF_CT_NETLINK) @@ -275,9 +259,14 @@ static int icmpv6_nlattr_to_tuple(struct nlattr *tb[], return 0; } -static int icmpv6_nlattr_tuple_size(void) +static unsigned int icmpv6_nlattr_tuple_size(void) { - return nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(icmpv6_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif @@ -367,10 +356,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_icmpv6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_ICMPV6, - .name = "icmpv6", .pkt_to_tuple = icmpv6_pkt_to_tuple, .invert_tuple = icmpv6_invert_tuple, - .print_tuple = icmpv6_print_tuple, .packet = icmpv6_packet, .get_timeouts = icmpv6_get_timeouts, .new = icmpv6_new, diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 986d4ca38832..977d8900cfd1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -169,12 +169,13 @@ static unsigned int nf_hashfn(const struct inet_frag_queue *q) return nf_hash_frag(nq->id, &nq->saddr, &nq->daddr); } -static void nf_ct_frag6_expire(unsigned long data) +static void nf_ct_frag6_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, nf_frag.frags); ip6_expire_frag_queue(net, fq, &nf_frags); @@ -622,18 +623,12 @@ EXPORT_SYMBOL_GPL(nf_ct_frag6_gather); static int nf_ct_net_init(struct net *net) { - int res; - net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&net->nf_frag.frags); - if (res) - return res; - res = nf_ct_frag6_sysctl_register(net); - if (res) - inet_frags_uninit_net(&net->nf_frag.frags); - return res; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); } static void nf_ct_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index ada60d1a991b..b326da59257f 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -74,7 +74,7 @@ static unsigned int ipv6_defrag(void *priv, return err == 0 ? NF_ACCEPT : NF_DROP; } -static struct nf_hook_ops ipv6_defrag_ops[] = { +static const struct nf_hook_ops ipv6_defrag_ops[] = { { .hook = ipv6_defrag, .pf = NFPROTO_IPV6, diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index 97c724224da7..b397a8fe88b9 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -25,7 +25,7 @@ #include <linux/netfilter/xt_LOG.h> #include <net/netfilter/nf_log.h> -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index b2b4f031b3a1..1d2fb9267d6f 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -196,7 +196,7 @@ int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, struct nf_conntrack_tuple target; unsigned long statusbit; - NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + WARN_ON(ctinfo != IP_CT_RELATED && ctinfo != IP_CT_RELATED_REPLY); if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) return 0; @@ -290,7 +290,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, else return NF_ACCEPT; } - /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + /* Only ICMPs can be IP_CT_IS_REPLY: */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. @@ -319,8 +320,8 @@ nf_nat_ipv6_fn(void *priv, struct sk_buff *skb, default: /* ESTABLISHED */ - NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || - ctinfo == IP_CT_ESTABLISHED_REPLY); + WARN_ON(ctinfo != IP_CT_ESTABLISHED && + ctinfo != IP_CT_ESTABLISHED_REPLY); if (nf_nat_oif_changed(state->hook, ctinfo, nat, state->out)) goto oif_changed; } diff --git a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c index d7b679037bae..98f61fcb9108 100644 --- a/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_masquerade_ipv6.c @@ -36,8 +36,8 @@ nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, struct nf_nat_range newrange; ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); + WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); if (ipv6_dev_get_saddr(nf_ct_net(ct), out, &ipv6_hdr(skb)->daddr, 0, &src) < 0) diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 43f91d9b086c..54b5899543ef 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -25,9 +25,9 @@ static int get_ifindex(const struct net_device *dev) static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, const struct nft_pktinfo *pkt, - const struct net_device *dev) + const struct net_device *dev, + struct ipv6hdr *iph) { - const struct ipv6hdr *iph = ipv6_hdr(pkt->skb); int lookup_flags = 0; if (priv->flags & NFTA_FIB_F_DADDR) { @@ -55,7 +55,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, } static u32 __nft_fib6_eval_type(const struct nft_fib *priv, - const struct nft_pktinfo *pkt) + const struct nft_pktinfo *pkt, + struct ipv6hdr *iph) { const struct net_device *dev = NULL; const struct nf_ipv6_ops *v6ops; @@ -77,7 +78,7 @@ static u32 __nft_fib6_eval_type(const struct nft_fib *priv, else if (priv->flags & NFTA_FIB_F_OIF) dev = nft_out(pkt); - nft_fib6_flowi_init(&fl6, priv, pkt, dev); + nft_fib6_flowi_init(&fl6, priv, pkt, dev, iph); v6ops = nf_get_ipv6_ops(); if (dev && v6ops && v6ops->chk_addr(nft_net(pkt), &fl6.daddr, dev, true)) @@ -131,9 +132,17 @@ void nft_fib6_eval_type(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); u32 *dest = ®s->data[priv->dreg]; + struct ipv6hdr *iph, _iph; - *dest = __nft_fib6_eval_type(priv, pkt); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + + *dest = __nft_fib6_eval_type(priv, pkt, iph); } EXPORT_SYMBOL_GPL(nft_fib6_eval_type); @@ -141,8 +150,10 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) { const struct nft_fib *priv = nft_expr_priv(expr); + int noff = skb_network_offset(pkt->skb); const struct net_device *oif = NULL; u32 *dest = ®s->data[priv->dreg]; + struct ipv6hdr *iph, _iph; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, .flowi6_proto = pkt->tprot, @@ -155,7 +166,13 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, else if (priv->flags & NFTA_FIB_F_OIF) oif = nft_out(pkt); - lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif); + iph = skb_header_pointer(pkt->skb, noff, sizeof(_iph), &_iph); + if (!iph) { + regs->verdict.code = NFT_BREAK; + return; + } + + lookup_flags = nft_fib6_flowi_init(&fl6, priv, pkt, oif, iph); if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index abb2c307fbe8..4a7e5ffa5108 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -31,37 +31,6 @@ static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, return id; } -/* This function exists only for tap drivers that must support broken - * clients requesting UFO without specifying an IPv6 fragment ID. - * - * This is similar to ipv6_select_ident() but we use an independent hash - * seed to limit information leakage. - * - * The network header must be set before calling this. - */ -void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) -{ - static u32 ip6_proxy_idents_hashrnd __read_mostly; - struct in6_addr buf[2]; - struct in6_addr *addrs; - u32 id; - - addrs = skb_header_pointer(skb, - skb_network_offset(skb) + - offsetof(struct ipv6hdr, saddr), - sizeof(buf), buf); - if (!addrs) - return; - - net_get_random_once(&ip6_proxy_idents_hashrnd, - sizeof(ip6_proxy_idents_hashrnd)); - - id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, - &addrs[1], &addrs[0]); - skb_shinfo(skb)->ip6_frag_id = htonl(id); -} -EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); - __be32 ipv6_select_ident(struct net *net, const struct in6_addr *daddr, const struct in6_addr *saddr) @@ -86,7 +55,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; - unsigned int len; switch (**nexthdr) { @@ -112,10 +80,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - len = ipv6_optlen(exthdr); - if (len + offset >= IPV6_MAXPLEN) + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) return -EINVAL; - offset += len; *nexthdr = &exthdr->nexthdr; } diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index ac826dd338ff..d12c55dad7d1 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -154,9 +154,8 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) ICMP6_MIB_OUTERRORS); ip6_flush_pending_frames(sk); } else { - err = icmpv6_push_pending_frames(sk, &fl6, - (struct icmp6hdr *) &pfh.icmph, - len); + icmpv6_push_pending_frames(sk, &fl6, + (struct icmp6hdr *)&pfh.icmph, len); } release_sock(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 60be012fe708..761a473a07c5 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -72,7 +72,7 @@ EXPORT_SYMBOL_GPL(raw_v6_hashinfo); struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, unsigned short num, const struct in6_addr *loc_addr, - const struct in6_addr *rmt_addr, int dif) + const struct in6_addr *rmt_addr, int dif, int sdif) { bool is_multicast = ipv6_addr_is_multicast(loc_addr); @@ -86,7 +86,9 @@ struct sock *__raw_v6_lookup(struct net *net, struct sock *sk, !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) continue; - if (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) + if (sk->sk_bound_dev_if && + sk->sk_bound_dev_if != dif && + sk->sk_bound_dev_if != sdif) continue; if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr)) { @@ -178,7 +180,8 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) goto out; net = dev_net(skb->dev); - sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, inet6_iif(skb)); + sk = __raw_v6_lookup(net, sk, nexthdr, daddr, saddr, + inet6_iif(skb), inet6_sdif(skb)); while (sk) { int filtered; @@ -222,7 +225,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) } } sk = __raw_v6_lookup(net, sk_next(sk), nexthdr, daddr, saddr, - inet6_iif(skb)); + inet6_iif(skb), inet6_sdif(skb)); } out: read_unlock(&raw_v6_hashinfo.lock); @@ -378,7 +381,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, net = dev_net(skb->dev); while ((sk = __raw_v6_lookup(net, sk, nexthdr, saddr, daddr, - inet6_iif(skb)))) { + inet6_iif(skb), inet6_iif(skb)))) { rawv6_err(sk, skb, NULL, type, code, inner_offset, info); sk = sk_next(sk); @@ -1052,6 +1055,7 @@ static int rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return ipv6_setsockopt(sk, level, optname, optval, optlen); } @@ -1074,6 +1078,7 @@ static int compat_rawv6_setsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return compat_ipv6_setsockopt(sk, level, optname, optval, optlen); @@ -1135,6 +1140,7 @@ static int rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return ipv6_getsockopt(sk, level, optname, optval, optlen); } @@ -1157,6 +1163,7 @@ static int compat_rawv6_getsockopt(struct sock *sk, int level, int optname, if (optname == IPV6_CHECKSUM || optname == IPV6_HDRINCL) break; + /* fall through */ default: return compat_ipv6_getsockopt(sk, level, optname, optval, optlen); diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index e1da5b888cc4..afbc000ad4f2 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -170,12 +170,13 @@ out: } EXPORT_SYMBOL(ip6_expire_frag_queue); -static void ip6_frag_expire(unsigned long data) +static void ip6_frag_expire(struct timer_list *t) { + struct inet_frag_queue *frag = from_timer(frag, t, timer); struct frag_queue *fq; struct net *net; - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + fq = container_of(frag, struct frag_queue, q); net = container_of(fq->q.net, struct net, ipv6.frags); ip6_expire_frag_queue(net, fq, &ip6_frags); @@ -714,19 +715,13 @@ static void ip6_frags_sysctl_unregister(void) static int __net_init ipv6_frags_init_net(struct net *net) { - int res; - net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH; net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT; - res = inet_frags_init_net(&net->ipv6.frags); - if (res) - return res; - res = ip6_frags_ns_sysctl_register(net); - if (res) - inet_frags_uninit_net(&net->ipv6.frags); - return res; + inet_frags_init_net(&net->ipv6.frags); + + return ip6_frags_ns_sysctl_register(net); } static void __net_exit ipv6_frags_exit_net(struct net *net) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 94d6a13d47f0..05eb7bc36156 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -44,6 +44,7 @@ #include <linux/seq_file.h> #include <linux/nsproxy.h> #include <linux/slab.h> +#include <linux/jhash.h> #include <net/net_namespace.h> #include <net/snmp.h> #include <net/ipv6.h> @@ -104,6 +105,9 @@ static int rt6_fill_node(struct net *net, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 portid, u32 seq, unsigned int flags); +static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, + struct in6_addr *daddr, + struct in6_addr *saddr); #ifdef CONFIG_IPV6_ROUTE_INFO static struct rt6_info *rt6_add_route_info(struct net *net, @@ -139,9 +143,11 @@ static void rt6_uncached_list_del(struct rt6_info *rt) { if (!list_empty(&rt->rt6i_uncached)) { struct uncached_list *ul = rt->rt6i_uncached_list; + struct net *net = dev_net(rt->dst.dev); spin_lock_bh(&ul->lock); list_del(&rt->rt6i_uncached); + atomic_dec(&net->ipv6.rt6_stats->fib_rt_uncache); spin_unlock_bh(&ul->lock); } } @@ -355,8 +361,10 @@ static struct rt6_info *__ip6_dst_alloc(struct net *net, struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 1, DST_OBSOLETE_FORCE_CHK, flags); - if (rt) + if (rt) { rt6_info_init(rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); + } return rt; } @@ -369,17 +377,7 @@ struct rt6_info *ip6_dst_alloc(struct net *net, if (rt) { rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); - if (rt->rt6i_pcpu) { - int cpu; - - for_each_possible_cpu(cpu) { - struct rt6_info **p; - - p = per_cpu_ptr(rt->rt6i_pcpu, cpu); - /* no one shares rt */ - *p = NULL; - } - } else { + if (!rt->rt6i_pcpu) { dst_release_immediate(&rt->dst); return NULL; } @@ -392,6 +390,7 @@ EXPORT_SYMBOL(ip6_dst_alloc); static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; + struct rt6_exception_bucket *bucket; struct dst_entry *from = dst->from; struct inet6_dev *idev; @@ -404,6 +403,11 @@ static void ip6_dst_destroy(struct dst_entry *dst) rt->rt6i_idev = NULL; in6_dev_put(idev); } + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, 1); + if (bucket) { + rt->rt6i_exception_bucket = NULL; + kfree(bucket); + } dst->from = NULL; dst_release(from); @@ -440,21 +444,12 @@ static bool rt6_check_expired(const struct rt6_info *rt) if (time_after(jiffies, rt->dst.expires)) return true; } else if (rt->dst.from) { - return rt6_check_expired((struct rt6_info *) rt->dst.from); + return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || + rt6_check_expired((struct rt6_info *)rt->dst.from); } return false; } -/* Multipath route selection: - * Hash based function using packet header and flowlabel. - * Adapted from fib_info_hashfn() - */ -static int rt6_info_hash_nhsfn(unsigned int candidate_count, - const struct flowi6 *fl6) -{ - return get_hash_from_flowi6(fl6) % candidate_count; -} - static struct rt6_info *rt6_multipath_select(struct rt6_info *match, struct flowi6 *fl6, int oif, int strict) @@ -462,7 +457,13 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, struct rt6_info *sibling, *next_sibling; int route_choosen; - route_choosen = rt6_info_hash_nhsfn(match->rt6i_nsiblings + 1, fl6); + /* We might have already computed the hash for ICMPv6 errors. In such + * case it will always be non-zero. Otherwise now is the time to do it. + */ + if (!fl6->mp_hash) + fl6->mp_hash = rt6_multipath_hash(fl6, NULL); + + route_choosen = fl6->mp_hash % (match->rt6i_nsiblings + 1); /* Don't change the route, if route_choosen == 0 * (siblings does not include ourself) */ @@ -481,7 +482,7 @@ static struct rt6_info *rt6_multipath_select(struct rt6_info *match, } /* - * Route lookup. Any table->tb6_lock is implied. + * Route lookup. rcu_read_lock() should be held. */ static inline struct rt6_info *rt6_device_match(struct net *net, @@ -496,7 +497,7 @@ static inline struct rt6_info *rt6_device_match(struct net *net, if (!oif && ipv6_addr_any(saddr)) goto out; - for (sprt = rt; sprt; sprt = sprt->dst.rt6_next) { + for (sprt = rt; sprt; sprt = rcu_dereference(sprt->dst.rt6_next)) { struct net_device *dev = sprt->dst.dev; if (oif) { @@ -705,6 +706,7 @@ out: } static struct rt6_info *find_rr_leaf(struct fib6_node *fn, + struct rt6_info *leaf, struct rt6_info *rr_head, u32 metric, int oif, int strict, bool *do_rr) @@ -714,7 +716,7 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = NULL; cont = NULL; - for (rt = rr_head; rt; rt = rt->dst.rt6_next) { + for (rt = rr_head; rt; rt = rcu_dereference(rt->dst.rt6_next)) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -723,7 +725,8 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, match = find_match(rt, oif, strict, &mpri, match, do_rr); } - for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + for (rt = leaf; rt && rt != rr_head; + rt = rcu_dereference(rt->dst.rt6_next)) { if (rt->rt6i_metric != metric) { cont = rt; break; @@ -735,37 +738,59 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, if (match || !cont) return match; - for (rt = cont; rt; rt = rt->dst.rt6_next) + for (rt = cont; rt; rt = rcu_dereference(rt->dst.rt6_next)) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; } -static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) +static struct rt6_info *rt6_select(struct net *net, struct fib6_node *fn, + int oif, int strict) { + struct rt6_info *leaf = rcu_dereference(fn->leaf); struct rt6_info *match, *rt0; - struct net *net; bool do_rr = false; + int key_plen; - rt0 = fn->rr_ptr; + if (!leaf || leaf == net->ipv6.ip6_null_entry) + return net->ipv6.ip6_null_entry; + + rt0 = rcu_dereference(fn->rr_ptr); if (!rt0) - fn->rr_ptr = rt0 = fn->leaf; + rt0 = leaf; + + /* Double check to make sure fn is not an intermediate node + * and fn->leaf does not points to its child's leaf + * (This might happen if all routes under fn are deleted from + * the tree and fib6_repair_tree() is called on the node.) + */ + key_plen = rt0->rt6i_dst.plen; +#ifdef CONFIG_IPV6_SUBTREES + if (rt0->rt6i_src.plen) + key_plen = rt0->rt6i_src.plen; +#endif + if (fn->fn_bit != key_plen) + return net->ipv6.ip6_null_entry; - match = find_rr_leaf(fn, rt0, rt0->rt6i_metric, oif, strict, + match = find_rr_leaf(fn, leaf, rt0, rt0->rt6i_metric, oif, strict, &do_rr); if (do_rr) { - struct rt6_info *next = rt0->dst.rt6_next; + struct rt6_info *next = rcu_dereference(rt0->dst.rt6_next); /* no entries matched; do round-robin */ if (!next || next->rt6i_metric != rt0->rt6i_metric) - next = fn->leaf; - - if (next != rt0) - fn->rr_ptr = next; + next = leaf; + + if (next != rt0) { + spin_lock_bh(&leaf->rt6i_table->tb6_lock); + /* make sure next is not being deleted from the tree */ + if (next->rt6i_node) + rcu_assign_pointer(fn->rr_ptr, next); + spin_unlock_bh(&leaf->rt6i_table->tb6_lock); + } } - net = dev_net(rt0->dst.dev); return match ? match : net->ipv6.ip6_null_entry; } @@ -853,13 +878,14 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, static struct fib6_node* fib6_backtrack(struct fib6_node *fn, struct in6_addr *saddr) { - struct fib6_node *pn; + struct fib6_node *pn, *sn; while (1) { if (fn->fn_flags & RTN_TL_ROOT) return NULL; - pn = fn->parent; - if (FIB6_SUBTREE(pn) && FIB6_SUBTREE(pn) != fn) - fn = fib6_lookup(FIB6_SUBTREE(pn), NULL, saddr); + pn = rcu_dereference(fn->parent); + sn = FIB6_SUBTREE(pn); + if (sn && sn != fn) + fn = fib6_lookup(sn, NULL, saddr); else fn = pn; if (fn->fn_flags & RTN_RTINFO) @@ -867,29 +893,59 @@ static struct fib6_node* fib6_backtrack(struct fib6_node *fn, } } +static bool ip6_hold_safe(struct net *net, struct rt6_info **prt, + bool null_fallback) +{ + struct rt6_info *rt = *prt; + + if (dst_hold_safe(&rt->dst)) + return true; + if (null_fallback) { + rt = net->ipv6.ip6_null_entry; + dst_hold(&rt->dst); + } else { + rt = NULL; + } + *prt = rt; + return false; +} + static struct rt6_info *ip6_pol_route_lookup(struct net *net, struct fib6_table *table, struct flowi6 *fl6, int flags) { + struct rt6_info *rt, *rt_cache; struct fib6_node *fn; - struct rt6_info *rt; - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - rt = fn->leaf; - rt = rt6_device_match(net, rt, &fl6->saddr, fl6->flowi6_oif, flags); - if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) - rt = rt6_multipath_select(rt, fl6, fl6->flowi6_oif, flags); + rt = rcu_dereference(fn->leaf); + if (!rt) { + rt = net->ipv6.ip6_null_entry; + } else { + rt = rt6_device_match(net, rt, &fl6->saddr, + fl6->flowi6_oif, flags); + if (rt->rt6i_nsiblings && fl6->flowi6_oif == 0) + rt = rt6_multipath_select(rt, fl6, + fl6->flowi6_oif, flags); + } if (rt == net->ipv6.ip6_null_entry) { fn = fib6_backtrack(fn, &fl6->saddr); if (fn) goto restart; } - dst_use(&rt->dst, jiffies); - read_unlock_bh(&table->tb6_lock); + /* Search through exception table */ + rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); + if (rt_cache) + rt = rt_cache; + + if (ip6_hold_safe(net, &rt, true)) + dst_use_noref(&rt->dst, jiffies); + + rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; @@ -941,9 +997,9 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info, struct fib6_table *table; table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); err = fib6_add(&table->tb6_root, rt, info, mxc, extack); - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); return err; } @@ -958,10 +1014,34 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info, &mxc, NULL); } +/* called with rcu_lock held */ +static struct net_device *ip6_rt_get_dev_rcu(struct rt6_info *rt) +{ + struct net_device *dev = rt->dst.dev; + + if (rt->rt6i_flags & RTF_LOCAL) { + /* for copies of local routes, dst->dev needs to be the + * device if it is a master device, the master device if + * device is enslaved, and the loopback as the default + */ + if (netif_is_l3_slave(dev) && + !rt6_need_strict(&rt->rt6i_dst.addr)) + dev = l3mdev_master_dev_rcu(dev); + else if (!netif_is_l3_master(dev)) + dev = dev_net(dev)->loopback_dev; + /* last case is netif_is_l3_master(dev) is true in which + * case we want dev returned to be dev + */ + } + + return dev; +} + static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, const struct in6_addr *daddr, const struct in6_addr *saddr) { + struct net_device *dev; struct rt6_info *rt; /* @@ -971,8 +1051,10 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) ort = (struct rt6_info *)ort->dst.from; - rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, 0); - + rcu_read_lock(); + dev = ip6_rt_get_dev_rcu(ort); + rt = __ip6_dst_alloc(dev_net(dev), dev, 0); + rcu_read_unlock(); if (!rt) return NULL; @@ -1000,11 +1082,13 @@ static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) { + struct net_device *dev; struct rt6_info *pcpu_rt; - pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), - rt->dst.dev, rt->dst.flags); - + rcu_read_lock(); + dev = ip6_rt_get_dev_rcu(rt); + pcpu_rt = __ip6_dst_alloc(dev_net(dev), dev, rt->dst.flags); + rcu_read_unlock(); if (!pcpu_rt) return NULL; ip6_rt_copy_init(pcpu_rt, rt); @@ -1013,7 +1097,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) return pcpu_rt; } -/* It should be called with read_lock_bh(&tb6_lock) acquired */ +/* It should be called with rcu_read_lock() acquired */ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) { struct rt6_info *pcpu_rt, **p; @@ -1021,16 +1105,14 @@ static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) p = this_cpu_ptr(rt->rt6i_pcpu); pcpu_rt = *p; - if (pcpu_rt) { - dst_hold(&pcpu_rt->dst); + if (pcpu_rt && ip6_hold_safe(NULL, &pcpu_rt, false)) rt6_dst_from_metrics_check(pcpu_rt); - } + return pcpu_rt; } static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) { - struct fib6_table *table = rt->rt6i_table; struct rt6_info *pcpu_rt, *prev, **p; pcpu_rt = ip6_rt_pcpu_alloc(rt); @@ -1041,36 +1123,526 @@ static struct rt6_info *rt6_make_pcpu_route(struct rt6_info *rt) return net->ipv6.ip6_null_entry; } - read_lock_bh(&table->tb6_lock); - if (rt->rt6i_pcpu) { - p = this_cpu_ptr(rt->rt6i_pcpu); - prev = cmpxchg(p, NULL, pcpu_rt); - if (prev) { - /* If someone did it before us, return prev instead */ - dst_release_immediate(&pcpu_rt->dst); - pcpu_rt = prev; - } - } else { - /* rt has been removed from the fib6 tree - * before we have a chance to acquire the read_lock. - * In this case, don't brother to create a pcpu rt - * since rt is going away anyway. The next - * dst_check() will trigger a re-lookup. - */ - dst_release_immediate(&pcpu_rt->dst); - pcpu_rt = rt; - } dst_hold(&pcpu_rt->dst); + p = this_cpu_ptr(rt->rt6i_pcpu); + prev = cmpxchg(p, NULL, pcpu_rt); + BUG_ON(prev); + rt6_dst_from_metrics_check(pcpu_rt); - read_unlock_bh(&table->tb6_lock); return pcpu_rt; } +/* exception hash table implementation + */ +static DEFINE_SPINLOCK(rt6_exception_lock); + +/* Remove rt6_ex from hash table and free the memory + * Caller must hold rt6_exception_lock + */ +static void rt6_remove_exception(struct rt6_exception_bucket *bucket, + struct rt6_exception *rt6_ex) +{ + struct net *net; + + if (!bucket || !rt6_ex) + return; + + net = dev_net(rt6_ex->rt6i->dst.dev); + rt6_ex->rt6i->rt6i_node = NULL; + hlist_del_rcu(&rt6_ex->hlist); + rt6_release(rt6_ex->rt6i); + kfree_rcu(rt6_ex, rcu); + WARN_ON_ONCE(!bucket->depth); + bucket->depth--; + net->ipv6.rt6_stats->fib_rt_cache--; +} + +/* Remove oldest rt6_ex in bucket and free the memory + * Caller must hold rt6_exception_lock + */ +static void rt6_exception_remove_oldest(struct rt6_exception_bucket *bucket) +{ + struct rt6_exception *rt6_ex, *oldest = NULL; + + if (!bucket) + return; + + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + if (!oldest || time_before(rt6_ex->stamp, oldest->stamp)) + oldest = rt6_ex; + } + rt6_remove_exception(bucket, oldest); +} + +static u32 rt6_exception_hash(const struct in6_addr *dst, + const struct in6_addr *src) +{ + static u32 seed __read_mostly; + u32 val; + + net_get_random_once(&seed, sizeof(seed)); + val = jhash(dst, sizeof(*dst), seed); + +#ifdef CONFIG_IPV6_SUBTREES + if (src) + val = jhash(src, sizeof(*src), val); +#endif + return hash_32(val, FIB6_EXCEPTION_BUCKET_SIZE_SHIFT); +} + +/* Helper function to find the cached rt in the hash table + * and update bucket pointer to point to the bucket for this + * (daddr, saddr) pair + * Caller must hold rt6_exception_lock + */ +static struct rt6_exception * +__rt6_find_exception_spinlock(struct rt6_exception_bucket **bucket, + const struct in6_addr *daddr, + const struct in6_addr *saddr) +{ + struct rt6_exception *rt6_ex; + u32 hval; + + if (!(*bucket) || !daddr) + return NULL; + + hval = rt6_exception_hash(daddr, saddr); + *bucket += hval; + + hlist_for_each_entry(rt6_ex, &(*bucket)->chain, hlist) { + struct rt6_info *rt6 = rt6_ex->rt6i; + bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); + +#ifdef CONFIG_IPV6_SUBTREES + if (matched && saddr) + matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); +#endif + if (matched) + return rt6_ex; + } + return NULL; +} + +/* Helper function to find the cached rt in the hash table + * and update bucket pointer to point to the bucket for this + * (daddr, saddr) pair + * Caller must hold rcu_read_lock() + */ +static struct rt6_exception * +__rt6_find_exception_rcu(struct rt6_exception_bucket **bucket, + const struct in6_addr *daddr, + const struct in6_addr *saddr) +{ + struct rt6_exception *rt6_ex; + u32 hval; + + WARN_ON_ONCE(!rcu_read_lock_held()); + + if (!(*bucket) || !daddr) + return NULL; + + hval = rt6_exception_hash(daddr, saddr); + *bucket += hval; + + hlist_for_each_entry_rcu(rt6_ex, &(*bucket)->chain, hlist) { + struct rt6_info *rt6 = rt6_ex->rt6i; + bool matched = ipv6_addr_equal(daddr, &rt6->rt6i_dst.addr); + +#ifdef CONFIG_IPV6_SUBTREES + if (matched && saddr) + matched = ipv6_addr_equal(saddr, &rt6->rt6i_src.addr); +#endif + if (matched) + return rt6_ex; + } + return NULL; +} + +static int rt6_insert_exception(struct rt6_info *nrt, + struct rt6_info *ort) +{ + struct net *net = dev_net(ort->dst.dev); + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + int err = 0; + + /* ort can't be a cache or pcpu route */ + if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) + ort = (struct rt6_info *)ort->dst.from; + WARN_ON_ONCE(ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)); + + spin_lock_bh(&rt6_exception_lock); + + if (ort->exception_bucket_flushed) { + err = -EINVAL; + goto out; + } + + bucket = rcu_dereference_protected(ort->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + if (!bucket) { + bucket = kcalloc(FIB6_EXCEPTION_BUCKET_SIZE, sizeof(*bucket), + GFP_ATOMIC); + if (!bucket) { + err = -ENOMEM; + goto out; + } + rcu_assign_pointer(ort->rt6i_exception_bucket, bucket); + } + +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates ort is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (ort->rt6i_src.plen) + src_key = &nrt->rt6i_src.addr; +#endif + + /* Update rt6i_prefsrc as it could be changed + * in rt6_remove_prefsrc() + */ + nrt->rt6i_prefsrc = ort->rt6i_prefsrc; + /* rt6_mtu_change() might lower mtu on ort. + * Only insert this exception route if its mtu + * is less than ort's mtu value. + */ + if (nrt->rt6i_pmtu >= dst_mtu(&ort->dst)) { + err = -EINVAL; + goto out; + } + + rt6_ex = __rt6_find_exception_spinlock(&bucket, &nrt->rt6i_dst.addr, + src_key); + if (rt6_ex) + rt6_remove_exception(bucket, rt6_ex); + + rt6_ex = kzalloc(sizeof(*rt6_ex), GFP_ATOMIC); + if (!rt6_ex) { + err = -ENOMEM; + goto out; + } + rt6_ex->rt6i = nrt; + rt6_ex->stamp = jiffies; + atomic_inc(&nrt->rt6i_ref); + nrt->rt6i_node = ort->rt6i_node; + hlist_add_head_rcu(&rt6_ex->hlist, &bucket->chain); + bucket->depth++; + net->ipv6.rt6_stats->fib_rt_cache++; + + if (bucket->depth > FIB6_MAX_DEPTH) + rt6_exception_remove_oldest(bucket); + +out: + spin_unlock_bh(&rt6_exception_lock); + + /* Update fn->fn_sernum to invalidate all cached dst */ + if (!err) { + fib6_update_sernum(ort); + fib6_force_start_gc(net); + } + + return err; +} + +void rt6_flush_exceptions(struct rt6_info *rt) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + struct hlist_node *tmp; + int i; + + spin_lock_bh(&rt6_exception_lock); + /* Prevent rt6_insert_exception() to recreate the bucket list */ + rt->exception_bucket_flushed = 1; + + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + if (!bucket) + goto out; + + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(rt6_ex, tmp, &bucket->chain, hlist) + rt6_remove_exception(bucket, rt6_ex); + WARN_ON_ONCE(bucket->depth); + bucket++; + } + +out: + spin_unlock_bh(&rt6_exception_lock); +} + +/* Find cached rt in the hash table inside passed in rt + * Caller has to hold rcu_read_lock() + */ +static struct rt6_info *rt6_find_cached_rt(struct rt6_info *rt, + struct in6_addr *daddr, + struct in6_addr *saddr) +{ + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + struct rt6_info *res = NULL; + + bucket = rcu_dereference(rt->rt6i_exception_bucket); + +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates rt is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (rt->rt6i_src.plen) + src_key = saddr; +#endif + rt6_ex = __rt6_find_exception_rcu(&bucket, daddr, src_key); + + if (rt6_ex && !rt6_check_expired(rt6_ex->rt6i)) + res = rt6_ex->rt6i; + + return res; +} + +/* Remove the passed in cached rt from the hash table that contains it */ +int rt6_remove_exception_rt(struct rt6_info *rt) +{ + struct rt6_info *from = (struct rt6_info *)rt->dst.from; + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + int err; + + if (!from || + !(rt->rt6i_flags & RTF_CACHE)) + return -EINVAL; + + if (!rcu_access_pointer(from->rt6i_exception_bucket)) + return -ENOENT; + + spin_lock_bh(&rt6_exception_lock); + bucket = rcu_dereference_protected(from->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates 'from' is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (from->rt6i_src.plen) + src_key = &rt->rt6i_src.addr; +#endif + rt6_ex = __rt6_find_exception_spinlock(&bucket, + &rt->rt6i_dst.addr, + src_key); + if (rt6_ex) { + rt6_remove_exception(bucket, rt6_ex); + err = 0; + } else { + err = -ENOENT; + } + + spin_unlock_bh(&rt6_exception_lock); + return err; +} + +/* Find rt6_ex which contains the passed in rt cache and + * refresh its stamp + */ +static void rt6_update_exception_stamp_rt(struct rt6_info *rt) +{ + struct rt6_info *from = (struct rt6_info *)rt->dst.from; + struct rt6_exception_bucket *bucket; + struct in6_addr *src_key = NULL; + struct rt6_exception *rt6_ex; + + if (!from || + !(rt->rt6i_flags & RTF_CACHE)) + return; + + rcu_read_lock(); + bucket = rcu_dereference(from->rt6i_exception_bucket); + +#ifdef CONFIG_IPV6_SUBTREES + /* rt6i_src.plen != 0 indicates 'from' is in subtree + * and exception table is indexed by a hash of + * both rt6i_dst and rt6i_src. + * Otherwise, the exception table is indexed by + * a hash of only rt6i_dst. + */ + if (from->rt6i_src.plen) + src_key = &rt->rt6i_src.addr; +#endif + rt6_ex = __rt6_find_exception_rcu(&bucket, + &rt->rt6i_dst.addr, + src_key); + if (rt6_ex) + rt6_ex->stamp = jiffies; + + rcu_read_unlock(); +} + +static void rt6_exceptions_remove_prefsrc(struct rt6_info *rt) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + int i; + + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + rt6_ex->rt6i->rt6i_prefsrc.plen = 0; + } + bucket++; + } + } +} + +static void rt6_exceptions_update_pmtu(struct rt6_info *rt, int mtu) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + int i; + + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry(rt6_ex, &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (entry->rt6i_pmtu && entry->rt6i_pmtu > mtu) + entry->rt6i_pmtu = mtu; + } + bucket++; + } + } +} + +#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) + +static void rt6_exceptions_clean_tohost(struct rt6_info *rt, + struct in6_addr *gateway) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + struct hlist_node *tmp; + int i; + + if (!rcu_access_pointer(rt->rt6i_exception_bucket)) + return; + + spin_lock_bh(&rt6_exception_lock); + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(rt6_ex, tmp, + &bucket->chain, hlist) { + struct rt6_info *entry = rt6_ex->rt6i; + + if ((entry->rt6i_flags & RTF_CACHE_GATEWAY) == + RTF_CACHE_GATEWAY && + ipv6_addr_equal(gateway, + &entry->rt6i_gateway)) { + rt6_remove_exception(bucket, rt6_ex); + } + } + bucket++; + } + } + + spin_unlock_bh(&rt6_exception_lock); +} + +static void rt6_age_examine_exception(struct rt6_exception_bucket *bucket, + struct rt6_exception *rt6_ex, + struct fib6_gc_args *gc_args, + unsigned long now) +{ + struct rt6_info *rt = rt6_ex->rt6i; + + /* we are pruning and obsoleting aged-out and non gateway exceptions + * even if others have still references to them, so that on next + * dst_check() such references can be dropped. + * EXPIRES exceptions - e.g. pmtu-generated ones are pruned when + * expired, independently from their aging, as per RFC 8201 section 4 + */ + if (!(rt->rt6i_flags & RTF_EXPIRES) && + time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + RT6_TRACE("aging clone %p\n", rt); + rt6_remove_exception(bucket, rt6_ex); + return; + } else if (rt->rt6i_flags & RTF_GATEWAY) { + struct neighbour *neigh; + __u8 neigh_flags = 0; + + neigh = dst_neigh_lookup(&rt->dst, &rt->rt6i_gateway); + if (neigh) { + neigh_flags = neigh->flags; + neigh_release(neigh); + } + if (!(neigh_flags & NTF_ROUTER)) { + RT6_TRACE("purging route %p via non-router but gateway\n", + rt); + rt6_remove_exception(bucket, rt6_ex); + return; + } + } else if (__rt6_check_expired(rt)) { + RT6_TRACE("purging expired route %p\n", rt); + rt6_remove_exception(bucket, rt6_ex); + return; + } + gc_args->more++; +} + +void rt6_age_exceptions(struct rt6_info *rt, + struct fib6_gc_args *gc_args, + unsigned long now) +{ + struct rt6_exception_bucket *bucket; + struct rt6_exception *rt6_ex; + struct hlist_node *tmp; + int i; + + if (!rcu_access_pointer(rt->rt6i_exception_bucket)) + return; + + spin_lock_bh(&rt6_exception_lock); + bucket = rcu_dereference_protected(rt->rt6i_exception_bucket, + lockdep_is_held(&rt6_exception_lock)); + + if (bucket) { + for (i = 0; i < FIB6_EXCEPTION_BUCKET_SIZE; i++) { + hlist_for_each_entry_safe(rt6_ex, tmp, + &bucket->chain, hlist) { + rt6_age_examine_exception(bucket, rt6_ex, + gc_args, now); + } + bucket++; + } + } + spin_unlock_bh(&rt6_exception_lock); +} + struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt; + struct rt6_info *rt, *rt_cache; int strict = 0; strict |= flags & RT6_LOOKUP_F_IFACE; @@ -1078,7 +1650,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); saved_fn = fn; @@ -1087,7 +1659,7 @@ struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, oif = 0; redo_rt6_select: - rt = rt6_select(fn, oif, strict); + rt = rt6_select(net, fn, oif, strict); if (rt->rt6i_nsiblings) rt = rt6_multipath_select(rt, fl6, oif, strict); if (rt == net->ipv6.ip6_null_entry) { @@ -1102,14 +1674,23 @@ redo_rt6_select: } } + /*Search through exception table */ + rt_cache = rt6_find_cached_rt(rt, &fl6->daddr, &fl6->saddr); + if (rt_cache) + rt = rt_cache; - if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { - dst_use(&rt->dst, jiffies); - read_unlock_bh(&table->tb6_lock); - - rt6_dst_from_metrics_check(rt); - - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + if (rt == net->ipv6.ip6_null_entry) { + rcu_read_unlock(); + dst_hold(&rt->dst); + trace_fib6_table_lookup(net, rt, table, fl6); + return rt; + } else if (rt->rt6i_flags & RTF_CACHE) { + if (ip6_hold_safe(net, &rt, true)) { + dst_use_noref(&rt->dst, jiffies); + rt6_dst_from_metrics_check(rt); + } + rcu_read_unlock(); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && !(rt->rt6i_flags & RTF_GATEWAY))) { @@ -1121,8 +1702,14 @@ redo_rt6_select: struct rt6_info *uncached_rt; - dst_use(&rt->dst, jiffies); - read_unlock_bh(&table->tb6_lock); + if (ip6_hold_safe(net, &rt, true)) { + dst_use_noref(&rt->dst, jiffies); + } else { + rcu_read_unlock(); + uncached_rt = rt; + goto uncached_rt_out; + } + rcu_read_unlock(); uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); dst_release(&rt->dst); @@ -1132,12 +1719,14 @@ redo_rt6_select: * No need for another dst_hold() */ rt6_uncached_list_add(uncached_rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); } else { uncached_rt = net->ipv6.ip6_null_entry; dst_hold(&uncached_rt->dst); } - trace_fib6_table_lookup(net, uncached_rt, table->tb6_id, fl6); +uncached_rt_out: + trace_fib6_table_lookup(net, uncached_rt, table, fl6); return uncached_rt; } else { @@ -1145,26 +1734,28 @@ redo_rt6_select: struct rt6_info *pcpu_rt; - rt->dst.lastuse = jiffies; - rt->dst.__use++; + dst_use_noref(&rt->dst, jiffies); + local_bh_disable(); pcpu_rt = rt6_get_pcpu_route(rt); - if (pcpu_rt) { - read_unlock_bh(&table->tb6_lock); - } else { - /* We have to do the read_unlock first - * because rt6_make_pcpu_route() may trigger - * ip6_dst_gc() which will take the write_lock. - */ - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - pcpu_rt = rt6_make_pcpu_route(rt); - dst_release(&rt->dst); + if (!pcpu_rt) { + /* atomic_inc_not_zero() is needed when using rcu */ + if (atomic_inc_not_zero(&rt->rt6i_ref)) { + /* No dst_hold() on rt is needed because grabbing + * rt->rt6i_ref makes sure rt can't be released. + */ + pcpu_rt = rt6_make_pcpu_route(rt); + rt6_release(rt); + } else { + /* rt is already removed from tree */ + pcpu_rt = net->ipv6.ip6_null_entry; + dst_hold(&pcpu_rt->dst); + } } - - trace_fib6_table_lookup(net, pcpu_rt, table->tb6_id, fl6); + local_bh_enable(); + rcu_read_unlock(); + trace_fib6_table_lookup(net, pcpu_rt, table, fl6); return pcpu_rt; - } } EXPORT_SYMBOL_GPL(ip6_pol_route); @@ -1186,6 +1777,54 @@ struct dst_entry *ip6_route_input_lookup(struct net *net, } EXPORT_SYMBOL_GPL(ip6_route_input_lookup); +static void ip6_multipath_l3_keys(const struct sk_buff *skb, + struct flow_keys *keys) +{ + const struct ipv6hdr *outer_iph = ipv6_hdr(skb); + const struct ipv6hdr *key_iph = outer_iph; + const struct ipv6hdr *inner_iph; + const struct icmp6hdr *icmph; + struct ipv6hdr _inner_iph; + + if (likely(outer_iph->nexthdr != IPPROTO_ICMPV6)) + goto out; + + icmph = icmp6_hdr(skb); + if (icmph->icmp6_type != ICMPV6_DEST_UNREACH && + icmph->icmp6_type != ICMPV6_PKT_TOOBIG && + icmph->icmp6_type != ICMPV6_TIME_EXCEED && + icmph->icmp6_type != ICMPV6_PARAMPROB) + goto out; + + inner_iph = skb_header_pointer(skb, + skb_transport_offset(skb) + sizeof(*icmph), + sizeof(_inner_iph), &_inner_iph); + if (!inner_iph) + goto out; + + key_iph = inner_iph; +out: + memset(keys, 0, sizeof(*keys)); + keys->control.addr_type = FLOW_DISSECTOR_KEY_IPV6_ADDRS; + keys->addrs.v6addrs.src = key_iph->saddr; + keys->addrs.v6addrs.dst = key_iph->daddr; + keys->tags.flow_label = ip6_flowinfo(key_iph); + keys->basic.ip_proto = key_iph->nexthdr; +} + +/* if skb is set it will be used and fl6 can be NULL */ +u32 rt6_multipath_hash(const struct flowi6 *fl6, const struct sk_buff *skb) +{ + struct flow_keys hash_keys; + + if (skb) { + ip6_multipath_l3_keys(skb, &hash_keys); + return flow_hash_from_keys(&hash_keys); + } + + return get_hash_from_flowi6(fl6); +} + void ip6_route_input(struct sk_buff *skb) { const struct ipv6hdr *iph = ipv6_hdr(skb); @@ -1204,6 +1843,8 @@ void ip6_route_input(struct sk_buff *skb) tun_info = skb_tunnel_info(skb); if (tun_info && !(tun_info->mode & IP_TUNNEL_INFO_TX)) fl6.flowi6_tun_key.tun_id = tun_info->key.tun_id; + if (unlikely(fl6.flowi6_proto == IPPROTO_ICMPV6)) + fl6.mp_hash = rt6_multipath_hash(&fl6, skb); skb_dst_drop(skb); skb_dst_set(skb, ip6_route_input_lookup(net, skb->dev, &fl6, flags)); } @@ -1250,9 +1891,10 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori struct dst_entry *new = NULL; rt = dst_alloc(&ip6_dst_blackhole_ops, loopback_dev, 1, - DST_OBSOLETE_NONE, 0); + DST_OBSOLETE_DEAD, 0); if (rt) { rt6_info_init(rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_alloc); new = &rt->dst; new->__use = 1; @@ -1289,7 +1931,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt) static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + u32 rt_cookie = 0; + + if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -1357,8 +2001,14 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) ip6_del_rt(rt); - } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { - rt->rt6i_node->fn_sernum = -1; + } else { + struct fib6_node *fn; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) + fn->fn_sernum = -1; + rcu_read_unlock(); } } } @@ -1375,7 +2025,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); + (rt->rt6i_flags & RTF_PCPU || + rcu_access_pointer(rt->rt6i_node)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -1407,23 +2058,17 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, if (!rt6_cache_allowed_for_pmtu(rt6)) { rt6_do_update_pmtu(rt6, mtu); + /* update rt6_ex->stamp for cache */ + if (rt6->rt6i_flags & RTF_CACHE) + rt6_update_exception_stamp_rt(rt6); } else if (daddr) { struct rt6_info *nrt6; nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); if (nrt6) { rt6_do_update_pmtu(nrt6, mtu); - - /* ip6_ins_rt(nrt6) will bump the - * rt6->rt6i_node->fn_sernum - * which will fail the next rt6_check() and - * invalidate the sk->sk_dst_cache. - */ - ip6_ins_rt(nrt6); - /* Release the reference taken in - * ip6_rt_cache_alloc() - */ - dst_release(&nrt6->dst); + if (rt6_insert_exception(nrt6, rt6)) + dst_release_immediate(&nrt6->dst); } } } @@ -1487,7 +2132,7 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, int flags) { struct ip6rd_flowi *rdfl = (struct ip6rd_flowi *)fl6; - struct rt6_info *rt; + struct rt6_info *rt, *rt_cache; struct fib6_node *fn; /* Get the "current" route for this destination and @@ -1500,10 +2145,10 @@ static struct rt6_info *__ip6_route_redirect(struct net *net, * routes. */ - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); restart: - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (rt6_check_expired(rt)) continue; if (rt->dst.error) @@ -1512,8 +2157,23 @@ restart: continue; if (fl6->flowi6_oif != rt->dst.dev->ifindex) continue; - if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) + /* rt_cache's gateway might be different from its 'parent' + * in the case of an ip redirect. + * So we keep searching in the exception table if the gateway + * is different. + */ + if (!ipv6_addr_equal(&rdfl->gateway, &rt->rt6i_gateway)) { + rt_cache = rt6_find_cached_rt(rt, + &fl6->daddr, + &fl6->saddr); + if (rt_cache && + ipv6_addr_equal(&rdfl->gateway, + &rt_cache->rt6i_gateway)) { + rt = rt_cache; + break; + } continue; + } break; } @@ -1531,11 +2191,11 @@ restart: } out: - dst_hold(&rt->dst); + ip6_hold_safe(net, &rt, true); - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); - trace_fib6_table_lookup(net, rt, table->tb6_id, fl6); + trace_fib6_table_lookup(net, rt, table, fl6); return rt; }; @@ -1682,6 +2342,7 @@ struct dst_entry *icmp6_dst_alloc(struct net_device *dev, * do proper release of the net_device */ rt6_uncached_list_add(rt); + atomic_inc(&net->ipv6.rt6_stats->fib_rt_uncache); dst = xfrm_lookup(net, &rt->dst, flowi6_to_flowi(fl6), NULL, 0); @@ -1717,6 +2378,7 @@ out: static int ip6_convert_metrics(struct mx6_config *mxc, const struct fib6_config *cfg) { + struct net *net = cfg->fc_nlinfo.nl_net; bool ecn_ca = false; struct nlattr *nla; int remaining; @@ -1742,7 +2404,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, char tmp[TCP_CA_NAME_MAX]; nla_strlcpy(tmp, nla, sizeof(tmp)); - val = tcp_ca_get_key_by_name(tmp, &ecn_ca); + val = tcp_ca_get_key_by_name(net, tmp, &ecn_ca); if (val == TCP_CA_UNSPEC) goto err; } else { @@ -1817,6 +2479,12 @@ static struct rt6_info *ip6_route_info_create(struct fib6_config *cfg, goto out; } + /* RTF_CACHE is an internal flag; can not be set by userspace */ + if (cfg->fc_flags & RTF_CACHE) { + NL_SET_ERR_MSG(extack, "Userspace can not set RTF_CACHE"); + goto out; + } + if (cfg->fc_dst_len > 128) { NL_SET_ERR_MSG(extack, "Invalid prefix length"); goto out; @@ -2132,9 +2800,9 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) } table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); err = fib6_del(rt, info); - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); out: ip6_rt_put(rt); @@ -2160,7 +2828,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) if (rt == net->ipv6.ip6_null_entry) goto out_put; table = rt->rt6i_table; - write_lock_bh(&table->tb6_lock); + spin_lock_bh(&table->tb6_lock); if (rt->rt6i_nsiblings && cfg->fc_delete_all_nh) { struct rt6_info *sibling, *next_sibling; @@ -2190,7 +2858,7 @@ static int __ip6_del_rt_siblings(struct rt6_info *rt, struct fib6_config *cfg) err = fib6_del(rt, info); out_unlock: - write_unlock_bh(&table->tb6_lock); + spin_unlock_bh(&table->tb6_lock); out_put: ip6_rt_put(rt); @@ -2204,9 +2872,9 @@ out_put: static int ip6_route_del(struct fib6_config *cfg, struct netlink_ext_ack *extack) { + struct rt6_info *rt, *rt_cache; struct fib6_table *table; struct fib6_node *fn; - struct rt6_info *rt; int err = -ESRCH; table = fib6_get_table(cfg->fc_nlinfo.nl_net, cfg->fc_table); @@ -2215,17 +2883,22 @@ static int ip6_route_del(struct fib6_config *cfg, return err; } - read_lock_bh(&table->tb6_lock); + rcu_read_lock(); fn = fib6_locate(&table->tb6_root, &cfg->fc_dst, cfg->fc_dst_len, - &cfg->fc_src, cfg->fc_src_len); + &cfg->fc_src, cfg->fc_src_len, + !(cfg->fc_flags & RTF_CACHE)); if (fn) { - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { - if ((rt->rt6i_flags & RTF_CACHE) && - !(cfg->fc_flags & RTF_CACHE)) - continue; + for_each_fib6_node_rt_rcu(fn) { + if (cfg->fc_flags & RTF_CACHE) { + rt_cache = rt6_find_cached_rt(rt, &cfg->fc_dst, + &cfg->fc_src); + if (!rt_cache) + continue; + rt = rt_cache; + } if (cfg->fc_ifindex && (!rt->dst.dev || rt->dst.dev->ifindex != cfg->fc_ifindex)) @@ -2237,8 +2910,9 @@ static int ip6_route_del(struct fib6_config *cfg, continue; if (cfg->fc_protocol && cfg->fc_protocol != rt->rt6i_protocol) continue; - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); + if (!dst_hold_safe(&rt->dst)) + break; + rcu_read_unlock(); /* if gateway was specified only delete the one hop */ if (cfg->fc_flags & RTF_GATEWAY) @@ -2247,7 +2921,7 @@ static int ip6_route_del(struct fib6_config *cfg, return __ip6_del_rt_siblings(rt, cfg); } } - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return err; } @@ -2351,8 +3025,14 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; - if (ip6_ins_rt(nrt)) - goto out_release; + /* No need to remove rt from the exception table if rt is + * a cached route because rt6_insert_exception() will + * takes care of it + */ + if (rt6_insert_exception(nrt, rt)) { + dst_release_immediate(&nrt->dst); + goto out; + } netevent.old = &rt->dst; netevent.new = &nrt->dst; @@ -2360,17 +3040,6 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu netevent.neigh = neigh; call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); - if (rt->rt6i_flags & RTF_CACHE) { - rt = (struct rt6_info *) dst_clone(&rt->dst); - ip6_del_rt(rt); - } - -out_release: - /* Release the reference taken in - * ip6_rt_cache_alloc() - */ - dst_release(&nrt->dst); - out: neigh_release(neigh); } @@ -2427,23 +3096,23 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!table) return NULL; - read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0); + rcu_read_lock(); + fn = fib6_locate(&table->tb6_root, prefix, prefixlen, NULL, 0, true); if (!fn) goto out; - for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + for_each_fib6_node_rt_rcu(fn) { if (rt->dst.dev->ifindex != ifindex) continue; if ((rt->rt6i_flags & (RTF_ROUTEINFO|RTF_GATEWAY)) != (RTF_ROUTEINFO|RTF_GATEWAY)) continue; if (!ipv6_addr_equal(&rt->rt6i_gateway, gwaddr)) continue; - dst_hold(&rt->dst); + ip6_hold_safe(NULL, &rt, false); break; } out: - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); return rt; } @@ -2489,16 +3158,16 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev if (!table) return NULL; - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { + rcu_read_lock(); + for_each_fib6_node_rt_rcu(&table->tb6_root) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) break; } if (rt) - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); + ip6_hold_safe(NULL, &rt, false); + rcu_read_unlock(); return rt; } @@ -2536,17 +3205,20 @@ static void __rt6_purge_dflt_routers(struct fib6_table *table) struct rt6_info *rt; restart: - read_lock_bh(&table->tb6_lock); - for (rt = table->tb6_root.leaf; rt; rt = rt->dst.rt6_next) { + rcu_read_lock(); + for_each_fib6_node_rt_rcu(&table->tb6_root) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF) && (!rt->rt6i_idev || rt->rt6i_idev->cnf.accept_ra != 2)) { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt); + if (dst_hold_safe(&rt->dst)) { + rcu_read_unlock(); + ip6_del_rt(rt); + } else { + rcu_read_unlock(); + } goto restart; } } - read_unlock_bh(&table->tb6_lock); + rcu_read_unlock(); table->flags &= ~RT6_TABLE_HAS_DFLT_ROUTER; } @@ -2688,15 +3360,9 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, { u32 tb_id; struct net *net = dev_net(idev->dev); - struct net_device *dev = net->loopback_dev; + struct net_device *dev = idev->dev; struct rt6_info *rt; - /* use L3 Master device as loopback for host routes if device - * is enslaved and address is not link local or multicast - */ - if (!rt6_need_strict(addr)) - dev = l3mdev_master_dev_rcu(idev->dev) ? : dev; - rt = ip6_dst_alloc(net, dev, DST_NOCOUNT); if (!rt) return ERR_PTR(-ENOMEM); @@ -2740,8 +3406,12 @@ static int fib6_remove_prefsrc(struct rt6_info *rt, void *arg) if (((void *)rt->dst.dev == dev || !dev) && rt != net->ipv6.ip6_null_entry && ipv6_addr_equal(addr, &rt->rt6i_prefsrc.addr)) { + spin_lock_bh(&rt6_exception_lock); /* remove prefsrc entry */ rt->rt6i_prefsrc.plen = 0; + /* need to update cache as well */ + rt6_exceptions_remove_prefsrc(rt); + spin_unlock_bh(&rt6_exception_lock); } return 0; } @@ -2758,18 +3428,23 @@ void rt6_remove_prefsrc(struct inet6_ifaddr *ifp) } #define RTF_RA_ROUTER (RTF_ADDRCONF | RTF_DEFAULT | RTF_GATEWAY) -#define RTF_CACHE_GATEWAY (RTF_GATEWAY | RTF_CACHE) /* Remove routers and update dst entries when gateway turn into host. */ static int fib6_clean_tohost(struct rt6_info *rt, void *arg) { struct in6_addr *gateway = (struct in6_addr *)arg; - if ((((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) || - ((rt->rt6i_flags & RTF_CACHE_GATEWAY) == RTF_CACHE_GATEWAY)) && - ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { + if (((rt->rt6i_flags & RTF_RA_ROUTER) == RTF_RA_ROUTER) && + ipv6_addr_equal(gateway, &rt->rt6i_gateway)) { return -1; } + + /* Further clean up cached routes in exception table. + * This is needed because cached route may have a different + * gateway than its 'parent' in the case of an ip redirect. + */ + rt6_exceptions_clean_tohost(rt, gateway); + return 0; } @@ -2848,19 +3523,14 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) if (rt->dst.dev == arg->dev && dst_metric_raw(&rt->dst, RTAX_MTU) && !dst_metric_locked(&rt->dst, RTAX_MTU)) { - if (rt->rt6i_flags & RTF_CACHE) { - /* For RTF_CACHE with rt6i_pmtu == 0 - * (i.e. a redirected route), - * the metrics of its rt->dst.from has already - * been updated. - */ - if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) - rt->rt6i_pmtu = arg->mtu; - } else if (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + spin_lock_bh(&rt6_exception_lock); + if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); } + rt6_exceptions_update_pmtu(rt, arg->mtu); + spin_unlock_bh(&rt6_exception_lock); } return 0; } @@ -3327,6 +3997,9 @@ static int rt6_nexthop_info(struct sk_buff *skb, struct rt6_info *rt, goto nla_put_failure; } + if (rt->rt6i_nh_flags & RTNH_F_OFFLOAD) + *flags |= RTNH_F_OFFLOAD; + /* not needed for multipath encoding b/c it has a rtnexthop struct */ if (!skip_oif && rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) @@ -3605,8 +4278,11 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, struct net_device *dev; int flags = 0; - dev = __dev_get_by_index(net, iif); + rcu_read_lock(); + + dev = dev_get_by_index_rcu(net, iif); if (!dev) { + rcu_read_unlock(); err = -ENODEV; goto errout; } @@ -3618,15 +4294,19 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, if (!fibmatch) dst = ip6_route_input_lookup(net, dev, &fl6, flags); + else + dst = ip6_route_lookup(net, &fl6, 0); + + rcu_read_unlock(); } else { fl6.flowi6_oif = oif; if (!fibmatch) dst = ip6_route_output(net, NULL, &fl6); + else + dst = ip6_route_lookup(net, &fl6, 0); } - if (fibmatch) - dst = ip6_route_lookup(net, &fl6, 0); rt = container_of(dst, struct rt6_info, dst); if (rt->dst.error) { @@ -3751,7 +4431,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%04x %04x %04x %04x %04x %04x %04x\n", net->ipv6.rt6_stats->fib_nodes, net->ipv6.rt6_stats->fib_route_nodes, - net->ipv6.rt6_stats->fib_rt_alloc, + atomic_read(&net->ipv6.rt6_stats->fib_rt_alloc), net->ipv6.rt6_stats->fib_rt_entries, net->ipv6.rt6_stats->fib_rt_cache, dst_entries_get_slow(&net->ipv6.ip6_dst_ops), @@ -3918,6 +4598,7 @@ static int __net_init ip6_route_net_init(struct net *net) ip6_template_metrics, true); #ifdef CONFIG_IPV6_MULTIPLE_TABLES + net->ipv6.fib6_has_custom_rules = false; net->ipv6.ip6_prohibit_entry = kmemdup(&ip6_prohibit_entry_template, sizeof(*net->ipv6.ip6_prohibit_entry), GFP_KERNEL); @@ -4093,9 +4774,10 @@ int __init ip6_route_init(void) goto fib6_rules_init; ret = -ENOBUFS; - if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, NULL) || - __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, NULL) || - __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, NULL)) + if (__rtnl_register(PF_INET6, RTM_NEWROUTE, inet6_rtm_newroute, NULL, 0) || + __rtnl_register(PF_INET6, RTM_DELROUTE, inet6_rtm_delroute, NULL, 0) || + __rtnl_register(PF_INET6, RTM_GETROUTE, inet6_rtm_getroute, NULL, + RTNL_FLAG_DOIT_UNLOCKED)) goto out_register_late_subsys; ret = register_netdevice_notifier(&ip6_route_dev_notifier); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 15fba55e3da8..c81407770956 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -40,7 +40,7 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) if (((srh->hdrlen + 1) << 3) != len) return false; - if (srh->segments_left != srh->first_segment) + if (srh->segments_left > srh->first_segment) return false; tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); @@ -456,6 +456,10 @@ int __init seg6_init(void) err = seg6_iptunnel_init(); if (err) goto out_unregister_pernet; + + err = seg6_local_init(); + if (err) + goto out_unregister_pernet; #endif #ifdef CONFIG_IPV6_SEG6_HMAC @@ -471,6 +475,7 @@ out: #ifdef CONFIG_IPV6_SEG6_HMAC out_unregister_iptun: #ifdef CONFIG_IPV6_SEG6_LWTUNNEL + seg6_local_exit(); seg6_iptunnel_exit(); #endif #endif diff --git a/net/ipv6/seg6_hmac.c b/net/ipv6/seg6_hmac.c index f950cb53d5e3..33fb35cbfac1 100644 --- a/net/ipv6/seg6_hmac.c +++ b/net/ipv6/seg6_hmac.c @@ -12,6 +12,7 @@ */ #include <linux/errno.h> +#include <linux/kernel.h> #include <linux/types.h> #include <linux/socket.h> #include <linux/sockios.h> @@ -110,7 +111,7 @@ static struct seg6_hmac_algo *__hmac_get_algo(u8 alg_id) struct seg6_hmac_algo *algo; int i, alg_count; - alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; if (algo->alg_id == alg_id) @@ -360,7 +361,7 @@ static int seg6_hmac_init_algo(void) struct shash_desc *shash; int i, alg_count, cpu; - alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { struct crypto_shash **p_tfm; @@ -421,7 +422,7 @@ void seg6_hmac_exit(void) struct seg6_hmac_algo *algo = NULL; int i, alg_count, cpu; - alg_count = sizeof(hmac_algos) / sizeof(struct seg6_hmac_algo); + alg_count = ARRAY_SIZE(hmac_algos); for (i = 0; i < alg_count; i++) { algo = &hmac_algos[i]; for_each_possible_cpu(cpu) { diff --git a/net/ipv6/seg6_iptunnel.c b/net/ipv6/seg6_iptunnel.c index 264d772d3c7d..bd6cc688bd19 100644 --- a/net/ipv6/seg6_iptunnel.c +++ b/net/ipv6/seg6_iptunnel.c @@ -91,7 +91,7 @@ static void set_tun_src(struct net *net, struct net_device *dev, } /* encapsulate an IPv6 packet within an outer IPv6 header with a given SRH */ -static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh, int proto) { struct net *net = dev_net(skb_dst(skb)->dev); struct ipv6hdr *hdr, *inner_hdr; @@ -116,15 +116,22 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) * hlim will be decremented in ip6_forward() afterwards and * decapsulation will overwrite inner hlim with outer hlim */ - ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), - ip6_flowlabel(inner_hdr)); - hdr->hop_limit = inner_hdr->hop_limit; + + if (skb->protocol == htons(ETH_P_IPV6)) { + ip6_flow_hdr(hdr, ip6_tclass(ip6_flowinfo(inner_hdr)), + ip6_flowlabel(inner_hdr)); + hdr->hop_limit = inner_hdr->hop_limit; + } else { + ip6_flow_hdr(hdr, 0, 0); + hdr->hop_limit = ip6_dst_hoplimit(skb_dst(skb)); + } + hdr->nexthdr = NEXTHDR_ROUTING; isrh = (void *)hdr + sizeof(*hdr); memcpy(isrh, osrh, hdrlen); - isrh->nexthdr = NEXTHDR_IPV6; + isrh->nexthdr = proto; hdr->daddr = isrh->segments[isrh->first_segment]; set_tun_src(net, skb->dev, &hdr->daddr, &hdr->saddr); @@ -141,10 +148,10 @@ static int seg6_do_srh_encap(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) return 0; } +EXPORT_SYMBOL_GPL(seg6_do_srh_encap); /* insert an SRH within an IPv6 packet, just after the IPv6 header */ -#ifdef CONFIG_IPV6_SEG6_INLINE -static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) +int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) { struct ipv6hdr *hdr, *oldhdr; struct ipv6_sr_hdr *isrh; @@ -193,13 +200,13 @@ static int seg6_do_srh_inline(struct sk_buff *skb, struct ipv6_sr_hdr *osrh) return 0; } -#endif +EXPORT_SYMBOL_GPL(seg6_do_srh_inline); static int seg6_do_srh(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct seg6_iptunnel_encap *tinfo; - int err = 0; + int proto, err = 0; tinfo = seg6_encap_lwtunnel(dst->lwtstate); @@ -209,19 +216,47 @@ static int seg6_do_srh(struct sk_buff *skb) } switch (tinfo->mode) { -#ifdef CONFIG_IPV6_SEG6_INLINE case SEG6_IPTUN_MODE_INLINE: + if (skb->protocol != htons(ETH_P_IPV6)) + return -EINVAL; + err = seg6_do_srh_inline(skb, tinfo->srh); + if (err) + return err; + skb_reset_inner_headers(skb); break; -#endif case SEG6_IPTUN_MODE_ENCAP: - err = seg6_do_srh_encap(skb, tinfo->srh); + if (skb->protocol == htons(ETH_P_IPV6)) + proto = IPPROTO_IPV6; + else if (skb->protocol == htons(ETH_P_IP)) + proto = IPPROTO_IPIP; + else + return -EINVAL; + + err = seg6_do_srh_encap(skb, tinfo->srh, proto); + if (err) + return err; + + skb->protocol = htons(ETH_P_IPV6); break; - } + case SEG6_IPTUN_MODE_L2ENCAP: + if (!skb_mac_header_was_set(skb)) + return -EINVAL; - if (err) - return err; + if (pskb_expand_head(skb, skb->mac_len, 0, GFP_ATOMIC) < 0) + return -ENOMEM; + + skb_mac_header_rebuild(skb); + skb_push(skb, skb->mac_len); + + err = seg6_do_srh_encap(skb, tinfo->srh, NEXTHDR_NONE); + if (err) + return err; + + skb->protocol = htons(ETH_P_IPV6); + break; + } ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); skb_set_transport_header(skb, sizeof(struct ipv6hdr)); @@ -336,6 +371,9 @@ static int seg6_build_state(struct nlattr *nla, struct seg6_lwt *slwt; int err; + if (family != AF_INET && family != AF_INET6) + return -EINVAL; + err = nla_parse_nested(tb, SEG6_IPTUNNEL_MAX, nla, seg6_iptunnel_policy, extack); @@ -357,12 +395,15 @@ static int seg6_build_state(struct nlattr *nla, return -EINVAL; switch (tuninfo->mode) { -#ifdef CONFIG_IPV6_SEG6_INLINE case SEG6_IPTUN_MODE_INLINE: + if (family != AF_INET6) + return -EINVAL; + break; -#endif case SEG6_IPTUN_MODE_ENCAP: break; + case SEG6_IPTUN_MODE_L2ENCAP: + break; default: return -EINVAL; } @@ -386,8 +427,11 @@ static int seg6_build_state(struct nlattr *nla, memcpy(&slwt->tuninfo, tuninfo, tuninfo_len); newts->type = LWTUNNEL_ENCAP_SEG6; - newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT | - LWTUNNEL_STATE_INPUT_REDIRECT; + newts->flags |= LWTUNNEL_STATE_INPUT_REDIRECT; + + if (tuninfo->mode != SEG6_IPTUN_MODE_L2ENCAP) + newts->flags |= LWTUNNEL_STATE_OUTPUT_REDIRECT; + newts->headroom = seg6_lwt_headroom(tuninfo); *ts = newts; diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c new file mode 100644 index 000000000000..825b8e01f947 --- /dev/null +++ b/net/ipv6/seg6_local.c @@ -0,0 +1,934 @@ +/* + * SR-IPv6 implementation + * + * Author: + * David Lebrun <david.lebrun@uclouvain.be> + * + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/types.h> +#include <linux/skbuff.h> +#include <linux/net.h> +#include <linux/module.h> +#include <net/ip.h> +#include <net/lwtunnel.h> +#include <net/netevent.h> +#include <net/netns/generic.h> +#include <net/ip6_fib.h> +#include <net/route.h> +#include <net/seg6.h> +#include <linux/seg6.h> +#include <linux/seg6_local.h> +#include <net/addrconf.h> +#include <net/ip6_route.h> +#include <net/dst_cache.h> +#ifdef CONFIG_IPV6_SEG6_HMAC +#include <net/seg6_hmac.h> +#endif +#include <linux/etherdevice.h> + +struct seg6_local_lwt; + +struct seg6_action_desc { + int action; + unsigned long attrs; + int (*input)(struct sk_buff *skb, struct seg6_local_lwt *slwt); + int static_headroom; +}; + +struct seg6_local_lwt { + int action; + struct ipv6_sr_hdr *srh; + int table; + struct in_addr nh4; + struct in6_addr nh6; + int iif; + int oif; + + int headroom; + struct seg6_action_desc *desc; +}; + +static struct seg6_local_lwt *seg6_local_lwtunnel(struct lwtunnel_state *lwt) +{ + return (struct seg6_local_lwt *)lwt->data; +} + +static struct ipv6_sr_hdr *get_srh(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *srh; + int len, srhoff = 0; + + if (ipv6_find_hdr(skb, &srhoff, IPPROTO_ROUTING, NULL, NULL) < 0) + return NULL; + + if (!pskb_may_pull(skb, srhoff + sizeof(*srh))) + return NULL; + + srh = (struct ipv6_sr_hdr *)(skb->data + srhoff); + + len = (srh->hdrlen + 1) << 3; + + if (!pskb_may_pull(skb, srhoff + len)) + return NULL; + + if (!seg6_validate_srh(srh, len)) + return NULL; + + return srh; +} + +static struct ipv6_sr_hdr *get_and_validate_srh(struct sk_buff *skb) +{ + struct ipv6_sr_hdr *srh; + + srh = get_srh(skb); + if (!srh) + return NULL; + + if (srh->segments_left == 0) + return NULL; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (!seg6_hmac_validate_skb(skb)) + return NULL; +#endif + + return srh; +} + +static bool decap_and_validate(struct sk_buff *skb, int proto) +{ + struct ipv6_sr_hdr *srh; + unsigned int off = 0; + + srh = get_srh(skb); + if (srh && srh->segments_left > 0) + return false; + +#ifdef CONFIG_IPV6_SEG6_HMAC + if (srh && !seg6_hmac_validate_skb(skb)) + return false; +#endif + + if (ipv6_find_hdr(skb, &off, proto, NULL, NULL) < 0) + return false; + + if (!pskb_pull(skb, off)) + return false; + + skb_postpull_rcsum(skb, skb_network_header(skb), off); + + skb_reset_network_header(skb); + skb_reset_transport_header(skb); + skb->encapsulation = 0; + + return true; +} + +static void advance_nextseg(struct ipv6_sr_hdr *srh, struct in6_addr *daddr) +{ + struct in6_addr *addr; + + srh->segments_left--; + addr = srh->segments + srh->segments_left; + *daddr = *addr; +} + +static void lookup_nexthop(struct sk_buff *skb, struct in6_addr *nhaddr, + u32 tbl_id) +{ + struct net *net = dev_net(skb->dev); + struct ipv6hdr *hdr = ipv6_hdr(skb); + int flags = RT6_LOOKUP_F_HAS_SADDR; + struct dst_entry *dst = NULL; + struct rt6_info *rt; + struct flowi6 fl6; + + fl6.flowi6_iif = skb->dev->ifindex; + fl6.daddr = nhaddr ? *nhaddr : hdr->daddr; + fl6.saddr = hdr->saddr; + fl6.flowlabel = ip6_flowinfo(hdr); + fl6.flowi6_mark = skb->mark; + fl6.flowi6_proto = hdr->nexthdr; + + if (nhaddr) + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; + + if (!tbl_id) { + dst = ip6_route_input_lookup(net, skb->dev, &fl6, flags); + } else { + struct fib6_table *table; + + table = fib6_get_table(net, tbl_id); + if (!table) + goto out; + + rt = ip6_pol_route(net, table, 0, &fl6, flags); + dst = &rt->dst; + } + + if (dst && dst->dev->flags & IFF_LOOPBACK && !dst->error) { + dst_release(dst); + dst = NULL; + } + +out: + if (!dst) { + rt = net->ipv6.ip6_blk_hole_entry; + dst = &rt->dst; + dst_hold(dst); + } + + skb_dst_drop(skb); + skb_dst_set(skb, dst); +} + +/* regular endpoint function */ +static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + + lookup_nexthop(skb, NULL, 0); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* regular endpoint, and forward to specified nexthop */ +static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + + lookup_nexthop(skb, &slwt->nh6, 0); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int input_action_end_t(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + + lookup_nexthop(skb, NULL, slwt->table); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* decapsulate and forward inner L2 frame on specified interface */ +static int input_action_end_dx2(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct net *net = dev_net(skb->dev); + struct net_device *odev; + struct ethhdr *eth; + + if (!decap_and_validate(skb, NEXTHDR_NONE)) + goto drop; + + if (!pskb_may_pull(skb, ETH_HLEN)) + goto drop; + + skb_reset_mac_header(skb); + eth = (struct ethhdr *)skb->data; + + /* To determine the frame's protocol, we assume it is 802.3. This avoids + * a call to eth_type_trans(), which is not really relevant for our + * use case. + */ + if (!eth_proto_is_802_3(eth->h_proto)) + goto drop; + + odev = dev_get_by_index_rcu(net, slwt->oif); + if (!odev) + goto drop; + + /* As we accept Ethernet frames, make sure the egress device is of + * the correct type. + */ + if (odev->type != ARPHRD_ETHER) + goto drop; + + if (!(odev->flags & IFF_UP) || !netif_carrier_ok(odev)) + goto drop; + + skb_orphan(skb); + + if (skb_warn_if_lro(skb)) + goto drop; + + skb_forward_csum(skb); + + if (skb->len - ETH_HLEN > odev->mtu) + goto drop; + + skb->dev = odev; + skb->protocol = eth->h_proto; + + return dev_queue_xmit(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* decapsulate and forward to specified nexthop */ +static int input_action_end_dx6(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct in6_addr *nhaddr = NULL; + + /* this function accepts IPv6 encapsulated packets, with either + * an SRH with SL=0, or no SRH. + */ + + if (!decap_and_validate(skb, IPPROTO_IPV6)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; + + /* The inner packet is not associated to any local interface, + * so we do not call netif_rx(). + * + * If slwt->nh6 is set to ::, then lookup the nexthop for the + * inner packet's DA. Otherwise, use the specified nexthop. + */ + + if (!ipv6_addr_any(&slwt->nh6)) + nhaddr = &slwt->nh6; + + lookup_nexthop(skb, nhaddr, 0); + + return dst_input(skb); +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int input_action_end_dx4(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct iphdr *iph; + __be32 nhaddr; + int err; + + if (!decap_and_validate(skb, IPPROTO_IPIP)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct iphdr))) + goto drop; + + skb->protocol = htons(ETH_P_IP); + + iph = ip_hdr(skb); + + nhaddr = slwt->nh4.s_addr ?: iph->daddr; + + skb_dst_drop(skb); + + err = ip_route_input(skb, nhaddr, iph->saddr, 0, skb->dev); + if (err) + goto drop; + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +static int input_action_end_dt6(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + if (!decap_and_validate(skb, IPPROTO_IPV6)) + goto drop; + + if (!pskb_may_pull(skb, sizeof(struct ipv6hdr))) + goto drop; + + lookup_nexthop(skb, NULL, slwt->table); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return -EINVAL; +} + +/* push an SRH on top of the current one */ +static int input_action_end_b6(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + int err = -EINVAL; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + err = seg6_do_srh_inline(skb, slwt->srh); + if (err) + goto drop; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + lookup_nexthop(skb, NULL, 0); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return err; +} + +/* encapsulate within an outer IPv6 header and a specified SRH */ +static int input_action_end_b6_encap(struct sk_buff *skb, + struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + int err = -EINVAL; + + srh = get_and_validate_srh(skb); + if (!srh) + goto drop; + + advance_nextseg(srh, &ipv6_hdr(skb)->daddr); + + skb_reset_inner_headers(skb); + skb->encapsulation = 1; + + err = seg6_do_srh_encap(skb, slwt->srh, IPPROTO_IPV6); + if (err) + goto drop; + + ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_set_transport_header(skb, sizeof(struct ipv6hdr)); + + lookup_nexthop(skb, NULL, 0); + + return dst_input(skb); + +drop: + kfree_skb(skb); + return err; +} + +static struct seg6_action_desc seg6_action_table[] = { + { + .action = SEG6_LOCAL_ACTION_END, + .attrs = 0, + .input = input_action_end, + }, + { + .action = SEG6_LOCAL_ACTION_END_X, + .attrs = (1 << SEG6_LOCAL_NH6), + .input = input_action_end_x, + }, + { + .action = SEG6_LOCAL_ACTION_END_T, + .attrs = (1 << SEG6_LOCAL_TABLE), + .input = input_action_end_t, + }, + { + .action = SEG6_LOCAL_ACTION_END_DX2, + .attrs = (1 << SEG6_LOCAL_OIF), + .input = input_action_end_dx2, + }, + { + .action = SEG6_LOCAL_ACTION_END_DX6, + .attrs = (1 << SEG6_LOCAL_NH6), + .input = input_action_end_dx6, + }, + { + .action = SEG6_LOCAL_ACTION_END_DX4, + .attrs = (1 << SEG6_LOCAL_NH4), + .input = input_action_end_dx4, + }, + { + .action = SEG6_LOCAL_ACTION_END_DT6, + .attrs = (1 << SEG6_LOCAL_TABLE), + .input = input_action_end_dt6, + }, + { + .action = SEG6_LOCAL_ACTION_END_B6, + .attrs = (1 << SEG6_LOCAL_SRH), + .input = input_action_end_b6, + }, + { + .action = SEG6_LOCAL_ACTION_END_B6_ENCAP, + .attrs = (1 << SEG6_LOCAL_SRH), + .input = input_action_end_b6_encap, + .static_headroom = sizeof(struct ipv6hdr), + } +}; + +static struct seg6_action_desc *__get_action_desc(int action) +{ + struct seg6_action_desc *desc; + int i, count; + + count = sizeof(seg6_action_table) / sizeof(struct seg6_action_desc); + for (i = 0; i < count; i++) { + desc = &seg6_action_table[i]; + if (desc->action == action) + return desc; + } + + return NULL; +} + +static int seg6_local_input(struct sk_buff *skb) +{ + struct dst_entry *orig_dst = skb_dst(skb); + struct seg6_action_desc *desc; + struct seg6_local_lwt *slwt; + + if (skb->protocol != htons(ETH_P_IPV6)) { + kfree_skb(skb); + return -EINVAL; + } + + slwt = seg6_local_lwtunnel(orig_dst->lwtstate); + desc = slwt->desc; + + return desc->input(skb, slwt); +} + +static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { + [SEG6_LOCAL_ACTION] = { .type = NLA_U32 }, + [SEG6_LOCAL_SRH] = { .type = NLA_BINARY }, + [SEG6_LOCAL_TABLE] = { .type = NLA_U32 }, + [SEG6_LOCAL_NH4] = { .type = NLA_BINARY, + .len = sizeof(struct in_addr) }, + [SEG6_LOCAL_NH6] = { .type = NLA_BINARY, + .len = sizeof(struct in6_addr) }, + [SEG6_LOCAL_IIF] = { .type = NLA_U32 }, + [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, +}; + +static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + int len; + + srh = nla_data(attrs[SEG6_LOCAL_SRH]); + len = nla_len(attrs[SEG6_LOCAL_SRH]); + + /* SRH must contain at least one segment */ + if (len < sizeof(*srh) + sizeof(struct in6_addr)) + return -EINVAL; + + if (!seg6_validate_srh(srh, len)) + return -EINVAL; + + slwt->srh = kmalloc(len, GFP_KERNEL); + if (!slwt->srh) + return -ENOMEM; + + memcpy(slwt->srh, srh, len); + + slwt->headroom += len; + + return 0; +} + +static int put_nla_srh(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct ipv6_sr_hdr *srh; + struct nlattr *nla; + int len; + + srh = slwt->srh; + len = (srh->hdrlen + 1) << 3; + + nla = nla_reserve(skb, SEG6_LOCAL_SRH, len); + if (!nla) + return -EMSGSIZE; + + memcpy(nla_data(nla), srh, len); + + return 0; +} + +static int cmp_nla_srh(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + int len = (a->srh->hdrlen + 1) << 3; + + if (len != ((b->srh->hdrlen + 1) << 3)) + return 1; + + return memcmp(a->srh, b->srh, len); +} + +static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); + + return 0; +} + +static int put_nla_table(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + if (nla_put_u32(skb, SEG6_LOCAL_TABLE, slwt->table)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_table(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + if (a->table != b->table) + return 1; + + return 0; +} + +static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), + sizeof(struct in_addr)); + + return 0; +} + +static int put_nla_nh4(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct nlattr *nla; + + nla = nla_reserve(skb, SEG6_LOCAL_NH4, sizeof(struct in_addr)); + if (!nla) + return -EMSGSIZE; + + memcpy(nla_data(nla), &slwt->nh4, sizeof(struct in_addr)); + + return 0; +} + +static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr)); +} + +static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]), + sizeof(struct in6_addr)); + + return 0; +} + +static int put_nla_nh6(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct nlattr *nla; + + nla = nla_reserve(skb, SEG6_LOCAL_NH6, sizeof(struct in6_addr)); + if (!nla) + return -EMSGSIZE; + + memcpy(nla_data(nla), &slwt->nh6, sizeof(struct in6_addr)); + + return 0; +} + +static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr)); +} + +static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]); + + return 0; +} + +static int put_nla_iif(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + if (nla_put_u32(skb, SEG6_LOCAL_IIF, slwt->iif)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + if (a->iif != b->iif) + return 1; + + return 0; +} + +static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]); + + return 0; +} + +static int put_nla_oif(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + if (nla_put_u32(skb, SEG6_LOCAL_OIF, slwt->oif)) + return -EMSGSIZE; + + return 0; +} + +static int cmp_nla_oif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + if (a->oif != b->oif) + return 1; + + return 0; +} + +struct seg6_action_param { + int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); + int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); + int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); +}; + +static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { + [SEG6_LOCAL_SRH] = { .parse = parse_nla_srh, + .put = put_nla_srh, + .cmp = cmp_nla_srh }, + + [SEG6_LOCAL_TABLE] = { .parse = parse_nla_table, + .put = put_nla_table, + .cmp = cmp_nla_table }, + + [SEG6_LOCAL_NH4] = { .parse = parse_nla_nh4, + .put = put_nla_nh4, + .cmp = cmp_nla_nh4 }, + + [SEG6_LOCAL_NH6] = { .parse = parse_nla_nh6, + .put = put_nla_nh6, + .cmp = cmp_nla_nh6 }, + + [SEG6_LOCAL_IIF] = { .parse = parse_nla_iif, + .put = put_nla_iif, + .cmp = cmp_nla_iif }, + + [SEG6_LOCAL_OIF] = { .parse = parse_nla_oif, + .put = put_nla_oif, + .cmp = cmp_nla_oif }, +}; + +static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) +{ + struct seg6_action_param *param; + struct seg6_action_desc *desc; + int i, err; + + desc = __get_action_desc(slwt->action); + if (!desc) + return -EINVAL; + + if (!desc->input) + return -EOPNOTSUPP; + + slwt->desc = desc; + slwt->headroom += desc->static_headroom; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { + if (desc->attrs & (1 << i)) { + if (!attrs[i]) + return -EINVAL; + + param = &seg6_action_params[i]; + + err = param->parse(attrs, slwt); + if (err < 0) + return err; + } + } + + return 0; +} + +static int seg6_local_build_state(struct nlattr *nla, unsigned int family, + const void *cfg, struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[SEG6_LOCAL_MAX + 1]; + struct lwtunnel_state *newts; + struct seg6_local_lwt *slwt; + int err; + + if (family != AF_INET6) + return -EINVAL; + + err = nla_parse_nested(tb, SEG6_LOCAL_MAX, nla, seg6_local_policy, + extack); + + if (err < 0) + return err; + + if (!tb[SEG6_LOCAL_ACTION]) + return -EINVAL; + + newts = lwtunnel_state_alloc(sizeof(*slwt)); + if (!newts) + return -ENOMEM; + + slwt = seg6_local_lwtunnel(newts); + slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]); + + err = parse_nla_action(tb, slwt); + if (err < 0) + goto out_free; + + newts->type = LWTUNNEL_ENCAP_SEG6_LOCAL; + newts->flags = LWTUNNEL_STATE_INPUT_REDIRECT; + newts->headroom = slwt->headroom; + + *ts = newts; + + return 0; + +out_free: + kfree(slwt->srh); + kfree(newts); + return err; +} + +static void seg6_local_destroy_state(struct lwtunnel_state *lwt) +{ + struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); + + kfree(slwt->srh); +} + +static int seg6_local_fill_encap(struct sk_buff *skb, + struct lwtunnel_state *lwt) +{ + struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); + struct seg6_action_param *param; + int i, err; + + if (nla_put_u32(skb, SEG6_LOCAL_ACTION, slwt->action)) + return -EMSGSIZE; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { + if (slwt->desc->attrs & (1 << i)) { + param = &seg6_action_params[i]; + err = param->put(skb, slwt); + if (err < 0) + return err; + } + } + + return 0; +} + +static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) +{ + struct seg6_local_lwt *slwt = seg6_local_lwtunnel(lwt); + unsigned long attrs; + int nlsize; + + nlsize = nla_total_size(4); /* action */ + + attrs = slwt->desc->attrs; + + if (attrs & (1 << SEG6_LOCAL_SRH)) + nlsize += nla_total_size((slwt->srh->hdrlen + 1) << 3); + + if (attrs & (1 << SEG6_LOCAL_TABLE)) + nlsize += nla_total_size(4); + + if (attrs & (1 << SEG6_LOCAL_NH4)) + nlsize += nla_total_size(4); + + if (attrs & (1 << SEG6_LOCAL_NH6)) + nlsize += nla_total_size(16); + + if (attrs & (1 << SEG6_LOCAL_IIF)) + nlsize += nla_total_size(4); + + if (attrs & (1 << SEG6_LOCAL_OIF)) + nlsize += nla_total_size(4); + + return nlsize; +} + +static int seg6_local_cmp_encap(struct lwtunnel_state *a, + struct lwtunnel_state *b) +{ + struct seg6_local_lwt *slwt_a, *slwt_b; + struct seg6_action_param *param; + int i; + + slwt_a = seg6_local_lwtunnel(a); + slwt_b = seg6_local_lwtunnel(b); + + if (slwt_a->action != slwt_b->action) + return 1; + + if (slwt_a->desc->attrs != slwt_b->desc->attrs) + return 1; + + for (i = 0; i < SEG6_LOCAL_MAX + 1; i++) { + if (slwt_a->desc->attrs & (1 << i)) { + param = &seg6_action_params[i]; + if (param->cmp(slwt_a, slwt_b)) + return 1; + } + } + + return 0; +} + +static const struct lwtunnel_encap_ops seg6_local_ops = { + .build_state = seg6_local_build_state, + .destroy_state = seg6_local_destroy_state, + .input = seg6_local_input, + .fill_encap = seg6_local_fill_encap, + .get_encap_size = seg6_local_get_encap_size, + .cmp_encap = seg6_local_cmp_encap, + .owner = THIS_MODULE, +}; + +int __init seg6_local_init(void) +{ + return lwtunnel_encap_add_ops(&seg6_local_ops, + LWTUNNEL_ENCAP_SEG6_LOCAL); +} + +void seg6_local_exit(void) +{ + lwtunnel_encap_del_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); +} diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index ac912bb21747..d60ddcb0bfe2 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -91,29 +91,35 @@ struct sit_net { * Must be invoked with rcu_read_lock */ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, - struct net_device *dev, __be32 remote, __be32 local) + struct net_device *dev, + __be32 remote, __be32 local, + int sifindex) { unsigned int h0 = HASH(remote); unsigned int h1 = HASH(local); struct ip_tunnel *t; struct sit_net *sitn = net_generic(net, sit_net_id); + int ifindex = dev ? dev->ifindex : 0; for_each_ip_tunnel_rcu(t, sitn->tunnels_r_l[h0 ^ h1]) { if (local == t->parms.iph.saddr && remote == t->parms.iph.daddr && - (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (!dev || !t->parms.link || ifindex == t->parms.link || + sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_r[h0]) { if (remote == t->parms.iph.daddr && - (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (!dev || !t->parms.link || ifindex == t->parms.link || + sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } for_each_ip_tunnel_rcu(t, sitn->tunnels_l[h1]) { if (local == t->parms.iph.saddr && - (!dev || !t->parms.link || dev->ifindex == t->parms.link) && + (!dev || !t->parms.link || ifindex == t->parms.link || + sifindex == t->parms.link) && (t->dev->flags & IFF_UP)) return t; } @@ -486,6 +492,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) const int code = icmp_hdr(skb)->code; unsigned int data_len = 0; struct ip_tunnel *t; + int sifindex; int err; switch (type) { @@ -517,10 +524,9 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - t = ipip6_tunnel_lookup(dev_net(skb->dev), - skb->dev, - iph->daddr, - iph->saddr); + sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; + t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, + iph->daddr, iph->saddr, sifindex); if (!t) goto out; @@ -633,10 +639,12 @@ static int ipip6_rcv(struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct ip_tunnel *tunnel; + int sifindex; int err; + sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, - iph->saddr, iph->daddr); + iph->saddr, iph->daddr, sifindex); if (tunnel) { struct pcpu_sw_netstats *tstats; @@ -704,10 +712,13 @@ static int sit_tunnel_rcv(struct sk_buff *skb, u8 ipproto) { const struct iphdr *iph; struct ip_tunnel *tunnel; + int sifindex; + + sifindex = netif_is_l3_master(skb->dev) ? IPCB(skb)->iif : 0; iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, - iph->saddr, iph->daddr); + iph->saddr, iph->daddr, sifindex); if (tunnel) { const struct tnl_ptk_info *tpi; @@ -1848,19 +1859,22 @@ err_alloc_dev: return err; } -static void __net_exit sit_exit_net(struct net *net) +static void __net_exit sit_exit_batch_net(struct list_head *net_list) { LIST_HEAD(list); + struct net *net; rtnl_lock(); - sit_destroy_tunnels(net, &list); + list_for_each_entry(net, net_list, exit_list) + sit_destroy_tunnels(net, &list); + unregister_netdevice_many(&list); rtnl_unlock(); } static struct pernet_operations sit_net_ops = { .init = sit_init_net, - .exit = sit_exit_net, + .exit_batch = sit_exit_batch_net, .id = &sit_net_id, .size = sizeof(struct sit_net), }; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 4e7817abc0b9..e7a3a6b6cf56 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -244,7 +244,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) } req->rsk_window_clamp = tp->window_clamp ? :dst_metric(dst, RTAX_WINDOW); - tcp_select_initial_window(tcp_full_space(sk), req->mss, + tcp_select_initial_window(sk, tcp_full_space(sk), req->mss, &req->rsk_rcv_wnd, &req->rsk_window_clamp, ireq->wscale_ok, &rcv_wscale, dst_metric(dst, RTAX_INITRWND)); diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 69c50e737c54..a789a8ac6a64 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * sysctl_net_ipv6.c: sysctl interface to net IPV6 subsystem. * @@ -90,6 +91,41 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "flowlabel_reflect", + .data = &init_net.ipv6.sysctl.flowlabel_reflect, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "max_dst_opts_number", + .data = &init_net.ipv6.sysctl.max_dst_opts_cnt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "max_hbh_opts_number", + .data = &init_net.ipv6.sysctl.max_hbh_opts_cnt, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "max_dst_opts_length", + .data = &init_net.ipv6.sysctl.max_dst_opts_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "max_hbh_length", + .data = &init_net.ipv6.sysctl.max_hbh_opts_len, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -149,6 +185,11 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_table[8].data = &net->ipv6.sysctl.ip_nonlocal_bind; + ipv6_table[9].data = &net->ipv6.sysctl.flowlabel_reflect; + ipv6_table[10].data = &net->ipv6.sysctl.max_dst_opts_cnt; + ipv6_table[11].data = &net->ipv6.sysctl.max_hbh_opts_cnt; + ipv6_table[12].data = &net->ipv6.sysctl.max_dst_opts_len; + ipv6_table[13].data = &net->ipv6.sysctl.max_hbh_opts_len; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 206210125fd7..6bb98c93edfe 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -69,6 +69,8 @@ #include <crypto/hash.h> #include <linux/scatterlist.h> +#include <trace/events/tcp.h> + static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb); static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb, struct request_sock *req); @@ -350,7 +352,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __inet6_lookup_established(net, &tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), - skb->dev->ifindex); + skb->dev->ifindex, inet6_sdif(skb)); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), @@ -890,7 +892,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) int genhash; struct sock *sk1 = NULL; #endif - int oif; + int oif = 0; if (th->rst) return; @@ -918,7 +920,8 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) &tcp_hashinfo, NULL, 0, &ipv6h->saddr, th->source, &ipv6h->daddr, - ntohs(th->source), tcp_v6_iif(skb)); + ntohs(th->source), tcp_v6_iif(skb), + tcp_v6_sdif(skb)); if (!sk1) goto out; @@ -938,7 +941,11 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len - (th->doff << 2); - oif = sk ? sk->sk_bound_dev_if : 0; + if (sk) { + oif = sk->sk_bound_dev_if; + trace_tcp_send_reset(sk, skb); + } + tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0); #ifdef CONFIG_TCP_MD5SIG @@ -1296,7 +1303,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) } } - tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len); + tcp_rcv_established(sk, skb, tcp_hdr(skb)); if (opt_skb) goto ipv6_pktoptions; return 0; @@ -1393,10 +1400,13 @@ static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr, TCP_SKB_CB(skb)->tcp_tw_isn = 0; TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr); TCP_SKB_CB(skb)->sacked = 0; + TCP_SKB_CB(skb)->has_rxtstamp = + skb->tstamp || skb_hwtstamps(skb)->hwtstamp; } static int tcp_v6_rcv(struct sk_buff *skb) { + int sdif = inet6_sdif(skb); const struct tcphdr *th; const struct ipv6hdr *hdr; bool refcounted; @@ -1430,7 +1440,7 @@ static int tcp_v6_rcv(struct sk_buff *skb) lookup: sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), - th->source, th->dest, inet6_iif(skb), + th->source, th->dest, inet6_iif(skb), sdif, &refcounted); if (!sk) goto no_tcp_socket; @@ -1456,9 +1466,9 @@ process: } sock_hold(sk); refcounted = true; - if (tcp_filter(sk, skb)) - goto discard_and_relse; - nsk = tcp_check_req(sk, skb, req, false); + nsk = NULL; + if (!tcp_filter(sk, skb)) + nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); goto discard_and_relse; @@ -1505,8 +1515,7 @@ process: tcp_segs_in(tcp_sk(sk), skb); ret = 0; if (!sock_owned_by_user(sk)) { - if (!tcp_prequeue(sk, skb)) - ret = tcp_v6_do_rcv(sk, skb); + ret = tcp_v6_do_rcv(sk, skb); } else if (tcp_add_backlog(sk, skb)) { goto discard_and_relse; } @@ -1564,7 +1573,8 @@ do_time_wait: skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, - ntohs(th->dest), tcp_v6_iif(skb)); + ntohs(th->dest), tcp_v6_iif(skb), + sdif); if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); inet_twsk_deschedule_put(tw); @@ -1573,8 +1583,9 @@ do_time_wait: refcounted = false; goto process; } - /* Fall through to ACK */ } + /* to ACK */ + /* fall through */ case TCP_TW_ACK: tcp_v6_timewait_ack(sk, skb); break; @@ -1611,7 +1622,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), - inet6_iif(skb)); + inet6_iif(skb), inet6_sdif(skb)); if (sk) { skb->sk = sk; skb->destructor = sock_edemux; @@ -1929,8 +1940,8 @@ struct proto tcpv6_prot = { .memory_pressure = &tcp_memory_pressure, .orphan_count = &tcp_orphan_count, .sysctl_mem = sysctl_tcp_mem, - .sysctl_wmem = sysctl_tcp_wmem, - .sysctl_rmem = sysctl_tcp_rmem, + .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem), + .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem), .max_header = MAX_TCP_HEADER, .obj_size = sizeof(struct tcp6_sock), .slab_flags = SLAB_TYPESAFE_BY_RCU, @@ -1945,6 +1956,9 @@ struct proto tcpv6_prot = { .diag_destroy = tcp_abort, }; +/* thinking of making this const? Don't. + * early_demux can change based on sysctl. + */ static struct inet6_protocol tcpv6_protocol = { .early_demux = tcp_v6_early_demux, .early_demux_handler = tcp_v6_early_demux, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 578142b7ca3e..3f30fa313bf2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -129,7 +129,7 @@ static void udp_v6_rehash(struct sock *sk) static int compute_score(struct sock *sk, struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned short hnum, - int dif, bool exact_dif) + int dif, int sdif, bool exact_dif) { int score; struct inet_sock *inet; @@ -161,9 +161,13 @@ static int compute_score(struct sock *sk, struct net *net, } if (sk->sk_bound_dev_if || exact_dif) { - if (sk->sk_bound_dev_if != dif) + bool dev_match = (sk->sk_bound_dev_if == dif || + sk->sk_bound_dev_if == sdif); + + if (exact_dif && !dev_match) return -1; - score++; + if (sk->sk_bound_dev_if && dev_match) + score++; } if (sk->sk_incoming_cpu == raw_smp_processor_id()) @@ -175,9 +179,9 @@ static int compute_score(struct sock *sk, struct net *net, /* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, unsigned int hnum, int dif, - bool exact_dif, struct udp_hslot *hslot2, - struct sk_buff *skb) + const struct in6_addr *daddr, unsigned int hnum, + int dif, int sdif, bool exact_dif, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; int score, badness, matches = 0, reuseport = 0; @@ -187,7 +191,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { score = compute_score(sk, net, saddr, sport, - daddr, hnum, dif, exact_dif); + daddr, hnum, dif, sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -214,10 +218,10 @@ static struct sock *udp6_lib_lookup2(struct net *net, /* rcu_read_lock() must be held */ struct sock *__udp6_lib_lookup(struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif, struct udp_table *udptable, - struct sk_buff *skb) + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif, int sdif, struct udp_table *udptable, + struct sk_buff *skb) { struct sock *sk, *result; unsigned short hnum = ntohs(dport); @@ -235,7 +239,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, exact_dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); if (!result) { unsigned int old_slot2 = slot2; @@ -250,7 +254,7 @@ struct sock *__udp6_lib_lookup(struct net *net, goto begin; result = udp6_lib_lookup2(net, saddr, sport, - daddr, hnum, dif, + daddr, hnum, dif, sdif, exact_dif, hslot2, skb); } @@ -261,7 +265,7 @@ begin: badness = -1; sk_for_each_rcu(sk, &hslot->head) { score = compute_score(sk, net, saddr, sport, daddr, hnum, dif, - exact_dif); + sdif, exact_dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { @@ -294,7 +298,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - udptable, skb); + inet6_sdif(skb), udptable, skb); } struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, @@ -304,7 +308,7 @@ struct sock *udp6_lib_lookup_skb(struct sk_buff *skb, return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), - &udp_table, skb); + inet6_sdif(skb), &udp_table, skb); } EXPORT_SYMBOL_GPL(udp6_lib_lookup_skb); @@ -320,7 +324,7 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be struct sock *sk; sk = __udp6_lib_lookup(net, saddr, sport, daddr, dport, - dif, &udp_table, NULL); + dif, 0, &udp_table, NULL); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; return sk; @@ -362,7 +366,8 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -501,7 +506,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, struct net *net = dev_net(skb->dev); sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, - inet6_iif(skb), udptable, skb); + inet6_iif(skb), 0, udptable, skb); if (!sk) { __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); @@ -574,8 +579,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, static struct static_key udpv6_encap_needed __read_mostly; void udpv6_encap_enable(void) { - if (!static_key_enabled(&udpv6_encap_needed)) - static_key_slow_inc(&udpv6_encap_needed); + static_key_enable(&udpv6_encap_needed); } EXPORT_SYMBOL(udpv6_encap_enable); @@ -602,7 +606,7 @@ static int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) */ /* if we're overly short, let UDP handle it */ - encap_rcv = ACCESS_ONCE(up->encap_rcv); + encap_rcv = READ_ONCE(up->encap_rcv); if (encap_rcv) { int ret; @@ -767,6 +771,15 @@ start_lookup: return 0; } +static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +{ + if (udp_sk_rx_dst_set(sk, dst)) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + + inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + } +} + int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { @@ -816,7 +829,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int ret; if (unlikely(sk->sk_rx_dst != dst)) - udp_sk_rx_dst_set(sk, dst); + udp6_sk_rx_dst_set(sk, dst); ret = udpv6_queue_rcv_skb(sk, skb); sock_put(sk); @@ -893,7 +906,7 @@ discard: static struct sock *__udp6_lib_demux_lookup(struct net *net, __be16 loc_port, const struct in6_addr *loc_addr, __be16 rmt_port, const struct in6_addr *rmt_addr, - int dif) + int dif, int sdif) { unsigned short hnum = ntohs(loc_port); unsigned int hash2 = udp6_portaddr_hash(net, loc_addr, hnum); @@ -904,7 +917,7 @@ static struct sock *__udp6_lib_demux_lookup(struct net *net, udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { if (sk->sk_state == TCP_ESTABLISHED && - INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif)) + INET6_MATCH(sk, net, rmt_addr, loc_addr, ports, dif, sdif)) return sk; /* Only check first socket in chain */ break; @@ -919,6 +932,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) struct sock *sk; struct dst_entry *dst; int dif = skb->dev->ifindex; + int sdif = inet6_sdif(skb); if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct udphdr))) @@ -930,7 +944,7 @@ static void udp_v6_early_demux(struct sk_buff *skb) sk = __udp6_lib_demux_lookup(net, uh->dest, &ipv6_hdr(skb)->daddr, uh->source, &ipv6_hdr(skb)->saddr, - dif); + dif, sdif); else return; @@ -1001,6 +1015,7 @@ static void udp6_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb, */ offset = skb_transport_offset(skb); skb->csum = skb_checksum(skb, offset, skb->len - offset, 0); + csum = skb->csum; skb->ip_summed = CHECKSUM_NONE; @@ -1417,7 +1432,7 @@ void udpv6_destroy_sock(struct sock *sk) if (static_key_false(&udpv6_encap_needed) && up->encap_type) { void (*encap_destroy)(struct sock *sk); - encap_destroy = ACCESS_ONCE(up->encap_destroy); + encap_destroy = READ_ONCE(up->encap_destroy); if (encap_destroy) encap_destroy(sk); } @@ -1466,6 +1481,9 @@ int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, } #endif +/* thinking of making this const? Don't. + * early_demux can change based on sysctl. + */ static struct inet6_protocol udpv6_protocol = { .early_demux = udp_v6_early_demux, .early_demux_handler = udp_v6_early_demux, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index f180b3d85e31..7903e21c178b 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _UDP6_IMPL_H #define _UDP6_IMPL_H #include <net/udp.h> diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index e7d378c032cb..455fd4e39333 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -17,109 +17,15 @@ #include <net/ip6_checksum.h> #include "ip6_offload.h" -static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *udp6_tunnel_segment(struct sk_buff *skb, + netdev_features_t features) { struct sk_buff *segs = ERR_PTR(-EINVAL); - unsigned int mss; - unsigned int unfrag_ip6hlen, unfrag_len; - struct frag_hdr *fptr; - u8 *packet_start, *prevhdr; - u8 nexthdr; - u8 frag_hdr_sz = sizeof(struct frag_hdr); - __wsum csum; - int tnl_hlen; - int err; - - mss = skb_shinfo(skb)->gso_size; - if (unlikely(skb->len <= mss)) - goto out; - - if (skb_gso_ok(skb, features | NETIF_F_GSO_ROBUST)) { - /* Packet is from an untrusted source, reset gso_segs. */ - - skb_shinfo(skb)->gso_segs = DIV_ROUND_UP(skb->len, mss); - - /* Set the IPv6 fragment id if not set yet */ - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - - segs = NULL; - goto out; - } if (skb->encapsulation && skb_shinfo(skb)->gso_type & (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM)) segs = skb_udp_tunnel_segment(skb, features, true); - else { - const struct ipv6hdr *ipv6h; - struct udphdr *uh; - - if (!pskb_may_pull(skb, sizeof(struct udphdr))) - goto out; - - /* Do software UFO. Complete and fill in the UDP checksum as HW cannot - * do checksum of UDP packets sent as multiple IP fragments. - */ - - uh = udp_hdr(skb); - ipv6h = ipv6_hdr(skb); - - uh->check = 0; - csum = skb_checksum(skb, 0, skb->len, 0); - uh->check = udp_v6_check(skb->len, &ipv6h->saddr, - &ipv6h->daddr, csum); - if (uh->check == 0) - uh->check = CSUM_MANGLED_0; - - skb->ip_summed = CHECKSUM_UNNECESSARY; - - /* If there is no outer header we can fake a checksum offload - * due to the fact that we have already done the checksum in - * software prior to segmenting the frame. - */ - if (!skb->encap_hdr_csum) - features |= NETIF_F_HW_CSUM; - - /* Check if there is enough headroom to insert fragment header. */ - tnl_hlen = skb_tnl_header_len(skb); - if (skb->mac_header < (tnl_hlen + frag_hdr_sz)) { - if (gso_pskb_expand_head(skb, tnl_hlen + frag_hdr_sz)) - goto out; - } - - /* Find the unfragmentable header and shift it left by frag_hdr_sz - * bytes to insert fragment header. - */ - err = ip6_find_1stfragopt(skb, &prevhdr); - if (err < 0) - return ERR_PTR(err); - unfrag_ip6hlen = err; - nexthdr = *prevhdr; - *prevhdr = NEXTHDR_FRAGMENT; - unfrag_len = (skb_network_header(skb) - skb_mac_header(skb)) + - unfrag_ip6hlen + tnl_hlen; - packet_start = (u8 *) skb->head + SKB_GSO_CB(skb)->mac_offset; - memmove(packet_start-frag_hdr_sz, packet_start, unfrag_len); - - SKB_GSO_CB(skb)->mac_offset -= frag_hdr_sz; - skb->mac_header -= frag_hdr_sz; - skb->network_header -= frag_hdr_sz; - - fptr = (struct frag_hdr *)(skb_network_header(skb) + unfrag_ip6hlen); - fptr->nexthdr = nexthdr; - fptr->reserved = 0; - if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(dev_net(skb->dev), skb); - fptr->identification = skb_shinfo(skb)->ip6_frag_id; - - /* Fragment the skb. ipv6 header and the remaining fields of the - * fragment header are updated in ipv6_gso_segment() - */ - segs = skb_segment(skb, features); - } -out: return segs; } @@ -169,7 +75,7 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff) static const struct net_offload udpv6_offload = { .callbacks = { - .gso_segment = udp6_ufo_fragment, + .gso_segment = udp6_tunnel_segment, .gro_receive = udp6_gro_receive, .gro_complete = udp6_gro_complete, }, diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 3ef5d913e7a3..fe04e23af986 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm6_input.c: based on net/ipv4/xfrm4_input.c * @@ -34,6 +35,7 @@ EXPORT_SYMBOL(xfrm6_rcv_spi); int xfrm6_transport_finish(struct sk_buff *skb, int async) { struct xfrm_offload *xo = xfrm_offload(skb); + int nhlen = skb->data - skb_network_header(skb); skb_network_header(skb)[IP6CB(skb)->nhoff] = XFRM_MODE_SKB_CB(skb)->protocol; @@ -43,8 +45,9 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) return 1; #endif - __skb_push(skb, skb->data - skb_network_header(skb)); + __skb_push(skb, nhlen); ipv6_hdr(skb)->payload_len = htons(skb->len - sizeof(struct ipv6hdr)); + skb_postpush_rcsum(skb, skb_network_header(skb), nhlen); if (xo && (xo->flags & XFRM_GRO)) { skb_mac_header_rebuild(skb); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 79651bc71bf0..885ade234a49 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm6_policy.c: based on xfrm4_policy.c * @@ -27,7 +28,8 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, - const xfrm_address_t *daddr) + const xfrm_address_t *daddr, + u32 mark) { struct flowi6 fl6; struct dst_entry *dst; @@ -36,6 +38,7 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, memset(&fl6, 0, sizeof(fl6)); fl6.flowi6_oif = l3mdev_master_ifindex_by_index(net, oif); fl6.flowi6_flags = FLOWI_FLAG_SKIP_NH_OIF; + fl6.flowi6_mark = mark; memcpy(&fl6.daddr, daddr, sizeof(fl6.daddr)); if (saddr) memcpy(&fl6.saddr, saddr, sizeof(fl6.saddr)); @@ -52,12 +55,13 @@ static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos, int oif, } static int xfrm6_get_saddr(struct net *net, int oif, - xfrm_address_t *saddr, xfrm_address_t *daddr) + xfrm_address_t *saddr, xfrm_address_t *daddr, + u32 mark) { struct dst_entry *dst; struct net_device *dev; - dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr); + dst = xfrm6_dst_lookup(net, 0, oif, NULL, daddr, mark); if (IS_ERR(dst)) return -EHOSTUNREACH; @@ -149,6 +153,7 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) switch (nexthdr) { case NEXTHDR_FRAGMENT: onlyproto = 1; + /* fall through */ case NEXTHDR_ROUTING: case NEXTHDR_HOP: case NEXTHDR_DEST: @@ -214,14 +219,6 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse) } } -static inline int xfrm6_garbage_collect(struct dst_ops *ops) -{ - struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); - - xfrm_garbage_collect_deferred(net); - return dst_entries_get_fast(ops) > ops->gc_thresh * 2; -} - static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu) { @@ -279,14 +276,13 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops_template = { .family = AF_INET6, - .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, .cow_metrics = dst_cow_metrics_generic, .destroy = xfrm6_dst_destroy, .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, - .gc_thresh = INT_MAX, + .gc_thresh = 32768, }; static const struct xfrm_policy_afinfo xfrm6_policy_afinfo = { diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 8a1f9c0d2a13..b15075a5c227 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm6_state.c: based on xfrm4_state.c * diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 4e438bc7ee87..f85f0d7480ac 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -338,6 +338,14 @@ static int __net_init xfrm6_tunnel_net_init(struct net *net) static void __net_exit xfrm6_tunnel_net_exit(struct net *net) { + struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); + unsigned int i; + + for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byaddr[i])); + + for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) + WARN_ON_ONCE(!hlist_empty(&xfrm6_tn->spi_byspi[i])); } static struct pernet_operations xfrm6_tunnel_net_ops = { diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c index ac598ec90589..d21a9d128d3e 100644 --- a/net/ipx/af_ipx.c +++ b/net/ipx/af_ipx.c @@ -1867,6 +1867,7 @@ static int ipx_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) rc = -EPERM; if (!capable(CAP_NET_ADMIN)) break; + /* fall through */ case SIOCGIFADDR: rc = ipxitf_ioctl(cmd, argp); break; diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index 7d75e4c5c75d..38a3d51d9ead 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IPX proc routines * diff --git a/net/ipx/ipx_route.c b/net/ipx/ipx_route.c index b5d91447f3dc..3cf93aa9f284 100644 --- a/net/ipx/ipx_route.c +++ b/net/ipx/ipx_route.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Implements the IPX routing routines. * Code moved from af_ipx.c. diff --git a/net/ipx/pe2.c b/net/ipx/pe2.c index 32dcd601ab32..ba7d4214bbff 100644 --- a/net/ipx/pe2.c +++ b/net/ipx/pe2.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/in.h> #include <linux/mm.h> #include <linux/module.h> diff --git a/net/ipx/sysctl_net_ipx.c b/net/ipx/sysctl_net_ipx.c index 0dafcc561ed6..c3eef457db88 100644 --- a/net/ipx/sysctl_net_ipx.c +++ b/net/ipx/sysctl_net_ipx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* -*- linux-c -*- * sysctl_net_ipx.c: sysctl interface to net IPX subsystem. * diff --git a/net/irda/Kconfig b/net/irda/Kconfig deleted file mode 100644 index c8671a7ffb3c..000000000000 --- a/net/irda/Kconfig +++ /dev/null @@ -1,96 +0,0 @@ -# -# IrDA protocol configuration -# - -menuconfig IRDA - depends on NET && !S390 - tristate "IrDA (infrared) subsystem support" - select CRC_CCITT - ---help--- - Say Y here if you want to build support for the IrDA (TM) protocols. - The Infrared Data Associations (tm) specifies standards for wireless - infrared communication and is supported by most laptops and PDA's. - - To use Linux support for the IrDA (tm) protocols, you will also need - some user-space utilities like irattach. For more information, see - the file <file:Documentation/networking/irda.txt>. You also want to - read the IR-HOWTO, available at - <http://www.tldp.org/docs.html#howto>. - - If you want to exchange bits of data (vCal, vCard) with a PDA, you - will need to install some OBEX application, such as OpenObex : - <http://sourceforge.net/projects/openobex/> - - To compile this support as a module, choose M here: the module will - be called irda. - -comment "IrDA protocols" - depends on IRDA - -source "net/irda/irlan/Kconfig" - -source "net/irda/irnet/Kconfig" - -source "net/irda/ircomm/Kconfig" - -config IRDA_ULTRA - bool "Ultra (connectionless) protocol" - depends on IRDA - help - Say Y here to support the connectionless Ultra IRDA protocol. - Ultra allows to exchange data over IrDA with really simple devices - (watch, beacon) without the overhead of the IrDA protocol (no handshaking, - no management frames, simple fixed header). - Ultra is available as a special socket : socket(AF_IRDA, SOCK_DGRAM, 1); - -comment "IrDA options" - depends on IRDA - -config IRDA_CACHE_LAST_LSAP - bool "Cache last LSAP" - depends on IRDA - help - Say Y here if you want IrLMP to cache the last LSAP used. This - makes sense since most frames will be sent/received on the same - connection. Enabling this option will save a hash-lookup per frame. - - If unsure, say Y. - -config IRDA_FAST_RR - bool "Fast RRs (low latency)" - depends on IRDA - ---help--- - Say Y here is you want IrLAP to send fast RR (Receive Ready) frames - when acting as a primary station. - Disabling this option will make latency over IrDA very bad. Enabling - this option will make the IrDA stack send more packet than strictly - necessary, thus reduce your battery life (but not that much). - - Fast RR will make IrLAP send out a RR frame immediately when - receiving a frame if its own transmit queue is currently empty. This - will give a lot of speed improvement when receiving much data since - the secondary station will not have to wait the max. turn around - time (usually 500ms) before it is allowed to transmit the next time. - If the transmit queue of the secondary is also empty, the primary will - start backing-off before sending another RR frame, waiting longer - each time until the back-off reaches the max. turn around time. - This back-off increase in controlled via - /proc/sys/net/irda/fast_poll_increase - - If unsure, say Y. - -config IRDA_DEBUG - bool "Debug information" - depends on IRDA - help - Say Y here if you want the IrDA subsystem to write debug information - to your syslog. You can change the debug level in - /proc/sys/net/irda/debug . - When this option is enabled, the IrDA also perform many extra internal - verifications which will usually prevent the kernel to crash in case of - bugs. - - If unsure, say Y (since it makes it easier to find the bugs). - -source "drivers/net/irda/Kconfig" - diff --git a/net/irda/Makefile b/net/irda/Makefile deleted file mode 100644 index 187f6c563a4b..000000000000 --- a/net/irda/Makefile +++ /dev/null @@ -1,15 +0,0 @@ -# -# Makefile for the Linux IrDA protocol layer. -# - -obj-$(CONFIG_IRDA) += irda.o -obj-$(CONFIG_IRLAN) += irlan/ -obj-$(CONFIG_IRNET) += irnet/ -obj-$(CONFIG_IRCOMM) += ircomm/ - -irda-y := iriap.o iriap_event.o irlmp.o irlmp_event.o irlmp_frame.o \ - irlap.o irlap_event.o irlap_frame.o timer.o qos.o irqueue.o \ - irttp.o irda_device.o irias_object.o wrapper.o af_irda.o \ - discovery.o parameters.o irnetlink.o irmod.o -irda-$(CONFIG_PROC_FS) += irproc.o -irda-$(CONFIG_SYSCTL) += irsysctl.o diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c deleted file mode 100644 index 2e6990f8b80b..000000000000 --- a/net/irda/af_irda.c +++ /dev/null @@ -1,2695 +0,0 @@ -/********************************************************************* - * - * Filename: af_irda.c - * Version: 0.9 - * Description: IrDA sockets implementation - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun May 31 10:12:43 1998 - * Modified at: Sat Dec 25 21:10:23 1999 - * Modified by: Dag Brattli <dag@brattli.net> - * Sources: af_netroom.c, af_ax25.c, af_rose.c, af_x25.c etc. - * - * Copyright (c) 1999 Dag Brattli <dagb@cs.uit.no> - * Copyright (c) 1999-2003 Jean Tourrilhes <jt@hpl.hp.com> - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - * Linux-IrDA now supports four different types of IrDA sockets: - * - * o SOCK_STREAM: TinyTP connections with SAR disabled. The - * max SDU size is 0 for conn. of this type - * o SOCK_SEQPACKET: TinyTP connections with SAR enabled. TTP may - * fragment the messages, but will preserve - * the message boundaries - * o SOCK_DGRAM: IRDAPROTO_UNITDATA: TinyTP connections with Unitdata - * (unreliable) transfers - * IRDAPROTO_ULTRA: Connectionless and unreliable data - * - ********************************************************************/ - -#include <linux/capability.h> -#include <linux/module.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/sockios.h> -#include <linux/slab.h> -#include <linux/sched/signal.h> -#include <linux/init.h> -#include <linux/net.h> -#include <linux/irda.h> -#include <linux/poll.h> - -#include <asm/ioctls.h> /* TIOCOUTQ, TIOCINQ */ -#include <linux/uaccess.h> - -#include <net/sock.h> -#include <net/tcp_states.h> - -#include <net/irda/af_irda.h> - -static int irda_create(struct net *net, struct socket *sock, int protocol, int kern); - -static const struct proto_ops irda_stream_ops; -static const struct proto_ops irda_seqpacket_ops; -static const struct proto_ops irda_dgram_ops; - -#ifdef CONFIG_IRDA_ULTRA -static const struct proto_ops irda_ultra_ops; -#define ULTRA_MAX_DATA 382 -#endif /* CONFIG_IRDA_ULTRA */ - -#define IRDA_MAX_HEADER (TTP_MAX_HEADER) - -/* - * Function irda_data_indication (instance, sap, skb) - * - * Received some data from TinyTP. Just queue it on the receive queue - * - */ -static int irda_data_indication(void *instance, void *sap, struct sk_buff *skb) -{ - struct irda_sock *self; - struct sock *sk; - int err; - - self = instance; - sk = instance; - - err = sock_queue_rcv_skb(sk, skb); - if (err) { - pr_debug("%s(), error: no more mem!\n", __func__); - self->rx_flow = FLOW_STOP; - - /* When we return error, TTP will need to requeue the skb */ - return err; - } - - return 0; -} - -/* - * Function irda_disconnect_indication (instance, sap, reason, skb) - * - * Connection has been closed. Check reason to find out why - * - */ -static void irda_disconnect_indication(void *instance, void *sap, - LM_REASON reason, struct sk_buff *skb) -{ - struct irda_sock *self; - struct sock *sk; - - self = instance; - - pr_debug("%s(%p)\n", __func__, self); - - /* Don't care about it, but let's not leak it */ - if(skb) - dev_kfree_skb(skb); - - sk = instance; - if (sk == NULL) { - pr_debug("%s(%p) : BUG : sk is NULL\n", - __func__, self); - return; - } - - /* Prevent race conditions with irda_release() and irda_shutdown() */ - bh_lock_sock(sk); - if (!sock_flag(sk, SOCK_DEAD) && sk->sk_state != TCP_CLOSE) { - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - - sk->sk_state_change(sk); - - /* Close our TSAP. - * If we leave it open, IrLMP put it back into the list of - * unconnected LSAPs. The problem is that any incoming request - * can then be matched to this socket (and it will be, because - * it is at the head of the list). This would prevent any - * listening socket waiting on the same TSAP to get those - * requests. Some apps forget to close sockets, or hang to it - * a bit too long, so we may stay in this dead state long - * enough to be noticed... - * Note : all socket function do check sk->sk_state, so we are - * safe... - * Jean II - */ - if (self->tsap) { - irttp_close_tsap(self->tsap); - self->tsap = NULL; - } - } - bh_unlock_sock(sk); - - /* Note : once we are there, there is not much you want to do - * with the socket anymore, apart from closing it. - * For example, bind() and connect() won't reset sk->sk_err, - * sk->sk_shutdown and sk->sk_flags to valid values... - * Jean II - */ -} - -/* - * Function irda_connect_confirm (instance, sap, qos, max_sdu_size, skb) - * - * Connections has been confirmed by the remote device - * - */ -static void irda_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, __u8 max_header_size, - struct sk_buff *skb) -{ - struct irda_sock *self; - struct sock *sk; - - self = instance; - - pr_debug("%s(%p)\n", __func__, self); - - sk = instance; - if (sk == NULL) { - dev_kfree_skb(skb); - return; - } - - dev_kfree_skb(skb); - // Should be ??? skb_queue_tail(&sk->sk_receive_queue, skb); - - /* How much header space do we need to reserve */ - self->max_header_size = max_header_size; - - /* IrTTP max SDU size in transmit direction */ - self->max_sdu_size_tx = max_sdu_size; - - /* Find out what the largest chunk of data that we can transmit is */ - switch (sk->sk_type) { - case SOCK_STREAM: - if (max_sdu_size != 0) { - net_err_ratelimited("%s: max_sdu_size must be 0\n", - __func__); - return; - } - self->max_data_size = irttp_get_max_seg_size(self->tsap); - break; - case SOCK_SEQPACKET: - if (max_sdu_size == 0) { - net_err_ratelimited("%s: max_sdu_size cannot be 0\n", - __func__); - return; - } - self->max_data_size = max_sdu_size; - break; - default: - self->max_data_size = irttp_get_max_seg_size(self->tsap); - } - - pr_debug("%s(), max_data_size=%d\n", __func__, - self->max_data_size); - - memcpy(&self->qos_tx, qos, sizeof(struct qos_info)); - - /* We are now connected! */ - sk->sk_state = TCP_ESTABLISHED; - sk->sk_state_change(sk); -} - -/* - * Function irda_connect_indication(instance, sap, qos, max_sdu_size, userdata) - * - * Incoming connection - * - */ -static void irda_connect_indication(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, struct sk_buff *skb) -{ - struct irda_sock *self; - struct sock *sk; - - self = instance; - - pr_debug("%s(%p)\n", __func__, self); - - sk = instance; - if (sk == NULL) { - dev_kfree_skb(skb); - return; - } - - /* How much header space do we need to reserve */ - self->max_header_size = max_header_size; - - /* IrTTP max SDU size in transmit direction */ - self->max_sdu_size_tx = max_sdu_size; - - /* Find out what the largest chunk of data that we can transmit is */ - switch (sk->sk_type) { - case SOCK_STREAM: - if (max_sdu_size != 0) { - net_err_ratelimited("%s: max_sdu_size must be 0\n", - __func__); - kfree_skb(skb); - return; - } - self->max_data_size = irttp_get_max_seg_size(self->tsap); - break; - case SOCK_SEQPACKET: - if (max_sdu_size == 0) { - net_err_ratelimited("%s: max_sdu_size cannot be 0\n", - __func__); - kfree_skb(skb); - return; - } - self->max_data_size = max_sdu_size; - break; - default: - self->max_data_size = irttp_get_max_seg_size(self->tsap); - } - - pr_debug("%s(), max_data_size=%d\n", __func__, - self->max_data_size); - - memcpy(&self->qos_tx, qos, sizeof(struct qos_info)); - - skb_queue_tail(&sk->sk_receive_queue, skb); - sk->sk_state_change(sk); -} - -/* - * Function irda_connect_response (handle) - * - * Accept incoming connection - * - */ -static void irda_connect_response(struct irda_sock *self) -{ - struct sk_buff *skb; - - skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, GFP_KERNEL); - if (skb == NULL) { - pr_debug("%s() Unable to allocate sk_buff!\n", - __func__); - return; - } - - /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(skb, IRDA_MAX_HEADER); - - irttp_connect_response(self->tsap, self->max_sdu_size_rx, skb); -} - -/* - * Function irda_flow_indication (instance, sap, flow) - * - * Used by TinyTP to tell us if it can accept more data or not - * - */ -static void irda_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) -{ - struct irda_sock *self; - struct sock *sk; - - self = instance; - sk = instance; - BUG_ON(sk == NULL); - - switch (flow) { - case FLOW_STOP: - pr_debug("%s(), IrTTP wants us to slow down\n", - __func__); - self->tx_flow = flow; - break; - case FLOW_START: - self->tx_flow = flow; - pr_debug("%s(), IrTTP wants us to start again\n", - __func__); - wake_up_interruptible(sk_sleep(sk)); - break; - default: - pr_debug("%s(), Unknown flow command!\n", __func__); - /* Unknown flow command, better stop */ - self->tx_flow = flow; - break; - } -} - -/* - * Function irda_getvalue_confirm (obj_id, value, priv) - * - * Got answer from remote LM-IAS, just pass object to requester... - * - * Note : duplicate from above, but we need our own version that - * doesn't touch the dtsap_sel and save the full value structure... - */ -static void irda_getvalue_confirm(int result, __u16 obj_id, - struct ias_value *value, void *priv) -{ - struct irda_sock *self; - - self = priv; - if (!self) { - net_warn_ratelimited("%s: lost myself!\n", __func__); - return; - } - - pr_debug("%s(%p)\n", __func__, self); - - /* We probably don't need to make any more queries */ - iriap_close(self->iriap); - self->iriap = NULL; - - /* Check if request succeeded */ - if (result != IAS_SUCCESS) { - pr_debug("%s(), IAS query failed! (%d)\n", __func__, - result); - - self->errno = result; /* We really need it later */ - - /* Wake up any processes waiting for result */ - wake_up_interruptible(&self->query_wait); - - return; - } - - /* Pass the object to the caller (so the caller must delete it) */ - self->ias_result = value; - self->errno = 0; - - /* Wake up any processes waiting for result */ - wake_up_interruptible(&self->query_wait); -} - -/* - * Function irda_selective_discovery_indication (discovery) - * - * Got a selective discovery indication from IrLMP. - * - * IrLMP is telling us that this node is new and matching our hint bit - * filter. Wake up any process waiting for answer... - */ -static void irda_selective_discovery_indication(discinfo_t *discovery, - DISCOVERY_MODE mode, - void *priv) -{ - struct irda_sock *self; - - self = priv; - if (!self) { - net_warn_ratelimited("%s: lost myself!\n", __func__); - return; - } - - /* Pass parameter to the caller */ - self->cachedaddr = discovery->daddr; - - /* Wake up process if its waiting for device to be discovered */ - wake_up_interruptible(&self->query_wait); -} - -/* - * Function irda_discovery_timeout (priv) - * - * Timeout in the selective discovery process - * - * We were waiting for a node to be discovered, but nothing has come up - * so far. Wake up the user and tell him that we failed... - */ -static void irda_discovery_timeout(u_long priv) -{ - struct irda_sock *self; - - self = (struct irda_sock *) priv; - BUG_ON(self == NULL); - - /* Nothing for the caller */ - self->cachelog = NULL; - self->cachedaddr = 0; - self->errno = -ETIME; - - /* Wake up process if its still waiting... */ - wake_up_interruptible(&self->query_wait); -} - -/* - * Function irda_open_tsap (self) - * - * Open local Transport Service Access Point (TSAP) - * - */ -static int irda_open_tsap(struct irda_sock *self, __u8 tsap_sel, char *name) -{ - notify_t notify; - - if (self->tsap) { - pr_debug("%s: busy!\n", __func__); - return -EBUSY; - } - - /* Initialize callbacks to be used by the IrDA stack */ - irda_notify_init(¬ify); - notify.connect_confirm = irda_connect_confirm; - notify.connect_indication = irda_connect_indication; - notify.disconnect_indication = irda_disconnect_indication; - notify.data_indication = irda_data_indication; - notify.udata_indication = irda_data_indication; - notify.flow_indication = irda_flow_indication; - notify.instance = self; - strncpy(notify.name, name, NOTIFY_MAX_NAME); - - self->tsap = irttp_open_tsap(tsap_sel, DEFAULT_INITIAL_CREDIT, - ¬ify); - if (self->tsap == NULL) { - pr_debug("%s(), Unable to allocate TSAP!\n", - __func__); - return -ENOMEM; - } - /* Remember which TSAP selector we actually got */ - self->stsap_sel = self->tsap->stsap_sel; - - return 0; -} - -/* - * Function irda_open_lsap (self) - * - * Open local Link Service Access Point (LSAP). Used for opening Ultra - * sockets - */ -#ifdef CONFIG_IRDA_ULTRA -static int irda_open_lsap(struct irda_sock *self, int pid) -{ - notify_t notify; - - if (self->lsap) { - net_warn_ratelimited("%s(), busy!\n", __func__); - return -EBUSY; - } - - /* Initialize callbacks to be used by the IrDA stack */ - irda_notify_init(¬ify); - notify.udata_indication = irda_data_indication; - notify.instance = self; - strncpy(notify.name, "Ultra", NOTIFY_MAX_NAME); - - self->lsap = irlmp_open_lsap(LSAP_CONNLESS, ¬ify, pid); - if (self->lsap == NULL) { - pr_debug("%s(), Unable to allocate LSAP!\n", __func__); - return -ENOMEM; - } - - return 0; -} -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Function irda_find_lsap_sel (self, name) - * - * Try to lookup LSAP selector in remote LM-IAS - * - * Basically, we start a IAP query, and then go to sleep. When the query - * return, irda_getvalue_confirm will wake us up, and we can examine the - * result of the query... - * Note that in some case, the query fail even before we go to sleep, - * creating some races... - */ -static int irda_find_lsap_sel(struct irda_sock *self, char *name) -{ - pr_debug("%s(%p, %s)\n", __func__, self, name); - - if (self->iriap) { - net_warn_ratelimited("%s(): busy with a previous query\n", - __func__); - return -EBUSY; - } - - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - irda_getvalue_confirm); - if(self->iriap == NULL) - return -ENOMEM; - - /* Treat unexpected wakeup as disconnect */ - self->errno = -EHOSTUNREACH; - - /* Query remote LM-IAS */ - iriap_getvaluebyclass_request(self->iriap, self->saddr, self->daddr, - name, "IrDA:TinyTP:LsapSel"); - - /* Wait for answer, if not yet finished (or failed) */ - if (wait_event_interruptible(self->query_wait, (self->iriap==NULL))) - /* Treat signals as disconnect */ - return -EHOSTUNREACH; - - /* Check what happened */ - if (self->errno) - { - /* Requested object/attribute doesn't exist */ - if((self->errno == IAS_CLASS_UNKNOWN) || - (self->errno == IAS_ATTRIB_UNKNOWN)) - return -EADDRNOTAVAIL; - else - return -EHOSTUNREACH; - } - - /* Get the remote TSAP selector */ - switch (self->ias_result->type) { - case IAS_INTEGER: - pr_debug("%s() int=%d\n", - __func__, self->ias_result->t.integer); - - if (self->ias_result->t.integer != -1) - self->dtsap_sel = self->ias_result->t.integer; - else - self->dtsap_sel = 0; - break; - default: - self->dtsap_sel = 0; - pr_debug("%s(), bad type!\n", __func__); - break; - } - if (self->ias_result) - irias_delete_value(self->ias_result); - - if (self->dtsap_sel) - return 0; - - return -EADDRNOTAVAIL; -} - -/* - * Function irda_discover_daddr_and_lsap_sel (self, name) - * - * This try to find a device with the requested service. - * - * It basically look into the discovery log. For each address in the list, - * it queries the LM-IAS of the device to find if this device offer - * the requested service. - * If there is more than one node supporting the service, we complain - * to the user (it should move devices around). - * The, we set both the destination address and the lsap selector to point - * on the service on the unique device we have found. - * - * Note : this function fails if there is more than one device in range, - * because IrLMP doesn't disconnect the LAP when the last LSAP is closed. - * Moreover, we would need to wait the LAP disconnection... - */ -static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name) -{ - discinfo_t *discoveries; /* Copy of the discovery log */ - int number; /* Number of nodes in the log */ - int i; - int err = -ENETUNREACH; - __u32 daddr = DEV_ADDR_ANY; /* Address we found the service on */ - __u8 dtsap_sel = 0x0; /* TSAP associated with it */ - - pr_debug("%s(), name=%s\n", __func__, name); - - /* Ask lmp for the current discovery log - * Note : we have to use irlmp_get_discoveries(), as opposed - * to play with the cachelog directly, because while we are - * making our ias query, le log might change... */ - discoveries = irlmp_get_discoveries(&number, self->mask.word, - self->nslots); - /* Check if the we got some results */ - if (discoveries == NULL) - return -ENETUNREACH; /* No nodes discovered */ - - /* - * Now, check all discovered devices (if any), and connect - * client only about the services that the client is - * interested in... - */ - for(i = 0; i < number; i++) { - /* Try the address in the log */ - self->daddr = discoveries[i].daddr; - self->saddr = 0x0; - pr_debug("%s(), trying daddr = %08x\n", - __func__, self->daddr); - - /* Query remote LM-IAS for this service */ - err = irda_find_lsap_sel(self, name); - switch (err) { - case 0: - /* We found the requested service */ - if(daddr != DEV_ADDR_ANY) { - pr_debug("%s(), discovered service ''%s'' in two different devices !!!\n", - __func__, name); - self->daddr = DEV_ADDR_ANY; - kfree(discoveries); - return -ENOTUNIQ; - } - /* First time we found that one, save it ! */ - daddr = self->daddr; - dtsap_sel = self->dtsap_sel; - break; - case -EADDRNOTAVAIL: - /* Requested service simply doesn't exist on this node */ - break; - default: - /* Something bad did happen :-( */ - pr_debug("%s(), unexpected IAS query failure\n", - __func__); - self->daddr = DEV_ADDR_ANY; - kfree(discoveries); - return -EHOSTUNREACH; - } - } - /* Cleanup our copy of the discovery log */ - kfree(discoveries); - - /* Check out what we found */ - if(daddr == DEV_ADDR_ANY) { - pr_debug("%s(), cannot discover service ''%s'' in any device !!!\n", - __func__, name); - self->daddr = DEV_ADDR_ANY; - return -EADDRNOTAVAIL; - } - - /* Revert back to discovered device & service */ - self->daddr = daddr; - self->saddr = 0x0; - self->dtsap_sel = dtsap_sel; - - pr_debug("%s(), discovered requested service ''%s'' at address %08x\n", - __func__, name, self->daddr); - - return 0; -} - -/* - * Function irda_getname (sock, uaddr, uaddr_len, peer) - * - * Return the our own, or peers socket address (sockaddr_irda) - * - */ -static int irda_getname(struct socket *sock, struct sockaddr *uaddr, - int *uaddr_len, int peer) -{ - struct sockaddr_irda saddr; - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - - memset(&saddr, 0, sizeof(saddr)); - if (peer) { - if (sk->sk_state != TCP_ESTABLISHED) - return -ENOTCONN; - - saddr.sir_family = AF_IRDA; - saddr.sir_lsap_sel = self->dtsap_sel; - saddr.sir_addr = self->daddr; - } else { - saddr.sir_family = AF_IRDA; - saddr.sir_lsap_sel = self->stsap_sel; - saddr.sir_addr = self->saddr; - } - - pr_debug("%s(), tsap_sel = %#x\n", __func__, saddr.sir_lsap_sel); - pr_debug("%s(), addr = %08x\n", __func__, saddr.sir_addr); - - /* uaddr_len come to us uninitialised */ - *uaddr_len = sizeof (struct sockaddr_irda); - memcpy(uaddr, &saddr, *uaddr_len); - - return 0; -} - -/* - * Function irda_listen (sock, backlog) - * - * Just move to the listen state - * - */ -static int irda_listen(struct socket *sock, int backlog) -{ - struct sock *sk = sock->sk; - int err = -EOPNOTSUPP; - - lock_sock(sk); - - if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && - (sk->sk_type != SOCK_DGRAM)) - goto out; - - if (sk->sk_state != TCP_LISTEN) { - sk->sk_max_ack_backlog = backlog; - sk->sk_state = TCP_LISTEN; - - err = 0; - } -out: - release_sock(sk); - - return err; -} - -/* - * Function irda_bind (sock, uaddr, addr_len) - * - * Used by servers to register their well known TSAP - * - */ -static int irda_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) -{ - struct sock *sk = sock->sk; - struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr; - struct irda_sock *self = irda_sk(sk); - int err; - - pr_debug("%s(%p)\n", __func__, self); - - if (addr_len != sizeof(struct sockaddr_irda)) - return -EINVAL; - - lock_sock(sk); -#ifdef CONFIG_IRDA_ULTRA - /* Special care for Ultra sockets */ - if ((sk->sk_type == SOCK_DGRAM) && - (sk->sk_protocol == IRDAPROTO_ULTRA)) { - self->pid = addr->sir_lsap_sel; - err = -EOPNOTSUPP; - if (self->pid & 0x80) { - pr_debug("%s(), extension in PID not supp!\n", - __func__); - goto out; - } - err = irda_open_lsap(self, self->pid); - if (err < 0) - goto out; - - /* Pretend we are connected */ - sock->state = SS_CONNECTED; - sk->sk_state = TCP_ESTABLISHED; - err = 0; - - goto out; - } -#endif /* CONFIG_IRDA_ULTRA */ - - self->ias_obj = irias_new_object(addr->sir_name, jiffies); - err = -ENOMEM; - if (self->ias_obj == NULL) - goto out; - - err = irda_open_tsap(self, addr->sir_lsap_sel, addr->sir_name); - if (err < 0) { - irias_delete_object(self->ias_obj); - self->ias_obj = NULL; - goto out; - } - - /* Register with LM-IAS */ - irias_add_integer_attrib(self->ias_obj, "IrDA:TinyTP:LsapSel", - self->stsap_sel, IAS_KERNEL_ATTR); - irias_insert_object(self->ias_obj); - - err = 0; -out: - release_sock(sk); - return err; -} - -/* - * Function irda_accept (sock, newsock, flags) - * - * Wait for incoming connection - * - */ -static int irda_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) -{ - struct sock *sk = sock->sk; - struct irda_sock *new, *self = irda_sk(sk); - struct sock *newsk; - struct sk_buff *skb = NULL; - int err; - - err = irda_create(sock_net(sk), newsock, sk->sk_protocol, kern); - if (err) - return err; - - err = -EINVAL; - - lock_sock(sk); - if (sock->state != SS_UNCONNECTED) - goto out; - - err = -EOPNOTSUPP; - if ((sk->sk_type != SOCK_STREAM) && (sk->sk_type != SOCK_SEQPACKET) && - (sk->sk_type != SOCK_DGRAM)) - goto out; - - err = -EINVAL; - if (sk->sk_state != TCP_LISTEN) - goto out; - - /* - * The read queue this time is holding sockets ready to use - * hooked into the SABM we saved - */ - - /* - * We can perform the accept only if there is incoming data - * on the listening socket. - * So, we will block the caller until we receive any data. - * If the caller was waiting on select() or poll() before - * calling us, the data is waiting for us ;-) - * Jean II - */ - while (1) { - skb = skb_dequeue(&sk->sk_receive_queue); - if (skb) - break; - - /* Non blocking operation */ - err = -EWOULDBLOCK; - if (flags & O_NONBLOCK) - goto out; - - err = wait_event_interruptible(*(sk_sleep(sk)), - skb_peek(&sk->sk_receive_queue)); - if (err) - goto out; - } - - newsk = newsock->sk; - err = -EIO; - if (newsk == NULL) - goto out; - - newsk->sk_state = TCP_ESTABLISHED; - - new = irda_sk(newsk); - - /* Now attach up the new socket */ - new->tsap = irttp_dup(self->tsap, new); - err = -EPERM; /* value does not seem to make sense. -arnd */ - if (!new->tsap) { - pr_debug("%s(), dup failed!\n", __func__); - goto out; - } - - new->stsap_sel = new->tsap->stsap_sel; - new->dtsap_sel = new->tsap->dtsap_sel; - new->saddr = irttp_get_saddr(new->tsap); - new->daddr = irttp_get_daddr(new->tsap); - - new->max_sdu_size_tx = self->max_sdu_size_tx; - new->max_sdu_size_rx = self->max_sdu_size_rx; - new->max_data_size = self->max_data_size; - new->max_header_size = self->max_header_size; - - memcpy(&new->qos_tx, &self->qos_tx, sizeof(struct qos_info)); - - /* Clean up the original one to keep it in listen state */ - irttp_listen(self->tsap); - - sk->sk_ack_backlog--; - - newsock->state = SS_CONNECTED; - - irda_connect_response(new); - err = 0; -out: - kfree_skb(skb); - release_sock(sk); - return err; -} - -/* - * Function irda_connect (sock, uaddr, addr_len, flags) - * - * Connect to a IrDA device - * - * The main difference with a "standard" connect is that with IrDA we need - * to resolve the service name into a TSAP selector (in TCP, port number - * doesn't have to be resolved). - * Because of this service name resolution, we can offer "auto-connect", - * where we connect to a service without specifying a destination address. - * - * Note : by consulting "errno", the user space caller may learn the cause - * of the failure. Most of them are visible in the function, others may come - * from subroutines called and are listed here : - * o EBUSY : already processing a connect - * o EHOSTUNREACH : bad addr->sir_addr argument - * o EADDRNOTAVAIL : bad addr->sir_name argument - * o ENOTUNIQ : more than one node has addr->sir_name (auto-connect) - * o ENETUNREACH : no node found on the network (auto-connect) - */ -static int irda_connect(struct socket *sock, struct sockaddr *uaddr, - int addr_len, int flags) -{ - struct sock *sk = sock->sk; - struct sockaddr_irda *addr = (struct sockaddr_irda *) uaddr; - struct irda_sock *self = irda_sk(sk); - int err; - - pr_debug("%s(%p)\n", __func__, self); - - lock_sock(sk); - /* Don't allow connect for Ultra sockets */ - err = -ESOCKTNOSUPPORT; - if ((sk->sk_type == SOCK_DGRAM) && (sk->sk_protocol == IRDAPROTO_ULTRA)) - goto out; - - if (sk->sk_state == TCP_ESTABLISHED && sock->state == SS_CONNECTING) { - sock->state = SS_CONNECTED; - err = 0; - goto out; /* Connect completed during a ERESTARTSYS event */ - } - - if (sk->sk_state == TCP_CLOSE && sock->state == SS_CONNECTING) { - sock->state = SS_UNCONNECTED; - err = -ECONNREFUSED; - goto out; - } - - err = -EISCONN; /* No reconnect on a seqpacket socket */ - if (sk->sk_state == TCP_ESTABLISHED) - goto out; - - sk->sk_state = TCP_CLOSE; - sock->state = SS_UNCONNECTED; - - err = -EINVAL; - if (addr_len != sizeof(struct sockaddr_irda)) - goto out; - - /* Check if user supplied any destination device address */ - if ((!addr->sir_addr) || (addr->sir_addr == DEV_ADDR_ANY)) { - /* Try to find one suitable */ - err = irda_discover_daddr_and_lsap_sel(self, addr->sir_name); - if (err) { - pr_debug("%s(), auto-connect failed!\n", __func__); - goto out; - } - } else { - /* Use the one provided by the user */ - self->daddr = addr->sir_addr; - pr_debug("%s(), daddr = %08x\n", __func__, self->daddr); - - /* If we don't have a valid service name, we assume the - * user want to connect on a specific LSAP. Prevent - * the use of invalid LSAPs (IrLMP 1.1 p10). Jean II */ - if((addr->sir_name[0] != '\0') || - (addr->sir_lsap_sel >= 0x70)) { - /* Query remote LM-IAS using service name */ - err = irda_find_lsap_sel(self, addr->sir_name); - if (err) { - pr_debug("%s(), connect failed!\n", __func__); - goto out; - } - } else { - /* Directly connect to the remote LSAP - * specified by the sir_lsap field. - * Please use with caution, in IrDA LSAPs are - * dynamic and there is no "well-known" LSAP. */ - self->dtsap_sel = addr->sir_lsap_sel; - } - } - - /* Check if we have opened a local TSAP */ - if (!self->tsap) { - err = irda_open_tsap(self, LSAP_ANY, addr->sir_name); - if (err) - goto out; - } - - /* Move to connecting socket, start sending Connect Requests */ - sock->state = SS_CONNECTING; - sk->sk_state = TCP_SYN_SENT; - - /* Connect to remote device */ - err = irttp_connect_request(self->tsap, self->dtsap_sel, - self->saddr, self->daddr, NULL, - self->max_sdu_size_rx, NULL); - if (err) { - pr_debug("%s(), connect failed!\n", __func__); - goto out; - } - - /* Now the loop */ - err = -EINPROGRESS; - if (sk->sk_state != TCP_ESTABLISHED && (flags & O_NONBLOCK)) - goto out; - - err = -ERESTARTSYS; - if (wait_event_interruptible(*(sk_sleep(sk)), - (sk->sk_state != TCP_SYN_SENT))) - goto out; - - if (sk->sk_state != TCP_ESTABLISHED) { - sock->state = SS_UNCONNECTED; - err = sock_error(sk); - if (!err) - err = -ECONNRESET; - goto out; - } - - sock->state = SS_CONNECTED; - - /* At this point, IrLMP has assigned our source address */ - self->saddr = irttp_get_saddr(self->tsap); - err = 0; -out: - release_sock(sk); - return err; -} - -static struct proto irda_proto = { - .name = "IRDA", - .owner = THIS_MODULE, - .obj_size = sizeof(struct irda_sock), -}; - -/* - * Function irda_create (sock, protocol) - * - * Create IrDA socket - * - */ -static int irda_create(struct net *net, struct socket *sock, int protocol, - int kern) -{ - struct sock *sk; - struct irda_sock *self; - - if (protocol < 0 || protocol > SK_PROTOCOL_MAX) - return -EINVAL; - - if (net != &init_net) - return -EAFNOSUPPORT; - - /* Check for valid socket type */ - switch (sock->type) { - case SOCK_STREAM: /* For TTP connections with SAR disabled */ - case SOCK_SEQPACKET: /* For TTP connections with SAR enabled */ - case SOCK_DGRAM: /* For TTP Unitdata or LMP Ultra transfers */ - break; - default: - return -ESOCKTNOSUPPORT; - } - - /* Allocate networking socket */ - sk = sk_alloc(net, PF_IRDA, GFP_KERNEL, &irda_proto, kern); - if (sk == NULL) - return -ENOMEM; - - self = irda_sk(sk); - pr_debug("%s() : self is %p\n", __func__, self); - - init_waitqueue_head(&self->query_wait); - - switch (sock->type) { - case SOCK_STREAM: - sock->ops = &irda_stream_ops; - self->max_sdu_size_rx = TTP_SAR_DISABLE; - break; - case SOCK_SEQPACKET: - sock->ops = &irda_seqpacket_ops; - self->max_sdu_size_rx = TTP_SAR_UNBOUND; - break; - case SOCK_DGRAM: - switch (protocol) { -#ifdef CONFIG_IRDA_ULTRA - case IRDAPROTO_ULTRA: - sock->ops = &irda_ultra_ops; - /* Initialise now, because we may send on unbound - * sockets. Jean II */ - self->max_data_size = ULTRA_MAX_DATA - LMP_PID_HEADER; - self->max_header_size = IRDA_MAX_HEADER + LMP_PID_HEADER; - break; -#endif /* CONFIG_IRDA_ULTRA */ - case IRDAPROTO_UNITDATA: - sock->ops = &irda_dgram_ops; - /* We let Unitdata conn. be like seqpack conn. */ - self->max_sdu_size_rx = TTP_SAR_UNBOUND; - break; - default: - sk_free(sk); - return -ESOCKTNOSUPPORT; - } - break; - default: - sk_free(sk); - return -ESOCKTNOSUPPORT; - } - - /* Initialise networking socket struct */ - sock_init_data(sock, sk); /* Note : set sk->sk_refcnt to 1 */ - sk->sk_family = PF_IRDA; - sk->sk_protocol = protocol; - - /* Register as a client with IrLMP */ - self->ckey = irlmp_register_client(0, NULL, NULL, NULL); - self->mask.word = 0xffff; - self->rx_flow = self->tx_flow = FLOW_START; - self->nslots = DISCOVERY_DEFAULT_SLOTS; - self->daddr = DEV_ADDR_ANY; /* Until we get connected */ - self->saddr = 0x0; /* so IrLMP assign us any link */ - return 0; -} - -/* - * Function irda_destroy_socket (self) - * - * Destroy socket - * - */ -static void irda_destroy_socket(struct irda_sock *self) -{ - pr_debug("%s(%p)\n", __func__, self); - - /* Unregister with IrLMP */ - irlmp_unregister_client(self->ckey); - irlmp_unregister_service(self->skey); - - /* Unregister with LM-IAS */ - if (self->ias_obj) { - irias_delete_object(self->ias_obj); - self->ias_obj = NULL; - } - - if (self->iriap) { - iriap_close(self->iriap); - self->iriap = NULL; - } - - if (self->tsap) { - irttp_disconnect_request(self->tsap, NULL, P_NORMAL); - irttp_close_tsap(self->tsap); - self->tsap = NULL; - } -#ifdef CONFIG_IRDA_ULTRA - if (self->lsap) { - irlmp_close_lsap(self->lsap); - self->lsap = NULL; - } -#endif /* CONFIG_IRDA_ULTRA */ -} - -/* - * Function irda_release (sock) - */ -static int irda_release(struct socket *sock) -{ - struct sock *sk = sock->sk; - - if (sk == NULL) - return 0; - - lock_sock(sk); - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - sk->sk_state_change(sk); - - /* Destroy IrDA socket */ - irda_destroy_socket(irda_sk(sk)); - - sock_orphan(sk); - sock->sk = NULL; - release_sock(sk); - - /* Purge queues (see sock_init_data()) */ - skb_queue_purge(&sk->sk_receive_queue); - - /* Destroy networking socket if we are the last reference on it, - * i.e. if(sk->sk_refcnt == 0) -> sk_free(sk) */ - sock_put(sk); - - /* Notes on socket locking and deallocation... - Jean II - * In theory we should put pairs of sock_hold() / sock_put() to - * prevent the socket to be destroyed whenever there is an - * outstanding request or outstanding incoming packet or event. - * - * 1) This may include IAS request, both in connect and getsockopt. - * Unfortunately, the situation is a bit more messy than it looks, - * because we close iriap and kfree(self) above. - * - * 2) This may include selective discovery in getsockopt. - * Same stuff as above, irlmp registration and self are gone. - * - * Probably 1 and 2 may not matter, because it's all triggered - * by a process and the socket layer already prevent the - * socket to go away while a process is holding it, through - * sockfd_put() and fput()... - * - * 3) This may include deferred TSAP closure. In particular, - * we may receive a late irda_disconnect_indication() - * Fortunately, (tsap_cb *)->close_pend should protect us - * from that. - * - * I did some testing on SMP, and it looks solid. And the socket - * memory leak is now gone... - Jean II - */ - - return 0; -} - -/* - * Function irda_sendmsg (sock, msg, len) - * - * Send message down to TinyTP. This function is used for both STREAM and - * SEQPACK services. This is possible since it forces the client to - * fragment the message if necessary - */ -static int irda_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) -{ - struct sock *sk = sock->sk; - struct irda_sock *self; - struct sk_buff *skb; - int err = -EPIPE; - - pr_debug("%s(), len=%zd\n", __func__, len); - - /* Note : socket.c set MSG_EOR on SEQPACKET sockets */ - if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_EOR | MSG_CMSG_COMPAT | - MSG_NOSIGNAL)) { - return -EINVAL; - } - - lock_sock(sk); - - if (sk->sk_shutdown & SEND_SHUTDOWN) - goto out_err; - - if (sk->sk_state != TCP_ESTABLISHED) { - err = -ENOTCONN; - goto out; - } - - self = irda_sk(sk); - - /* Check if IrTTP is wants us to slow down */ - - if (wait_event_interruptible(*(sk_sleep(sk)), - (self->tx_flow != FLOW_STOP || sk->sk_state != TCP_ESTABLISHED))) { - err = -ERESTARTSYS; - goto out; - } - - /* Check if we are still connected */ - if (sk->sk_state != TCP_ESTABLISHED) { - err = -ENOTCONN; - goto out; - } - - /* Check that we don't send out too big frames */ - if (len > self->max_data_size) { - pr_debug("%s(), Chopping frame from %zd to %d bytes!\n", - __func__, len, self->max_data_size); - len = self->max_data_size; - } - - skb = sock_alloc_send_skb(sk, len + self->max_header_size + 16, - msg->msg_flags & MSG_DONTWAIT, &err); - if (!skb) - goto out_err; - - skb_reserve(skb, self->max_header_size + 16); - skb_reset_transport_header(skb); - skb_put(skb, len); - err = memcpy_from_msg(skb_transport_header(skb), msg, len); - if (err) { - kfree_skb(skb); - goto out_err; - } - - /* - * Just send the message to TinyTP, and let it deal with possible - * errors. No need to duplicate all that here - */ - err = irttp_data_request(self->tsap, skb); - if (err) { - pr_debug("%s(), err=%d\n", __func__, err); - goto out_err; - } - - release_sock(sk); - /* Tell client how much data we actually sent */ - return len; - -out_err: - err = sk_stream_error(sk, msg->msg_flags, err); -out: - release_sock(sk); - return err; - -} - -/* - * Function irda_recvmsg_dgram (sock, msg, size, flags) - * - * Try to receive message and copy it to user. The frame is discarded - * after being read, regardless of how much the user actually read - */ -static int irda_recvmsg_dgram(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - struct sk_buff *skb; - size_t copied; - int err; - - skb = skb_recv_datagram(sk, flags & ~MSG_DONTWAIT, - flags & MSG_DONTWAIT, &err); - if (!skb) - return err; - - skb_reset_transport_header(skb); - copied = skb->len; - - if (copied > size) { - pr_debug("%s(), Received truncated frame (%zd < %zd)!\n", - __func__, copied, size); - copied = size; - msg->msg_flags |= MSG_TRUNC; - } - skb_copy_datagram_msg(skb, 0, msg, copied); - - skb_free_datagram(sk, skb); - - /* - * Check if we have previously stopped IrTTP and we know - * have more free space in our rx_queue. If so tell IrTTP - * to start delivering frames again before our rx_queue gets - * empty - */ - if (self->rx_flow == FLOW_STOP) { - if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) { - pr_debug("%s(), Starting IrTTP\n", __func__); - self->rx_flow = FLOW_START; - irttp_flow_request(self->tsap, FLOW_START); - } - } - - return copied; -} - -/* - * Function irda_recvmsg_stream (sock, msg, size, flags) - */ -static int irda_recvmsg_stream(struct socket *sock, struct msghdr *msg, - size_t size, int flags) -{ - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - int noblock = flags & MSG_DONTWAIT; - size_t copied = 0; - int target, err; - long timeo; - - if ((err = sock_error(sk)) < 0) - return err; - - if (sock->flags & __SO_ACCEPTCON) - return -EINVAL; - - err =-EOPNOTSUPP; - if (flags & MSG_OOB) - return -EOPNOTSUPP; - - err = 0; - target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); - timeo = sock_rcvtimeo(sk, noblock); - - do { - int chunk; - struct sk_buff *skb = skb_dequeue(&sk->sk_receive_queue); - - if (skb == NULL) { - DEFINE_WAIT(wait); - err = 0; - - if (copied >= target) - break; - - prepare_to_wait_exclusive(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - - /* - * POSIX 1003.1g mandates this order. - */ - err = sock_error(sk); - if (err) - ; - else if (sk->sk_shutdown & RCV_SHUTDOWN) - ; - else if (noblock) - err = -EAGAIN; - else if (signal_pending(current)) - err = sock_intr_errno(timeo); - else if (sk->sk_state != TCP_ESTABLISHED) - err = -ENOTCONN; - else if (skb_peek(&sk->sk_receive_queue) == NULL) - /* Wait process until data arrives */ - schedule(); - - finish_wait(sk_sleep(sk), &wait); - - if (err) - return err; - if (sk->sk_shutdown & RCV_SHUTDOWN) - break; - - continue; - } - - chunk = min_t(unsigned int, skb->len, size); - if (memcpy_to_msg(msg, skb->data, chunk)) { - skb_queue_head(&sk->sk_receive_queue, skb); - if (copied == 0) - copied = -EFAULT; - break; - } - copied += chunk; - size -= chunk; - - /* Mark read part of skb as used */ - if (!(flags & MSG_PEEK)) { - skb_pull(skb, chunk); - - /* put the skb back if we didn't use it up.. */ - if (skb->len) { - pr_debug("%s(), back on q!\n", - __func__); - skb_queue_head(&sk->sk_receive_queue, skb); - break; - } - - kfree_skb(skb); - } else { - pr_debug("%s() questionable!?\n", __func__); - - /* put message back and return */ - skb_queue_head(&sk->sk_receive_queue, skb); - break; - } - } while (size); - - /* - * Check if we have previously stopped IrTTP and we know - * have more free space in our rx_queue. If so tell IrTTP - * to start delivering frames again before our rx_queue gets - * empty - */ - if (self->rx_flow == FLOW_STOP) { - if ((atomic_read(&sk->sk_rmem_alloc) << 2) <= sk->sk_rcvbuf) { - pr_debug("%s(), Starting IrTTP\n", __func__); - self->rx_flow = FLOW_START; - irttp_flow_request(self->tsap, FLOW_START); - } - } - - return copied; -} - -/* - * Function irda_sendmsg_dgram (sock, msg, len) - * - * Send message down to TinyTP for the unreliable sequenced - * packet service... - * - */ -static int irda_sendmsg_dgram(struct socket *sock, struct msghdr *msg, - size_t len) -{ - struct sock *sk = sock->sk; - struct irda_sock *self; - struct sk_buff *skb; - int err; - - pr_debug("%s(), len=%zd\n", __func__, len); - - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) - return -EINVAL; - - lock_sock(sk); - - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - err = -EPIPE; - goto out; - } - - err = -ENOTCONN; - if (sk->sk_state != TCP_ESTABLISHED) - goto out; - - self = irda_sk(sk); - - /* - * Check that we don't send out too big frames. This is an unreliable - * service, so we have no fragmentation and no coalescence - */ - if (len > self->max_data_size) { - pr_debug("%s(), Warning too much data! Chopping frame from %zd to %d bytes!\n", - __func__, len, self->max_data_size); - len = self->max_data_size; - } - - skb = sock_alloc_send_skb(sk, len + self->max_header_size, - msg->msg_flags & MSG_DONTWAIT, &err); - err = -ENOBUFS; - if (!skb) - goto out; - - skb_reserve(skb, self->max_header_size); - skb_reset_transport_header(skb); - - pr_debug("%s(), appending user data\n", __func__); - skb_put(skb, len); - err = memcpy_from_msg(skb_transport_header(skb), msg, len); - if (err) { - kfree_skb(skb); - goto out; - } - - /* - * Just send the message to TinyTP, and let it deal with possible - * errors. No need to duplicate all that here - */ - err = irttp_udata_request(self->tsap, skb); - if (err) { - pr_debug("%s(), err=%d\n", __func__, err); - goto out; - } - - release_sock(sk); - return len; - -out: - release_sock(sk); - return err; -} - -/* - * Function irda_sendmsg_ultra (sock, msg, len) - * - * Send message down to IrLMP for the unreliable Ultra - * packet service... - */ -#ifdef CONFIG_IRDA_ULTRA -static int irda_sendmsg_ultra(struct socket *sock, struct msghdr *msg, - size_t len) -{ - struct sock *sk = sock->sk; - struct irda_sock *self; - __u8 pid = 0; - int bound = 0; - struct sk_buff *skb; - int err; - - pr_debug("%s(), len=%zd\n", __func__, len); - - err = -EINVAL; - if (msg->msg_flags & ~(MSG_DONTWAIT|MSG_CMSG_COMPAT)) - return -EINVAL; - - lock_sock(sk); - - err = -EPIPE; - if (sk->sk_shutdown & SEND_SHUTDOWN) { - send_sig(SIGPIPE, current, 0); - goto out; - } - - self = irda_sk(sk); - - /* Check if an address was specified with sendto. Jean II */ - if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_irda *, addr, msg->msg_name); - err = -EINVAL; - /* Check address, extract pid. Jean II */ - if (msg->msg_namelen < sizeof(*addr)) - goto out; - if (addr->sir_family != AF_IRDA) - goto out; - - pid = addr->sir_lsap_sel; - if (pid & 0x80) { - pr_debug("%s(), extension in PID not supp!\n", - __func__); - err = -EOPNOTSUPP; - goto out; - } - } else { - /* Check that the socket is properly bound to an Ultra - * port. Jean II */ - if ((self->lsap == NULL) || - (sk->sk_state != TCP_ESTABLISHED)) { - pr_debug("%s(), socket not bound to Ultra PID.\n", - __func__); - err = -ENOTCONN; - goto out; - } - /* Use PID from socket */ - bound = 1; - } - - /* - * Check that we don't send out too big frames. This is an unreliable - * service, so we have no fragmentation and no coalescence - */ - if (len > self->max_data_size) { - pr_debug("%s(), Warning too much data! Chopping frame from %zd to %d bytes!\n", - __func__, len, self->max_data_size); - len = self->max_data_size; - } - - skb = sock_alloc_send_skb(sk, len + self->max_header_size, - msg->msg_flags & MSG_DONTWAIT, &err); - err = -ENOBUFS; - if (!skb) - goto out; - - skb_reserve(skb, self->max_header_size); - skb_reset_transport_header(skb); - - pr_debug("%s(), appending user data\n", __func__); - skb_put(skb, len); - err = memcpy_from_msg(skb_transport_header(skb), msg, len); - if (err) { - kfree_skb(skb); - goto out; - } - - err = irlmp_connless_data_request((bound ? self->lsap : NULL), - skb, pid); - if (err) - pr_debug("%s(), err=%d\n", __func__, err); -out: - release_sock(sk); - return err ? : len; -} -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Function irda_shutdown (sk, how) - */ -static int irda_shutdown(struct socket *sock, int how) -{ - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - - pr_debug("%s(%p)\n", __func__, self); - - lock_sock(sk); - - sk->sk_state = TCP_CLOSE; - sk->sk_shutdown |= SEND_SHUTDOWN; - sk->sk_state_change(sk); - - if (self->iriap) { - iriap_close(self->iriap); - self->iriap = NULL; - } - - if (self->tsap) { - irttp_disconnect_request(self->tsap, NULL, P_NORMAL); - irttp_close_tsap(self->tsap); - self->tsap = NULL; - } - - /* A few cleanup so the socket look as good as new... */ - self->rx_flow = self->tx_flow = FLOW_START; /* needed ??? */ - self->daddr = DEV_ADDR_ANY; /* Until we get re-connected */ - self->saddr = 0x0; /* so IrLMP assign us any link */ - - release_sock(sk); - - return 0; -} - -/* - * Function irda_poll (file, sock, wait) - */ -static unsigned int irda_poll(struct file * file, struct socket *sock, - poll_table *wait) -{ - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - unsigned int mask; - - poll_wait(file, sk_sleep(sk), wait); - mask = 0; - - /* Exceptional events? */ - if (sk->sk_err) - mask |= POLLERR; - if (sk->sk_shutdown & RCV_SHUTDOWN) { - pr_debug("%s(), POLLHUP\n", __func__); - mask |= POLLHUP; - } - - /* Readable? */ - if (!skb_queue_empty(&sk->sk_receive_queue)) { - pr_debug("Socket is readable\n"); - mask |= POLLIN | POLLRDNORM; - } - - /* Connection-based need to check for termination and startup */ - switch (sk->sk_type) { - case SOCK_STREAM: - if (sk->sk_state == TCP_CLOSE) { - pr_debug("%s(), POLLHUP\n", __func__); - mask |= POLLHUP; - } - - if (sk->sk_state == TCP_ESTABLISHED) { - if ((self->tx_flow == FLOW_START) && - sock_writeable(sk)) - { - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; - } - } - break; - case SOCK_SEQPACKET: - if ((self->tx_flow == FLOW_START) && - sock_writeable(sk)) - { - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; - } - break; - case SOCK_DGRAM: - if (sock_writeable(sk)) - mask |= POLLOUT | POLLWRNORM | POLLWRBAND; - break; - default: - break; - } - - return mask; -} - -/* - * Function irda_ioctl (sock, cmd, arg) - */ -static int irda_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - struct sock *sk = sock->sk; - int err; - - pr_debug("%s(), cmd=%#x\n", __func__, cmd); - - err = -EINVAL; - switch (cmd) { - case TIOCOUTQ: { - long amount; - - amount = sk->sk_sndbuf - sk_wmem_alloc_get(sk); - if (amount < 0) - amount = 0; - err = put_user(amount, (unsigned int __user *)arg); - break; - } - - case TIOCINQ: { - struct sk_buff *skb; - long amount = 0L; - /* These two are safe on a single CPU system as only user tasks fiddle here */ - if ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) - amount = skb->len; - err = put_user(amount, (unsigned int __user *)arg); - break; - } - - case SIOCGSTAMP: - if (sk != NULL) - err = sock_get_timestamp(sk, (struct timeval __user *)arg); - break; - - case SIOCGIFADDR: - case SIOCSIFADDR: - case SIOCGIFDSTADDR: - case SIOCSIFDSTADDR: - case SIOCGIFBRDADDR: - case SIOCSIFBRDADDR: - case SIOCGIFNETMASK: - case SIOCSIFNETMASK: - case SIOCGIFMETRIC: - case SIOCSIFMETRIC: - break; - default: - pr_debug("%s(), doing device ioctl!\n", __func__); - err = -ENOIOCTLCMD; - } - - return err; -} - -#ifdef CONFIG_COMPAT -/* - * Function irda_ioctl (sock, cmd, arg) - */ -static int irda_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) -{ - /* - * All IRDA's ioctl are standard ones. - */ - return -ENOIOCTLCMD; -} -#endif - -/* - * Function irda_setsockopt (sock, level, optname, optval, optlen) - * - * Set some options for the socket - * - */ -static int irda_setsockopt(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) -{ - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - struct irda_ias_set *ias_opt; - struct ias_object *ias_obj; - struct ias_attrib * ias_attr; /* Attribute in IAS object */ - int opt, free_ias = 0, err = 0; - - pr_debug("%s(%p)\n", __func__, self); - - if (level != SOL_IRLMP) - return -ENOPROTOOPT; - - lock_sock(sk); - - switch (optname) { - case IRLMP_IAS_SET: - /* The user want to add an attribute to an existing IAS object - * (in the IAS database) or to create a new object with this - * attribute. - * We first query IAS to know if the object exist, and then - * create the right attribute... - */ - - if (optlen != sizeof(struct irda_ias_set)) { - err = -EINVAL; - goto out; - } - - /* Copy query to the driver. */ - ias_opt = memdup_user(optval, optlen); - if (IS_ERR(ias_opt)) { - err = PTR_ERR(ias_opt); - goto out; - } - - /* Find the object we target. - * If the user gives us an empty string, we use the object - * associated with this socket. This will workaround - * duplicated class name - Jean II */ - if(ias_opt->irda_class_name[0] == '\0') { - if(self->ias_obj == NULL) { - kfree(ias_opt); - err = -EINVAL; - goto out; - } - ias_obj = self->ias_obj; - } else - ias_obj = irias_find_object(ias_opt->irda_class_name); - - /* Only ROOT can mess with the global IAS database. - * Users can only add attributes to the object associated - * with the socket they own - Jean II */ - if((!capable(CAP_NET_ADMIN)) && - ((ias_obj == NULL) || (ias_obj != self->ias_obj))) { - kfree(ias_opt); - err = -EPERM; - goto out; - } - - /* If the object doesn't exist, create it */ - if(ias_obj == (struct ias_object *) NULL) { - /* Create a new object */ - ias_obj = irias_new_object(ias_opt->irda_class_name, - jiffies); - if (ias_obj == NULL) { - kfree(ias_opt); - err = -ENOMEM; - goto out; - } - free_ias = 1; - } - - /* Do we have the attribute already ? */ - if(irias_find_attrib(ias_obj, ias_opt->irda_attrib_name)) { - kfree(ias_opt); - if (free_ias) { - kfree(ias_obj->name); - kfree(ias_obj); - } - err = -EINVAL; - goto out; - } - - /* Look at the type */ - switch(ias_opt->irda_attrib_type) { - case IAS_INTEGER: - /* Add an integer attribute */ - irias_add_integer_attrib( - ias_obj, - ias_opt->irda_attrib_name, - ias_opt->attribute.irda_attrib_int, - IAS_USER_ATTR); - break; - case IAS_OCT_SEQ: - /* Check length */ - if(ias_opt->attribute.irda_attrib_octet_seq.len > - IAS_MAX_OCTET_STRING) { - kfree(ias_opt); - if (free_ias) { - kfree(ias_obj->name); - kfree(ias_obj); - } - - err = -EINVAL; - goto out; - } - /* Add an octet sequence attribute */ - irias_add_octseq_attrib( - ias_obj, - ias_opt->irda_attrib_name, - ias_opt->attribute.irda_attrib_octet_seq.octet_seq, - ias_opt->attribute.irda_attrib_octet_seq.len, - IAS_USER_ATTR); - break; - case IAS_STRING: - /* Should check charset & co */ - /* Check length */ - /* The length is encoded in a __u8, and - * IAS_MAX_STRING == 256, so there is no way - * userspace can pass us a string too large. - * Jean II */ - /* NULL terminate the string (avoid troubles) */ - ias_opt->attribute.irda_attrib_string.string[ias_opt->attribute.irda_attrib_string.len] = '\0'; - /* Add a string attribute */ - irias_add_string_attrib( - ias_obj, - ias_opt->irda_attrib_name, - ias_opt->attribute.irda_attrib_string.string, - IAS_USER_ATTR); - break; - default : - kfree(ias_opt); - if (free_ias) { - kfree(ias_obj->name); - kfree(ias_obj); - } - err = -EINVAL; - goto out; - } - irias_insert_object(ias_obj); - kfree(ias_opt); - break; - case IRLMP_IAS_DEL: - /* The user want to delete an object from our local IAS - * database. We just need to query the IAS, check is the - * object is not owned by the kernel and delete it. - */ - - if (optlen != sizeof(struct irda_ias_set)) { - err = -EINVAL; - goto out; - } - - /* Copy query to the driver. */ - ias_opt = memdup_user(optval, optlen); - if (IS_ERR(ias_opt)) { - err = PTR_ERR(ias_opt); - goto out; - } - - /* Find the object we target. - * If the user gives us an empty string, we use the object - * associated with this socket. This will workaround - * duplicated class name - Jean II */ - if(ias_opt->irda_class_name[0] == '\0') - ias_obj = self->ias_obj; - else - ias_obj = irias_find_object(ias_opt->irda_class_name); - if(ias_obj == (struct ias_object *) NULL) { - kfree(ias_opt); - err = -EINVAL; - goto out; - } - - /* Only ROOT can mess with the global IAS database. - * Users can only del attributes from the object associated - * with the socket they own - Jean II */ - if((!capable(CAP_NET_ADMIN)) && - ((ias_obj == NULL) || (ias_obj != self->ias_obj))) { - kfree(ias_opt); - err = -EPERM; - goto out; - } - - /* Find the attribute (in the object) we target */ - ias_attr = irias_find_attrib(ias_obj, - ias_opt->irda_attrib_name); - if(ias_attr == (struct ias_attrib *) NULL) { - kfree(ias_opt); - err = -EINVAL; - goto out; - } - - /* Check is the user space own the object */ - if(ias_attr->value->owner != IAS_USER_ATTR) { - pr_debug("%s(), attempting to delete a kernel attribute\n", - __func__); - kfree(ias_opt); - err = -EPERM; - goto out; - } - - /* Remove the attribute (and maybe the object) */ - irias_delete_attrib(ias_obj, ias_attr, 1); - kfree(ias_opt); - break; - case IRLMP_MAX_SDU_SIZE: - if (optlen < sizeof(int)) { - err = -EINVAL; - goto out; - } - - if (get_user(opt, (int __user *)optval)) { - err = -EFAULT; - goto out; - } - - /* Only possible for a seqpacket service (TTP with SAR) */ - if (sk->sk_type != SOCK_SEQPACKET) { - pr_debug("%s(), setting max_sdu_size = %d\n", - __func__, opt); - self->max_sdu_size_rx = opt; - } else { - net_warn_ratelimited("%s: not allowed to set MAXSDUSIZE for this socket type!\n", - __func__); - err = -ENOPROTOOPT; - goto out; - } - break; - case IRLMP_HINTS_SET: - if (optlen < sizeof(int)) { - err = -EINVAL; - goto out; - } - - /* The input is really a (__u8 hints[2]), easier as an int */ - if (get_user(opt, (int __user *)optval)) { - err = -EFAULT; - goto out; - } - - /* Unregister any old registration */ - irlmp_unregister_service(self->skey); - - self->skey = irlmp_register_service((__u16) opt); - break; - case IRLMP_HINT_MASK_SET: - /* As opposed to the previous case which set the hint bits - * that we advertise, this one set the filter we use when - * making a discovery (nodes which don't match any hint - * bit in the mask are not reported). - */ - if (optlen < sizeof(int)) { - err = -EINVAL; - goto out; - } - - /* The input is really a (__u8 hints[2]), easier as an int */ - if (get_user(opt, (int __user *)optval)) { - err = -EFAULT; - goto out; - } - - /* Set the new hint mask */ - self->mask.word = (__u16) opt; - /* Mask out extension bits */ - self->mask.word &= 0x7f7f; - /* Check if no bits */ - if(!self->mask.word) - self->mask.word = 0xFFFF; - - break; - default: - err = -ENOPROTOOPT; - break; - } - -out: - release_sock(sk); - - return err; -} - -/* - * Function irda_extract_ias_value(ias_opt, ias_value) - * - * Translate internal IAS value structure to the user space representation - * - * The external representation of IAS values, as we exchange them with - * user space program is quite different from the internal representation, - * as stored in the IAS database (because we need a flat structure for - * crossing kernel boundary). - * This function transform the former in the latter. We also check - * that the value type is valid. - */ -static int irda_extract_ias_value(struct irda_ias_set *ias_opt, - struct ias_value *ias_value) -{ - /* Look at the type */ - switch (ias_value->type) { - case IAS_INTEGER: - /* Copy the integer */ - ias_opt->attribute.irda_attrib_int = ias_value->t.integer; - break; - case IAS_OCT_SEQ: - /* Set length */ - ias_opt->attribute.irda_attrib_octet_seq.len = ias_value->len; - /* Copy over */ - memcpy(ias_opt->attribute.irda_attrib_octet_seq.octet_seq, - ias_value->t.oct_seq, ias_value->len); - break; - case IAS_STRING: - /* Set length */ - ias_opt->attribute.irda_attrib_string.len = ias_value->len; - ias_opt->attribute.irda_attrib_string.charset = ias_value->charset; - /* Copy over */ - memcpy(ias_opt->attribute.irda_attrib_string.string, - ias_value->t.string, ias_value->len); - /* NULL terminate the string (avoid troubles) */ - ias_opt->attribute.irda_attrib_string.string[ias_value->len] = '\0'; - break; - case IAS_MISSING: - default : - return -EINVAL; - } - - /* Copy type over */ - ias_opt->irda_attrib_type = ias_value->type; - - return 0; -} - -/* - * Function irda_getsockopt (sock, level, optname, optval, optlen) - */ -static int irda_getsockopt(struct socket *sock, int level, int optname, - char __user *optval, int __user *optlen) -{ - struct sock *sk = sock->sk; - struct irda_sock *self = irda_sk(sk); - struct irda_device_list list; - struct irda_device_info *discoveries; - struct irda_ias_set * ias_opt; /* IAS get/query params */ - struct ias_object * ias_obj; /* Object in IAS */ - struct ias_attrib * ias_attr; /* Attribute in IAS object */ - int daddr = DEV_ADDR_ANY; /* Dest address for IAS queries */ - int val = 0; - int len = 0; - int err = 0; - int offset, total; - - pr_debug("%s(%p)\n", __func__, self); - - if (level != SOL_IRLMP) - return -ENOPROTOOPT; - - if (get_user(len, optlen)) - return -EFAULT; - - if(len < 0) - return -EINVAL; - - lock_sock(sk); - - switch (optname) { - case IRLMP_ENUMDEVICES: - - /* Offset to first device entry */ - offset = sizeof(struct irda_device_list) - - sizeof(struct irda_device_info); - - if (len < offset) { - err = -EINVAL; - goto out; - } - - /* Ask lmp for the current discovery log */ - discoveries = irlmp_get_discoveries(&list.len, self->mask.word, - self->nslots); - /* Check if the we got some results */ - if (discoveries == NULL) { - err = -EAGAIN; - goto out; /* Didn't find any devices */ - } - - /* Write total list length back to client */ - if (copy_to_user(optval, &list, offset)) - err = -EFAULT; - - /* Copy the list itself - watch for overflow */ - if (list.len > 2048) { - err = -EINVAL; - goto bed; - } - total = offset + (list.len * sizeof(struct irda_device_info)); - if (total > len) - total = len; - if (copy_to_user(optval+offset, discoveries, total - offset)) - err = -EFAULT; - - /* Write total number of bytes used back to client */ - if (put_user(total, optlen)) - err = -EFAULT; -bed: - /* Free up our buffer */ - kfree(discoveries); - break; - case IRLMP_MAX_SDU_SIZE: - val = self->max_data_size; - len = sizeof(int); - if (put_user(len, optlen)) { - err = -EFAULT; - goto out; - } - - if (copy_to_user(optval, &val, len)) { - err = -EFAULT; - goto out; - } - - break; - case IRLMP_IAS_GET: - /* The user want an object from our local IAS database. - * We just need to query the IAS and return the value - * that we found */ - - /* Check that the user has allocated the right space for us */ - if (len != sizeof(struct irda_ias_set)) { - err = -EINVAL; - goto out; - } - - /* Copy query to the driver. */ - ias_opt = memdup_user(optval, len); - if (IS_ERR(ias_opt)) { - err = PTR_ERR(ias_opt); - goto out; - } - - /* Find the object we target. - * If the user gives us an empty string, we use the object - * associated with this socket. This will workaround - * duplicated class name - Jean II */ - if(ias_opt->irda_class_name[0] == '\0') - ias_obj = self->ias_obj; - else - ias_obj = irias_find_object(ias_opt->irda_class_name); - if(ias_obj == (struct ias_object *) NULL) { - kfree(ias_opt); - err = -EINVAL; - goto out; - } - - /* Find the attribute (in the object) we target */ - ias_attr = irias_find_attrib(ias_obj, - ias_opt->irda_attrib_name); - if(ias_attr == (struct ias_attrib *) NULL) { - kfree(ias_opt); - err = -EINVAL; - goto out; - } - - /* Translate from internal to user structure */ - err = irda_extract_ias_value(ias_opt, ias_attr->value); - if(err) { - kfree(ias_opt); - goto out; - } - - /* Copy reply to the user */ - if (copy_to_user(optval, ias_opt, - sizeof(struct irda_ias_set))) { - kfree(ias_opt); - err = -EFAULT; - goto out; - } - /* Note : don't need to put optlen, we checked it */ - kfree(ias_opt); - break; - case IRLMP_IAS_QUERY: - /* The user want an object from a remote IAS database. - * We need to use IAP to query the remote database and - * then wait for the answer to come back. */ - - /* Check that the user has allocated the right space for us */ - if (len != sizeof(struct irda_ias_set)) { - err = -EINVAL; - goto out; - } - - /* Copy query to the driver. */ - ias_opt = memdup_user(optval, len); - if (IS_ERR(ias_opt)) { - err = PTR_ERR(ias_opt); - goto out; - } - - /* At this point, there are two cases... - * 1) the socket is connected - that's the easy case, we - * just query the device we are connected to... - * 2) the socket is not connected - the user doesn't want - * to connect and/or may not have a valid service name - * (so can't create a fake connection). In this case, - * we assume that the user pass us a valid destination - * address in the requesting structure... - */ - if(self->daddr != DEV_ADDR_ANY) { - /* We are connected - reuse known daddr */ - daddr = self->daddr; - } else { - /* We are not connected, we must specify a valid - * destination address */ - daddr = ias_opt->daddr; - if((!daddr) || (daddr == DEV_ADDR_ANY)) { - kfree(ias_opt); - err = -EINVAL; - goto out; - } - } - - /* Check that we can proceed with IAP */ - if (self->iriap) { - net_warn_ratelimited("%s: busy with a previous query\n", - __func__); - kfree(ias_opt); - err = -EBUSY; - goto out; - } - - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - irda_getvalue_confirm); - - if (self->iriap == NULL) { - kfree(ias_opt); - err = -ENOMEM; - goto out; - } - - /* Treat unexpected wakeup as disconnect */ - self->errno = -EHOSTUNREACH; - - /* Query remote LM-IAS */ - iriap_getvaluebyclass_request(self->iriap, - self->saddr, daddr, - ias_opt->irda_class_name, - ias_opt->irda_attrib_name); - - /* Wait for answer, if not yet finished (or failed) */ - if (wait_event_interruptible(self->query_wait, - (self->iriap == NULL))) { - /* pending request uses copy of ias_opt-content - * we can free it regardless! */ - kfree(ias_opt); - /* Treat signals as disconnect */ - err = -EHOSTUNREACH; - goto out; - } - - /* Check what happened */ - if (self->errno) - { - kfree(ias_opt); - /* Requested object/attribute doesn't exist */ - if((self->errno == IAS_CLASS_UNKNOWN) || - (self->errno == IAS_ATTRIB_UNKNOWN)) - err = -EADDRNOTAVAIL; - else - err = -EHOSTUNREACH; - - goto out; - } - - /* Translate from internal to user structure */ - err = irda_extract_ias_value(ias_opt, self->ias_result); - if (self->ias_result) - irias_delete_value(self->ias_result); - if (err) { - kfree(ias_opt); - goto out; - } - - /* Copy reply to the user */ - if (copy_to_user(optval, ias_opt, - sizeof(struct irda_ias_set))) { - kfree(ias_opt); - err = -EFAULT; - goto out; - } - /* Note : don't need to put optlen, we checked it */ - kfree(ias_opt); - break; - case IRLMP_WAITDEVICE: - /* This function is just another way of seeing life ;-) - * IRLMP_ENUMDEVICES assumes that you have a static network, - * and that you just want to pick one of the devices present. - * On the other hand, in here we assume that no device is - * present and that at some point in the future a device will - * come into range. When this device arrive, we just wake - * up the caller, so that he has time to connect to it before - * the device goes away... - * Note : once the node has been discovered for more than a - * few second, it won't trigger this function, unless it - * goes away and come back changes its hint bits (so we - * might call it IRLMP_WAITNEWDEVICE). - */ - - /* Check that the user is passing us an int */ - if (len != sizeof(int)) { - err = -EINVAL; - goto out; - } - /* Get timeout in ms (max time we block the caller) */ - if (get_user(val, (int __user *)optval)) { - err = -EFAULT; - goto out; - } - - /* Tell IrLMP we want to be notified */ - irlmp_update_client(self->ckey, self->mask.word, - irda_selective_discovery_indication, - NULL, (void *) self); - - /* Do some discovery (and also return cached results) */ - irlmp_discovery_request(self->nslots); - - /* Wait until a node is discovered */ - if (!self->cachedaddr) { - pr_debug("%s(), nothing discovered yet, going to sleep...\n", - __func__); - - /* Set watchdog timer to expire in <val> ms. */ - self->errno = 0; - setup_timer(&self->watchdog, irda_discovery_timeout, - (unsigned long)self); - mod_timer(&self->watchdog, - jiffies + msecs_to_jiffies(val)); - - /* Wait for IR-LMP to call us back */ - err = __wait_event_interruptible(self->query_wait, - (self->cachedaddr != 0 || self->errno == -ETIME)); - - /* If watchdog is still activated, kill it! */ - del_timer(&(self->watchdog)); - - pr_debug("%s(), ...waking up !\n", __func__); - - if (err != 0) - goto out; - } - else - pr_debug("%s(), found immediately !\n", - __func__); - - /* Tell IrLMP that we have been notified */ - irlmp_update_client(self->ckey, self->mask.word, - NULL, NULL, NULL); - - /* Check if the we got some results */ - if (!self->cachedaddr) { - err = -EAGAIN; /* Didn't find any devices */ - goto out; - } - daddr = self->cachedaddr; - /* Cleanup */ - self->cachedaddr = 0; - - /* We return the daddr of the device that trigger the - * wakeup. As irlmp pass us only the new devices, we - * are sure that it's not an old device. - * If the user want more details, he should query - * the whole discovery log and pick one device... - */ - if (put_user(daddr, (int __user *)optval)) { - err = -EFAULT; - goto out; - } - - break; - default: - err = -ENOPROTOOPT; - } - -out: - - release_sock(sk); - - return err; -} - -static const struct net_proto_family irda_family_ops = { - .family = PF_IRDA, - .create = irda_create, - .owner = THIS_MODULE, -}; - -static const struct proto_ops irda_stream_ops = { - .family = PF_IRDA, - .owner = THIS_MODULE, - .release = irda_release, - .bind = irda_bind, - .connect = irda_connect, - .socketpair = sock_no_socketpair, - .accept = irda_accept, - .getname = irda_getname, - .poll = irda_poll, - .ioctl = irda_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = irda_compat_ioctl, -#endif - .listen = irda_listen, - .shutdown = irda_shutdown, - .setsockopt = irda_setsockopt, - .getsockopt = irda_getsockopt, - .sendmsg = irda_sendmsg, - .recvmsg = irda_recvmsg_stream, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; - -static const struct proto_ops irda_seqpacket_ops = { - .family = PF_IRDA, - .owner = THIS_MODULE, - .release = irda_release, - .bind = irda_bind, - .connect = irda_connect, - .socketpair = sock_no_socketpair, - .accept = irda_accept, - .getname = irda_getname, - .poll = datagram_poll, - .ioctl = irda_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = irda_compat_ioctl, -#endif - .listen = irda_listen, - .shutdown = irda_shutdown, - .setsockopt = irda_setsockopt, - .getsockopt = irda_getsockopt, - .sendmsg = irda_sendmsg, - .recvmsg = irda_recvmsg_dgram, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; - -static const struct proto_ops irda_dgram_ops = { - .family = PF_IRDA, - .owner = THIS_MODULE, - .release = irda_release, - .bind = irda_bind, - .connect = irda_connect, - .socketpair = sock_no_socketpair, - .accept = irda_accept, - .getname = irda_getname, - .poll = datagram_poll, - .ioctl = irda_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = irda_compat_ioctl, -#endif - .listen = irda_listen, - .shutdown = irda_shutdown, - .setsockopt = irda_setsockopt, - .getsockopt = irda_getsockopt, - .sendmsg = irda_sendmsg_dgram, - .recvmsg = irda_recvmsg_dgram, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; - -#ifdef CONFIG_IRDA_ULTRA -static const struct proto_ops irda_ultra_ops = { - .family = PF_IRDA, - .owner = THIS_MODULE, - .release = irda_release, - .bind = irda_bind, - .connect = sock_no_connect, - .socketpair = sock_no_socketpair, - .accept = sock_no_accept, - .getname = irda_getname, - .poll = datagram_poll, - .ioctl = irda_ioctl, -#ifdef CONFIG_COMPAT - .compat_ioctl = irda_compat_ioctl, -#endif - .listen = sock_no_listen, - .shutdown = irda_shutdown, - .setsockopt = irda_setsockopt, - .getsockopt = irda_getsockopt, - .sendmsg = irda_sendmsg_ultra, - .recvmsg = irda_recvmsg_dgram, - .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, -}; -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Function irsock_init (pro) - * - * Initialize IrDA protocol - * - */ -int __init irsock_init(void) -{ - int rc = proto_register(&irda_proto, 0); - - if (rc == 0) - rc = sock_register(&irda_family_ops); - - return rc; -} - -/* - * Function irsock_cleanup (void) - * - * Remove IrDA protocol - * - */ -void irsock_cleanup(void) -{ - sock_unregister(PF_IRDA); - proto_unregister(&irda_proto); -} diff --git a/net/irda/discovery.c b/net/irda/discovery.c deleted file mode 100644 index 364d70aed068..000000000000 --- a/net/irda/discovery.c +++ /dev/null @@ -1,417 +0,0 @@ -/********************************************************************* - * - * Filename: discovery.c - * Version: 0.1 - * Description: Routines for handling discoveries at the IrLMP layer - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Tue Apr 6 15:33:50 1999 - * Modified at: Sat Oct 9 17:11:31 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Modified at: Fri May 28 3:11 CST 1999 - * Modified by: Horst von Brand <vonbrand@sleipnir.valparaiso.cl> - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/fs.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/export.h> - -#include <net/irda/irda.h> -#include <net/irda/irlmp.h> - -#include <net/irda/discovery.h> - -#include <asm/unaligned.h> - -/* - * Function irlmp_add_discovery (cachelog, discovery) - * - * Add a new discovery to the cachelog, and remove any old discoveries - * from the same device - * - * Note : we try to preserve the time this device was *first* discovered - * (as opposed to the time of last discovery used for cleanup). This is - * used by clients waiting for discovery events to tell if the device - * discovered is "new" or just the same old one. They can't rely there - * on a binary flag (new/old), because not all discovery events are - * propagated to them, and they might not always listen, so they would - * miss some new devices popping up... - * Jean II - */ -void irlmp_add_discovery(hashbin_t *cachelog, discovery_t *new) -{ - discovery_t *discovery, *node; - unsigned long flags; - - /* Set time of first discovery if node is new (see below) */ - new->firststamp = new->timestamp; - - spin_lock_irqsave(&cachelog->hb_spinlock, flags); - - /* - * Remove all discoveries of devices that has previously been - * discovered on the same link with the same name (info), or the - * same daddr. We do this since some devices (mostly PDAs) change - * their device address between every discovery. - */ - discovery = (discovery_t *) hashbin_get_first(cachelog); - while (discovery != NULL ) { - node = discovery; - - /* Be sure to stay one item ahead */ - discovery = (discovery_t *) hashbin_get_next(cachelog); - - if ((node->data.saddr == new->data.saddr) && - ((node->data.daddr == new->data.daddr) || - (strcmp(node->data.info, new->data.info) == 0))) - { - /* This discovery is a previous discovery - * from the same device, so just remove it - */ - hashbin_remove_this(cachelog, (irda_queue_t *) node); - /* Check if hints bits are unchanged */ - if (get_unaligned((__u16 *)node->data.hints) == get_unaligned((__u16 *)new->data.hints)) - /* Set time of first discovery for this node */ - new->firststamp = node->firststamp; - kfree(node); - } - } - - /* Insert the new and updated version */ - hashbin_insert(cachelog, (irda_queue_t *) new, new->data.daddr, NULL); - - spin_unlock_irqrestore(&cachelog->hb_spinlock, flags); -} - -/* - * Function irlmp_add_discovery_log (cachelog, log) - * - * Merge a disovery log into the cachelog. - * - */ -void irlmp_add_discovery_log(hashbin_t *cachelog, hashbin_t *log) -{ - discovery_t *discovery; - - /* - * If log is missing this means that IrLAP was unable to perform the - * discovery, so restart discovery again with just the half timeout - * of the normal one. - */ - /* Well... It means that there was nobody out there - Jean II */ - if (log == NULL) { - /* irlmp_start_discovery_timer(irlmp, 150); */ - return; - } - - /* - * Locking : we are the only owner of this discovery log, so - * no need to lock it. - * We just need to lock the global log in irlmp_add_discovery(). - */ - discovery = (discovery_t *) hashbin_remove_first(log); - while (discovery != NULL) { - irlmp_add_discovery(cachelog, discovery); - - discovery = (discovery_t *) hashbin_remove_first(log); - } - - /* Delete the now empty log */ - hashbin_delete(log, (FREE_FUNC) kfree); -} - -/* - * Function irlmp_expire_discoveries (log, saddr, force) - * - * Go through all discoveries and expire all that has stayed too long - * - * Note : this assume that IrLAP won't change its saddr, which - * currently is a valid assumption... - */ -void irlmp_expire_discoveries(hashbin_t *log, __u32 saddr, int force) -{ - discovery_t * discovery; - discovery_t * curr; - unsigned long flags; - discinfo_t * buffer = NULL; - int n; /* Size of the full log */ - int i = 0; /* How many we expired */ - - IRDA_ASSERT(log != NULL, return;); - spin_lock_irqsave(&log->hb_spinlock, flags); - - discovery = (discovery_t *) hashbin_get_first(log); - while (discovery != NULL) { - /* Be sure to be one item ahead */ - curr = discovery; - discovery = (discovery_t *) hashbin_get_next(log); - - /* Test if it's time to expire this discovery */ - if ((curr->data.saddr == saddr) && - (force || - ((jiffies - curr->timestamp) > DISCOVERY_EXPIRE_TIMEOUT))) - { - /* Create buffer as needed. - * As this function get called a lot and most time - * we don't have anything to put in the log (we are - * quite picky), we can save a lot of overhead - * by not calling kmalloc. Jean II */ - if(buffer == NULL) { - /* Create the client specific buffer */ - n = HASHBIN_GET_SIZE(log); - buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC); - if (buffer == NULL) { - spin_unlock_irqrestore(&log->hb_spinlock, flags); - return; - } - - } - - /* Copy discovery information */ - memcpy(&(buffer[i]), &(curr->data), - sizeof(discinfo_t)); - i++; - - /* Remove it from the log */ - curr = hashbin_remove_this(log, (irda_queue_t *) curr); - kfree(curr); - } - } - - /* Drop the spinlock before calling the higher layers, as - * we can't guarantee they won't call us back and create a - * deadlock. We will work on our own private data, so we - * don't care to be interrupted. - Jean II */ - spin_unlock_irqrestore(&log->hb_spinlock, flags); - - if(buffer == NULL) - return; - - /* Tell IrLMP and registered clients about it */ - irlmp_discovery_expiry(buffer, i); - - /* Free up our buffer */ - kfree(buffer); -} - -#if 0 -/* - * Function irlmp_dump_discoveries (log) - * - * Print out all discoveries in log - * - */ -void irlmp_dump_discoveries(hashbin_t *log) -{ - discovery_t *discovery; - - IRDA_ASSERT(log != NULL, return;); - - discovery = (discovery_t *) hashbin_get_first(log); - while (discovery != NULL) { - pr_debug("Discovery:\n"); - pr_debug(" daddr=%08x\n", discovery->data.daddr); - pr_debug(" saddr=%08x\n", discovery->data.saddr); - pr_debug(" nickname=%s\n", discovery->data.info); - - discovery = (discovery_t *) hashbin_get_next(log); - } -} -#endif - -/* - * Function irlmp_copy_discoveries (log, pn, mask) - * - * Copy all discoveries in a buffer - * - * This function implement a safe way for lmp clients to access the - * discovery log. The basic problem is that we don't want the log - * to change (add/remove) while the client is reading it. If the - * lmp client manipulate directly the hashbin, he is sure to get - * into troubles... - * The idea is that we copy all the current discovery log in a buffer - * which is specific to the client and pass this copy to him. As we - * do this operation with the spinlock grabbed, we are safe... - * Note : we don't want those clients to grab the spinlock, because - * we have no control on how long they will hold it... - * Note : we choose to copy the log in "struct irda_device_info" to - * save space... - * Note : the client must kfree himself() the log... - * Jean II - */ -struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, - __u16 mask, int old_entries) -{ - discovery_t * discovery; - unsigned long flags; - discinfo_t * buffer = NULL; - int j_timeout = (sysctl_discovery_timeout * HZ); - int n; /* Size of the full log */ - int i = 0; /* How many we picked */ - - IRDA_ASSERT(pn != NULL, return NULL;); - IRDA_ASSERT(log != NULL, return NULL;); - - /* Save spin lock */ - spin_lock_irqsave(&log->hb_spinlock, flags); - - discovery = (discovery_t *) hashbin_get_first(log); - while (discovery != NULL) { - /* Mask out the ones we don't want : - * We want to match the discovery mask, and to get only - * the most recent one (unless we want old ones) */ - if ((get_unaligned((__u16 *)discovery->data.hints) & mask) && - ((old_entries) || - ((jiffies - discovery->firststamp) < j_timeout))) { - /* Create buffer as needed. - * As this function get called a lot and most time - * we don't have anything to put in the log (we are - * quite picky), we can save a lot of overhead - * by not calling kmalloc. Jean II */ - if(buffer == NULL) { - /* Create the client specific buffer */ - n = HASHBIN_GET_SIZE(log); - buffer = kmalloc(n * sizeof(struct irda_device_info), GFP_ATOMIC); - if (buffer == NULL) { - spin_unlock_irqrestore(&log->hb_spinlock, flags); - return NULL; - } - - } - - /* Copy discovery information */ - memcpy(&(buffer[i]), &(discovery->data), - sizeof(discinfo_t)); - i++; - } - discovery = (discovery_t *) hashbin_get_next(log); - } - - spin_unlock_irqrestore(&log->hb_spinlock, flags); - - /* Get the actual number of device in the buffer and return */ - *pn = i; - return buffer; -} - -#ifdef CONFIG_PROC_FS -static inline discovery_t *discovery_seq_idx(loff_t pos) - -{ - discovery_t *discovery; - - for (discovery = (discovery_t *) hashbin_get_first(irlmp->cachelog); - discovery != NULL; - discovery = (discovery_t *) hashbin_get_next(irlmp->cachelog)) { - if (pos-- == 0) - break; - } - - return discovery; -} - -static void *discovery_seq_start(struct seq_file *seq, loff_t *pos) -{ - spin_lock_irq(&irlmp->cachelog->hb_spinlock); - return *pos ? discovery_seq_idx(*pos - 1) : SEQ_START_TOKEN; -} - -static void *discovery_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - return (v == SEQ_START_TOKEN) - ? (void *) hashbin_get_first(irlmp->cachelog) - : (void *) hashbin_get_next(irlmp->cachelog); -} - -static void discovery_seq_stop(struct seq_file *seq, void *v) -{ - spin_unlock_irq(&irlmp->cachelog->hb_spinlock); -} - -static int discovery_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "IrLMP: Discovery log:\n\n"); - else { - const discovery_t *discovery = v; - - seq_printf(seq, "nickname: %s, hint: 0x%02x%02x", - discovery->data.info, - discovery->data.hints[0], - discovery->data.hints[1]); -#if 0 - if ( discovery->data.hints[0] & HINT_PNP) - seq_puts(seq, "PnP Compatible "); - if ( discovery->data.hints[0] & HINT_PDA) - seq_puts(seq, "PDA/Palmtop "); - if ( discovery->data.hints[0] & HINT_COMPUTER) - seq_puts(seq, "Computer "); - if ( discovery->data.hints[0] & HINT_PRINTER) - seq_puts(seq, "Printer "); - if ( discovery->data.hints[0] & HINT_MODEM) - seq_puts(seq, "Modem "); - if ( discovery->data.hints[0] & HINT_FAX) - seq_puts(seq, "Fax "); - if ( discovery->data.hints[0] & HINT_LAN) - seq_puts(seq, "LAN Access "); - - if ( discovery->data.hints[1] & HINT_TELEPHONY) - seq_puts(seq, "Telephony "); - if ( discovery->data.hints[1] & HINT_FILE_SERVER) - seq_puts(seq, "File Server "); - if ( discovery->data.hints[1] & HINT_COMM) - seq_puts(seq, "IrCOMM "); - if ( discovery->data.hints[1] & HINT_OBEX) - seq_puts(seq, "IrOBEX "); -#endif - seq_printf(seq,", saddr: 0x%08x, daddr: 0x%08x\n\n", - discovery->data.saddr, - discovery->data.daddr); - - seq_putc(seq, '\n'); - } - return 0; -} - -static const struct seq_operations discovery_seq_ops = { - .start = discovery_seq_start, - .next = discovery_seq_next, - .stop = discovery_seq_stop, - .show = discovery_seq_show, -}; - -static int discovery_seq_open(struct inode *inode, struct file *file) -{ - IRDA_ASSERT(irlmp != NULL, return -EINVAL;); - - return seq_open(file, &discovery_seq_ops); -} - -const struct file_operations discovery_seq_fops = { - .owner = THIS_MODULE, - .open = discovery_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif diff --git a/net/irda/ircomm/Kconfig b/net/irda/ircomm/Kconfig deleted file mode 100644 index 19492c1707b7..000000000000 --- a/net/irda/ircomm/Kconfig +++ /dev/null @@ -1,12 +0,0 @@ -config IRCOMM - tristate "IrCOMM protocol" - depends on IRDA && TTY - help - Say Y here if you want to build support for the IrCOMM protocol. - To compile it as modules, choose M here: the modules will be - called ircomm and ircomm_tty. - IrCOMM implements serial port emulation, and makes it possible to - use all existing applications that understands TTY's with an - infrared link. Thus you should be able to use application like PPP, - minicom and others. - diff --git a/net/irda/ircomm/Makefile b/net/irda/ircomm/Makefile deleted file mode 100644 index ab23b5ba7e33..000000000000 --- a/net/irda/ircomm/Makefile +++ /dev/null @@ -1,8 +0,0 @@ -# -# Makefile for the Linux IrDA IrCOMM protocol layer. -# - -obj-$(CONFIG_IRCOMM) += ircomm.o ircomm-tty.o - -ircomm-y := ircomm_core.o ircomm_event.o ircomm_lmp.o ircomm_ttp.o -ircomm-tty-y := ircomm_tty.o ircomm_tty_attach.o ircomm_tty_ioctl.o ircomm_param.o diff --git a/net/irda/ircomm/ircomm_core.c b/net/irda/ircomm/ircomm_core.c deleted file mode 100644 index 3af219545f6d..000000000000 --- a/net/irda/ircomm/ircomm_core.c +++ /dev/null @@ -1,563 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_core.c - * Version: 1.0 - * Description: IrCOMM service interface - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Jun 6 20:37:34 1999 - * Modified at: Tue Dec 21 13:26:41 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/module.h> -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/init.h> -#include <linux/slab.h> - -#include <net/irda/irda.h> -#include <net/irda/irmod.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/irttp.h> -#include <net/irda/irias_object.h> - -#include <net/irda/ircomm_event.h> -#include <net/irda/ircomm_lmp.h> -#include <net/irda/ircomm_ttp.h> -#include <net/irda/ircomm_param.h> -#include <net/irda/ircomm_core.h> - -static int __ircomm_close(struct ircomm_cb *self); -static void ircomm_control_indication(struct ircomm_cb *self, - struct sk_buff *skb, int clen); - -#ifdef CONFIG_PROC_FS -extern struct proc_dir_entry *proc_irda; -static int ircomm_seq_open(struct inode *, struct file *); - -static const struct file_operations ircomm_proc_fops = { - .owner = THIS_MODULE, - .open = ircomm_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; -#endif /* CONFIG_PROC_FS */ - -hashbin_t *ircomm = NULL; - -static int __init ircomm_init(void) -{ - ircomm = hashbin_new(HB_LOCK); - if (ircomm == NULL) { - net_err_ratelimited("%s(), can't allocate hashbin!\n", - __func__); - return -ENOMEM; - } - -#ifdef CONFIG_PROC_FS - { struct proc_dir_entry *ent; - ent = proc_create("ircomm", 0, proc_irda, &ircomm_proc_fops); - if (!ent) { - printk(KERN_ERR "ircomm_init: can't create /proc entry!\n"); - return -ENODEV; - } - } -#endif /* CONFIG_PROC_FS */ - - net_info_ratelimited("IrCOMM protocol (Dag Brattli)\n"); - - return 0; -} - -static void __exit ircomm_cleanup(void) -{ - hashbin_delete(ircomm, (FREE_FUNC) __ircomm_close); - -#ifdef CONFIG_PROC_FS - remove_proc_entry("ircomm", proc_irda); -#endif /* CONFIG_PROC_FS */ -} - -/* - * Function ircomm_open (client_notify) - * - * Start a new IrCOMM instance - * - */ -struct ircomm_cb *ircomm_open(notify_t *notify, __u8 service_type, int line) -{ - struct ircomm_cb *self = NULL; - int ret; - - pr_debug("%s(), service_type=0x%02x\n", __func__ , - service_type); - - IRDA_ASSERT(ircomm != NULL, return NULL;); - - self = kzalloc(sizeof(struct ircomm_cb), GFP_KERNEL); - if (self == NULL) - return NULL; - - self->notify = *notify; - self->magic = IRCOMM_MAGIC; - - /* Check if we should use IrLMP or IrTTP */ - if (service_type & IRCOMM_3_WIRE_RAW) { - self->flow_status = FLOW_START; - ret = ircomm_open_lsap(self); - } else - ret = ircomm_open_tsap(self); - - if (ret < 0) { - kfree(self); - return NULL; - } - - self->service_type = service_type; - self->line = line; - - hashbin_insert(ircomm, (irda_queue_t *) self, line, NULL); - - ircomm_next_state(self, IRCOMM_IDLE); - - return self; -} - -EXPORT_SYMBOL(ircomm_open); - -/* - * Function ircomm_close_instance (self) - * - * Remove IrCOMM instance - * - */ -static int __ircomm_close(struct ircomm_cb *self) -{ - /* Disconnect link if any */ - ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, NULL, NULL); - - /* Remove TSAP */ - if (self->tsap) { - irttp_close_tsap(self->tsap); - self->tsap = NULL; - } - - /* Remove LSAP */ - if (self->lsap) { - irlmp_close_lsap(self->lsap); - self->lsap = NULL; - } - self->magic = 0; - - kfree(self); - - return 0; -} - -/* - * Function ircomm_close (self) - * - * Closes and removes the specified IrCOMM instance - * - */ -int ircomm_close(struct ircomm_cb *self) -{ - struct ircomm_cb *entry; - - IRDA_ASSERT(self != NULL, return -EIO;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EIO;); - - entry = hashbin_remove(ircomm, self->line, NULL); - - IRDA_ASSERT(entry == self, return -1;); - - return __ircomm_close(self); -} - -EXPORT_SYMBOL(ircomm_close); - -/* - * Function ircomm_connect_request (self, service_type) - * - * Impl. of this function is differ from one of the reference. This - * function does discovery as well as sending connect request - * - */ -int ircomm_connect_request(struct ircomm_cb *self, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, struct sk_buff *skb, - __u8 service_type) -{ - struct ircomm_info info; - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;); - - self->service_type= service_type; - - info.dlsap_sel = dlsap_sel; - info.saddr = saddr; - info.daddr = daddr; - - ret = ircomm_do_event(self, IRCOMM_CONNECT_REQUEST, skb, &info); - - return ret; -} - -EXPORT_SYMBOL(ircomm_connect_request); - -/* - * Function ircomm_connect_indication (self, qos, skb) - * - * Notify user layer about the incoming connection - * - */ -void ircomm_connect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info) -{ - /* - * If there are any data hiding in the control channel, we must - * deliver it first. The side effect is that the control channel - * will be removed from the skb - */ - if (self->notify.connect_indication) - self->notify.connect_indication(self->notify.instance, self, - info->qos, info->max_data_size, - info->max_header_size, skb); - else { - pr_debug("%s(), missing handler\n", __func__); - } -} - -/* - * Function ircomm_connect_response (self, userdata, max_sdu_size) - * - * User accepts connection - * - */ -int ircomm_connect_response(struct ircomm_cb *self, struct sk_buff *userdata) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;); - - ret = ircomm_do_event(self, IRCOMM_CONNECT_RESPONSE, userdata, NULL); - - return ret; -} - -EXPORT_SYMBOL(ircomm_connect_response); - -/* - * Function connect_confirm (self, skb) - * - * Notify user layer that the link is now connected - * - */ -void ircomm_connect_confirm(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info) -{ - if (self->notify.connect_confirm ) - self->notify.connect_confirm(self->notify.instance, - self, info->qos, - info->max_data_size, - info->max_header_size, skb); - else { - pr_debug("%s(), missing handler\n", __func__); - } -} - -/* - * Function ircomm_data_request (self, userdata) - * - * Send IrCOMM data to peer device - * - */ -int ircomm_data_request(struct ircomm_cb *self, struct sk_buff *skb) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -EFAULT;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;); - IRDA_ASSERT(skb != NULL, return -EFAULT;); - - ret = ircomm_do_event(self, IRCOMM_DATA_REQUEST, skb, NULL); - - return ret; -} - -EXPORT_SYMBOL(ircomm_data_request); - -/* - * Function ircomm_data_indication (self, skb) - * - * Data arrived, so deliver it to user - * - */ -void ircomm_data_indication(struct ircomm_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(skb->len > 0, return;); - - if (self->notify.data_indication) - self->notify.data_indication(self->notify.instance, self, skb); - else { - pr_debug("%s(), missing handler\n", __func__); - } -} - -/* - * Function ircomm_process_data (self, skb) - * - * Data arrived which may contain control channel data - * - */ -void ircomm_process_data(struct ircomm_cb *self, struct sk_buff *skb) -{ - int clen; - - IRDA_ASSERT(skb->len > 0, return;); - - clen = skb->data[0]; - - /* - * Input validation check: a stir4200/mcp2150 combinations sometimes - * results in frames with clen > remaining packet size. These are - * illegal; if we throw away just this frame then it seems to carry on - * fine - */ - if (unlikely(skb->len < (clen + 1))) { - pr_debug("%s() throwing away illegal frame\n", - __func__); - return; - } - - /* - * If there are any data hiding in the control channel, we must - * deliver it first. The side effect is that the control channel - * will be removed from the skb - */ - if (clen > 0) - ircomm_control_indication(self, skb, clen); - - /* Remove control channel from data channel */ - skb_pull(skb, clen+1); - - if (skb->len) - ircomm_data_indication(self, skb); - else { - pr_debug("%s(), data was control info only!\n", - __func__); - } -} - -/* - * Function ircomm_control_request (self, params) - * - * Send control data to peer device - * - */ -int ircomm_control_request(struct ircomm_cb *self, struct sk_buff *skb) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -EFAULT;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EFAULT;); - IRDA_ASSERT(skb != NULL, return -EFAULT;); - - ret = ircomm_do_event(self, IRCOMM_CONTROL_REQUEST, skb, NULL); - - return ret; -} - -EXPORT_SYMBOL(ircomm_control_request); - -/* - * Function ircomm_control_indication (self, skb) - * - * Data has arrived on the control channel - * - */ -static void ircomm_control_indication(struct ircomm_cb *self, - struct sk_buff *skb, int clen) -{ - /* Use udata for delivering data on the control channel */ - if (self->notify.udata_indication) { - struct sk_buff *ctrl_skb; - - /* We don't own the skb, so clone it */ - ctrl_skb = skb_clone(skb, GFP_ATOMIC); - if (!ctrl_skb) - return; - - /* Remove data channel from control channel */ - skb_trim(ctrl_skb, clen+1); - - self->notify.udata_indication(self->notify.instance, self, - ctrl_skb); - - /* Drop reference count - - * see ircomm_tty_control_indication(). */ - dev_kfree_skb(ctrl_skb); - } else { - pr_debug("%s(), missing handler\n", __func__); - } -} - -/* - * Function ircomm_disconnect_request (self, userdata, priority) - * - * User layer wants to disconnect the IrCOMM connection - * - */ -int ircomm_disconnect_request(struct ircomm_cb *self, struct sk_buff *userdata) -{ - struct ircomm_info info; - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;); - - ret = ircomm_do_event(self, IRCOMM_DISCONNECT_REQUEST, userdata, - &info); - return ret; -} - -EXPORT_SYMBOL(ircomm_disconnect_request); - -/* - * Function disconnect_indication (self, skb) - * - * Tell user that the link has been disconnected - * - */ -void ircomm_disconnect_indication(struct ircomm_cb *self, struct sk_buff *skb, - struct ircomm_info *info) -{ - IRDA_ASSERT(info != NULL, return;); - - if (self->notify.disconnect_indication) { - self->notify.disconnect_indication(self->notify.instance, self, - info->reason, skb); - } else { - pr_debug("%s(), missing handler\n", __func__); - } -} - -/* - * Function ircomm_flow_request (self, flow) - * - * - * - */ -void ircomm_flow_request(struct ircomm_cb *self, LOCAL_FLOW flow) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - - if (self->service_type == IRCOMM_3_WIRE_RAW) - return; - - irttp_flow_request(self->tsap, flow); -} - -EXPORT_SYMBOL(ircomm_flow_request); - -#ifdef CONFIG_PROC_FS -static void *ircomm_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct ircomm_cb *self; - loff_t off = 0; - - spin_lock_irq(&ircomm->hb_spinlock); - - for (self = (struct ircomm_cb *) hashbin_get_first(ircomm); - self != NULL; - self = (struct ircomm_cb *) hashbin_get_next(ircomm)) { - if (off++ == *pos) - break; - - } - return self; -} - -static void *ircomm_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return (void *) hashbin_get_next(ircomm); -} - -static void ircomm_seq_stop(struct seq_file *seq, void *v) -{ - spin_unlock_irq(&ircomm->hb_spinlock); -} - -static int ircomm_seq_show(struct seq_file *seq, void *v) -{ - const struct ircomm_cb *self = v; - - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -EINVAL; ); - - if(self->line < 0x10) - seq_printf(seq, "ircomm%d", self->line); - else - seq_printf(seq, "irlpt%d", self->line - 0x10); - - seq_printf(seq, - " state: %s, slsap_sel: %#02x, dlsap_sel: %#02x, mode:", - ircomm_state[ self->state], - self->slsap_sel, self->dlsap_sel); - - if(self->service_type & IRCOMM_3_WIRE_RAW) - seq_printf(seq, " 3-wire-raw"); - if(self->service_type & IRCOMM_3_WIRE) - seq_printf(seq, " 3-wire"); - if(self->service_type & IRCOMM_9_WIRE) - seq_printf(seq, " 9-wire"); - if(self->service_type & IRCOMM_CENTRONICS) - seq_printf(seq, " Centronics"); - seq_putc(seq, '\n'); - - return 0; -} - -static const struct seq_operations ircomm_seq_ops = { - .start = ircomm_seq_start, - .next = ircomm_seq_next, - .stop = ircomm_seq_stop, - .show = ircomm_seq_show, -}; - -static int ircomm_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &ircomm_seq_ops); -} -#endif /* CONFIG_PROC_FS */ - -MODULE_AUTHOR("Dag Brattli <dag@brattli.net>"); -MODULE_DESCRIPTION("IrCOMM protocol"); -MODULE_LICENSE("GPL"); - -module_init(ircomm_init); -module_exit(ircomm_cleanup); diff --git a/net/irda/ircomm/ircomm_event.c b/net/irda/ircomm/ircomm_event.c deleted file mode 100644 index b0730ac9f388..000000000000 --- a/net/irda/ircomm/ircomm_event.c +++ /dev/null @@ -1,246 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_event.c - * Version: 1.0 - * Description: IrCOMM layer state machine - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Jun 6 20:33:11 1999 - * Modified at: Sun Dec 12 13:44:32 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/proc_fs.h> -#include <linux/init.h> - -#include <net/irda/irda.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/irttp.h> -#include <net/irda/irias_object.h> - -#include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_event.h> - -static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); -static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); -static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); -static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info); - -const char *const ircomm_state[] = { - "IRCOMM_IDLE", - "IRCOMM_WAITI", - "IRCOMM_WAITR", - "IRCOMM_CONN", -}; - -static const char *const ircomm_event[] __maybe_unused = { - "IRCOMM_CONNECT_REQUEST", - "IRCOMM_CONNECT_RESPONSE", - "IRCOMM_TTP_CONNECT_INDICATION", - "IRCOMM_LMP_CONNECT_INDICATION", - "IRCOMM_TTP_CONNECT_CONFIRM", - "IRCOMM_LMP_CONNECT_CONFIRM", - - "IRCOMM_LMP_DISCONNECT_INDICATION", - "IRCOMM_TTP_DISCONNECT_INDICATION", - "IRCOMM_DISCONNECT_REQUEST", - - "IRCOMM_TTP_DATA_INDICATION", - "IRCOMM_LMP_DATA_INDICATION", - "IRCOMM_DATA_REQUEST", - "IRCOMM_CONTROL_REQUEST", - "IRCOMM_CONTROL_INDICATION", -}; - -static int (*state[])(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info) = -{ - ircomm_state_idle, - ircomm_state_waiti, - ircomm_state_waitr, - ircomm_state_conn, -}; - -/* - * Function ircomm_state_idle (self, event, skb) - * - * IrCOMM is currently idle - * - */ -static int ircomm_state_idle(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info) -{ - int ret = 0; - - switch (event) { - case IRCOMM_CONNECT_REQUEST: - ircomm_next_state(self, IRCOMM_WAITI); - ret = self->issue.connect_request(self, skb, info); - break; - case IRCOMM_TTP_CONNECT_INDICATION: - case IRCOMM_LMP_CONNECT_INDICATION: - ircomm_next_state(self, IRCOMM_WAITR); - ircomm_connect_indication(self, skb, info); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_state_waiti (self, event, skb) - * - * The IrCOMM user has requested an IrCOMM connection to the remote - * device and is awaiting confirmation - */ -static int ircomm_state_waiti(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info) -{ - int ret = 0; - - switch (event) { - case IRCOMM_TTP_CONNECT_CONFIRM: - case IRCOMM_LMP_CONNECT_CONFIRM: - ircomm_next_state(self, IRCOMM_CONN); - ircomm_connect_confirm(self, skb, info); - break; - case IRCOMM_TTP_DISCONNECT_INDICATION: - case IRCOMM_LMP_DISCONNECT_INDICATION: - ircomm_next_state(self, IRCOMM_IDLE); - ircomm_disconnect_indication(self, skb, info); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_state_waitr (self, event, skb) - * - * IrCOMM has received an incoming connection request and is awaiting - * response from the user - */ -static int ircomm_state_waitr(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info) -{ - int ret = 0; - - switch (event) { - case IRCOMM_CONNECT_RESPONSE: - ircomm_next_state(self, IRCOMM_CONN); - ret = self->issue.connect_response(self, skb); - break; - case IRCOMM_DISCONNECT_REQUEST: - ircomm_next_state(self, IRCOMM_IDLE); - ret = self->issue.disconnect_request(self, skb, info); - break; - case IRCOMM_TTP_DISCONNECT_INDICATION: - case IRCOMM_LMP_DISCONNECT_INDICATION: - ircomm_next_state(self, IRCOMM_IDLE); - ircomm_disconnect_indication(self, skb, info); - break; - default: - pr_debug("%s(), unknown event = %s\n", __func__ , - ircomm_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_state_conn (self, event, skb) - * - * IrCOMM is connected to the peer IrCOMM device - * - */ -static int ircomm_state_conn(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info) -{ - int ret = 0; - - switch (event) { - case IRCOMM_DATA_REQUEST: - ret = self->issue.data_request(self, skb, 0); - break; - case IRCOMM_TTP_DATA_INDICATION: - ircomm_process_data(self, skb); - break; - case IRCOMM_LMP_DATA_INDICATION: - ircomm_data_indication(self, skb); - break; - case IRCOMM_CONTROL_REQUEST: - /* Just send a separate frame for now */ - ret = self->issue.data_request(self, skb, skb->len); - break; - case IRCOMM_TTP_DISCONNECT_INDICATION: - case IRCOMM_LMP_DISCONNECT_INDICATION: - ircomm_next_state(self, IRCOMM_IDLE); - ircomm_disconnect_indication(self, skb, info); - break; - case IRCOMM_DISCONNECT_REQUEST: - ircomm_next_state(self, IRCOMM_IDLE); - ret = self->issue.disconnect_request(self, skb, info); - break; - default: - pr_debug("%s(), unknown event = %s\n", __func__ , - ircomm_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_do_event (self, event, skb) - * - * Process event - * - */ -int ircomm_do_event(struct ircomm_cb *self, IRCOMM_EVENT event, - struct sk_buff *skb, struct ircomm_info *info) -{ - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_state[self->state], ircomm_event[event]); - - return (*state[self->state])(self, event, skb, info); -} - -/* - * Function ircomm_next_state (self, state) - * - * Switch state - * - */ -void ircomm_next_state(struct ircomm_cb *self, IRCOMM_STATE state) -{ - self->state = state; - - pr_debug("%s: next state=%s, service type=%d\n", __func__ , - ircomm_state[self->state], self->service_type); -} diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c deleted file mode 100644 index e4cc847bb933..000000000000 --- a/net/irda/ircomm/ircomm_lmp.c +++ /dev/null @@ -1,350 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_lmp.c - * Version: 1.0 - * Description: Interface between IrCOMM and IrLMP - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Jun 6 20:48:27 1999 - * Modified at: Sun Dec 12 13:44:17 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Sources: Previous IrLPT work by Thomas Davis - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/init.h> -#include <linux/gfp.h> - -#include <net/irda/irda.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/irda_device.h> /* struct irda_skb_cb */ - -#include <net/irda/ircomm_event.h> -#include <net/irda/ircomm_lmp.h> - - -/* - * Function ircomm_lmp_connect_request (self, userdata) - * - * - * - */ -static int ircomm_lmp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) -{ - int ret = 0; - - /* Don't forget to refcount it - should be NULL anyway */ - if(userdata) - skb_get(userdata); - - ret = irlmp_connect_request(self->lsap, info->dlsap_sel, - info->saddr, info->daddr, NULL, userdata); - return ret; -} - -/* - * Function ircomm_lmp_connect_response (self, skb) - * - * - * - */ -static int ircomm_lmp_connect_response(struct ircomm_cb *self, - struct sk_buff *userdata) -{ - struct sk_buff *tx_skb; - - /* Any userdata supplied? */ - if (userdata == NULL) { - tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - /* Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, LMP_MAX_HEADER); - } else { - /* - * Check that the client has reserved enough space for - * headers - */ - IRDA_ASSERT(skb_headroom(userdata) >= LMP_MAX_HEADER, - return -1;); - - /* Don't forget to refcount it - should be NULL anyway */ - skb_get(userdata); - tx_skb = userdata; - } - - return irlmp_connect_response(self->lsap, tx_skb); -} - -static int ircomm_lmp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) -{ - struct sk_buff *tx_skb; - int ret; - - if (!userdata) { - tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - /* Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, LMP_MAX_HEADER); - userdata = tx_skb; - } else { - /* Don't forget to refcount it - should be NULL anyway */ - skb_get(userdata); - } - - ret = irlmp_disconnect_request(self->lsap, userdata); - - return ret; -} - -/* - * Function ircomm_lmp_flow_control (skb) - * - * This function is called when a data frame we have sent to IrLAP has - * been deallocated. We do this to make sure we don't flood IrLAP with - * frames, since we are not using the IrTTP flow control mechanism - */ -static void ircomm_lmp_flow_control(struct sk_buff *skb) -{ - struct irda_skb_cb *cb; - struct ircomm_cb *self; - int line; - - IRDA_ASSERT(skb != NULL, return;); - - cb = (struct irda_skb_cb *) skb->cb; - - line = cb->line; - - self = (struct ircomm_cb *) hashbin_lock_find(ircomm, line, NULL); - if (!self) { - pr_debug("%s(), didn't find myself\n", __func__); - return; - } - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - - self->pkt_count--; - - if ((self->pkt_count < 2) && (self->flow_status == FLOW_STOP)) { - pr_debug("%s(), asking TTY to start again!\n", __func__); - self->flow_status = FLOW_START; - if (self->notify.flow_indication) - self->notify.flow_indication(self->notify.instance, - self, FLOW_START); - } -} - -/* - * Function ircomm_lmp_data_request (self, userdata) - * - * Send data frame to peer device - * - */ -static int ircomm_lmp_data_request(struct ircomm_cb *self, - struct sk_buff *skb, - int not_used) -{ - struct irda_skb_cb *cb; - int ret; - - IRDA_ASSERT(skb != NULL, return -1;); - - cb = (struct irda_skb_cb *) skb->cb; - - cb->line = self->line; - - pr_debug("%s(), sending frame\n", __func__); - - /* Don't forget to refcount it - see ircomm_tty_do_softint() */ - skb_get(skb); - - skb_orphan(skb); - skb->destructor = ircomm_lmp_flow_control; - - if ((self->pkt_count++ > 7) && (self->flow_status == FLOW_START)) { - pr_debug("%s(), asking TTY to slow down!\n", __func__); - self->flow_status = FLOW_STOP; - if (self->notify.flow_indication) - self->notify.flow_indication(self->notify.instance, - self, FLOW_STOP); - } - ret = irlmp_data_request(self->lsap, skb); - if (ret) { - net_err_ratelimited("%s(), failed\n", __func__); - /* irlmp_data_request already free the packet */ - } - - return ret; -} - -/* - * Function ircomm_lmp_data_indication (instance, sap, skb) - * - * Incoming data which we must deliver to the state machine, to check - * we are still connected. - */ -static int ircomm_lmp_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - ircomm_do_event(self, IRCOMM_LMP_DATA_INDICATION, skb, NULL); - - /* Drop reference count - see ircomm_tty_data_indication(). */ - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function ircomm_lmp_connect_confirm (instance, sap, qos, max_sdu_size, - * max_header_size, skb) - * - * Connection has been confirmed by peer device - * - */ -static void ircomm_lmp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_seg_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - struct ircomm_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(qos != NULL, return;); - - info.max_data_size = max_seg_size; - info.max_header_size = max_header_size; - info.qos = qos; - - ircomm_do_event(self, IRCOMM_LMP_CONNECT_CONFIRM, skb, &info); - - /* Drop reference count - see ircomm_tty_connect_confirm(). */ - dev_kfree_skb(skb); -} - -/* - * Function ircomm_lmp_connect_indication (instance, sap, qos, max_sdu_size, - * max_header_size, skb) - * - * Peer device wants to make a connection with us - * - */ -static void ircomm_lmp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_seg_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *)instance; - struct ircomm_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(qos != NULL, return;); - - info.max_data_size = max_seg_size; - info.max_header_size = max_header_size; - info.qos = qos; - - ircomm_do_event(self, IRCOMM_LMP_CONNECT_INDICATION, skb, &info); - - /* Drop reference count - see ircomm_tty_connect_indication(). */ - dev_kfree_skb(skb); -} - -/* - * Function ircomm_lmp_disconnect_indication (instance, sap, reason, skb) - * - * Peer device has closed the connection, or the link went down for some - * other reason - */ -static void ircomm_lmp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - struct ircomm_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - - info.reason = reason; - - ircomm_do_event(self, IRCOMM_LMP_DISCONNECT_INDICATION, skb, &info); - - /* Drop reference count - see ircomm_tty_disconnect_indication(). */ - if(skb) - dev_kfree_skb(skb); -} -/* - * Function ircomm_open_lsap (self) - * - * Open LSAP. This function will only be used when using "raw" services - * - */ -int ircomm_open_lsap(struct ircomm_cb *self) -{ - notify_t notify; - - /* Register callbacks */ - irda_notify_init(¬ify); - notify.data_indication = ircomm_lmp_data_indication; - notify.connect_confirm = ircomm_lmp_connect_confirm; - notify.connect_indication = ircomm_lmp_connect_indication; - notify.disconnect_indication = ircomm_lmp_disconnect_indication; - notify.instance = self; - strlcpy(notify.name, "IrCOMM", sizeof(notify.name)); - - self->lsap = irlmp_open_lsap(LSAP_ANY, ¬ify, 0); - if (!self->lsap) { - pr_debug("%sfailed to allocate tsap\n", __func__); - return -1; - } - self->slsap_sel = self->lsap->slsap_sel; - - /* - * Initialize the call-table for issuing commands - */ - self->issue.data_request = ircomm_lmp_data_request; - self->issue.connect_request = ircomm_lmp_connect_request; - self->issue.connect_response = ircomm_lmp_connect_response; - self->issue.disconnect_request = ircomm_lmp_disconnect_request; - - return 0; -} diff --git a/net/irda/ircomm/ircomm_param.c b/net/irda/ircomm/ircomm_param.c deleted file mode 100644 index 5728e76ca6d5..000000000000 --- a/net/irda/ircomm/ircomm_param.c +++ /dev/null @@ -1,501 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_param.c - * Version: 1.0 - * Description: Parameter handling for the IrCOMM protocol - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Mon Jun 7 10:25:11 1999 - * Modified at: Sun Jan 30 14:32:03 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/gfp.h> -#include <linux/workqueue.h> -#include <linux/interrupt.h> - -#include <net/irda/irda.h> -#include <net/irda/parameters.h> - -#include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_tty_attach.h> -#include <net/irda/ircomm_tty.h> - -#include <net/irda/ircomm_param.h> - -static int ircomm_param_service_type(void *instance, irda_param_t *param, - int get); -static int ircomm_param_port_type(void *instance, irda_param_t *param, - int get); -static int ircomm_param_port_name(void *instance, irda_param_t *param, - int get); -static int ircomm_param_service_type(void *instance, irda_param_t *param, - int get); -static int ircomm_param_data_rate(void *instance, irda_param_t *param, - int get); -static int ircomm_param_data_format(void *instance, irda_param_t *param, - int get); -static int ircomm_param_flow_control(void *instance, irda_param_t *param, - int get); -static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get); -static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get); -static int ircomm_param_line_status(void *instance, irda_param_t *param, - int get); -static int ircomm_param_dte(void *instance, irda_param_t *param, int get); -static int ircomm_param_dce(void *instance, irda_param_t *param, int get); -static int ircomm_param_poll(void *instance, irda_param_t *param, int get); - -static const pi_minor_info_t pi_minor_call_table_common[] = { - { ircomm_param_service_type, PV_INT_8_BITS }, - { ircomm_param_port_type, PV_INT_8_BITS }, - { ircomm_param_port_name, PV_STRING } -}; -static const pi_minor_info_t pi_minor_call_table_non_raw[] = { - { ircomm_param_data_rate, PV_INT_32_BITS | PV_BIG_ENDIAN }, - { ircomm_param_data_format, PV_INT_8_BITS }, - { ircomm_param_flow_control, PV_INT_8_BITS }, - { ircomm_param_xon_xoff, PV_INT_16_BITS }, - { ircomm_param_enq_ack, PV_INT_16_BITS }, - { ircomm_param_line_status, PV_INT_8_BITS } -}; -static const pi_minor_info_t pi_minor_call_table_9_wire[] = { - { ircomm_param_dte, PV_INT_8_BITS }, - { ircomm_param_dce, PV_INT_8_BITS }, - { ircomm_param_poll, PV_NO_VALUE }, -}; - -static const pi_major_info_t pi_major_call_table[] = { - { pi_minor_call_table_common, 3 }, - { pi_minor_call_table_non_raw, 6 }, - { pi_minor_call_table_9_wire, 3 } -/* { pi_minor_call_table_centronics } */ -}; - -pi_param_info_t ircomm_param_info = { pi_major_call_table, 3, 0x0f, 4 }; - -/* - * Function ircomm_param_request (self, pi, flush) - * - * Queue a parameter for the control channel - * - */ -int ircomm_param_request(struct ircomm_tty_cb *self, __u8 pi, int flush) -{ - unsigned long flags; - struct sk_buff *skb; - int count; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - /* Make sure we don't send parameters for raw mode */ - if (self->service_type == IRCOMM_3_WIRE_RAW) - return 0; - - spin_lock_irqsave(&self->spinlock, flags); - - skb = self->ctrl_skb; - if (!skb) { - skb = alloc_skb(256, GFP_ATOMIC); - if (!skb) { - spin_unlock_irqrestore(&self->spinlock, flags); - return -ENOMEM; - } - - skb_reserve(skb, self->max_header_size); - self->ctrl_skb = skb; - } - /* - * Inserting is a little bit tricky since we don't know how much - * room we will need. But this should hopefully work OK - */ - count = irda_param_insert(self, pi, skb_tail_pointer(skb), - skb_tailroom(skb), &ircomm_param_info); - if (count < 0) { - net_warn_ratelimited("%s(), no room for parameter!\n", - __func__); - spin_unlock_irqrestore(&self->spinlock, flags); - return -1; - } - skb_put(skb, count); - pr_debug("%s(), skb->len=%d\n", __func__, skb->len); - - spin_unlock_irqrestore(&self->spinlock, flags); - - if (flush) { - /* ircomm_tty_do_softint will take care of the rest */ - schedule_work(&self->tqueue); - } - - return count; -} - -/* - * Function ircomm_param_service_type (self, buf, len) - * - * Handle service type, this function will both be called after the LM-IAS - * query and then the remote device sends its initial parameters - * - */ -static int ircomm_param_service_type(void *instance, irda_param_t *param, - int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - __u8 service_type = (__u8) param->pv.i; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) { - param->pv.i = self->settings.service_type; - return 0; - } - - /* Find all common service types */ - service_type &= self->service_type; - if (!service_type) { - pr_debug("%s(), No common service type to use!\n", __func__); - return -1; - } - pr_debug("%s(), services in common=%02x\n", __func__ , - service_type); - - /* - * Now choose a preferred service type of those available - */ - if (service_type & IRCOMM_CENTRONICS) - self->settings.service_type = IRCOMM_CENTRONICS; - else if (service_type & IRCOMM_9_WIRE) - self->settings.service_type = IRCOMM_9_WIRE; - else if (service_type & IRCOMM_3_WIRE) - self->settings.service_type = IRCOMM_3_WIRE; - else if (service_type & IRCOMM_3_WIRE_RAW) - self->settings.service_type = IRCOMM_3_WIRE_RAW; - - pr_debug("%s(), resulting service type=0x%02x\n", __func__ , - self->settings.service_type); - - /* - * Now the line is ready for some communication. Check if we are a - * server, and send over some initial parameters. - * Client do it in ircomm_tty_state_setup(). - * Note : we may get called from ircomm_tty_getvalue_confirm(), - * therefore before we even have open any socket. And self->client - * is initialised to TRUE only later. So, we check if the link is - * really initialised. - Jean II - */ - if ((self->max_header_size != IRCOMM_TTY_HDR_UNINITIALISED) && - (!self->client) && - (self->settings.service_type != IRCOMM_3_WIRE_RAW)) - { - /* Init connection */ - ircomm_tty_send_initial_parameters(self); - ircomm_tty_link_established(self); - } - - return 0; -} - -/* - * Function ircomm_param_port_type (self, param) - * - * The port type parameter tells if the devices are serial or parallel. - * Since we only advertise serial service, this parameter should only - * be equal to IRCOMM_SERIAL. - */ -static int ircomm_param_port_type(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) - param->pv.i = IRCOMM_SERIAL; - else { - self->settings.port_type = (__u8) param->pv.i; - - pr_debug("%s(), port type=%d\n", __func__ , - self->settings.port_type); - } - return 0; -} - -/* - * Function ircomm_param_port_name (self, param) - * - * Exchange port name - * - */ -static int ircomm_param_port_name(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) { - pr_debug("%s(), not imp!\n", __func__); - } else { - pr_debug("%s(), port-name=%s\n", __func__ , param->pv.c); - strncpy(self->settings.port_name, param->pv.c, 32); - } - - return 0; -} - -/* - * Function ircomm_param_data_rate (self, param) - * - * Exchange data rate to be used in this settings - * - */ -static int ircomm_param_data_rate(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) - param->pv.i = self->settings.data_rate; - else - self->settings.data_rate = param->pv.i; - - pr_debug("%s(), data rate = %d\n", __func__ , param->pv.i); - - return 0; -} - -/* - * Function ircomm_param_data_format (self, param) - * - * Exchange data format to be used in this settings - * - */ -static int ircomm_param_data_format(void *instance, irda_param_t *param, - int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) - param->pv.i = self->settings.data_format; - else - self->settings.data_format = (__u8) param->pv.i; - - return 0; -} - -/* - * Function ircomm_param_flow_control (self, param) - * - * Exchange flow control settings to be used in this settings - * - */ -static int ircomm_param_flow_control(void *instance, irda_param_t *param, - int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) - param->pv.i = self->settings.flow_control; - else - self->settings.flow_control = (__u8) param->pv.i; - - pr_debug("%s(), flow control = 0x%02x\n", __func__ , (__u8)param->pv.i); - - return 0; -} - -/* - * Function ircomm_param_xon_xoff (self, param) - * - * Exchange XON/XOFF characters - * - */ -static int ircomm_param_xon_xoff(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) { - param->pv.i = self->settings.xonxoff[0]; - param->pv.i |= self->settings.xonxoff[1] << 8; - } else { - self->settings.xonxoff[0] = (__u16) param->pv.i & 0xff; - self->settings.xonxoff[1] = (__u16) param->pv.i >> 8; - } - - pr_debug("%s(), XON/XOFF = 0x%02x,0x%02x\n", __func__ , - param->pv.i & 0xff, param->pv.i >> 8); - - return 0; -} - -/* - * Function ircomm_param_enq_ack (self, param) - * - * Exchange ENQ/ACK characters - * - */ -static int ircomm_param_enq_ack(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) { - param->pv.i = self->settings.enqack[0]; - param->pv.i |= self->settings.enqack[1] << 8; - } else { - self->settings.enqack[0] = (__u16) param->pv.i & 0xff; - self->settings.enqack[1] = (__u16) param->pv.i >> 8; - } - - pr_debug("%s(), ENQ/ACK = 0x%02x,0x%02x\n", __func__ , - param->pv.i & 0xff, param->pv.i >> 8); - - return 0; -} - -/* - * Function ircomm_param_line_status (self, param) - * - * - * - */ -static int ircomm_param_line_status(void *instance, irda_param_t *param, - int get) -{ - pr_debug("%s(), not impl.\n", __func__); - - return 0; -} - -/* - * Function ircomm_param_dte (instance, param) - * - * If we get here, there must be some sort of null-modem connection, and - * we are probably working in server mode as well. - */ -static int ircomm_param_dte(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - __u8 dte; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (get) - param->pv.i = self->settings.dte; - else { - dte = (__u8) param->pv.i; - - self->settings.dce = 0; - - if (dte & IRCOMM_DELTA_DTR) - self->settings.dce |= (IRCOMM_DELTA_DSR| - IRCOMM_DELTA_RI | - IRCOMM_DELTA_CD); - if (dte & IRCOMM_DTR) - self->settings.dce |= (IRCOMM_DSR| - IRCOMM_RI | - IRCOMM_CD); - - if (dte & IRCOMM_DELTA_RTS) - self->settings.dce |= IRCOMM_DELTA_CTS; - if (dte & IRCOMM_RTS) - self->settings.dce |= IRCOMM_CTS; - - /* Take appropriate actions */ - ircomm_tty_check_modem_status(self); - - /* Null modem cable emulator */ - self->settings.null_modem = TRUE; - } - - return 0; -} - -/* - * Function ircomm_param_dce (instance, param) - * - * - * - */ -static int ircomm_param_dce(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - __u8 dce; - - pr_debug("%s(), dce = 0x%02x\n", __func__ , (__u8)param->pv.i); - - dce = (__u8) param->pv.i; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - self->settings.dce = dce; - - /* Check if any of the settings have changed */ - if (dce & 0x0f) { - if (dce & IRCOMM_DELTA_CTS) { - pr_debug("%s(), CTS\n", __func__); - } - } - - ircomm_tty_check_modem_status(self); - - return 0; -} - -/* - * Function ircomm_param_poll (instance, param) - * - * Called when the peer device is polling for the line settings - * - */ -static int ircomm_param_poll(void *instance, irda_param_t *param, int get) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - /* Poll parameters are always of length 0 (just a signal) */ - if (!get) { - /* Respond with DTE line settings */ - ircomm_param_request(self, IRCOMM_DTE, TRUE); - } - return 0; -} - - - - - diff --git a/net/irda/ircomm/ircomm_ttp.c b/net/irda/ircomm/ircomm_ttp.c deleted file mode 100644 index 4b81e0934770..000000000000 --- a/net/irda/ircomm/ircomm_ttp.c +++ /dev/null @@ -1,350 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_ttp.c - * Version: 1.0 - * Description: Interface between IrCOMM and IrTTP - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Jun 6 20:48:27 1999 - * Modified at: Mon Dec 13 11:35:13 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/init.h> - -#include <net/irda/irda.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/irttp.h> - -#include <net/irda/ircomm_event.h> -#include <net/irda/ircomm_ttp.h> - -static int ircomm_ttp_data_indication(void *instance, void *sap, - struct sk_buff *skb); -static void ircomm_ttp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -static void ircomm_ttp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -static void ircomm_ttp_flow_indication(void *instance, void *sap, - LOCAL_FLOW cmd); -static void ircomm_ttp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb); -static int ircomm_ttp_data_request(struct ircomm_cb *self, - struct sk_buff *skb, - int clen); -static int ircomm_ttp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info); -static int ircomm_ttp_connect_response(struct ircomm_cb *self, - struct sk_buff *userdata); -static int ircomm_ttp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info); - -/* - * Function ircomm_open_tsap (self) - * - * - * - */ -int ircomm_open_tsap(struct ircomm_cb *self) -{ - notify_t notify; - - /* Register callbacks */ - irda_notify_init(¬ify); - notify.data_indication = ircomm_ttp_data_indication; - notify.connect_confirm = ircomm_ttp_connect_confirm; - notify.connect_indication = ircomm_ttp_connect_indication; - notify.flow_indication = ircomm_ttp_flow_indication; - notify.disconnect_indication = ircomm_ttp_disconnect_indication; - notify.instance = self; - strlcpy(notify.name, "IrCOMM", sizeof(notify.name)); - - self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, - ¬ify); - if (!self->tsap) { - pr_debug("%sfailed to allocate tsap\n", __func__); - return -1; - } - self->slsap_sel = self->tsap->stsap_sel; - - /* - * Initialize the call-table for issuing commands - */ - self->issue.data_request = ircomm_ttp_data_request; - self->issue.connect_request = ircomm_ttp_connect_request; - self->issue.connect_response = ircomm_ttp_connect_response; - self->issue.disconnect_request = ircomm_ttp_disconnect_request; - - return 0; -} - -/* - * Function ircomm_ttp_connect_request (self, userdata) - * - * - * - */ -static int ircomm_ttp_connect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) -{ - int ret = 0; - - /* Don't forget to refcount it - should be NULL anyway */ - if(userdata) - skb_get(userdata); - - ret = irttp_connect_request(self->tsap, info->dlsap_sel, - info->saddr, info->daddr, NULL, - TTP_SAR_DISABLE, userdata); - - return ret; -} - -/* - * Function ircomm_ttp_connect_response (self, skb) - * - * - * - */ -static int ircomm_ttp_connect_response(struct ircomm_cb *self, - struct sk_buff *userdata) -{ - int ret; - - /* Don't forget to refcount it - should be NULL anyway */ - if(userdata) - skb_get(userdata); - - ret = irttp_connect_response(self->tsap, TTP_SAR_DISABLE, userdata); - - return ret; -} - -/* - * Function ircomm_ttp_data_request (self, userdata) - * - * Send IrCOMM data to IrTTP layer. Currently we do not try to combine - * control data with pure data, so they will be sent as separate frames. - * Should not be a big problem though, since control frames are rare. But - * some of them are sent after connection establishment, so this can - * increase the latency a bit. - */ -static int ircomm_ttp_data_request(struct ircomm_cb *self, - struct sk_buff *skb, - int clen) -{ - int ret; - - IRDA_ASSERT(skb != NULL, return -1;); - - pr_debug("%s(), clen=%d\n", __func__ , clen); - - /* - * Insert clen field, currently we either send data only, or control - * only frames, to make things easier and avoid queueing - */ - IRDA_ASSERT(skb_headroom(skb) >= IRCOMM_HEADER_SIZE, return -1;); - - /* Don't forget to refcount it - see ircomm_tty_do_softint() */ - skb_get(skb); - - skb_push(skb, IRCOMM_HEADER_SIZE); - - skb->data[0] = clen; - - ret = irttp_data_request(self->tsap, skb); - if (ret) { - net_err_ratelimited("%s(), failed\n", __func__); - /* irttp_data_request already free the packet */ - } - - return ret; -} - -/* - * Function ircomm_ttp_data_indication (instance, sap, skb) - * - * Incoming data - * - */ -static int ircomm_ttp_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - ircomm_do_event(self, IRCOMM_TTP_DATA_INDICATION, skb, NULL); - - /* Drop reference count - see ircomm_tty_data_indication(). */ - dev_kfree_skb(skb); - - return 0; -} - -static void ircomm_ttp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - struct ircomm_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(qos != NULL, goto out;); - - if (max_sdu_size != TTP_SAR_DISABLE) { - net_err_ratelimited("%s(), SAR not allowed for IrCOMM!\n", - __func__); - goto out; - } - - info.max_data_size = irttp_get_max_seg_size(self->tsap) - - IRCOMM_HEADER_SIZE; - info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE; - info.qos = qos; - - ircomm_do_event(self, IRCOMM_TTP_CONNECT_CONFIRM, skb, &info); - -out: - /* Drop reference count - see ircomm_tty_connect_confirm(). */ - dev_kfree_skb(skb); -} - -/* - * Function ircomm_ttp_connect_indication (instance, sap, qos, max_sdu_size, - * max_header_size, skb) - * - * - * - */ -static void ircomm_ttp_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *)instance; - struct ircomm_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(qos != NULL, goto out;); - - if (max_sdu_size != TTP_SAR_DISABLE) { - net_err_ratelimited("%s(), SAR not allowed for IrCOMM!\n", - __func__); - goto out; - } - - info.max_data_size = irttp_get_max_seg_size(self->tsap) - - IRCOMM_HEADER_SIZE; - info.max_header_size = max_header_size + IRCOMM_HEADER_SIZE; - info.qos = qos; - - ircomm_do_event(self, IRCOMM_TTP_CONNECT_INDICATION, skb, &info); - -out: - /* Drop reference count - see ircomm_tty_connect_indication(). */ - dev_kfree_skb(skb); -} - -/* - * Function ircomm_ttp_disconnect_request (self, userdata, info) - * - * - * - */ -static int ircomm_ttp_disconnect_request(struct ircomm_cb *self, - struct sk_buff *userdata, - struct ircomm_info *info) -{ - int ret; - - /* Don't forget to refcount it - should be NULL anyway */ - if(userdata) - skb_get(userdata); - - ret = irttp_disconnect_request(self->tsap, userdata, P_NORMAL); - - return ret; -} - -/* - * Function ircomm_ttp_disconnect_indication (instance, sap, reason, skb) - * - * - * - */ -static void ircomm_ttp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - struct ircomm_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - - info.reason = reason; - - ircomm_do_event(self, IRCOMM_TTP_DISCONNECT_INDICATION, skb, &info); - - /* Drop reference count - see ircomm_tty_disconnect_indication(). */ - if(skb) - dev_kfree_skb(skb); -} - -/* - * Function ircomm_ttp_flow_indication (instance, sap, cmd) - * - * Layer below is telling us to start or stop the flow of data - * - */ -static void ircomm_ttp_flow_indication(void *instance, void *sap, - LOCAL_FLOW cmd) -{ - struct ircomm_cb *self = (struct ircomm_cb *) instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_MAGIC, return;); - - if (self->notify.flow_indication) - self->notify.flow_indication(self->notify.instance, self, cmd); -} - - diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c deleted file mode 100644 index ec157c3419b5..000000000000 --- a/net/irda/ircomm/ircomm_tty.c +++ /dev/null @@ -1,1329 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty.c - * Version: 1.0 - * Description: IrCOMM serial TTY driver - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Jun 6 21:00:56 1999 - * Modified at: Wed Feb 23 00:09:02 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Sources: serial.c and previous IrCOMM work by Takahide Higuchi - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/init.h> -#include <linux/module.h> -#include <linux/fs.h> -#include <linux/slab.h> -#include <linux/sched/signal.h> -#include <linux/seq_file.h> -#include <linux/termios.h> -#include <linux/tty.h> -#include <linux/tty_flip.h> -#include <linux/interrupt.h> -#include <linux/device.h> /* for MODULE_ALIAS_CHARDEV_MAJOR */ - -#include <linux/uaccess.h> - -#include <net/irda/irda.h> -#include <net/irda/irmod.h> - -#include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_param.h> -#include <net/irda/ircomm_tty_attach.h> -#include <net/irda/ircomm_tty.h> - -static int ircomm_tty_install(struct tty_driver *driver, - struct tty_struct *tty); -static int ircomm_tty_open(struct tty_struct *tty, struct file *filp); -static void ircomm_tty_close(struct tty_struct * tty, struct file *filp); -static int ircomm_tty_write(struct tty_struct * tty, - const unsigned char *buf, int count); -static int ircomm_tty_write_room(struct tty_struct *tty); -static void ircomm_tty_throttle(struct tty_struct *tty); -static void ircomm_tty_unthrottle(struct tty_struct *tty); -static int ircomm_tty_chars_in_buffer(struct tty_struct *tty); -static void ircomm_tty_flush_buffer(struct tty_struct *tty); -static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch); -static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout); -static void ircomm_tty_hangup(struct tty_struct *tty); -static void ircomm_tty_do_softint(struct work_struct *work); -static void ircomm_tty_shutdown(struct ircomm_tty_cb *self); -static void ircomm_tty_stop(struct tty_struct *tty); - -static int ircomm_tty_data_indication(void *instance, void *sap, - struct sk_buff *skb); -static int ircomm_tty_control_indication(void *instance, void *sap, - struct sk_buff *skb); -static void ircomm_tty_flow_indication(void *instance, void *sap, - LOCAL_FLOW cmd); -#ifdef CONFIG_PROC_FS -static const struct file_operations ircomm_tty_proc_fops; -#endif /* CONFIG_PROC_FS */ -static struct tty_driver *driver; - -static hashbin_t *ircomm_tty = NULL; - -static const struct tty_operations ops = { - .install = ircomm_tty_install, - .open = ircomm_tty_open, - .close = ircomm_tty_close, - .write = ircomm_tty_write, - .write_room = ircomm_tty_write_room, - .chars_in_buffer = ircomm_tty_chars_in_buffer, - .flush_buffer = ircomm_tty_flush_buffer, - .ioctl = ircomm_tty_ioctl, /* ircomm_tty_ioctl.c */ - .tiocmget = ircomm_tty_tiocmget, /* ircomm_tty_ioctl.c */ - .tiocmset = ircomm_tty_tiocmset, /* ircomm_tty_ioctl.c */ - .throttle = ircomm_tty_throttle, - .unthrottle = ircomm_tty_unthrottle, - .send_xchar = ircomm_tty_send_xchar, - .set_termios = ircomm_tty_set_termios, - .stop = ircomm_tty_stop, - .start = ircomm_tty_start, - .hangup = ircomm_tty_hangup, - .wait_until_sent = ircomm_tty_wait_until_sent, -#ifdef CONFIG_PROC_FS - .proc_fops = &ircomm_tty_proc_fops, -#endif /* CONFIG_PROC_FS */ -}; - -static void ircomm_port_raise_dtr_rts(struct tty_port *port, int raise) -{ - struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb, - port); - /* - * Here, we use to lock those two guys, but as ircomm_param_request() - * does it itself, I don't see the point (and I see the deadlock). - * Jean II - */ - if (raise) - self->settings.dte |= IRCOMM_RTS | IRCOMM_DTR; - else - self->settings.dte &= ~(IRCOMM_RTS | IRCOMM_DTR); - - ircomm_param_request(self, IRCOMM_DTE, TRUE); -} - -static int ircomm_port_carrier_raised(struct tty_port *port) -{ - struct ircomm_tty_cb *self = container_of(port, struct ircomm_tty_cb, - port); - return self->settings.dce & IRCOMM_CD; -} - -static const struct tty_port_operations ircomm_port_ops = { - .dtr_rts = ircomm_port_raise_dtr_rts, - .carrier_raised = ircomm_port_carrier_raised, -}; - -/* - * Function ircomm_tty_init() - * - * Init IrCOMM TTY layer/driver - * - */ -static int __init ircomm_tty_init(void) -{ - driver = alloc_tty_driver(IRCOMM_TTY_PORTS); - if (!driver) - return -ENOMEM; - ircomm_tty = hashbin_new(HB_LOCK); - if (ircomm_tty == NULL) { - net_err_ratelimited("%s(), can't allocate hashbin!\n", - __func__); - put_tty_driver(driver); - return -ENOMEM; - } - - driver->driver_name = "ircomm"; - driver->name = "ircomm"; - driver->major = IRCOMM_TTY_MAJOR; - driver->minor_start = IRCOMM_TTY_MINOR; - driver->type = TTY_DRIVER_TYPE_SERIAL; - driver->subtype = SERIAL_TYPE_NORMAL; - driver->init_termios = tty_std_termios; - driver->init_termios.c_cflag = B9600 | CS8 | CREAD | HUPCL | CLOCAL; - driver->flags = TTY_DRIVER_REAL_RAW; - tty_set_operations(driver, &ops); - if (tty_register_driver(driver)) { - net_err_ratelimited("%s(): Couldn't register serial driver\n", - __func__); - put_tty_driver(driver); - return -1; - } - return 0; -} - -static void __exit __ircomm_tty_cleanup(struct ircomm_tty_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - ircomm_tty_shutdown(self); - - self->magic = 0; - tty_port_destroy(&self->port); - kfree(self); -} - -/* - * Function ircomm_tty_cleanup () - * - * Remove IrCOMM TTY layer/driver - * - */ -static void __exit ircomm_tty_cleanup(void) -{ - int ret; - - ret = tty_unregister_driver(driver); - if (ret) { - net_err_ratelimited("%s(), failed to unregister driver\n", - __func__); - return; - } - - hashbin_delete(ircomm_tty, (FREE_FUNC) __ircomm_tty_cleanup); - put_tty_driver(driver); -} - -/* - * Function ircomm_startup (self) - * - * - * - */ -static int ircomm_tty_startup(struct ircomm_tty_cb *self) -{ - notify_t notify; - int ret = -ENODEV; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - /* Check if already open */ - if (tty_port_initialized(&self->port)) { - pr_debug("%s(), already open so break out!\n", __func__); - return 0; - } - tty_port_set_initialized(&self->port, 1); - - /* Register with IrCOMM */ - irda_notify_init(¬ify); - /* These callbacks we must handle ourselves */ - notify.data_indication = ircomm_tty_data_indication; - notify.udata_indication = ircomm_tty_control_indication; - notify.flow_indication = ircomm_tty_flow_indication; - - /* Use the ircomm_tty interface for these ones */ - notify.disconnect_indication = ircomm_tty_disconnect_indication; - notify.connect_confirm = ircomm_tty_connect_confirm; - notify.connect_indication = ircomm_tty_connect_indication; - strlcpy(notify.name, "ircomm_tty", sizeof(notify.name)); - notify.instance = self; - - if (!self->ircomm) { - self->ircomm = ircomm_open(¬ify, self->service_type, - self->line); - } - if (!self->ircomm) - goto err; - - self->slsap_sel = self->ircomm->slsap_sel; - - /* Connect IrCOMM link with remote device */ - ret = ircomm_tty_attach_cable(self); - if (ret < 0) { - net_err_ratelimited("%s(), error attaching cable!\n", __func__); - goto err; - } - - return 0; -err: - tty_port_set_initialized(&self->port, 0); - return ret; -} - -/* - * Function ircomm_block_til_ready (self, filp) - * - * - * - */ -static int ircomm_tty_block_til_ready(struct ircomm_tty_cb *self, - struct tty_struct *tty, struct file *filp) -{ - struct tty_port *port = &self->port; - DECLARE_WAITQUEUE(wait, current); - int retval; - int do_clocal = 0; - unsigned long flags; - - /* - * If non-blocking mode is set, or the port is not enabled, - * then make the check up front and then exit. - */ - if (tty_io_error(tty)) { - tty_port_set_active(port, 1); - return 0; - } - - if (filp->f_flags & O_NONBLOCK) { - /* nonblock mode is set */ - if (C_BAUD(tty)) - tty_port_raise_dtr_rts(port); - tty_port_set_active(port, 1); - pr_debug("%s(), O_NONBLOCK requested!\n", __func__); - return 0; - } - - if (C_CLOCAL(tty)) { - pr_debug("%s(), doing CLOCAL!\n", __func__); - do_clocal = 1; - } - - /* Wait for carrier detect and the line to become - * free (i.e., not in use by the callout). While we are in - * this loop, port->count is dropped by one, so that - * mgsl_close() knows when to free things. We restore it upon - * exit, either normal or abnormal. - */ - - retval = 0; - add_wait_queue(&port->open_wait, &wait); - - pr_debug("%s(%d):block_til_ready before block on %s open_count=%d\n", - __FILE__, __LINE__, tty->driver->name, port->count); - - spin_lock_irqsave(&port->lock, flags); - port->count--; - port->blocked_open++; - spin_unlock_irqrestore(&port->lock, flags); - - while (1) { - if (C_BAUD(tty) && tty_port_initialized(port)) - tty_port_raise_dtr_rts(port); - - set_current_state(TASK_INTERRUPTIBLE); - - if (tty_hung_up_p(filp) || !tty_port_initialized(port)) { - retval = (port->flags & ASYNC_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS; - break; - } - - /* - * Check if link is ready now. Even if CLOCAL is - * specified, we cannot return before the IrCOMM link is - * ready - */ - if ((do_clocal || tty_port_carrier_raised(port)) && - self->state == IRCOMM_TTY_READY) - { - break; - } - - if (signal_pending(current)) { - retval = -ERESTARTSYS; - break; - } - - pr_debug("%s(%d):block_til_ready blocking on %s open_count=%d\n", - __FILE__, __LINE__, tty->driver->name, port->count); - - schedule(); - } - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&port->open_wait, &wait); - - spin_lock_irqsave(&port->lock, flags); - if (!tty_hung_up_p(filp)) - port->count++; - port->blocked_open--; - spin_unlock_irqrestore(&port->lock, flags); - - pr_debug("%s(%d):block_til_ready after blocking on %s open_count=%d\n", - __FILE__, __LINE__, tty->driver->name, port->count); - - if (!retval) - tty_port_set_active(port, 1); - - return retval; -} - - -static int ircomm_tty_install(struct tty_driver *driver, struct tty_struct *tty) -{ - struct ircomm_tty_cb *self; - unsigned int line = tty->index; - - /* Check if instance already exists */ - self = hashbin_lock_find(ircomm_tty, line, NULL); - if (!self) { - /* No, so make new instance */ - self = kzalloc(sizeof(struct ircomm_tty_cb), GFP_KERNEL); - if (self == NULL) - return -ENOMEM; - - tty_port_init(&self->port); - self->port.ops = &ircomm_port_ops; - self->magic = IRCOMM_TTY_MAGIC; - self->flow = FLOW_STOP; - - self->line = line; - INIT_WORK(&self->tqueue, ircomm_tty_do_softint); - self->max_header_size = IRCOMM_TTY_HDR_UNINITIALISED; - self->max_data_size = IRCOMM_TTY_DATA_UNINITIALISED; - - /* Init some important stuff */ - init_timer(&self->watchdog_timer); - spin_lock_init(&self->spinlock); - - /* - * Force TTY into raw mode by default which is usually what - * we want for IrCOMM and IrLPT. This way applications will - * not have to twiddle with printcap etc. - * - * Note this is completely usafe and doesn't work properly - */ - tty->termios.c_iflag = 0; - tty->termios.c_oflag = 0; - - /* Insert into hash */ - hashbin_insert(ircomm_tty, (irda_queue_t *) self, line, NULL); - } - - tty->driver_data = self; - - return tty_port_install(&self->port, driver, tty); -} - -/* - * Function ircomm_tty_open (tty, filp) - * - * This routine is called when a particular tty device is opened. This - * routine is mandatory; if this routine is not filled in, the attempted - * open will fail with ENODEV. - */ -static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) -{ - struct ircomm_tty_cb *self = tty->driver_data; - unsigned long flags; - int ret; - - /* ++ is not atomic, so this should be protected - Jean II */ - spin_lock_irqsave(&self->port.lock, flags); - self->port.count++; - spin_unlock_irqrestore(&self->port.lock, flags); - tty_port_tty_set(&self->port, tty); - - pr_debug("%s(), %s%d, count = %d\n", __func__ , tty->driver->name, - self->line, self->port.count); - - /* Not really used by us, but lets do it anyway */ - self->port.low_latency = (self->port.flags & ASYNC_LOW_LATENCY) ? 1 : 0; - - /* Check if this is a "normal" ircomm device, or an irlpt device */ - if (self->line < 0x10) { - self->service_type = IRCOMM_3_WIRE | IRCOMM_9_WIRE; - self->settings.service_type = IRCOMM_9_WIRE; /* 9 wire as default */ - /* Jan Kiszka -> add DSR/RI -> Conform to IrCOMM spec */ - self->settings.dce = IRCOMM_CTS | IRCOMM_CD | IRCOMM_DSR | IRCOMM_RI; /* Default line settings */ - pr_debug("%s(), IrCOMM device\n", __func__); - } else { - pr_debug("%s(), IrLPT device\n", __func__); - self->service_type = IRCOMM_3_WIRE_RAW; - self->settings.service_type = IRCOMM_3_WIRE_RAW; /* Default */ - } - - ret = ircomm_tty_startup(self); - if (ret) - return ret; - - ret = ircomm_tty_block_til_ready(self, tty, filp); - if (ret) { - pr_debug("%s(), returning after block_til_ready with %d\n", - __func__, ret); - - return ret; - } - return 0; -} - -/* - * Function ircomm_tty_close (tty, filp) - * - * This routine is called when a particular tty device is closed. - * - */ -static void ircomm_tty_close(struct tty_struct *tty, struct file *filp) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - struct tty_port *port = &self->port; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - if (tty_port_close_start(port, tty, filp) == 0) - return; - - ircomm_tty_shutdown(self); - - tty_driver_flush_buffer(tty); - - tty_port_close_end(port, tty); - tty_port_tty_set(port, NULL); -} - -/* - * Function ircomm_tty_flush_buffer (tty) - * - * - * - */ -static void ircomm_tty_flush_buffer(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - /* - * Let do_softint() do this to avoid race condition with - * do_softint() ;-) - */ - schedule_work(&self->tqueue); -} - -/* - * Function ircomm_tty_do_softint (work) - * - * We use this routine to give the write wakeup to the user at at a - * safe time (as fast as possible after write have completed). This - * can be compared to the Tx interrupt. - */ -static void ircomm_tty_do_softint(struct work_struct *work) -{ - struct ircomm_tty_cb *self = - container_of(work, struct ircomm_tty_cb, tqueue); - struct tty_struct *tty; - unsigned long flags; - struct sk_buff *skb, *ctrl_skb; - - if (!self || self->magic != IRCOMM_TTY_MAGIC) - return; - - tty = tty_port_tty_get(&self->port); - if (!tty) - return; - - /* Unlink control buffer */ - spin_lock_irqsave(&self->spinlock, flags); - - ctrl_skb = self->ctrl_skb; - self->ctrl_skb = NULL; - - spin_unlock_irqrestore(&self->spinlock, flags); - - /* Flush control buffer if any */ - if(ctrl_skb) { - if(self->flow == FLOW_START) - ircomm_control_request(self->ircomm, ctrl_skb); - /* Drop reference count - see ircomm_ttp_data_request(). */ - dev_kfree_skb(ctrl_skb); - } - - if (tty->hw_stopped) - goto put; - - /* Unlink transmit buffer */ - spin_lock_irqsave(&self->spinlock, flags); - - skb = self->tx_skb; - self->tx_skb = NULL; - - spin_unlock_irqrestore(&self->spinlock, flags); - - /* Flush transmit buffer if any */ - if (skb) { - ircomm_tty_do_event(self, IRCOMM_TTY_DATA_REQUEST, skb, NULL); - /* Drop reference count - see ircomm_ttp_data_request(). */ - dev_kfree_skb(skb); - } - - /* Check if user (still) wants to be waken up */ - tty_wakeup(tty); -put: - tty_kref_put(tty); -} - -/* - * Function ircomm_tty_write (tty, buf, count) - * - * This routine is called by the kernel to write a series of characters - * to the tty device. The characters may come from user space or kernel - * space. This routine will return the number of characters actually - * accepted for writing. This routine is mandatory. - */ -static int ircomm_tty_write(struct tty_struct *tty, - const unsigned char *buf, int count) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned long flags; - struct sk_buff *skb; - int tailroom = 0; - int len = 0; - int size; - - pr_debug("%s(), count=%d, hw_stopped=%d\n", __func__ , count, - tty->hw_stopped); - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - /* We may receive packets from the TTY even before we have finished - * our setup. Not cool. - * The problem is that we don't know the final header and data size - * to create the proper skb, so any skb we would create would have - * bogus header and data size, so need care. - * We use a bogus header size to safely detect this condition. - * Another problem is that hw_stopped was set to 0 way before it - * should be, so we would drop this skb. It should now be fixed. - * One option is to not accept data until we are properly setup. - * But, I suspect that when it happens, the ppp line discipline - * just "drops" the data, which might screw up connect scripts. - * The second option is to create a "safe skb", with large header - * and small size (see ircomm_tty_open() for values). - * We just need to make sure that when the real values get filled, - * we don't mess up the original "safe skb" (see tx_data_size). - * Jean II */ - if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED) { - pr_debug("%s() : not initialised\n", __func__); -#ifdef IRCOMM_NO_TX_BEFORE_INIT - /* We didn't consume anything, TTY will retry */ - return 0; -#endif - } - - if (count < 1) - return 0; - - /* Protect our manipulation of self->tx_skb and related */ - spin_lock_irqsave(&self->spinlock, flags); - - /* Fetch current transmit buffer */ - skb = self->tx_skb; - - /* - * Send out all the data we get, possibly as multiple fragmented - * frames, but this will only happen if the data is larger than the - * max data size. The normal case however is just the opposite, and - * this function may be called multiple times, and will then actually - * defragment the data and send it out as one packet as soon as - * possible, but at a safer point in time - */ - while (count) { - size = count; - - /* Adjust data size to the max data size */ - if (size > self->max_data_size) - size = self->max_data_size; - - /* - * Do we already have a buffer ready for transmit, or do - * we need to allocate a new frame - */ - if (skb) { - /* - * Any room for more data at the end of the current - * transmit buffer? Cannot use skb_tailroom, since - * dev_alloc_skb gives us a larger skb than we - * requested - * Note : use tx_data_size, because max_data_size - * may have changed and we don't want to overwrite - * the skb. - Jean II - */ - if ((tailroom = (self->tx_data_size - skb->len)) > 0) { - /* Adjust data to tailroom */ - if (size > tailroom) - size = tailroom; - } else { - /* - * Current transmit frame is full, so break - * out, so we can send it as soon as possible - */ - break; - } - } else { - /* Prepare a full sized frame */ - skb = alloc_skb(self->max_data_size+ - self->max_header_size, - GFP_ATOMIC); - if (!skb) { - spin_unlock_irqrestore(&self->spinlock, flags); - return -ENOBUFS; - } - skb_reserve(skb, self->max_header_size); - self->tx_skb = skb; - /* Remember skb size because max_data_size may - * change later on - Jean II */ - self->tx_data_size = self->max_data_size; - } - - /* Copy data */ - skb_put_data(skb, buf + len, size); - - count -= size; - len += size; - } - - spin_unlock_irqrestore(&self->spinlock, flags); - - /* - * Schedule a new thread which will transmit the frame as soon - * as possible, but at a safe point in time. We do this so the - * "user" can give us data multiple times, as PPP does (because of - * its 256 byte tx buffer). We will then defragment and send out - * all this data as one single packet. - */ - schedule_work(&self->tqueue); - - return len; -} - -/* - * Function ircomm_tty_write_room (tty) - * - * This routine returns the numbers of characters the tty driver will - * accept for queuing to be written. This number is subject to change as - * output buffers get emptied, or if the output flow control is acted. - */ -static int ircomm_tty_write_room(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned long flags; - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - -#ifdef IRCOMM_NO_TX_BEFORE_INIT - /* max_header_size tells us if the channel is initialised or not. */ - if (self->max_header_size == IRCOMM_TTY_HDR_UNINITIALISED) - /* Don't bother us yet */ - return 0; -#endif - - /* Check if we are allowed to transmit any data. - * hw_stopped is the regular flow control. - * Jean II */ - if (tty->hw_stopped) - ret = 0; - else { - spin_lock_irqsave(&self->spinlock, flags); - if (self->tx_skb) - ret = self->tx_data_size - self->tx_skb->len; - else - ret = self->max_data_size; - spin_unlock_irqrestore(&self->spinlock, flags); - } - pr_debug("%s(), ret=%d\n", __func__ , ret); - - return ret; -} - -/* - * Function ircomm_tty_wait_until_sent (tty, timeout) - * - * This routine waits until the device has written out all of the - * characters in its transmitter FIFO. - */ -static void ircomm_tty_wait_until_sent(struct tty_struct *tty, int timeout) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned long orig_jiffies, poll_time; - unsigned long flags; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - orig_jiffies = jiffies; - - /* Set poll time to 200 ms */ - poll_time = msecs_to_jiffies(200); - if (timeout) - poll_time = min_t(unsigned long, timeout, poll_time); - - spin_lock_irqsave(&self->spinlock, flags); - while (self->tx_skb && self->tx_skb->len) { - spin_unlock_irqrestore(&self->spinlock, flags); - schedule_timeout_interruptible(poll_time); - spin_lock_irqsave(&self->spinlock, flags); - if (signal_pending(current)) - break; - if (timeout && time_after(jiffies, orig_jiffies + timeout)) - break; - } - spin_unlock_irqrestore(&self->spinlock, flags); - __set_current_state(TASK_RUNNING); -} - -/* - * Function ircomm_tty_throttle (tty) - * - * This routine notifies the tty driver that input buffers for the line - * discipline are close to full, and it should somehow signal that no - * more characters should be sent to the tty. - */ -static void ircomm_tty_throttle(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - /* Software flow control? */ - if (I_IXOFF(tty)) - ircomm_tty_send_xchar(tty, STOP_CHAR(tty)); - - /* Hardware flow control? */ - if (C_CRTSCTS(tty)) { - self->settings.dte &= ~IRCOMM_RTS; - self->settings.dte |= IRCOMM_DELTA_RTS; - - ircomm_param_request(self, IRCOMM_DTE, TRUE); - } - - ircomm_flow_request(self->ircomm, FLOW_STOP); -} - -/* - * Function ircomm_tty_unthrottle (tty) - * - * This routine notifies the tty drivers that it should signals that - * characters can now be sent to the tty without fear of overrunning the - * input buffers of the line disciplines. - */ -static void ircomm_tty_unthrottle(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - /* Using software flow control? */ - if (I_IXOFF(tty)) - ircomm_tty_send_xchar(tty, START_CHAR(tty)); - - /* Using hardware flow control? */ - if (C_CRTSCTS(tty)) { - self->settings.dte |= (IRCOMM_RTS|IRCOMM_DELTA_RTS); - - ircomm_param_request(self, IRCOMM_DTE, TRUE); - pr_debug("%s(), FLOW_START\n", __func__); - } - ircomm_flow_request(self->ircomm, FLOW_START); -} - -/* - * Function ircomm_tty_chars_in_buffer (tty) - * - * Indicates if there are any data in the buffer - * - */ -static int ircomm_tty_chars_in_buffer(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned long flags; - int len = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - spin_lock_irqsave(&self->spinlock, flags); - - if (self->tx_skb) - len = self->tx_skb->len; - - spin_unlock_irqrestore(&self->spinlock, flags); - - return len; -} - -static void ircomm_tty_shutdown(struct ircomm_tty_cb *self) -{ - unsigned long flags; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - if (!tty_port_initialized(&self->port)) - return; - tty_port_set_initialized(&self->port, 0); - - ircomm_tty_detach_cable(self); - - spin_lock_irqsave(&self->spinlock, flags); - - del_timer(&self->watchdog_timer); - - /* Free parameter buffer */ - if (self->ctrl_skb) { - dev_kfree_skb(self->ctrl_skb); - self->ctrl_skb = NULL; - } - - /* Free transmit buffer */ - if (self->tx_skb) { - dev_kfree_skb(self->tx_skb); - self->tx_skb = NULL; - } - - if (self->ircomm) { - ircomm_close(self->ircomm); - self->ircomm = NULL; - } - - spin_unlock_irqrestore(&self->spinlock, flags); -} - -/* - * Function ircomm_tty_hangup (tty) - * - * This routine notifies the tty driver that it should hangup the tty - * device. - * - */ -static void ircomm_tty_hangup(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - struct tty_port *port = &self->port; - unsigned long flags; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - /* ircomm_tty_flush_buffer(tty); */ - ircomm_tty_shutdown(self); - - spin_lock_irqsave(&port->lock, flags); - if (port->tty) { - set_bit(TTY_IO_ERROR, &port->tty->flags); - tty_kref_put(port->tty); - } - port->tty = NULL; - port->count = 0; - spin_unlock_irqrestore(&port->lock, flags); - tty_port_set_active(port, 0); - - wake_up_interruptible(&port->open_wait); -} - -/* - * Function ircomm_tty_send_xchar (tty, ch) - * - * This routine is used to send a high-priority XON/XOFF character to - * the device. - */ -static void ircomm_tty_send_xchar(struct tty_struct *tty, char ch) -{ - pr_debug("%s(), not impl\n", __func__); -} - -/* - * Function ircomm_tty_start (tty) - * - * This routine notifies the tty driver that it resume sending - * characters to the tty device. - */ -void ircomm_tty_start(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - - ircomm_flow_request(self->ircomm, FLOW_START); -} - -/* - * Function ircomm_tty_stop (tty) - * - * This routine notifies the tty driver that it should stop outputting - * characters to the tty device. - */ -static void ircomm_tty_stop(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - ircomm_flow_request(self->ircomm, FLOW_STOP); -} - -/* - * Function ircomm_check_modem_status (self) - * - * Check for any changes in the DCE's line settings. This function should - * be called whenever the dce parameter settings changes, to update the - * flow control settings and other things - */ -void ircomm_tty_check_modem_status(struct ircomm_tty_cb *self) -{ - struct tty_struct *tty; - int status; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - tty = tty_port_tty_get(&self->port); - - status = self->settings.dce; - - if (status & IRCOMM_DCE_DELTA_ANY) { - /*wake_up_interruptible(&self->delta_msr_wait);*/ - } - if (tty_port_check_carrier(&self->port) && (status & IRCOMM_DELTA_CD)) { - pr_debug("%s(), ircomm%d CD now %s...\n", __func__ , self->line, - (status & IRCOMM_CD) ? "on" : "off"); - - if (status & IRCOMM_CD) { - wake_up_interruptible(&self->port.open_wait); - } else { - pr_debug("%s(), Doing serial hangup..\n", __func__); - if (tty) - tty_hangup(tty); - - /* Hangup will remote the tty, so better break out */ - goto put; - } - } - if (tty && tty_port_cts_enabled(&self->port)) { - if (tty->hw_stopped) { - if (status & IRCOMM_CTS) { - pr_debug("%s(), CTS tx start...\n", __func__); - tty->hw_stopped = 0; - - /* Wake up processes blocked on open */ - wake_up_interruptible(&self->port.open_wait); - - schedule_work(&self->tqueue); - goto put; - } - } else { - if (!(status & IRCOMM_CTS)) { - pr_debug("%s(), CTS tx stop...\n", __func__); - tty->hw_stopped = 1; - } - } - } -put: - tty_kref_put(tty); -} - -/* - * Function ircomm_tty_data_indication (instance, sap, skb) - * - * Handle incoming data, and deliver it to the line discipline - * - */ -static int ircomm_tty_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - struct tty_struct *tty; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - tty = tty_port_tty_get(&self->port); - if (!tty) { - pr_debug("%s(), no tty!\n", __func__); - return 0; - } - - /* - * If we receive data when hardware is stopped then something is wrong. - * We try to poll the peers line settings to check if we are up todate. - * Devices like WinCE can do this, and since they don't send any - * params, we can just as well declare the hardware for running. - */ - if (tty->hw_stopped && (self->flow == FLOW_START)) { - pr_debug("%s(), polling for line settings!\n", __func__); - ircomm_param_request(self, IRCOMM_POLL, TRUE); - - /* We can just as well declare the hardware for running */ - ircomm_tty_send_initial_parameters(self); - ircomm_tty_link_established(self); - } - tty_kref_put(tty); - - /* - * Use flip buffer functions since the code may be called from interrupt - * context - */ - tty_insert_flip_string(&self->port, skb->data, skb->len); - tty_flip_buffer_push(&self->port); - - /* No need to kfree_skb - see ircomm_ttp_data_indication() */ - - return 0; -} - -/* - * Function ircomm_tty_control_indication (instance, sap, skb) - * - * Parse all incoming parameters (easy!) - * - */ -static int ircomm_tty_control_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - int clen; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - clen = skb->data[0]; - - irda_param_extract_all(self, skb->data+1, IRDA_MIN(skb->len-1, clen), - &ircomm_param_info); - - /* No need to kfree_skb - see ircomm_control_indication() */ - - return 0; -} - -/* - * Function ircomm_tty_flow_indication (instance, sap, cmd) - * - * This function is called by IrTTP when it wants us to slow down the - * transmission of data. We just mark the hardware as stopped, and wait - * for IrTTP to notify us that things are OK again. - */ -static void ircomm_tty_flow_indication(void *instance, void *sap, - LOCAL_FLOW cmd) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - struct tty_struct *tty; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - tty = tty_port_tty_get(&self->port); - - switch (cmd) { - case FLOW_START: - pr_debug("%s(), hw start!\n", __func__); - if (tty) - tty->hw_stopped = 0; - - /* ircomm_tty_do_softint will take care of the rest */ - schedule_work(&self->tqueue); - break; - default: /* If we get here, something is very wrong, better stop */ - case FLOW_STOP: - pr_debug("%s(), hw stopped!\n", __func__); - if (tty) - tty->hw_stopped = 1; - break; - } - - tty_kref_put(tty); - self->flow = cmd; -} - -#ifdef CONFIG_PROC_FS -static void ircomm_tty_line_info(struct ircomm_tty_cb *self, struct seq_file *m) -{ - struct tty_struct *tty; - char sep; - - seq_printf(m, "State: %s\n", ircomm_tty_state[self->state]); - - seq_puts(m, "Service type: "); - if (self->service_type & IRCOMM_9_WIRE) - seq_puts(m, "9_WIRE"); - else if (self->service_type & IRCOMM_3_WIRE) - seq_puts(m, "3_WIRE"); - else if (self->service_type & IRCOMM_3_WIRE_RAW) - seq_puts(m, "3_WIRE_RAW"); - else - seq_puts(m, "No common service type!\n"); - seq_putc(m, '\n'); - - seq_printf(m, "Port name: %s\n", self->settings.port_name); - - seq_printf(m, "DTE status:"); - sep = ' '; - if (self->settings.dte & IRCOMM_RTS) { - seq_printf(m, "%cRTS", sep); - sep = '|'; - } - if (self->settings.dte & IRCOMM_DTR) { - seq_printf(m, "%cDTR", sep); - sep = '|'; - } - seq_putc(m, '\n'); - - seq_puts(m, "DCE status:"); - sep = ' '; - if (self->settings.dce & IRCOMM_CTS) { - seq_printf(m, "%cCTS", sep); - sep = '|'; - } - if (self->settings.dce & IRCOMM_DSR) { - seq_printf(m, "%cDSR", sep); - sep = '|'; - } - if (self->settings.dce & IRCOMM_CD) { - seq_printf(m, "%cCD", sep); - sep = '|'; - } - if (self->settings.dce & IRCOMM_RI) { - seq_printf(m, "%cRI", sep); - sep = '|'; - } - seq_putc(m, '\n'); - - seq_puts(m, "Configuration: "); - if (!self->settings.null_modem) - seq_puts(m, "DTE <-> DCE\n"); - else - seq_puts(m, "DTE <-> DTE (null modem emulation)\n"); - - seq_printf(m, "Data rate: %d\n", self->settings.data_rate); - - seq_puts(m, "Flow control:"); - sep = ' '; - if (self->settings.flow_control & IRCOMM_XON_XOFF_IN) { - seq_printf(m, "%cXON_XOFF_IN", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_XON_XOFF_OUT) { - seq_printf(m, "%cXON_XOFF_OUT", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_RTS_CTS_IN) { - seq_printf(m, "%cRTS_CTS_IN", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_RTS_CTS_OUT) { - seq_printf(m, "%cRTS_CTS_OUT", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_DSR_DTR_IN) { - seq_printf(m, "%cDSR_DTR_IN", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_DSR_DTR_OUT) { - seq_printf(m, "%cDSR_DTR_OUT", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_ENQ_ACK_IN) { - seq_printf(m, "%cENQ_ACK_IN", sep); - sep = '|'; - } - if (self->settings.flow_control & IRCOMM_ENQ_ACK_OUT) { - seq_printf(m, "%cENQ_ACK_OUT", sep); - sep = '|'; - } - seq_putc(m, '\n'); - - seq_puts(m, "Flags:"); - sep = ' '; - if (tty_port_cts_enabled(&self->port)) { - seq_printf(m, "%cASYNC_CTS_FLOW", sep); - sep = '|'; - } - if (tty_port_check_carrier(&self->port)) { - seq_printf(m, "%cASYNC_CHECK_CD", sep); - sep = '|'; - } - if (tty_port_initialized(&self->port)) { - seq_printf(m, "%cASYNC_INITIALIZED", sep); - sep = '|'; - } - if (self->port.flags & ASYNC_LOW_LATENCY) { - seq_printf(m, "%cASYNC_LOW_LATENCY", sep); - sep = '|'; - } - if (tty_port_active(&self->port)) { - seq_printf(m, "%cASYNC_NORMAL_ACTIVE", sep); - sep = '|'; - } - seq_putc(m, '\n'); - - seq_printf(m, "Role: %s\n", self->client ? "client" : "server"); - seq_printf(m, "Open count: %d\n", self->port.count); - seq_printf(m, "Max data size: %d\n", self->max_data_size); - seq_printf(m, "Max header size: %d\n", self->max_header_size); - - tty = tty_port_tty_get(&self->port); - if (tty) { - seq_printf(m, "Hardware: %s\n", - tty->hw_stopped ? "Stopped" : "Running"); - tty_kref_put(tty); - } -} - -static int ircomm_tty_proc_show(struct seq_file *m, void *v) -{ - struct ircomm_tty_cb *self; - unsigned long flags; - - spin_lock_irqsave(&ircomm_tty->hb_spinlock, flags); - - self = (struct ircomm_tty_cb *) hashbin_get_first(ircomm_tty); - while (self != NULL) { - if (self->magic != IRCOMM_TTY_MAGIC) - break; - - ircomm_tty_line_info(self, m); - self = (struct ircomm_tty_cb *) hashbin_get_next(ircomm_tty); - } - spin_unlock_irqrestore(&ircomm_tty->hb_spinlock, flags); - return 0; -} - -static int ircomm_tty_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, ircomm_tty_proc_show, NULL); -} - -static const struct file_operations ircomm_tty_proc_fops = { - .owner = THIS_MODULE, - .open = ircomm_tty_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif /* CONFIG_PROC_FS */ - -MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); -MODULE_DESCRIPTION("IrCOMM serial TTY driver"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CHARDEV_MAJOR(IRCOMM_TTY_MAJOR); - -module_init(ircomm_tty_init); -module_exit(ircomm_tty_cleanup); diff --git a/net/irda/ircomm/ircomm_tty_attach.c b/net/irda/ircomm/ircomm_tty_attach.c deleted file mode 100644 index 0a411019c098..000000000000 --- a/net/irda/ircomm/ircomm_tty_attach.c +++ /dev/null @@ -1,987 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty_attach.c - * Version: - * Description: Code for attaching the serial driver to IrCOMM - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sat Jun 5 17:42:00 1999 - * Modified at: Tue Jan 4 14:20:49 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/init.h> -#include <linux/sched.h> - -#include <net/irda/irda.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/irttp.h> -#include <net/irda/irias_object.h> -#include <net/irda/parameters.h> - -#include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_param.h> -#include <net/irda/ircomm_event.h> - -#include <net/irda/ircomm_tty.h> -#include <net/irda/ircomm_tty_attach.h> - -static void ircomm_tty_ias_register(struct ircomm_tty_cb *self); -static void ircomm_tty_discovery_indication(discinfo_t *discovery, - DISCOVERY_MODE mode, - void *priv); -static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id, - struct ias_value *value, void *priv); -static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, - int timeout); -static void ircomm_tty_watchdog_timer_expired(void *data); - -static int ircomm_tty_state_idle(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info); -static int ircomm_tty_state_search(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info); -static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info); -static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info); -static int ircomm_tty_state_setup(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info); -static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info); - -const char *const ircomm_tty_state[] = { - "IRCOMM_TTY_IDLE", - "IRCOMM_TTY_SEARCH", - "IRCOMM_TTY_QUERY_PARAMETERS", - "IRCOMM_TTY_QUERY_LSAP_SEL", - "IRCOMM_TTY_SETUP", - "IRCOMM_TTY_READY", - "*** ERROR *** ", -}; - -static const char *const ircomm_tty_event[] __maybe_unused = { - "IRCOMM_TTY_ATTACH_CABLE", - "IRCOMM_TTY_DETACH_CABLE", - "IRCOMM_TTY_DATA_REQUEST", - "IRCOMM_TTY_DATA_INDICATION", - "IRCOMM_TTY_DISCOVERY_REQUEST", - "IRCOMM_TTY_DISCOVERY_INDICATION", - "IRCOMM_TTY_CONNECT_CONFIRM", - "IRCOMM_TTY_CONNECT_INDICATION", - "IRCOMM_TTY_DISCONNECT_REQUEST", - "IRCOMM_TTY_DISCONNECT_INDICATION", - "IRCOMM_TTY_WD_TIMER_EXPIRED", - "IRCOMM_TTY_GOT_PARAMETERS", - "IRCOMM_TTY_GOT_LSAPSEL", - "*** ERROR ****", -}; - -static int (*state[])(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, - struct sk_buff *skb, struct ircomm_tty_info *info) = -{ - ircomm_tty_state_idle, - ircomm_tty_state_search, - ircomm_tty_state_query_parameters, - ircomm_tty_state_query_lsap_sel, - ircomm_tty_state_setup, - ircomm_tty_state_ready, -}; - -/* - * Function ircomm_tty_attach_cable (driver) - * - * Try to attach cable (IrCOMM link). This function will only return - * when the link has been connected, or if an error condition occurs. - * If success, the return value is the resulting service type. - */ -int ircomm_tty_attach_cable(struct ircomm_tty_cb *self) -{ - struct tty_struct *tty; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - /* Check if somebody has already connected to us */ - if (ircomm_is_connected(self->ircomm)) { - pr_debug("%s(), already connected!\n", __func__); - return 0; - } - - /* Make sure nobody tries to write before the link is up */ - tty = tty_port_tty_get(&self->port); - if (tty) { - tty->hw_stopped = 1; - tty_kref_put(tty); - } - - ircomm_tty_ias_register(self); - - ircomm_tty_do_event(self, IRCOMM_TTY_ATTACH_CABLE, NULL, NULL); - - return 0; -} - -/* - * Function ircomm_detach_cable (driver) - * - * Detach cable, or cable has been detached by peer - * - */ -void ircomm_tty_detach_cable(struct ircomm_tty_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - del_timer(&self->watchdog_timer); - - /* Remove discovery handler */ - if (self->ckey) { - irlmp_unregister_client(self->ckey); - self->ckey = NULL; - } - /* Remove IrCOMM hint bits */ - if (self->skey) { - irlmp_unregister_service(self->skey); - self->skey = NULL; - } - - if (self->iriap) { - iriap_close(self->iriap); - self->iriap = NULL; - } - - /* Remove LM-IAS object */ - if (self->obj) { - irias_delete_object(self->obj); - self->obj = NULL; - } - - ircomm_tty_do_event(self, IRCOMM_TTY_DETACH_CABLE, NULL, NULL); - - /* Reset some values */ - self->daddr = self->saddr = 0; - self->dlsap_sel = self->slsap_sel = 0; - - memset(&self->settings, 0, sizeof(struct ircomm_params)); -} - -/* - * Function ircomm_tty_ias_register (self) - * - * Register with LM-IAS depending on which service type we are - * - */ -static void ircomm_tty_ias_register(struct ircomm_tty_cb *self) -{ - __u8 oct_seq[6]; - __u16 hints; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - /* Compute hint bits based on service */ - hints = irlmp_service_to_hint(S_COMM); - if (self->service_type & IRCOMM_3_WIRE_RAW) - hints |= irlmp_service_to_hint(S_PRINTER); - - /* Advertise IrCOMM hint bit in discovery */ - if (!self->skey) - self->skey = irlmp_register_service(hints); - /* Set up a discovery handler */ - if (!self->ckey) - self->ckey = irlmp_register_client(hints, - ircomm_tty_discovery_indication, - NULL, (void *) self); - - /* If already done, no need to do it again */ - if (self->obj) - return; - - if (self->service_type & IRCOMM_3_WIRE_RAW) { - /* Register IrLPT with LM-IAS */ - self->obj = irias_new_object("IrLPT", IAS_IRLPT_ID); - irias_add_integer_attrib(self->obj, "IrDA:IrLMP:LsapSel", - self->slsap_sel, IAS_KERNEL_ATTR); - } else { - /* Register IrCOMM with LM-IAS */ - self->obj = irias_new_object("IrDA:IrCOMM", IAS_IRCOMM_ID); - irias_add_integer_attrib(self->obj, "IrDA:TinyTP:LsapSel", - self->slsap_sel, IAS_KERNEL_ATTR); - - /* Code the parameters into the buffer */ - irda_param_pack(oct_seq, "bbbbbb", - IRCOMM_SERVICE_TYPE, 1, self->service_type, - IRCOMM_PORT_TYPE, 1, IRCOMM_SERIAL); - - /* Register parameters with LM-IAS */ - irias_add_octseq_attrib(self->obj, "Parameters", oct_seq, 6, - IAS_KERNEL_ATTR); - } - irias_insert_object(self->obj); -} - -/* - * Function ircomm_tty_ias_unregister (self) - * - * Remove our IAS object and client hook while connected. - * - */ -static void ircomm_tty_ias_unregister(struct ircomm_tty_cb *self) -{ - /* Remove LM-IAS object now so it is not reused. - * IrCOMM deals very poorly with multiple incoming connections. - * It should looks a lot more like IrNET, and "dup" a server TSAP - * to the application TSAP (based on various rules). - * This is a cheap workaround allowing multiple clients to - * connect to us. It will not always work. - * Each IrCOMM socket has an IAS entry. Incoming connection will - * pick the first one found. So, when we are fully connected, - * we remove our IAS entries so that the next IAS entry is used. - * We do that for *both* client and server, because a server - * can also create client instances. - * Jean II */ - if (self->obj) { - irias_delete_object(self->obj); - self->obj = NULL; - } - -#if 0 - /* Remove discovery handler. - * While we are connected, we no longer need to receive - * discovery events. This would be the case if there is - * multiple IrLAP interfaces. Jean II */ - if (self->ckey) { - irlmp_unregister_client(self->ckey); - self->ckey = NULL; - } -#endif -} - -/* - * Function ircomm_send_initial_parameters (self) - * - * Send initial parameters to the remote IrCOMM device. These parameters - * must be sent before any data. - */ -int ircomm_tty_send_initial_parameters(struct ircomm_tty_cb *self) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (self->service_type & IRCOMM_3_WIRE_RAW) - return 0; - - /* - * Set default values, but only if the application for some reason - * haven't set them already - */ - pr_debug("%s(), data-rate = %d\n", __func__ , - self->settings.data_rate); - if (!self->settings.data_rate) - self->settings.data_rate = 9600; - pr_debug("%s(), data-format = %d\n", __func__ , - self->settings.data_format); - if (!self->settings.data_format) - self->settings.data_format = IRCOMM_WSIZE_8; /* 8N1 */ - - pr_debug("%s(), flow-control = %d\n", __func__ , - self->settings.flow_control); - /*self->settings.flow_control = IRCOMM_RTS_CTS_IN|IRCOMM_RTS_CTS_OUT;*/ - - /* Do not set delta values for the initial parameters */ - self->settings.dte = IRCOMM_DTR | IRCOMM_RTS; - - /* Only send service type parameter when we are the client */ - if (self->client) - ircomm_param_request(self, IRCOMM_SERVICE_TYPE, FALSE); - ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE); - ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE); - - /* For a 3 wire service, we just flush the last parameter and return */ - if (self->settings.service_type == IRCOMM_3_WIRE) { - ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE); - return 0; - } - - /* Only 9-wire service types continue here */ - ircomm_param_request(self, IRCOMM_FLOW_CONTROL, FALSE); -#if 0 - ircomm_param_request(self, IRCOMM_XON_XOFF, FALSE); - ircomm_param_request(self, IRCOMM_ENQ_ACK, FALSE); -#endif - /* Notify peer that we are ready to receive data */ - ircomm_param_request(self, IRCOMM_DTE, TRUE); - - return 0; -} - -/* - * Function ircomm_tty_discovery_indication (discovery) - * - * Remote device is discovered, try query the remote IAS to see which - * device it is, and which services it has. - * - */ -static void ircomm_tty_discovery_indication(discinfo_t *discovery, - DISCOVERY_MODE mode, - void *priv) -{ - struct ircomm_tty_cb *self; - struct ircomm_tty_info info; - - /* Important note : - * We need to drop all passive discoveries. - * The LSAP management of IrComm is deficient and doesn't deal - * with the case of two instance connecting to each other - * simultaneously (it will deadlock in LMP). - * The proper fix would be to use the same technique as in IrNET, - * to have one server socket and separate instances for the - * connecting/connected socket. - * The workaround is to drop passive discovery, which drastically - * reduce the probability of this happening. - * Jean II */ - if(mode == DISCOVERY_PASSIVE) - return; - - info.daddr = discovery->daddr; - info.saddr = discovery->saddr; - - self = priv; - ircomm_tty_do_event(self, IRCOMM_TTY_DISCOVERY_INDICATION, - NULL, &info); -} - -/* - * Function ircomm_tty_disconnect_indication (instance, sap, reason, skb) - * - * Link disconnected - * - */ -void ircomm_tty_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - struct tty_struct *tty; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - tty = tty_port_tty_get(&self->port); - if (!tty) - return; - - /* This will stop control data transfers */ - self->flow = FLOW_STOP; - - /* Stop data transfers */ - tty->hw_stopped = 1; - - ircomm_tty_do_event(self, IRCOMM_TTY_DISCONNECT_INDICATION, NULL, - NULL); - tty_kref_put(tty); -} - -/* - * Function ircomm_tty_getvalue_confirm (result, obj_id, value, priv) - * - * Got result from the IAS query we make - * - */ -static void ircomm_tty_getvalue_confirm(int result, __u16 obj_id, - struct ias_value *value, - void *priv) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) priv; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - /* We probably don't need to make any more queries */ - iriap_close(self->iriap); - self->iriap = NULL; - - /* Check if request succeeded */ - if (result != IAS_SUCCESS) { - pr_debug("%s(), got NULL value!\n", __func__); - return; - } - - switch (value->type) { - case IAS_OCT_SEQ: - pr_debug("%s(), got octet sequence\n", __func__); - - irda_param_extract_all(self, value->t.oct_seq, value->len, - &ircomm_param_info); - - ircomm_tty_do_event(self, IRCOMM_TTY_GOT_PARAMETERS, NULL, - NULL); - break; - case IAS_INTEGER: - /* Got LSAP selector */ - pr_debug("%s(), got lsapsel = %d\n", __func__ , - value->t.integer); - - if (value->t.integer == -1) { - pr_debug("%s(), invalid value!\n", __func__); - } else - self->dlsap_sel = value->t.integer; - - ircomm_tty_do_event(self, IRCOMM_TTY_GOT_LSAPSEL, NULL, NULL); - break; - case IAS_MISSING: - pr_debug("%s(), got IAS_MISSING\n", __func__); - break; - default: - pr_debug("%s(), got unknown type!\n", __func__); - break; - } - irias_delete_value(value); -} - -/* - * Function ircomm_tty_connect_confirm (instance, sap, qos, max_sdu_size, skb) - * - * Connection confirmed - * - */ -void ircomm_tty_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_data_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - self->client = TRUE; - self->max_data_size = max_data_size; - self->max_header_size = max_header_size; - self->flow = FLOW_START; - - ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_CONFIRM, NULL, NULL); - - /* No need to kfree_skb - see ircomm_ttp_connect_confirm() */ -} - -/* - * Function ircomm_tty_connect_indication (instance, sap, qos, max_sdu_size, - * skb) - * - * we are discovered and being requested to connect by remote device ! - * - */ -void ircomm_tty_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_data_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) instance; - int clen; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - self->client = FALSE; - self->max_data_size = max_data_size; - self->max_header_size = max_header_size; - self->flow = FLOW_START; - - clen = skb->data[0]; - if (clen) - irda_param_extract_all(self, skb->data+1, - IRDA_MIN(skb->len, clen), - &ircomm_param_info); - - ircomm_tty_do_event(self, IRCOMM_TTY_CONNECT_INDICATION, NULL, NULL); - - /* No need to kfree_skb - see ircomm_ttp_connect_indication() */ -} - -/* - * Function ircomm_tty_link_established (self) - * - * Called when the IrCOMM link is established - * - */ -void ircomm_tty_link_established(struct ircomm_tty_cb *self) -{ - struct tty_struct *tty; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - tty = tty_port_tty_get(&self->port); - if (!tty) - return; - - del_timer(&self->watchdog_timer); - - /* - * IrCOMM link is now up, and if we are not using hardware - * flow-control, then declare the hardware as running. Otherwise we - * will have to wait for the peer device (DCE) to raise the CTS - * line. - */ - if (tty_port_cts_enabled(&self->port) && - ((self->settings.dce & IRCOMM_CTS) == 0)) { - pr_debug("%s(), waiting for CTS ...\n", __func__); - goto put; - } else { - pr_debug("%s(), starting hardware!\n", __func__); - - tty->hw_stopped = 0; - - /* Wake up processes blocked on open */ - wake_up_interruptible(&self->port.open_wait); - } - - schedule_work(&self->tqueue); -put: - tty_kref_put(tty); -} - -/* - * Function ircomm_tty_start_watchdog_timer (self, timeout) - * - * Start the watchdog timer. This timer is used to make sure that any - * connection attempt is successful, and if not, we will retry after - * the timeout - */ -static void ircomm_tty_start_watchdog_timer(struct ircomm_tty_cb *self, - int timeout) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - irda_start_timer(&self->watchdog_timer, timeout, (void *) self, - ircomm_tty_watchdog_timer_expired); -} - -/* - * Function ircomm_tty_watchdog_timer_expired (data) - * - * Called when the connect procedure have taken to much time. - * - */ -static void ircomm_tty_watchdog_timer_expired(void *data) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - ircomm_tty_do_event(self, IRCOMM_TTY_WD_TIMER_EXPIRED, NULL, NULL); -} - - -/* - * Function ircomm_tty_do_event (self, event, skb) - * - * Process event - * - */ -int ircomm_tty_do_event(struct ircomm_tty_cb *self, IRCOMM_TTY_EVENT event, - struct sk_buff *skb, struct ircomm_tty_info *info) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_tty_state[self->state], ircomm_tty_event[event]); - - return (*state[self->state])(self, event, skb, info); -} - -/* - * Function ircomm_tty_next_state (self, state) - * - * Switch state - * - */ -static inline void ircomm_tty_next_state(struct ircomm_tty_cb *self, IRCOMM_TTY_STATE state) -{ - /* - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return;); - - pr_debug("%s: next state=%s, service type=%d\n", __func__ , - ircomm_tty_state[self->state], self->service_type); - */ - self->state = state; -} - -/* - * Function ircomm_tty_state_idle (self, event, skb, info) - * - * Just hanging around - * - */ -static int ircomm_tty_state_idle(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info) -{ - int ret = 0; - - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_tty_state[self->state], ircomm_tty_event[event]); - switch (event) { - case IRCOMM_TTY_ATTACH_CABLE: - /* Try to discover any remote devices */ - ircomm_tty_start_watchdog_timer(self, 3*HZ); - ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); - - irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS); - break; - case IRCOMM_TTY_DISCOVERY_INDICATION: - self->daddr = info->daddr; - self->saddr = info->saddr; - - if (self->iriap) { - net_warn_ratelimited("%s(), busy with a previous query\n", - __func__); - return -EBUSY; - } - - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - ircomm_tty_getvalue_confirm); - - iriap_getvaluebyclass_request(self->iriap, - self->saddr, self->daddr, - "IrDA:IrCOMM", "Parameters"); - - ircomm_tty_start_watchdog_timer(self, 3*HZ); - ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS); - break; - case IRCOMM_TTY_CONNECT_INDICATION: - del_timer(&self->watchdog_timer); - - /* Accept connection */ - ircomm_connect_response(self->ircomm, NULL); - ircomm_tty_next_state(self, IRCOMM_TTY_READY); - break; - case IRCOMM_TTY_WD_TIMER_EXPIRED: - /* Just stay idle */ - break; - case IRCOMM_TTY_DETACH_CABLE: - ircomm_tty_next_state(self, IRCOMM_TTY_IDLE); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_tty_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_tty_state_search (self, event, skb, info) - * - * Trying to discover an IrCOMM device - * - */ -static int ircomm_tty_state_search(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info) -{ - int ret = 0; - - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_tty_state[self->state], ircomm_tty_event[event]); - - switch (event) { - case IRCOMM_TTY_DISCOVERY_INDICATION: - self->daddr = info->daddr; - self->saddr = info->saddr; - - if (self->iriap) { - net_warn_ratelimited("%s(), busy with a previous query\n", - __func__); - return -EBUSY; - } - - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - ircomm_tty_getvalue_confirm); - - if (self->service_type == IRCOMM_3_WIRE_RAW) { - iriap_getvaluebyclass_request(self->iriap, self->saddr, - self->daddr, "IrLPT", - "IrDA:IrLMP:LsapSel"); - ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL); - } else { - iriap_getvaluebyclass_request(self->iriap, self->saddr, - self->daddr, - "IrDA:IrCOMM", - "Parameters"); - - ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_PARAMETERS); - } - ircomm_tty_start_watchdog_timer(self, 3*HZ); - break; - case IRCOMM_TTY_CONNECT_INDICATION: - del_timer(&self->watchdog_timer); - ircomm_tty_ias_unregister(self); - - /* Accept connection */ - ircomm_connect_response(self->ircomm, NULL); - ircomm_tty_next_state(self, IRCOMM_TTY_READY); - break; - case IRCOMM_TTY_WD_TIMER_EXPIRED: -#if 1 - /* Give up */ -#else - /* Try to discover any remote devices */ - ircomm_tty_start_watchdog_timer(self, 3*HZ); - irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS); -#endif - break; - case IRCOMM_TTY_DETACH_CABLE: - ircomm_tty_next_state(self, IRCOMM_TTY_IDLE); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_tty_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_tty_state_query (self, event, skb, info) - * - * Querying the remote LM-IAS for IrCOMM parameters - * - */ -static int ircomm_tty_state_query_parameters(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info) -{ - int ret = 0; - - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_tty_state[self->state], ircomm_tty_event[event]); - - switch (event) { - case IRCOMM_TTY_GOT_PARAMETERS: - if (self->iriap) { - net_warn_ratelimited("%s(), busy with a previous query\n", - __func__); - return -EBUSY; - } - - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - ircomm_tty_getvalue_confirm); - - iriap_getvaluebyclass_request(self->iriap, self->saddr, - self->daddr, "IrDA:IrCOMM", - "IrDA:TinyTP:LsapSel"); - - ircomm_tty_start_watchdog_timer(self, 3*HZ); - ircomm_tty_next_state(self, IRCOMM_TTY_QUERY_LSAP_SEL); - break; - case IRCOMM_TTY_WD_TIMER_EXPIRED: - /* Go back to search mode */ - ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); - ircomm_tty_start_watchdog_timer(self, 3*HZ); - break; - case IRCOMM_TTY_CONNECT_INDICATION: - del_timer(&self->watchdog_timer); - ircomm_tty_ias_unregister(self); - - /* Accept connection */ - ircomm_connect_response(self->ircomm, NULL); - ircomm_tty_next_state(self, IRCOMM_TTY_READY); - break; - case IRCOMM_TTY_DETACH_CABLE: - ircomm_tty_next_state(self, IRCOMM_TTY_IDLE); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_tty_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_tty_state_query_lsap_sel (self, event, skb, info) - * - * Query remote LM-IAS for the LSAP selector which we can connect to - * - */ -static int ircomm_tty_state_query_lsap_sel(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info) -{ - int ret = 0; - - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_tty_state[self->state], ircomm_tty_event[event]); - - switch (event) { - case IRCOMM_TTY_GOT_LSAPSEL: - /* Connect to remote device */ - ret = ircomm_connect_request(self->ircomm, self->dlsap_sel, - self->saddr, self->daddr, - NULL, self->service_type); - ircomm_tty_start_watchdog_timer(self, 3*HZ); - ircomm_tty_next_state(self, IRCOMM_TTY_SETUP); - break; - case IRCOMM_TTY_WD_TIMER_EXPIRED: - /* Go back to search mode */ - ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); - ircomm_tty_start_watchdog_timer(self, 3*HZ); - break; - case IRCOMM_TTY_CONNECT_INDICATION: - del_timer(&self->watchdog_timer); - ircomm_tty_ias_unregister(self); - - /* Accept connection */ - ircomm_connect_response(self->ircomm, NULL); - ircomm_tty_next_state(self, IRCOMM_TTY_READY); - break; - case IRCOMM_TTY_DETACH_CABLE: - ircomm_tty_next_state(self, IRCOMM_TTY_IDLE); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_tty_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_tty_state_setup (self, event, skb, info) - * - * Trying to connect - * - */ -static int ircomm_tty_state_setup(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info) -{ - int ret = 0; - - pr_debug("%s: state=%s, event=%s\n", __func__ , - ircomm_tty_state[self->state], ircomm_tty_event[event]); - - switch (event) { - case IRCOMM_TTY_CONNECT_CONFIRM: - del_timer(&self->watchdog_timer); - ircomm_tty_ias_unregister(self); - - /* - * Send initial parameters. This will also send out queued - * parameters waiting for the connection to come up - */ - ircomm_tty_send_initial_parameters(self); - ircomm_tty_link_established(self); - ircomm_tty_next_state(self, IRCOMM_TTY_READY); - break; - case IRCOMM_TTY_CONNECT_INDICATION: - del_timer(&self->watchdog_timer); - ircomm_tty_ias_unregister(self); - - /* Accept connection */ - ircomm_connect_response(self->ircomm, NULL); - ircomm_tty_next_state(self, IRCOMM_TTY_READY); - break; - case IRCOMM_TTY_WD_TIMER_EXPIRED: - /* Go back to search mode */ - ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); - ircomm_tty_start_watchdog_timer(self, 3*HZ); - break; - case IRCOMM_TTY_DETACH_CABLE: - /* ircomm_disconnect_request(self->ircomm, NULL); */ - ircomm_tty_next_state(self, IRCOMM_TTY_IDLE); - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_tty_event[event]); - ret = -EINVAL; - } - return ret; -} - -/* - * Function ircomm_tty_state_ready (self, event, skb, info) - * - * IrCOMM is now connected - * - */ -static int ircomm_tty_state_ready(struct ircomm_tty_cb *self, - IRCOMM_TTY_EVENT event, - struct sk_buff *skb, - struct ircomm_tty_info *info) -{ - int ret = 0; - - switch (event) { - case IRCOMM_TTY_DATA_REQUEST: - ret = ircomm_data_request(self->ircomm, skb); - break; - case IRCOMM_TTY_DETACH_CABLE: - ircomm_disconnect_request(self->ircomm, NULL); - ircomm_tty_next_state(self, IRCOMM_TTY_IDLE); - break; - case IRCOMM_TTY_DISCONNECT_INDICATION: - ircomm_tty_ias_register(self); - ircomm_tty_next_state(self, IRCOMM_TTY_SEARCH); - ircomm_tty_start_watchdog_timer(self, 3*HZ); - - if (tty_port_check_carrier(&self->port)) { - /* Drop carrier */ - self->settings.dce = IRCOMM_DELTA_CD; - ircomm_tty_check_modem_status(self); - } else { - pr_debug("%s(), hanging up!\n", __func__); - tty_port_tty_hangup(&self->port, false); - } - break; - default: - pr_debug("%s(), unknown event: %s\n", __func__ , - ircomm_tty_event[event]); - ret = -EINVAL; - } - return ret; -} - diff --git a/net/irda/ircomm/ircomm_tty_ioctl.c b/net/irda/ircomm/ircomm_tty_ioctl.c deleted file mode 100644 index 171c3dee760e..000000000000 --- a/net/irda/ircomm/ircomm_tty_ioctl.c +++ /dev/null @@ -1,291 +0,0 @@ -/********************************************************************* - * - * Filename: ircomm_tty_ioctl.c - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Thu Jun 10 14:39:09 1999 - * Modified at: Wed Jan 5 14:45:43 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/termios.h> -#include <linux/tty.h> -#include <linux/serial.h> - -#include <linux/uaccess.h> - -#include <net/irda/irda.h> -#include <net/irda/irmod.h> - -#include <net/irda/ircomm_core.h> -#include <net/irda/ircomm_param.h> -#include <net/irda/ircomm_tty_attach.h> -#include <net/irda/ircomm_tty.h> - -#define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) - -/* - * Function ircomm_tty_change_speed (driver) - * - * Change speed of the driver. If the remote device is a DCE, then this - * should make it change the speed of its serial port - */ -static void ircomm_tty_change_speed(struct ircomm_tty_cb *self, - struct tty_struct *tty) -{ - unsigned int cflag, cval; - int baud; - - if (!self->ircomm) - return; - - cflag = tty->termios.c_cflag; - - /* byte size and parity */ - switch (cflag & CSIZE) { - case CS5: cval = IRCOMM_WSIZE_5; break; - case CS6: cval = IRCOMM_WSIZE_6; break; - case CS7: cval = IRCOMM_WSIZE_7; break; - case CS8: cval = IRCOMM_WSIZE_8; break; - default: cval = IRCOMM_WSIZE_5; break; - } - if (cflag & CSTOPB) - cval |= IRCOMM_2_STOP_BIT; - - if (cflag & PARENB) - cval |= IRCOMM_PARITY_ENABLE; - if (!(cflag & PARODD)) - cval |= IRCOMM_PARITY_EVEN; - - /* Determine divisor based on baud rate */ - baud = tty_get_baud_rate(tty); - if (!baud) - baud = 9600; /* B0 transition handled in rs_set_termios */ - - self->settings.data_rate = baud; - ircomm_param_request(self, IRCOMM_DATA_RATE, FALSE); - - /* CTS flow control flag and modem status interrupts */ - tty_port_set_cts_flow(&self->port, cflag & CRTSCTS); - if (cflag & CRTSCTS) { - self->settings.flow_control |= IRCOMM_RTS_CTS_IN; - /* This got me. Bummer. Jean II */ - if (self->service_type == IRCOMM_3_WIRE_RAW) - net_warn_ratelimited("%s(), enabling RTS/CTS on link that doesn't support it (3-wire-raw)\n", - __func__); - } else { - self->settings.flow_control &= ~IRCOMM_RTS_CTS_IN; - } - tty_port_set_check_carrier(&self->port, ~cflag & CLOCAL); - - self->settings.data_format = cval; - - ircomm_param_request(self, IRCOMM_DATA_FORMAT, FALSE); - ircomm_param_request(self, IRCOMM_FLOW_CONTROL, TRUE); -} - -/* - * Function ircomm_tty_set_termios (tty, old_termios) - * - * This routine allows the tty driver to be notified when device's - * termios settings have changed. Note that a well-designed tty driver - * should be prepared to accept the case where old == NULL, and try to - * do something rational. - */ -void ircomm_tty_set_termios(struct tty_struct *tty, - struct ktermios *old_termios) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned int cflag = tty->termios.c_cflag; - - if ((cflag == old_termios->c_cflag) && - (RELEVANT_IFLAG(tty->termios.c_iflag) == - RELEVANT_IFLAG(old_termios->c_iflag))) - { - return; - } - - ircomm_tty_change_speed(self, tty); - - /* Handle transition to B0 status */ - if ((old_termios->c_cflag & CBAUD) && !(cflag & CBAUD)) { - self->settings.dte &= ~(IRCOMM_DTR|IRCOMM_RTS); - ircomm_param_request(self, IRCOMM_DTE, TRUE); - } - - /* Handle transition away from B0 status */ - if (!(old_termios->c_cflag & CBAUD) && (cflag & CBAUD)) { - self->settings.dte |= IRCOMM_DTR; - if (!C_CRTSCTS(tty) || !tty_throttled(tty)) - self->settings.dte |= IRCOMM_RTS; - ircomm_param_request(self, IRCOMM_DTE, TRUE); - } - - /* Handle turning off CRTSCTS */ - if ((old_termios->c_cflag & CRTSCTS) && !C_CRTSCTS(tty)) - { - tty->hw_stopped = 0; - ircomm_tty_start(tty); - } -} - -/* - * Function ircomm_tty_tiocmget (tty) - * - * - * - */ -int ircomm_tty_tiocmget(struct tty_struct *tty) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - unsigned int result; - - if (tty_io_error(tty)) - return -EIO; - - result = ((self->settings.dte & IRCOMM_RTS) ? TIOCM_RTS : 0) - | ((self->settings.dte & IRCOMM_DTR) ? TIOCM_DTR : 0) - | ((self->settings.dce & IRCOMM_CD) ? TIOCM_CAR : 0) - | ((self->settings.dce & IRCOMM_RI) ? TIOCM_RNG : 0) - | ((self->settings.dce & IRCOMM_DSR) ? TIOCM_DSR : 0) - | ((self->settings.dce & IRCOMM_CTS) ? TIOCM_CTS : 0); - return result; -} - -/* - * Function ircomm_tty_tiocmset (tty, set, clear) - * - * - * - */ -int ircomm_tty_tiocmset(struct tty_struct *tty, - unsigned int set, unsigned int clear) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - - if (tty_io_error(tty)) - return -EIO; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRCOMM_TTY_MAGIC, return -1;); - - if (set & TIOCM_RTS) - self->settings.dte |= IRCOMM_RTS; - if (set & TIOCM_DTR) - self->settings.dte |= IRCOMM_DTR; - - if (clear & TIOCM_RTS) - self->settings.dte &= ~IRCOMM_RTS; - if (clear & TIOCM_DTR) - self->settings.dte &= ~IRCOMM_DTR; - - if ((set|clear) & TIOCM_RTS) - self->settings.dte |= IRCOMM_DELTA_RTS; - if ((set|clear) & TIOCM_DTR) - self->settings.dte |= IRCOMM_DELTA_DTR; - - ircomm_param_request(self, IRCOMM_DTE, TRUE); - - return 0; -} - -/* - * Function get_serial_info (driver, retinfo) - * - * - * - */ -static int ircomm_tty_get_serial_info(struct ircomm_tty_cb *self, - struct serial_struct __user *retinfo) -{ - struct serial_struct info; - - memset(&info, 0, sizeof(info)); - info.line = self->line; - info.flags = self->port.flags; - info.baud_base = self->settings.data_rate; - info.close_delay = self->port.close_delay; - info.closing_wait = self->port.closing_wait; - - /* For compatibility */ - info.type = PORT_16550A; - - if (copy_to_user(retinfo, &info, sizeof(*retinfo))) - return -EFAULT; - - return 0; -} - -/* - * Function set_serial_info (driver, new_info) - * - * - * - */ -static int ircomm_tty_set_serial_info(struct ircomm_tty_cb *self, - struct serial_struct __user *new_info) -{ - return 0; -} - -/* - * Function ircomm_tty_ioctl (tty, cmd, arg) - * - * - * - */ -int ircomm_tty_ioctl(struct tty_struct *tty, - unsigned int cmd, unsigned long arg) -{ - struct ircomm_tty_cb *self = (struct ircomm_tty_cb *) tty->driver_data; - int ret = 0; - - if ((cmd != TIOCGSERIAL) && (cmd != TIOCSSERIAL) && - (cmd != TIOCSERCONFIG) && (cmd != TIOCSERGSTRUCT) && - (cmd != TIOCMIWAIT) && (cmd != TIOCGICOUNT)) { - if (tty_io_error(tty)) - return -EIO; - } - - switch (cmd) { - case TIOCGSERIAL: - ret = ircomm_tty_get_serial_info(self, (struct serial_struct __user *) arg); - break; - case TIOCSSERIAL: - ret = ircomm_tty_set_serial_info(self, (struct serial_struct __user *) arg); - break; - case TIOCMIWAIT: - pr_debug("(), TIOCMIWAIT, not impl!\n"); - break; - - case TIOCGICOUNT: - pr_debug("%s(), TIOCGICOUNT not impl!\n", __func__); - return 0; - default: - ret = -ENOIOCTLCMD; /* ioctls which we must ignore */ - } - return ret; -} - - - diff --git a/net/irda/irda_device.c b/net/irda/irda_device.c deleted file mode 100644 index 890b90d055d5..000000000000 --- a/net/irda/irda_device.c +++ /dev/null @@ -1,316 +0,0 @@ -/********************************************************************* - * - * Filename: irda_device.c - * Version: 0.9 - * Description: Utility functions used by the device drivers - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sat Oct 9 09:22:27 1999 - * Modified at: Sun Jan 23 17:41:24 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/string.h> -#include <linux/proc_fs.h> -#include <linux/skbuff.h> -#include <linux/capability.h> -#include <linux/if.h> -#include <linux/if_ether.h> -#include <linux/if_arp.h> -#include <linux/netdevice.h> -#include <linux/init.h> -#include <linux/tty.h> -#include <linux/kmod.h> -#include <linux/spinlock.h> -#include <linux/slab.h> -#include <linux/export.h> - -#include <asm/ioctls.h> -#include <linux/uaccess.h> -#include <asm/dma.h> -#include <asm/io.h> - -#include <net/irda/irda_device.h> -#include <net/irda/irlap.h> -#include <net/irda/timer.h> -#include <net/irda/wrapper.h> - -static void __irda_task_delete(struct irda_task *task); - -static hashbin_t *dongles = NULL; -static hashbin_t *tasks = NULL; - -static void irda_task_timer_expired(void *data); - -int __init irda_device_init( void) -{ - dongles = hashbin_new(HB_NOLOCK); - if (dongles == NULL) { - net_warn_ratelimited("IrDA: Can't allocate dongles hashbin!\n"); - return -ENOMEM; - } - spin_lock_init(&dongles->hb_spinlock); - - tasks = hashbin_new(HB_LOCK); - if (tasks == NULL) { - net_warn_ratelimited("IrDA: Can't allocate tasks hashbin!\n"); - hashbin_delete(dongles, NULL); - return -ENOMEM; - } - - /* We no longer initialise the driver ourselves here, we let - * the system do it for us... - Jean II */ - - return 0; -} - -static void leftover_dongle(void *arg) -{ - struct dongle_reg *reg = arg; - net_warn_ratelimited("IrDA: Dongle type %x not unregistered\n", - reg->type); -} - -void irda_device_cleanup(void) -{ - hashbin_delete(tasks, (FREE_FUNC) __irda_task_delete); - - hashbin_delete(dongles, leftover_dongle); -} - -/* - * Function irda_device_set_media_busy (self, status) - * - * Called when we have detected that another station is transmitting - * in contention mode. - */ -void irda_device_set_media_busy(struct net_device *dev, int status) -{ - struct irlap_cb *self; - - pr_debug("%s(%s)\n", __func__, status ? "TRUE" : "FALSE"); - - self = (struct irlap_cb *) dev->atalk_ptr; - - /* Some drivers may enable the receive interrupt before calling - * irlap_open(), or they may disable the receive interrupt - * after calling irlap_close(). - * The IrDA stack is protected from this in irlap_driver_rcv(). - * However, the driver calls directly the wrapper, that calls - * us directly. Make sure we protect ourselves. - * Jean II */ - if (!self || self->magic != LAP_MAGIC) - return; - - if (status) { - self->media_busy = TRUE; - if (status == SMALL) - irlap_start_mbusy_timer(self, SMALLBUSY_TIMEOUT); - else - irlap_start_mbusy_timer(self, MEDIABUSY_TIMEOUT); - pr_debug("Media busy!\n"); - } else { - self->media_busy = FALSE; - irlap_stop_mbusy_timer(self); - } -} -EXPORT_SYMBOL(irda_device_set_media_busy); - - -/* - * Function irda_device_is_receiving (dev) - * - * Check if the device driver is currently receiving data - * - */ -int irda_device_is_receiving(struct net_device *dev) -{ - struct if_irda_req req; - int ret; - - if (!dev->netdev_ops->ndo_do_ioctl) { - net_err_ratelimited("%s: do_ioctl not impl. by device driver\n", - __func__); - return -1; - } - - ret = (dev->netdev_ops->ndo_do_ioctl)(dev, (struct ifreq *) &req, - SIOCGRECEIVING); - if (ret < 0) - return ret; - - return req.ifr_receiving; -} - -static void __irda_task_delete(struct irda_task *task) -{ - del_timer(&task->timer); - - kfree(task); -} - -static void irda_task_delete(struct irda_task *task) -{ - /* Unregister task */ - hashbin_remove(tasks, (long) task, NULL); - - __irda_task_delete(task); -} - -/* - * Function irda_task_kick (task) - * - * Tries to execute a task possible multiple times until the task is either - * finished, or askes for a timeout. When a task is finished, we do post - * processing, and notify the parent task, that is waiting for this task - * to complete. - */ -static int irda_task_kick(struct irda_task *task) -{ - int finished = TRUE; - int count = 0; - int timeout; - - IRDA_ASSERT(task != NULL, return -1;); - IRDA_ASSERT(task->magic == IRDA_TASK_MAGIC, return -1;); - - /* Execute task until it's finished, or askes for a timeout */ - do { - timeout = task->function(task); - if (count++ > 100) { - net_err_ratelimited("%s: error in task handler!\n", - __func__); - irda_task_delete(task); - return TRUE; - } - } while ((timeout == 0) && (task->state != IRDA_TASK_DONE)); - - if (timeout < 0) { - net_err_ratelimited("%s: Error executing task!\n", __func__); - irda_task_delete(task); - return TRUE; - } - - /* Check if we are finished */ - if (task->state == IRDA_TASK_DONE) { - del_timer(&task->timer); - - /* Do post processing */ - if (task->finished) - task->finished(task); - - /* Notify parent */ - if (task->parent) { - /* Check if parent is waiting for us to complete */ - if (task->parent->state == IRDA_TASK_CHILD_WAIT) { - task->parent->state = IRDA_TASK_CHILD_DONE; - - /* Stop timer now that we are here */ - del_timer(&task->parent->timer); - - /* Kick parent task */ - irda_task_kick(task->parent); - } - } - irda_task_delete(task); - } else if (timeout > 0) { - irda_start_timer(&task->timer, timeout, (void *) task, - irda_task_timer_expired); - finished = FALSE; - } else { - pr_debug("%s(), not finished, and no timeout!\n", - __func__); - finished = FALSE; - } - - return finished; -} - -/* - * Function irda_task_timer_expired (data) - * - * Task time has expired. We now try to execute task (again), and restart - * the timer if the task has not finished yet - */ -static void irda_task_timer_expired(void *data) -{ - struct irda_task *task; - - task = data; - - irda_task_kick(task); -} - -/* - * Function irda_device_setup (dev) - * - * This function should be used by low level device drivers in a similar way - * as ether_setup() is used by normal network device drivers - */ -static void irda_device_setup(struct net_device *dev) -{ - dev->hard_header_len = 0; - dev->addr_len = LAP_ALEN; - - dev->type = ARPHRD_IRDA; - dev->tx_queue_len = 8; /* Window size + 1 s-frame */ - - memset(dev->broadcast, 0xff, LAP_ALEN); - - dev->mtu = 2048; - dev->flags = IFF_NOARP; -} - -/* - * Funciton alloc_irdadev - * Allocates and sets up an IRDA device in a manner similar to - * alloc_etherdev. - */ -struct net_device *alloc_irdadev(int sizeof_priv) -{ - return alloc_netdev(sizeof_priv, "irda%d", NET_NAME_UNKNOWN, - irda_device_setup); -} -EXPORT_SYMBOL(alloc_irdadev); - -#ifdef CONFIG_ISA_DMA_API -/* - * Function setup_dma (idev, buffer, count, mode) - * - * Setup the DMA channel. Commonly used by LPC FIR drivers - * - */ -void irda_setup_dma(int channel, dma_addr_t buffer, int count, int mode) -{ - unsigned long flags; - - flags = claim_dma_lock(); - - disable_dma(channel); - clear_dma_ff(channel); - set_dma_mode(channel, mode); - set_dma_addr(channel, buffer); - set_dma_count(channel, count); - enable_dma(channel); - - release_dma_lock(flags); -} -EXPORT_SYMBOL(irda_setup_dma); -#endif diff --git a/net/irda/iriap.c b/net/irda/iriap.c deleted file mode 100644 index 1138eaf5c682..000000000000 --- a/net/irda/iriap.c +++ /dev/null @@ -1,1085 +0,0 @@ -/********************************************************************* - * - * Filename: iriap.c - * Version: 0.8 - * Description: Information Access Protocol (IAP) - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Thu Aug 21 00:02:07 1997 - * Modified at: Sat Dec 25 16:42:42 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/module.h> -#include <linux/types.h> -#include <linux/skbuff.h> -#include <linux/fs.h> -#include <linux/string.h> -#include <linux/init.h> -#include <linux/seq_file.h> -#include <linux/slab.h> - -#include <asm/byteorder.h> -#include <asm/unaligned.h> - -#include <net/irda/irda.h> -#include <net/irda/irttp.h> -#include <net/irda/irlmp.h> -#include <net/irda/irias_object.h> -#include <net/irda/iriap_event.h> -#include <net/irda/iriap.h> - -/* FIXME: This one should go in irlmp.c */ -static const char *const ias_charset_types[] __maybe_unused = { - "CS_ASCII", - "CS_ISO_8859_1", - "CS_ISO_8859_2", - "CS_ISO_8859_3", - "CS_ISO_8859_4", - "CS_ISO_8859_5", - "CS_ISO_8859_6", - "CS_ISO_8859_7", - "CS_ISO_8859_8", - "CS_ISO_8859_9", - "CS_UNICODE" -}; - -static hashbin_t *iriap = NULL; -static void *service_handle; - -static void __iriap_close(struct iriap_cb *self); -static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode); -static void iriap_disconnect_indication(void *instance, void *sap, - LM_REASON reason, struct sk_buff *skb); -static void iriap_connect_indication(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); -static void iriap_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, __u8 max_header_size, - struct sk_buff *skb); -static int iriap_data_indication(void *instance, void *sap, - struct sk_buff *skb); - -static void iriap_watchdog_timer_expired(void *data); - -static inline void iriap_start_watchdog_timer(struct iriap_cb *self, - int timeout) -{ - irda_start_timer(&self->watchdog_timer, timeout, self, - iriap_watchdog_timer_expired); -} - -static struct lock_class_key irias_objects_key; - -/* - * Function iriap_init (void) - * - * Initializes the IrIAP layer, called by the module initialization code - * in irmod.c - */ -int __init iriap_init(void) -{ - struct ias_object *obj; - struct iriap_cb *server; - __u8 oct_seq[6]; - __u16 hints; - - /* Allocate master array */ - iriap = hashbin_new(HB_LOCK); - if (!iriap) - return -ENOMEM; - - /* Object repository - defined in irias_object.c */ - irias_objects = hashbin_new(HB_LOCK); - if (!irias_objects) { - net_warn_ratelimited("%s: Can't allocate irias_objects hashbin!\n", - __func__); - hashbin_delete(iriap, NULL); - return -ENOMEM; - } - - lockdep_set_class_and_name(&irias_objects->hb_spinlock, &irias_objects_key, - "irias_objects"); - - /* - * Register some default services for IrLMP - */ - hints = irlmp_service_to_hint(S_COMPUTER); - service_handle = irlmp_register_service(hints); - - /* Register the Device object with LM-IAS */ - obj = irias_new_object("Device", IAS_DEVICE_ID); - irias_add_string_attrib(obj, "DeviceName", "Linux", IAS_KERNEL_ATTR); - - oct_seq[0] = 0x01; /* Version 1 */ - oct_seq[1] = 0x00; /* IAS support bits */ - oct_seq[2] = 0x00; /* LM-MUX support bits */ -#ifdef CONFIG_IRDA_ULTRA - oct_seq[2] |= 0x04; /* Connectionless Data support */ -#endif - irias_add_octseq_attrib(obj, "IrLMPSupport", oct_seq, 3, - IAS_KERNEL_ATTR); - irias_insert_object(obj); - - /* - * Register server support with IrLMP so we can accept incoming - * connections - */ - server = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL); - if (!server) { - pr_debug("%s(), unable to open server\n", __func__); - return -1; - } - iriap_register_lsap(server, LSAP_IAS, IAS_SERVER); - - return 0; -} - -/* - * Function iriap_cleanup (void) - * - * Initializes the IrIAP layer, called by the module cleanup code in - * irmod.c - */ -void iriap_cleanup(void) -{ - irlmp_unregister_service(service_handle); - - hashbin_delete(iriap, (FREE_FUNC) __iriap_close); - hashbin_delete(irias_objects, (FREE_FUNC) __irias_delete_object); -} - -/* - * Function iriap_open (void) - * - * Opens an instance of the IrIAP layer, and registers with IrLMP - */ -struct iriap_cb *iriap_open(__u8 slsap_sel, int mode, void *priv, - CONFIRM_CALLBACK callback) -{ - struct iriap_cb *self; - - self = kzalloc(sizeof(*self), GFP_ATOMIC); - if (!self) - return NULL; - - /* - * Initialize instance - */ - - self->magic = IAS_MAGIC; - self->mode = mode; - if (mode == IAS_CLIENT) { - if (iriap_register_lsap(self, slsap_sel, mode)) { - kfree(self); - return NULL; - } - } - - self->confirm = callback; - self->priv = priv; - - /* iriap_getvaluebyclass_request() will construct packets before - * we connect, so this must have a sane value... Jean II */ - self->max_header_size = LMP_MAX_HEADER; - - init_timer(&self->watchdog_timer); - - hashbin_insert(iriap, (irda_queue_t *) self, (long) self, NULL); - - /* Initialize state machines */ - iriap_next_client_state(self, S_DISCONNECT); - iriap_next_call_state(self, S_MAKE_CALL); - iriap_next_server_state(self, R_DISCONNECT); - iriap_next_r_connect_state(self, R_WAITING); - - return self; -} -EXPORT_SYMBOL(iriap_open); - -/* - * Function __iriap_close (self) - * - * Removes (deallocates) the IrIAP instance - * - */ -static void __iriap_close(struct iriap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - del_timer(&self->watchdog_timer); - - if (self->request_skb) - dev_kfree_skb(self->request_skb); - - self->magic = 0; - - kfree(self); -} - -/* - * Function iriap_close (void) - * - * Closes IrIAP and deregisters with IrLMP - */ -void iriap_close(struct iriap_cb *self) -{ - struct iriap_cb *entry; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - if (self->lsap) { - irlmp_close_lsap(self->lsap); - self->lsap = NULL; - } - - entry = (struct iriap_cb *) hashbin_remove(iriap, (long) self, NULL); - IRDA_ASSERT(entry == self, return;); - - __iriap_close(self); -} -EXPORT_SYMBOL(iriap_close); - -static int iriap_register_lsap(struct iriap_cb *self, __u8 slsap_sel, int mode) -{ - notify_t notify; - - irda_notify_init(¬ify); - notify.connect_confirm = iriap_connect_confirm; - notify.connect_indication = iriap_connect_indication; - notify.disconnect_indication = iriap_disconnect_indication; - notify.data_indication = iriap_data_indication; - notify.instance = self; - if (mode == IAS_CLIENT) - strcpy(notify.name, "IrIAS cli"); - else - strcpy(notify.name, "IrIAS srv"); - - self->lsap = irlmp_open_lsap(slsap_sel, ¬ify, 0); - if (self->lsap == NULL) { - net_err_ratelimited("%s: Unable to allocated LSAP!\n", - __func__); - return -1; - } - self->slsap_sel = self->lsap->slsap_sel; - - return 0; -} - -/* - * Function iriap_disconnect_indication (handle, reason) - * - * Got disconnect, so clean up everything associated with this connection - * - */ -static void iriap_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *skb) -{ - struct iriap_cb *self; - - pr_debug("%s(), reason=%s [%d]\n", __func__, - irlmp_reason_str(reason), reason); - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - IRDA_ASSERT(iriap != NULL, return;); - - del_timer(&self->watchdog_timer); - - /* Not needed */ - if (skb) - dev_kfree_skb(skb); - - if (self->mode == IAS_CLIENT) { - pr_debug("%s(), disconnect as client\n", __func__); - - - iriap_do_client_event(self, IAP_LM_DISCONNECT_INDICATION, - NULL); - /* - * Inform service user that the request failed by sending - * it a NULL value. Warning, the client might close us, so - * remember no to use self anymore after calling confirm - */ - if (self->confirm) - self->confirm(IAS_DISCONNECT, 0, NULL, self->priv); - } else { - pr_debug("%s(), disconnect as server\n", __func__); - iriap_do_server_event(self, IAP_LM_DISCONNECT_INDICATION, - NULL); - iriap_close(self); - } -} - -/* - * Function iriap_disconnect_request (handle) - */ -static void iriap_disconnect_request(struct iriap_cb *self) -{ - struct sk_buff *tx_skb; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); - if (tx_skb == NULL) { - pr_debug("%s(), Could not allocate an sk_buff of length %d\n", - __func__, LMP_MAX_HEADER); - return; - } - - /* - * Reserve space for MUX control and LAP header - */ - skb_reserve(tx_skb, LMP_MAX_HEADER); - - irlmp_disconnect_request(self->lsap, tx_skb); -} - -/* - * Function iriap_getvaluebyclass (addr, name, attr) - * - * Retrieve all values from attribute in all objects with given class - * name - */ -int iriap_getvaluebyclass_request(struct iriap_cb *self, - __u32 saddr, __u32 daddr, - char *name, char *attr) -{ - struct sk_buff *tx_skb; - int name_len, attr_len, skb_len; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return -1;); - - /* Client must supply the destination device address */ - if (!daddr) - return -1; - - self->daddr = daddr; - self->saddr = saddr; - - /* - * Save operation, so we know what the later indication is about - */ - self->operation = GET_VALUE_BY_CLASS; - - /* Give ourselves 10 secs to finish this operation */ - iriap_start_watchdog_timer(self, 10*HZ); - - name_len = strlen(name); /* Up to IAS_MAX_CLASSNAME = 60 */ - attr_len = strlen(attr); /* Up to IAS_MAX_ATTRIBNAME = 60 */ - - skb_len = self->max_header_size+2+name_len+1+attr_len+4; - tx_skb = alloc_skb(skb_len, GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - /* Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, self->max_header_size); - skb_put(tx_skb, 3+name_len+attr_len); - frame = tx_skb->data; - - /* Build frame */ - frame[0] = IAP_LST | GET_VALUE_BY_CLASS; - frame[1] = name_len; /* Insert length of name */ - memcpy(frame+2, name, name_len); /* Insert name */ - frame[2+name_len] = attr_len; /* Insert length of attr */ - memcpy(frame+3+name_len, attr, attr_len); /* Insert attr */ - - iriap_do_client_event(self, IAP_CALL_REQUEST_GVBC, tx_skb); - - /* Drop reference count - see state_s_disconnect(). */ - dev_kfree_skb(tx_skb); - - return 0; -} -EXPORT_SYMBOL(iriap_getvaluebyclass_request); - -/* - * Function iriap_getvaluebyclass_confirm (self, skb) - * - * Got result from GetValueByClass command. Parse it and return result - * to service user. - * - */ -static void iriap_getvaluebyclass_confirm(struct iriap_cb *self, - struct sk_buff *skb) -{ - struct ias_value *value; - int charset; - __u32 value_len; - __u32 tmp_cpu32; - __u16 obj_id; - __u16 len; - __u8 type; - __u8 *fp; - int n; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* Initialize variables */ - fp = skb->data; - n = 2; - - /* Get length, MSB first */ - len = get_unaligned_be16(fp + n); - n += 2; - - pr_debug("%s(), len=%d\n", __func__, len); - - /* Get object ID, MSB first */ - obj_id = get_unaligned_be16(fp + n); - n += 2; - - type = fp[n++]; - pr_debug("%s(), Value type = %d\n", __func__, type); - - switch (type) { - case IAS_INTEGER: - memcpy(&tmp_cpu32, fp+n, 4); n += 4; - be32_to_cpus(&tmp_cpu32); - value = irias_new_integer_value(tmp_cpu32); - - /* Legal values restricted to 0x01-0x6f, page 15 irttp */ - pr_debug("%s(), lsap=%d\n", __func__, value->t.integer); - break; - case IAS_STRING: - charset = fp[n++]; - - switch (charset) { - case CS_ASCII: - break; -/* case CS_ISO_8859_1: */ -/* case CS_ISO_8859_2: */ -/* case CS_ISO_8859_3: */ -/* case CS_ISO_8859_4: */ -/* case CS_ISO_8859_5: */ -/* case CS_ISO_8859_6: */ -/* case CS_ISO_8859_7: */ -/* case CS_ISO_8859_8: */ -/* case CS_ISO_8859_9: */ -/* case CS_UNICODE: */ - default: - pr_debug("%s(), charset [%d] %s, not supported\n", - __func__, charset, - charset < ARRAY_SIZE(ias_charset_types) ? - ias_charset_types[charset] : - "(unknown)"); - - /* Aborting, close connection! */ - iriap_disconnect_request(self); - return; - /* break; */ - } - value_len = fp[n++]; - pr_debug("%s(), strlen=%d\n", __func__, value_len); - - /* Make sure the string is null-terminated */ - if (n + value_len < skb->len) - fp[n + value_len] = 0x00; - pr_debug("Got string %s\n", fp+n); - - /* Will truncate to IAS_MAX_STRING bytes */ - value = irias_new_string_value(fp+n); - break; - case IAS_OCT_SEQ: - value_len = get_unaligned_be16(fp + n); - n += 2; - - /* Will truncate to IAS_MAX_OCTET_STRING bytes */ - value = irias_new_octseq_value(fp+n, value_len); - break; - default: - value = irias_new_missing_value(); - break; - } - - /* Finished, close connection! */ - iriap_disconnect_request(self); - - /* Warning, the client might close us, so remember no to use self - * anymore after calling confirm - */ - if (self->confirm) - self->confirm(IAS_SUCCESS, obj_id, value, self->priv); - else { - pr_debug("%s(), missing handler!\n", __func__); - irias_delete_value(value); - } -} - -/* - * Function iriap_getvaluebyclass_response () - * - * Send answer back to remote LM-IAS - * - */ -static void iriap_getvaluebyclass_response(struct iriap_cb *self, - __u16 obj_id, - __u8 ret_code, - struct ias_value *value) -{ - struct sk_buff *tx_skb; - int n; - __be32 tmp_be32; - __be16 tmp_be16; - __u8 *fp; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - IRDA_ASSERT(value != NULL, return;); - IRDA_ASSERT(value->len <= 1024, return;); - - /* Initialize variables */ - n = 0; - - /* - * We must adjust the size of the response after the length of the - * value. We add 32 bytes because of the 6 bytes for the frame and - * max 5 bytes for the value coding. - */ - tx_skb = alloc_skb(value->len + self->max_header_size + 32, - GFP_ATOMIC); - if (!tx_skb) - return; - - /* Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, self->max_header_size); - skb_put(tx_skb, 6); - - fp = tx_skb->data; - - /* Build frame */ - fp[n++] = GET_VALUE_BY_CLASS | IAP_LST; - fp[n++] = ret_code; - - /* Insert list length (MSB first) */ - tmp_be16 = htons(0x0001); - memcpy(fp+n, &tmp_be16, 2); n += 2; - - /* Insert object identifier ( MSB first) */ - tmp_be16 = cpu_to_be16(obj_id); - memcpy(fp+n, &tmp_be16, 2); n += 2; - - switch (value->type) { - case IAS_STRING: - skb_put(tx_skb, 3 + value->len); - fp[n++] = value->type; - fp[n++] = 0; /* ASCII */ - fp[n++] = (__u8) value->len; - memcpy(fp+n, value->t.string, value->len); n+=value->len; - break; - case IAS_INTEGER: - skb_put(tx_skb, 5); - fp[n++] = value->type; - - tmp_be32 = cpu_to_be32(value->t.integer); - memcpy(fp+n, &tmp_be32, 4); n += 4; - break; - case IAS_OCT_SEQ: - skb_put(tx_skb, 3 + value->len); - fp[n++] = value->type; - - tmp_be16 = cpu_to_be16(value->len); - memcpy(fp+n, &tmp_be16, 2); n += 2; - memcpy(fp+n, value->t.oct_seq, value->len); n+=value->len; - break; - case IAS_MISSING: - pr_debug("%s: sending IAS_MISSING\n", __func__); - skb_put(tx_skb, 1); - fp[n++] = value->type; - break; - default: - pr_debug("%s(), type not implemented!\n", __func__); - break; - } - iriap_do_r_connect_event(self, IAP_CALL_RESPONSE, tx_skb); - - /* Drop reference count - see state_r_execute(). */ - dev_kfree_skb(tx_skb); -} - -/* - * Function iriap_getvaluebyclass_indication (self, skb) - * - * getvaluebyclass is requested from peer LM-IAS - * - */ -static void iriap_getvaluebyclass_indication(struct iriap_cb *self, - struct sk_buff *skb) -{ - struct ias_object *obj; - struct ias_attrib *attrib; - int name_len; - int attr_len; - char name[IAS_MAX_CLASSNAME + 1]; /* 60 bytes */ - char attr[IAS_MAX_ATTRIBNAME + 1]; /* 60 bytes */ - __u8 *fp; - int n; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - fp = skb->data; - n = 1; - - name_len = fp[n++]; - - IRDA_ASSERT(name_len < IAS_MAX_CLASSNAME + 1, return;); - - memcpy(name, fp+n, name_len); n+=name_len; - name[name_len] = '\0'; - - attr_len = fp[n++]; - - IRDA_ASSERT(attr_len < IAS_MAX_ATTRIBNAME + 1, return;); - - memcpy(attr, fp+n, attr_len); n+=attr_len; - attr[attr_len] = '\0'; - - pr_debug("LM-IAS: Looking up %s: %s\n", name, attr); - obj = irias_find_object(name); - - if (obj == NULL) { - pr_debug("LM-IAS: Object %s not found\n", name); - iriap_getvaluebyclass_response(self, 0x1235, IAS_CLASS_UNKNOWN, - &irias_missing); - return; - } - pr_debug("LM-IAS: found %s, id=%d\n", obj->name, obj->id); - - attrib = irias_find_attrib(obj, attr); - if (attrib == NULL) { - pr_debug("LM-IAS: Attribute %s not found\n", attr); - iriap_getvaluebyclass_response(self, obj->id, - IAS_ATTRIB_UNKNOWN, - &irias_missing); - return; - } - - /* We have a match; send the value. */ - iriap_getvaluebyclass_response(self, obj->id, IAS_SUCCESS, - attrib->value); -} - -/* - * Function iriap_send_ack (void) - * - * Currently not used - * - */ -void iriap_send_ack(struct iriap_cb *self) -{ - struct sk_buff *tx_skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - tx_skb = alloc_skb(LMP_MAX_HEADER + 1, GFP_ATOMIC); - if (!tx_skb) - return; - - /* Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, self->max_header_size); - skb_put(tx_skb, 1); - frame = tx_skb->data; - - /* Build frame */ - frame[0] = IAP_LST | IAP_ACK | self->operation; - - irlmp_data_request(self->lsap, tx_skb); -} - -void iriap_connect_request(struct iriap_cb *self) -{ - int ret; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - ret = irlmp_connect_request(self->lsap, LSAP_IAS, - self->saddr, self->daddr, - NULL, NULL); - if (ret < 0) { - pr_debug("%s(), connect failed!\n", __func__); - self->confirm(IAS_DISCONNECT, 0, NULL, self->priv); - } -} - -/* - * Function iriap_connect_confirm (handle, skb) - * - * LSAP connection confirmed! - * - */ -static void iriap_connect_confirm(void *instance, void *sap, - struct qos_info *qos, __u32 max_seg_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct iriap_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - self->max_data_size = max_seg_size; - self->max_header_size = max_header_size; - - del_timer(&self->watchdog_timer); - - iriap_do_client_event(self, IAP_LM_CONNECT_CONFIRM, skb); - - /* Drop reference count - see state_s_make_call(). */ - dev_kfree_skb(skb); -} - -/* - * Function iriap_connect_indication ( handle, skb) - * - * Remote LM-IAS is requesting connection - * - */ -static void iriap_connect_indication(void *instance, void *sap, - struct qos_info *qos, __u32 max_seg_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct iriap_cb *self, *new; - - self = instance; - - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(self != NULL, goto out;); - IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;); - - /* Start new server */ - new = iriap_open(LSAP_IAS, IAS_SERVER, NULL, NULL); - if (!new) { - pr_debug("%s(), open failed\n", __func__); - goto out; - } - - /* Now attach up the new "socket" */ - new->lsap = irlmp_dup(self->lsap, new); - if (!new->lsap) { - pr_debug("%s(), dup failed!\n", __func__); - goto out; - } - - new->max_data_size = max_seg_size; - new->max_header_size = max_header_size; - - /* Clean up the original one to keep it in listen state */ - irlmp_listen(self->lsap); - - iriap_do_server_event(new, IAP_LM_CONNECT_INDICATION, skb); - -out: - /* Drop reference count - see state_r_disconnect(). */ - dev_kfree_skb(skb); -} - -/* - * Function iriap_data_indication (handle, skb) - * - * Receives data from connection identified by handle from IrLMP - * - */ -static int iriap_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct iriap_cb *self; - __u8 *frame; - __u8 opcode; - - self = instance; - - IRDA_ASSERT(skb != NULL, return 0;); - IRDA_ASSERT(self != NULL, goto out;); - IRDA_ASSERT(self->magic == IAS_MAGIC, goto out;); - - frame = skb->data; - - if (self->mode == IAS_SERVER) { - /* Call server */ - pr_debug("%s(), Calling server!\n", __func__); - iriap_do_r_connect_event(self, IAP_RECV_F_LST, skb); - goto out; - } - opcode = frame[0]; - if (~opcode & IAP_LST) { - net_warn_ratelimited("%s:, IrIAS multiframe commands or results is not implemented yet!\n", - __func__); - goto out; - } - - /* Check for ack frames since they don't contain any data */ - if (opcode & IAP_ACK) { - pr_debug("%s() Got ack frame!\n", __func__); - goto out; - } - - opcode &= ~IAP_LST; /* Mask away LST bit */ - - switch (opcode) { - case GET_INFO_BASE: - pr_debug("IrLMP GetInfoBaseDetails not implemented!\n"); - break; - case GET_VALUE_BY_CLASS: - iriap_do_call_event(self, IAP_RECV_F_LST, NULL); - - switch (frame[1]) { - case IAS_SUCCESS: - iriap_getvaluebyclass_confirm(self, skb); - break; - case IAS_CLASS_UNKNOWN: - pr_debug("%s(), No such class!\n", __func__); - /* Finished, close connection! */ - iriap_disconnect_request(self); - - /* - * Warning, the client might close us, so remember - * no to use self anymore after calling confirm - */ - if (self->confirm) - self->confirm(IAS_CLASS_UNKNOWN, 0, NULL, - self->priv); - break; - case IAS_ATTRIB_UNKNOWN: - pr_debug("%s(), No such attribute!\n", __func__); - /* Finished, close connection! */ - iriap_disconnect_request(self); - - /* - * Warning, the client might close us, so remember - * no to use self anymore after calling confirm - */ - if (self->confirm) - self->confirm(IAS_ATTRIB_UNKNOWN, 0, NULL, - self->priv); - break; - } - break; - default: - pr_debug("%s(), Unknown op-code: %02x\n", __func__, - opcode); - break; - } - -out: - /* Cleanup - sub-calls will have done skb_get() as needed. */ - dev_kfree_skb(skb); - return 0; -} - -/* - * Function iriap_call_indication (self, skb) - * - * Received call to server from peer LM-IAS - * - */ -void iriap_call_indication(struct iriap_cb *self, struct sk_buff *skb) -{ - __u8 *fp; - __u8 opcode; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - fp = skb->data; - - opcode = fp[0]; - if (~opcode & 0x80) { - net_warn_ratelimited("%s: IrIAS multiframe commands or results is not implemented yet!\n", - __func__); - return; - } - opcode &= 0x7f; /* Mask away LST bit */ - - switch (opcode) { - case GET_INFO_BASE: - net_warn_ratelimited("%s: GetInfoBaseDetails not implemented yet!\n", - __func__); - break; - case GET_VALUE_BY_CLASS: - iriap_getvaluebyclass_indication(self, skb); - break; - } - /* skb will be cleaned up in iriap_data_indication */ -} - -/* - * Function iriap_watchdog_timer_expired (data) - * - * Query has taken too long time, so abort - * - */ -static void iriap_watchdog_timer_expired(void *data) -{ - struct iriap_cb *self = (struct iriap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - /* iriap_close(self); */ -} - -#ifdef CONFIG_PROC_FS - -static const char *const ias_value_types[] = { - "IAS_MISSING", - "IAS_INTEGER", - "IAS_OCT_SEQ", - "IAS_STRING" -}; - -static inline struct ias_object *irias_seq_idx(loff_t pos) -{ - struct ias_object *obj; - - for (obj = (struct ias_object *) hashbin_get_first(irias_objects); - obj; obj = (struct ias_object *) hashbin_get_next(irias_objects)) { - if (pos-- == 0) - break; - } - - return obj; -} - -static void *irias_seq_start(struct seq_file *seq, loff_t *pos) -{ - spin_lock_irq(&irias_objects->hb_spinlock); - - return *pos ? irias_seq_idx(*pos - 1) : SEQ_START_TOKEN; -} - -static void *irias_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - ++*pos; - - return (v == SEQ_START_TOKEN) - ? (void *) hashbin_get_first(irias_objects) - : (void *) hashbin_get_next(irias_objects); -} - -static void irias_seq_stop(struct seq_file *seq, void *v) -{ - spin_unlock_irq(&irias_objects->hb_spinlock); -} - -static int irias_seq_show(struct seq_file *seq, void *v) -{ - if (v == SEQ_START_TOKEN) - seq_puts(seq, "LM-IAS Objects:\n"); - else { - struct ias_object *obj = v; - struct ias_attrib *attrib; - - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -EINVAL;); - - seq_printf(seq, "name: %s, id=%d\n", - obj->name, obj->id); - - /* Careful for priority inversions here ! - * All other uses of attrib spinlock are independent of - * the object spinlock, so we are safe. Jean II */ - spin_lock(&obj->attribs->hb_spinlock); - - /* List all attributes for this object */ - for (attrib = (struct ias_attrib *) hashbin_get_first(obj->attribs); - attrib != NULL; - attrib = (struct ias_attrib *) hashbin_get_next(obj->attribs)) { - - IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, - goto outloop; ); - - seq_printf(seq, " - Attribute name: \"%s\", ", - attrib->name); - seq_printf(seq, "value[%s]: ", - ias_value_types[attrib->value->type]); - - switch (attrib->value->type) { - case IAS_INTEGER: - seq_printf(seq, "%d\n", - attrib->value->t.integer); - break; - case IAS_STRING: - seq_printf(seq, "\"%s\"\n", - attrib->value->t.string); - break; - case IAS_OCT_SEQ: - seq_printf(seq, "octet sequence (%d bytes)\n", - attrib->value->len); - break; - case IAS_MISSING: - seq_puts(seq, "missing\n"); - break; - default: - seq_printf(seq, "type %d?\n", - attrib->value->type); - } - seq_putc(seq, '\n'); - - } - IRDA_ASSERT_LABEL(outloop:) - spin_unlock(&obj->attribs->hb_spinlock); - } - - return 0; -} - -static const struct seq_operations irias_seq_ops = { - .start = irias_seq_start, - .next = irias_seq_next, - .stop = irias_seq_stop, - .show = irias_seq_show, -}; - -static int irias_seq_open(struct inode *inode, struct file *file) -{ - IRDA_ASSERT( irias_objects != NULL, return -EINVAL;); - - return seq_open(file, &irias_seq_ops); -} - -const struct file_operations irias_seq_fops = { - .owner = THIS_MODULE, - .open = irias_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -#endif /* PROC_FS */ diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c deleted file mode 100644 index e6098b2e048a..000000000000 --- a/net/irda/iriap_event.c +++ /dev/null @@ -1,496 +0,0 @@ -/********************************************************************* - * - * Filename: iriap_event.c - * Version: 0.1 - * Description: IAP Finite State Machine - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Thu Aug 21 00:02:07 1997 - * Modified at: Wed Mar 1 11:28:34 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1997, 1999-2000 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/slab.h> - -#include <net/irda/irda.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/iriap_event.h> - -static void state_s_disconnect (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_connecting (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_call (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -static void state_s_make_call (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_calling (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_outstanding (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_replying (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_s_wait_active (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -static void state_r_disconnect (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_r_call (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_r_waiting (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_r_wait_active (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_r_receiving (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_r_execute (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); -static void state_r_returning (struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb); - -static void (*iriap_state[])(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) = { - /* Client FSM */ - state_s_disconnect, - state_s_connecting, - state_s_call, - - /* S-Call FSM */ - state_s_make_call, - state_s_calling, - state_s_outstanding, - state_s_replying, - state_s_wait_for_call, - state_s_wait_active, - - /* Server FSM */ - state_r_disconnect, - state_r_call, - - /* R-Connect FSM */ - state_r_waiting, - state_r_wait_active, - state_r_receiving, - state_r_execute, - state_r_returning, -}; - -void iriap_next_client_state(struct iriap_cb *self, IRIAP_STATE state) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - self->client_state = state; -} - -void iriap_next_call_state(struct iriap_cb *self, IRIAP_STATE state) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - self->call_state = state; -} - -void iriap_next_server_state(struct iriap_cb *self, IRIAP_STATE state) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - self->server_state = state; -} - -void iriap_next_r_connect_state(struct iriap_cb *self, IRIAP_STATE state) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - self->r_connect_state = state; -} - -void iriap_do_client_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - (*iriap_state[ self->client_state]) (self, event, skb); -} - -void iriap_do_call_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - (*iriap_state[ self->call_state]) (self, event, skb); -} - -void iriap_do_server_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - (*iriap_state[ self->server_state]) (self, event, skb); -} - -void iriap_do_r_connect_event(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - (*iriap_state[ self->r_connect_state]) (self, event, skb); -} - - -/* - * Function state_s_disconnect (event, skb) - * - * S-Disconnect, The device has no LSAP connection to a particular - * remote device. - */ -static void state_s_disconnect(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - switch (event) { - case IAP_CALL_REQUEST_GVBC: - iriap_next_client_state(self, S_CONNECTING); - IRDA_ASSERT(self->request_skb == NULL, return;); - /* Don't forget to refcount it - - * see iriap_getvaluebyclass_request(). */ - skb_get(skb); - self->request_skb = skb; - iriap_connect_request(self); - break; - case IAP_LM_DISCONNECT_INDICATION: - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__, event); - break; - } -} - -/* - * Function state_s_connecting (self, event, skb) - * - * S-Connecting - * - */ -static void state_s_connecting(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - switch (event) { - case IAP_LM_CONNECT_CONFIRM: - /* - * Jump to S-Call FSM - */ - iriap_do_call_event(self, IAP_CALL_REQUEST, skb); - /* iriap_call_request(self, 0,0,0); */ - iriap_next_client_state(self, S_CALL); - break; - case IAP_LM_DISCONNECT_INDICATION: - /* Abort calls */ - iriap_next_call_state(self, S_MAKE_CALL); - iriap_next_client_state(self, S_DISCONNECT); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__, event); - break; - } -} - -/* - * Function state_s_call (self, event, skb) - * - * S-Call, The device can process calls to a specific remote - * device. Whenever the LSAP connection is disconnected, this state - * catches that event and clears up - */ -static void state_s_call(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - - switch (event) { - case IAP_LM_DISCONNECT_INDICATION: - /* Abort calls */ - iriap_next_call_state(self, S_MAKE_CALL); - iriap_next_client_state(self, S_DISCONNECT); - break; - default: - pr_debug("state_s_call: Unknown event %d\n", event); - break; - } -} - -/* - * Function state_s_make_call (event, skb) - * - * S-Make-Call - * - */ -static void state_s_make_call(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - struct sk_buff *tx_skb; - - IRDA_ASSERT(self != NULL, return;); - - switch (event) { - case IAP_CALL_REQUEST: - /* Already refcounted - see state_s_disconnect() */ - tx_skb = self->request_skb; - self->request_skb = NULL; - - irlmp_data_request(self->lsap, tx_skb); - iriap_next_call_state(self, S_OUTSTANDING); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__, event); - break; - } -} - -/* - * Function state_s_calling (event, skb) - * - * S-Calling - * - */ -static void state_s_calling(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), Not implemented\n", __func__); -} - -/* - * Function state_s_outstanding (event, skb) - * - * S-Outstanding, The device is waiting for a response to a command - * - */ -static void state_s_outstanding(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - - switch (event) { - case IAP_RECV_F_LST: - /*iriap_send_ack(self);*/ - /*LM_Idle_request(idle); */ - - iriap_next_call_state(self, S_WAIT_FOR_CALL); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__, event); - break; - } -} - -/* - * Function state_s_replying (event, skb) - * - * S-Replying, The device is collecting a multiple part response - */ -static void state_s_replying(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), Not implemented\n", __func__); -} - -/* - * Function state_s_wait_for_call (event, skb) - * - * S-Wait-for-Call - * - */ -static void state_s_wait_for_call(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), Not implemented\n", __func__); -} - - -/* - * Function state_s_wait_active (event, skb) - * - * S-Wait-Active - * - */ -static void state_s_wait_active(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), Not implemented\n", __func__); -} - -/************************************************************************** - * - * Server FSM - * - **************************************************************************/ - -/* - * Function state_r_disconnect (self, event, skb) - * - * LM-IAS server is disconnected (not processing any requests!) - * - */ -static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - struct sk_buff *tx_skb; - - switch (event) { - case IAP_LM_CONNECT_INDICATION: - tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); - if (tx_skb == NULL) - return; - - /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, LMP_MAX_HEADER); - - irlmp_connect_response(self->lsap, tx_skb); - /*LM_Idle_request(idle); */ - - iriap_next_server_state(self, R_CALL); - - /* - * Jump to R-Connect FSM, we skip R-Waiting since we do not - * care about LM_Idle_request()! - */ - iriap_next_r_connect_state(self, R_RECEIVING); - break; - default: - pr_debug("%s(), unknown event %d\n", __func__, event); - break; - } -} - -/* - * Function state_r_call (self, event, skb) - */ -static void state_r_call(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - switch (event) { - case IAP_LM_DISCONNECT_INDICATION: - /* Abort call */ - iriap_next_server_state(self, R_DISCONNECT); - iriap_next_r_connect_state(self, R_WAITING); - break; - default: - pr_debug("%s(), unknown event!\n", __func__); - break; - } -} - -/* - * R-Connect FSM - */ - -/* - * Function state_r_waiting (self, event, skb) - */ -static void state_r_waiting(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), Not implemented\n", __func__); -} - -static void state_r_wait_active(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), Not implemented\n", __func__); -} - -/* - * Function state_r_receiving (self, event, skb) - * - * We are receiving a command - * - */ -static void state_r_receiving(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - switch (event) { - case IAP_RECV_F_LST: - iriap_next_r_connect_state(self, R_EXECUTE); - - iriap_call_indication(self, skb); - break; - default: - pr_debug("%s(), unknown event!\n", __func__); - break; - } -} - -/* - * Function state_r_execute (self, event, skb) - * - * The server is processing the request - * - */ -static void state_r_execute(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - - switch (event) { - case IAP_CALL_RESPONSE: - /* - * Since we don't implement the Waiting state, we return - * to state Receiving instead, DB. - */ - iriap_next_r_connect_state(self, R_RECEIVING); - - /* Don't forget to refcount it - see - * iriap_getvaluebyclass_response(). */ - skb_get(skb); - - irlmp_data_request(self->lsap, skb); - break; - default: - pr_debug("%s(), unknown event!\n", __func__); - break; - } -} - -static void state_r_returning(struct iriap_cb *self, IRIAP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), event=%d\n", __func__, event); - - switch (event) { - case IAP_RECV_F_LST: - break; - default: - break; - } -} diff --git a/net/irda/irias_object.c b/net/irda/irias_object.c deleted file mode 100644 index 53b86d0e1630..000000000000 --- a/net/irda/irias_object.c +++ /dev/null @@ -1,555 +0,0 @@ -/********************************************************************* - * - * Filename: irias_object.c - * Version: 0.3 - * Description: IAS object database and functions - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Thu Oct 1 22:50:04 1998 - * Modified at: Wed Dec 15 11:23:16 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/socket.h> -#include <linux/module.h> - -#include <net/irda/irda.h> -#include <net/irda/irias_object.h> - -hashbin_t *irias_objects; - -/* - * Used when a missing value needs to be returned - */ -struct ias_value irias_missing = { IAS_MISSING, 0, 0, 0, {0}}; - - -/* - * Function ias_new_object (name, id) - * - * Create a new IAS object - * - */ -struct ias_object *irias_new_object( char *name, int id) -{ - struct ias_object *obj; - - obj = kzalloc(sizeof(struct ias_object), GFP_ATOMIC); - if (obj == NULL) { - net_warn_ratelimited("%s(), Unable to allocate object!\n", - __func__); - return NULL; - } - - obj->magic = IAS_OBJECT_MAGIC; - obj->name = kstrndup(name, IAS_MAX_CLASSNAME, GFP_ATOMIC); - if (!obj->name) { - net_warn_ratelimited("%s(), Unable to allocate name!\n", - __func__); - kfree(obj); - return NULL; - } - obj->id = id; - - /* Locking notes : the attrib spinlock has lower precendence - * than the objects spinlock. Never grap the objects spinlock - * while holding any attrib spinlock (risk of deadlock). Jean II */ - obj->attribs = hashbin_new(HB_LOCK); - - if (obj->attribs == NULL) { - net_warn_ratelimited("%s(), Unable to allocate attribs!\n", - __func__); - kfree(obj->name); - kfree(obj); - return NULL; - } - - return obj; -} -EXPORT_SYMBOL(irias_new_object); - -/* - * Function irias_delete_attrib (attrib) - * - * Delete given attribute and deallocate all its memory - * - */ -static void __irias_delete_attrib(struct ias_attrib *attrib) -{ - IRDA_ASSERT(attrib != NULL, return;); - IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;); - - kfree(attrib->name); - - irias_delete_value(attrib->value); - attrib->magic = ~IAS_ATTRIB_MAGIC; - - kfree(attrib); -} - -void __irias_delete_object(struct ias_object *obj) -{ - IRDA_ASSERT(obj != NULL, return;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); - - kfree(obj->name); - - hashbin_delete(obj->attribs, (FREE_FUNC) __irias_delete_attrib); - - obj->magic = ~IAS_OBJECT_MAGIC; - - kfree(obj); -} - -/* - * Function irias_delete_object (obj) - * - * Remove object from hashbin and deallocate all attributes associated with - * with this object and the object itself - * - */ -int irias_delete_object(struct ias_object *obj) -{ - struct ias_object *node; - - IRDA_ASSERT(obj != NULL, return -1;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;); - - /* Remove from list */ - node = hashbin_remove_this(irias_objects, (irda_queue_t *) obj); - if (!node) - pr_debug("%s(), object already removed!\n", - __func__); - - /* Destroy */ - __irias_delete_object(obj); - - return 0; -} -EXPORT_SYMBOL(irias_delete_object); - -/* - * Function irias_delete_attrib (obj) - * - * Remove attribute from hashbin and, if it was the last attribute of - * the object, remove the object as well. - * - */ -int irias_delete_attrib(struct ias_object *obj, struct ias_attrib *attrib, - int cleanobject) -{ - struct ias_attrib *node; - - IRDA_ASSERT(obj != NULL, return -1;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return -1;); - IRDA_ASSERT(attrib != NULL, return -1;); - - /* Remove attribute from object */ - node = hashbin_remove_this(obj->attribs, (irda_queue_t *) attrib); - if (!node) - return 0; /* Already removed or non-existent */ - - /* Deallocate attribute */ - __irias_delete_attrib(node); - - /* Check if object has still some attributes, destroy it if none. - * At first glance, this look dangerous, as the kernel reference - * various IAS objects. However, we only use this function on - * user attributes, not kernel attributes, so there is no risk - * of deleting a kernel object this way. Jean II */ - node = (struct ias_attrib *) hashbin_get_first(obj->attribs); - if (cleanobject && !node) - irias_delete_object(obj); - - return 0; -} - -/* - * Function irias_insert_object (obj) - * - * Insert an object into the LM-IAS database - * - */ -void irias_insert_object(struct ias_object *obj) -{ - IRDA_ASSERT(obj != NULL, return;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); - - hashbin_insert(irias_objects, (irda_queue_t *) obj, 0, obj->name); -} -EXPORT_SYMBOL(irias_insert_object); - -/* - * Function irias_find_object (name) - * - * Find object with given name - * - */ -struct ias_object *irias_find_object(char *name) -{ - IRDA_ASSERT(name != NULL, return NULL;); - - /* Unsafe (locking), object might change */ - return hashbin_lock_find(irias_objects, 0, name); -} -EXPORT_SYMBOL(irias_find_object); - -/* - * Function irias_find_attrib (obj, name) - * - * Find named attribute in object - * - */ -struct ias_attrib *irias_find_attrib(struct ias_object *obj, char *name) -{ - struct ias_attrib *attrib; - - IRDA_ASSERT(obj != NULL, return NULL;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return NULL;); - IRDA_ASSERT(name != NULL, return NULL;); - - attrib = hashbin_lock_find(obj->attribs, 0, name); - if (attrib == NULL) - return NULL; - - /* Unsafe (locking), attrib might change */ - return attrib; -} - -/* - * Function irias_add_attribute (obj, attrib) - * - * Add attribute to object - * - */ -static void irias_add_attrib(struct ias_object *obj, struct ias_attrib *attrib, - int owner) -{ - IRDA_ASSERT(obj != NULL, return;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); - - IRDA_ASSERT(attrib != NULL, return;); - IRDA_ASSERT(attrib->magic == IAS_ATTRIB_MAGIC, return;); - - /* Set if attrib is owned by kernel or user space */ - attrib->value->owner = owner; - - hashbin_insert(obj->attribs, (irda_queue_t *) attrib, 0, attrib->name); -} - -/* - * Function irias_object_change_attribute (obj_name, attrib_name, new_value) - * - * Change the value of an objects attribute. - * - */ -int irias_object_change_attribute(char *obj_name, char *attrib_name, - struct ias_value *new_value) -{ - struct ias_object *obj; - struct ias_attrib *attrib; - unsigned long flags; - - /* Find object */ - obj = hashbin_lock_find(irias_objects, 0, obj_name); - if (obj == NULL) { - net_warn_ratelimited("%s: Unable to find object: %s\n", - __func__, obj_name); - return -1; - } - - /* Slightly unsafe (obj might get removed under us) */ - spin_lock_irqsave(&obj->attribs->hb_spinlock, flags); - - /* Find attribute */ - attrib = hashbin_find(obj->attribs, 0, attrib_name); - if (attrib == NULL) { - net_warn_ratelimited("%s: Unable to find attribute: %s\n", - __func__, attrib_name); - spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags); - return -1; - } - - if ( attrib->value->type != new_value->type) { - pr_debug("%s(), changing value type not allowed!\n", - __func__); - spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags); - return -1; - } - - /* Delete old value */ - irias_delete_value(attrib->value); - - /* Insert new value */ - attrib->value = new_value; - - /* Success */ - spin_unlock_irqrestore(&obj->attribs->hb_spinlock, flags); - return 0; -} -EXPORT_SYMBOL(irias_object_change_attribute); - -/* - * Function irias_object_add_integer_attrib (obj, name, value) - * - * Add an integer attribute to an LM-IAS object - * - */ -void irias_add_integer_attrib(struct ias_object *obj, char *name, int value, - int owner) -{ - struct ias_attrib *attrib; - - IRDA_ASSERT(obj != NULL, return;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); - IRDA_ASSERT(name != NULL, return;); - - attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC); - if (attrib == NULL) { - net_warn_ratelimited("%s: Unable to allocate attribute!\n", - __func__); - return; - } - - attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); - - /* Insert value */ - attrib->value = irias_new_integer_value(value); - if (!attrib->name || !attrib->value) { - net_warn_ratelimited("%s: Unable to allocate attribute!\n", - __func__); - if (attrib->value) - irias_delete_value(attrib->value); - kfree(attrib->name); - kfree(attrib); - return; - } - - irias_add_attrib(obj, attrib, owner); -} -EXPORT_SYMBOL(irias_add_integer_attrib); - - /* - * Function irias_add_octseq_attrib (obj, name, octet_seq, len) - * - * Add a octet sequence attribute to an LM-IAS object - * - */ - -void irias_add_octseq_attrib(struct ias_object *obj, char *name, __u8 *octets, - int len, int owner) -{ - struct ias_attrib *attrib; - - IRDA_ASSERT(obj != NULL, return;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); - - IRDA_ASSERT(name != NULL, return;); - IRDA_ASSERT(octets != NULL, return;); - - attrib = kzalloc(sizeof(struct ias_attrib), GFP_ATOMIC); - if (attrib == NULL) { - net_warn_ratelimited("%s: Unable to allocate attribute!\n", - __func__); - return; - } - - attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); - - attrib->value = irias_new_octseq_value( octets, len); - if (!attrib->name || !attrib->value) { - net_warn_ratelimited("%s: Unable to allocate attribute!\n", - __func__); - if (attrib->value) - irias_delete_value(attrib->value); - kfree(attrib->name); - kfree(attrib); - return; - } - - irias_add_attrib(obj, attrib, owner); -} -EXPORT_SYMBOL(irias_add_octseq_attrib); - -/* - * Function irias_object_add_string_attrib (obj, string) - * - * Add a string attribute to an LM-IAS object - * - */ -void irias_add_string_attrib(struct ias_object *obj, char *name, char *value, - int owner) -{ - struct ias_attrib *attrib; - - IRDA_ASSERT(obj != NULL, return;); - IRDA_ASSERT(obj->magic == IAS_OBJECT_MAGIC, return;); - - IRDA_ASSERT(name != NULL, return;); - IRDA_ASSERT(value != NULL, return;); - - attrib = kzalloc(sizeof( struct ias_attrib), GFP_ATOMIC); - if (attrib == NULL) { - net_warn_ratelimited("%s: Unable to allocate attribute!\n", - __func__); - return; - } - - attrib->magic = IAS_ATTRIB_MAGIC; - attrib->name = kstrndup(name, IAS_MAX_ATTRIBNAME, GFP_ATOMIC); - - attrib->value = irias_new_string_value(value); - if (!attrib->name || !attrib->value) { - net_warn_ratelimited("%s: Unable to allocate attribute!\n", - __func__); - if (attrib->value) - irias_delete_value(attrib->value); - kfree(attrib->name); - kfree(attrib); - return; - } - - irias_add_attrib(obj, attrib, owner); -} -EXPORT_SYMBOL(irias_add_string_attrib); - -/* - * Function irias_new_integer_value (integer) - * - * Create new IAS integer value - * - */ -struct ias_value *irias_new_integer_value(int integer) -{ - struct ias_value *value; - - value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); - if (value == NULL) - return NULL; - - value->type = IAS_INTEGER; - value->len = 4; - value->t.integer = integer; - - return value; -} -EXPORT_SYMBOL(irias_new_integer_value); - -/* - * Function irias_new_string_value (string) - * - * Create new IAS string value - * - * Per IrLMP 1.1, 4.3.3.2, strings are up to 256 chars - Jean II - */ -struct ias_value *irias_new_string_value(char *string) -{ - struct ias_value *value; - - value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); - if (value == NULL) - return NULL; - - value->type = IAS_STRING; - value->charset = CS_ASCII; - value->t.string = kstrndup(string, IAS_MAX_STRING, GFP_ATOMIC); - if (!value->t.string) { - net_warn_ratelimited("%s: Unable to kmalloc!\n", __func__); - kfree(value); - return NULL; - } - - value->len = strlen(value->t.string); - - return value; -} - -/* - * Function irias_new_octseq_value (octets, len) - * - * Create new IAS octet-sequence value - * - * Per IrLMP 1.1, 4.3.3.2, octet-sequence are up to 1024 bytes - Jean II - */ -struct ias_value *irias_new_octseq_value(__u8 *octseq , int len) -{ - struct ias_value *value; - - value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); - if (value == NULL) - return NULL; - - value->type = IAS_OCT_SEQ; - /* Check length */ - if(len > IAS_MAX_OCTET_STRING) - len = IAS_MAX_OCTET_STRING; - value->len = len; - - value->t.oct_seq = kmemdup(octseq, len, GFP_ATOMIC); - if (value->t.oct_seq == NULL){ - net_warn_ratelimited("%s: Unable to kmalloc!\n", __func__); - kfree(value); - return NULL; - } - return value; -} - -struct ias_value *irias_new_missing_value(void) -{ - struct ias_value *value; - - value = kzalloc(sizeof(struct ias_value), GFP_ATOMIC); - if (value == NULL) - return NULL; - - value->type = IAS_MISSING; - - return value; -} - -/* - * Function irias_delete_value (value) - * - * Delete IAS value - * - */ -void irias_delete_value(struct ias_value *value) -{ - IRDA_ASSERT(value != NULL, return;); - - switch (value->type) { - case IAS_INTEGER: /* Fallthrough */ - case IAS_MISSING: - /* No need to deallocate */ - break; - case IAS_STRING: - /* Deallocate string */ - kfree(value->t.string); - break; - case IAS_OCT_SEQ: - /* Deallocate byte stream */ - kfree(value->t.oct_seq); - break; - default: - pr_debug("%s(), Unknown value type!\n", __func__); - break; - } - kfree(value); -} -EXPORT_SYMBOL(irias_delete_value); diff --git a/net/irda/irlan/Kconfig b/net/irda/irlan/Kconfig deleted file mode 100644 index 951abc2e3a7f..000000000000 --- a/net/irda/irlan/Kconfig +++ /dev/null @@ -1,14 +0,0 @@ -config IRLAN - tristate "IrLAN protocol" - depends on IRDA - help - Say Y here if you want to build support for the IrLAN protocol. - To compile it as a module, choose M here: the module will be called - irlan. IrLAN emulates an Ethernet and makes it possible to put up - a wireless LAN using infrared beams. - - The IrLAN protocol can be used to talk with infrared access points - like the HP NetbeamIR, or the ESI JetEye NET. You can also connect - to another Linux machine running the IrLAN protocol for ad-hoc - networking! - diff --git a/net/irda/irlan/Makefile b/net/irda/irlan/Makefile deleted file mode 100644 index 94eefbc8e6b9..000000000000 --- a/net/irda/irlan/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux IrDA IrLAN protocol layer. -# - -obj-$(CONFIG_IRLAN) += irlan.o - -irlan-y := irlan_common.o irlan_eth.o irlan_event.o irlan_client.o irlan_provider.o irlan_filter.o irlan_provider_event.o irlan_client_event.o diff --git a/net/irda/irlan/irlan_client.c b/net/irda/irlan/irlan_client.c deleted file mode 100644 index c5837a40c78e..000000000000 --- a/net/irda/irlan/irlan_client.c +++ /dev/null @@ -1,559 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_client.c - * Version: 0.9 - * Description: IrDA LAN Access Protocol (IrLAN) Client - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Tue Dec 14 15:47:02 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Sources: skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov> - * slip.c by Laurence Culhane, <loz@holmes.demon.co.uk> - * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/slab.h> -#include <linux/errno.h> -#include <linux/init.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/if_arp.h> -#include <linux/bitops.h> -#include <net/arp.h> - -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/irttp.h> -#include <net/irda/irlmp.h> -#include <net/irda/irias_object.h> -#include <net/irda/iriap.h> -#include <net/irda/timer.h> - -#include <net/irda/irlan_common.h> -#include <net/irda/irlan_event.h> -#include <net/irda/irlan_eth.h> -#include <net/irda/irlan_provider.h> -#include <net/irda/irlan_client.h> - -#undef CONFIG_IRLAN_GRATUITOUS_ARP - -static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *); -static int irlan_client_ctrl_data_indication(void *instance, void *sap, - struct sk_buff *skb); -static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *); -static void irlan_check_response_param(struct irlan_cb *self, char *param, - char *value, int val_len); -static void irlan_client_open_ctrl_tsap(struct irlan_cb *self); - -static void irlan_client_kick_timer_expired(void *data) -{ - struct irlan_cb *self = (struct irlan_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* - * If we are in peer mode, the client may not have got the discovery - * indication it needs to make progress. If the client is still in - * IDLE state, we must kick it to, but only if the provider is not IDLE - */ - if ((self->provider.access_type == ACCESS_PEER) && - (self->client.state == IRLAN_IDLE) && - (self->provider.state != IRLAN_IDLE)) { - irlan_client_wakeup(self, self->saddr, self->daddr); - } -} - -static void irlan_client_start_kick_timer(struct irlan_cb *self, int timeout) -{ - irda_start_timer(&self->client.kick_timer, timeout, (void *) self, - irlan_client_kick_timer_expired); -} - -/* - * Function irlan_client_wakeup (self, saddr, daddr) - * - * Wake up client - * - */ -void irlan_client_wakeup(struct irlan_cb *self, __u32 saddr, __u32 daddr) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* - * Check if we are already awake, or if we are a provider in direct - * mode (in that case we must leave the client idle - */ - if ((self->client.state != IRLAN_IDLE) || - (self->provider.access_type == ACCESS_DIRECT)) - { - pr_debug("%s(), already awake!\n", __func__); - return; - } - - /* Addresses may have changed! */ - self->saddr = saddr; - self->daddr = daddr; - - if (self->disconnect_reason == LM_USER_REQUEST) { - pr_debug("%s(), still stopped by user\n", __func__); - return; - } - - /* Open TSAPs */ - irlan_client_open_ctrl_tsap(self); - irlan_open_data_tsap(self); - - irlan_do_client_event(self, IRLAN_DISCOVERY_INDICATION, NULL); - - /* Start kick timer */ - irlan_client_start_kick_timer(self, 2*HZ); -} - -/* - * Function irlan_discovery_indication (daddr) - * - * Remote device with IrLAN server support discovered - * - */ -void irlan_client_discovery_indication(discinfo_t *discovery, - DISCOVERY_MODE mode, - void *priv) -{ - struct irlan_cb *self; - __u32 saddr, daddr; - - IRDA_ASSERT(discovery != NULL, return;); - - /* - * I didn't check it, but I bet that IrLAN suffer from the same - * deficiency as IrComm and doesn't handle two instances - * simultaneously connecting to each other. - * Same workaround, drop passive discoveries. - * Jean II */ - if(mode == DISCOVERY_PASSIVE) - return; - - saddr = discovery->saddr; - daddr = discovery->daddr; - - /* Find instance */ - rcu_read_lock(); - self = irlan_get_any(); - if (self) { - IRDA_ASSERT(self->magic == IRLAN_MAGIC, goto out;); - - pr_debug("%s(), Found instance (%08x)!\n", __func__ , - daddr); - - irlan_client_wakeup(self, saddr, daddr); - } -IRDA_ASSERT_LABEL(out:) - rcu_read_unlock(); -} - -/* - * Function irlan_client_data_indication (handle, skb) - * - * This function gets the data that is received on the control channel - * - */ -static int irlan_client_ctrl_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct irlan_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - irlan_do_client_event(self, IRLAN_DATA_INDICATION, skb); - - /* Ready for a new command */ - pr_debug("%s(), clearing tx_busy\n", __func__); - self->client.tx_busy = FALSE; - - /* Check if we have some queued commands waiting to be sent */ - irlan_run_ctrl_tx_queue(self); - - return 0; -} - -static void irlan_client_ctrl_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *userdata) -{ - struct irlan_cb *self; - struct tsap_cb *tsap; - struct sk_buff *skb; - - pr_debug("%s(), reason=%d\n", __func__ , reason); - - self = instance; - tsap = sap; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - IRDA_ASSERT(tsap != NULL, return;); - IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;); - - IRDA_ASSERT(tsap == self->client.tsap_ctrl, return;); - - /* Remove frames queued on the control channel */ - while ((skb = skb_dequeue(&self->client.txq)) != NULL) { - dev_kfree_skb(skb); - } - self->client.tx_busy = FALSE; - - irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL); -} - -/* - * Function irlan_client_open_tsaps (self) - * - * Initialize callbacks and open IrTTP TSAPs - * - */ -static void irlan_client_open_ctrl_tsap(struct irlan_cb *self) -{ - struct tsap_cb *tsap; - notify_t notify; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* Check if already open */ - if (self->client.tsap_ctrl) - return; - - irda_notify_init(¬ify); - - /* Set up callbacks */ - notify.data_indication = irlan_client_ctrl_data_indication; - notify.connect_confirm = irlan_client_ctrl_connect_confirm; - notify.disconnect_indication = irlan_client_ctrl_disconnect_indication; - notify.instance = self; - strlcpy(notify.name, "IrLAN ctrl (c)", sizeof(notify.name)); - - tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, ¬ify); - if (!tsap) { - pr_debug("%s(), Got no tsap!\n", __func__); - return; - } - self->client.tsap_ctrl = tsap; -} - -/* - * Function irlan_client_connect_confirm (handle, skb) - * - * Connection to peer IrLAN laye confirmed - * - */ -static void irlan_client_ctrl_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct irlan_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - self->client.max_sdu_size = max_sdu_size; - self->client.max_header_size = max_header_size; - - /* TODO: we could set the MTU depending on the max_sdu_size */ - - irlan_do_client_event(self, IRLAN_CONNECT_COMPLETE, NULL); -} - -/* - * Function print_ret_code (code) - * - * Print return code of request to peer IrLAN layer. - * - */ -static void print_ret_code(__u8 code) -{ - switch(code) { - case 0: - printk(KERN_INFO "Success\n"); - break; - case 1: - net_warn_ratelimited("IrLAN: Insufficient resources\n"); - break; - case 2: - net_warn_ratelimited("IrLAN: Invalid command format\n"); - break; - case 3: - net_warn_ratelimited("IrLAN: Command not supported\n"); - break; - case 4: - net_warn_ratelimited("IrLAN: Parameter not supported\n"); - break; - case 5: - net_warn_ratelimited("IrLAN: Value not supported\n"); - break; - case 6: - net_warn_ratelimited("IrLAN: Not open\n"); - break; - case 7: - net_warn_ratelimited("IrLAN: Authentication required\n"); - break; - case 8: - net_warn_ratelimited("IrLAN: Invalid password\n"); - break; - case 9: - net_warn_ratelimited("IrLAN: Protocol error\n"); - break; - case 255: - net_warn_ratelimited("IrLAN: Asynchronous status\n"); - break; - } -} - -/* - * Function irlan_client_parse_response (self, skb) - * - * Extract all parameters from received buffer, then feed them to - * check_params for parsing - */ -void irlan_client_parse_response(struct irlan_cb *self, struct sk_buff *skb) -{ - __u8 *frame; - __u8 *ptr; - int count; - int ret; - __u16 val_len; - int i; - char *name; - char *value; - - IRDA_ASSERT(skb != NULL, return;); - - pr_debug("%s() skb->len=%d\n", __func__ , (int)skb->len); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - if (!skb) { - net_err_ratelimited("%s(), Got NULL skb!\n", __func__); - return; - } - frame = skb->data; - - /* - * Check return code and print it if not success - */ - if (frame[0]) { - print_ret_code(frame[0]); - return; - } - - name = kmalloc(255, GFP_ATOMIC); - if (!name) - return; - value = kmalloc(1016, GFP_ATOMIC); - if (!value) { - kfree(name); - return; - } - - /* How many parameters? */ - count = frame[1]; - - pr_debug("%s(), got %d parameters\n", __func__ , count); - - ptr = frame+2; - - /* For all parameters */ - for (i=0; i<count;i++) { - ret = irlan_extract_param(ptr, name, value, &val_len); - if (ret < 0) { - pr_debug("%s(), IrLAN, Error!\n", __func__); - break; - } - ptr += ret; - irlan_check_response_param(self, name, value, val_len); - } - /* Cleanup */ - kfree(name); - kfree(value); -} - -/* - * Function irlan_check_response_param (self, param, value, val_len) - * - * Check which parameter is received and update local variables - * - */ -static void irlan_check_response_param(struct irlan_cb *self, char *param, - char *value, int val_len) -{ - __u16 tmp_cpu; /* Temporary value in host order */ - __u8 *bytes; - int i; - - pr_debug("%s(), parm=%s\n", __func__ , param); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* Media type */ - if (strcmp(param, "MEDIA") == 0) { - if (strcmp(value, "802.3") == 0) - self->media = MEDIA_802_3; - else - self->media = MEDIA_802_5; - return; - } - if (strcmp(param, "FILTER_TYPE") == 0) { - if (strcmp(value, "DIRECTED") == 0) - self->client.filter_type |= IRLAN_DIRECTED; - else if (strcmp(value, "FUNCTIONAL") == 0) - self->client.filter_type |= IRLAN_FUNCTIONAL; - else if (strcmp(value, "GROUP") == 0) - self->client.filter_type |= IRLAN_GROUP; - else if (strcmp(value, "MAC_FRAME") == 0) - self->client.filter_type |= IRLAN_MAC_FRAME; - else if (strcmp(value, "MULTICAST") == 0) - self->client.filter_type |= IRLAN_MULTICAST; - else if (strcmp(value, "BROADCAST") == 0) - self->client.filter_type |= IRLAN_BROADCAST; - else if (strcmp(value, "IPX_SOCKET") == 0) - self->client.filter_type |= IRLAN_IPX_SOCKET; - - } - if (strcmp(param, "ACCESS_TYPE") == 0) { - if (strcmp(value, "DIRECT") == 0) - self->client.access_type = ACCESS_DIRECT; - else if (strcmp(value, "PEER") == 0) - self->client.access_type = ACCESS_PEER; - else if (strcmp(value, "HOSTED") == 0) - self->client.access_type = ACCESS_HOSTED; - else { - pr_debug("%s(), unknown access type!\n", __func__); - } - } - /* IRLAN version */ - if (strcmp(param, "IRLAN_VER") == 0) { - pr_debug("IrLAN version %d.%d\n", (__u8)value[0], - (__u8)value[1]); - - self->version[0] = value[0]; - self->version[1] = value[1]; - return; - } - /* Which remote TSAP to use for data channel */ - if (strcmp(param, "DATA_CHAN") == 0) { - self->dtsap_sel_data = value[0]; - pr_debug("Data TSAP = %02x\n", self->dtsap_sel_data); - return; - } - if (strcmp(param, "CON_ARB") == 0) { - memcpy(&tmp_cpu, value, 2); /* Align value */ - le16_to_cpus(&tmp_cpu); /* Convert to host order */ - self->client.recv_arb_val = tmp_cpu; - pr_debug("%s(), receive arb val=%d\n", __func__ , - self->client.recv_arb_val); - } - if (strcmp(param, "MAX_FRAME") == 0) { - memcpy(&tmp_cpu, value, 2); /* Align value */ - le16_to_cpus(&tmp_cpu); /* Convert to host order */ - self->client.max_frame = tmp_cpu; - pr_debug("%s(), max frame=%d\n", __func__ , - self->client.max_frame); - } - - /* RECONNECT_KEY, in case the link goes down! */ - if (strcmp(param, "RECONNECT_KEY") == 0) { - pr_debug("Got reconnect key: "); - /* for (i = 0; i < val_len; i++) */ -/* printk("%02x", value[i]); */ - memcpy(self->client.reconnect_key, value, val_len); - self->client.key_len = val_len; - pr_debug("\n"); - } - /* FILTER_ENTRY, have we got an ethernet address? */ - if (strcmp(param, "FILTER_ENTRY") == 0) { - bytes = value; - pr_debug("Ethernet address = %pM\n", bytes); - for (i = 0; i < 6; i++) - self->dev->dev_addr[i] = bytes[i]; - } -} - -/* - * Function irlan_client_get_value_confirm (obj_id, value) - * - * Got results from remote LM-IAS - * - */ -void irlan_client_get_value_confirm(int result, __u16 obj_id, - struct ias_value *value, void *priv) -{ - struct irlan_cb *self; - - IRDA_ASSERT(priv != NULL, return;); - - self = priv; - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* We probably don't need to make any more queries */ - iriap_close(self->client.iriap); - self->client.iriap = NULL; - - /* Check if request succeeded */ - if (result != IAS_SUCCESS) { - pr_debug("%s(), got NULL value!\n", __func__); - irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL, - NULL); - return; - } - - switch (value->type) { - case IAS_INTEGER: - self->dtsap_sel_ctrl = value->t.integer; - - if (value->t.integer != -1) { - irlan_do_client_event(self, IRLAN_IAS_PROVIDER_AVAIL, - NULL); - return; - } - irias_delete_value(value); - break; - default: - pr_debug("%s(), unknown type!\n", __func__); - break; - } - irlan_do_client_event(self, IRLAN_IAS_PROVIDER_NOT_AVAIL, NULL); -} diff --git a/net/irda/irlan/irlan_client_event.c b/net/irda/irlan/irlan_client_event.c deleted file mode 100644 index cc93fabbbb19..000000000000 --- a/net/irda/irlan/irlan_client_event.c +++ /dev/null @@ -1,511 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_client_event.c - * Version: 0.9 - * Description: IrLAN client state machine - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun Dec 26 21:52:24 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/skbuff.h> - -#include <net/irda/irda.h> -#include <net/irda/timer.h> -#include <net/irda/irmod.h> -#include <net/irda/iriap.h> -#include <net/irda/irlmp.h> -#include <net/irda/irttp.h> - -#include <net/irda/irlan_common.h> -#include <net/irda/irlan_client.h> -#include <net/irda/irlan_event.h> - -static int irlan_client_state_idle (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_conn (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_info (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_open (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_wait (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_arb (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_data (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_client_state_sync (struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -static int (*state[])(struct irlan_cb *, IRLAN_EVENT event, struct sk_buff *) = -{ - irlan_client_state_idle, - irlan_client_state_query, - irlan_client_state_conn, - irlan_client_state_info, - irlan_client_state_media, - irlan_client_state_open, - irlan_client_state_wait, - irlan_client_state_arb, - irlan_client_state_data, - irlan_client_state_close, - irlan_client_state_sync -}; - -void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - (*state[ self->client.state]) (self, event, skb); -} - -/* - * Function irlan_client_state_idle (event, skb, info) - * - * IDLE, We are waiting for an indication that there is a provider - * available. - */ -static int irlan_client_state_idle(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - switch (event) { - case IRLAN_DISCOVERY_INDICATION: - if (self->client.iriap) { - net_warn_ratelimited("%s(), busy with a previous query\n", - __func__); - return -EBUSY; - } - - self->client.iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - irlan_client_get_value_confirm); - /* Get some values from peer IAS */ - irlan_next_client_state(self, IRLAN_QUERY); - iriap_getvaluebyclass_request(self->client.iriap, - self->saddr, self->daddr, - "IrLAN", "IrDA:TinyTP:LsapSel"); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_query (event, skb, info) - * - * QUERY, We have queryed the remote IAS and is ready to connect - * to provider, just waiting for the confirm. - * - */ -static int irlan_client_state_query(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - switch(event) { - case IRLAN_IAS_PROVIDER_AVAIL: - IRDA_ASSERT(self->dtsap_sel_ctrl != 0, return -1;); - - self->client.open_retries = 0; - - irttp_connect_request(self->client.tsap_ctrl, - self->dtsap_sel_ctrl, - self->saddr, self->daddr, NULL, - IRLAN_MTU, NULL); - irlan_next_client_state(self, IRLAN_CONN); - break; - case IRLAN_IAS_PROVIDER_NOT_AVAIL: - pr_debug("%s(), IAS_PROVIDER_NOT_AVAIL\n", __func__); - irlan_next_client_state(self, IRLAN_IDLE); - - /* Give the client a kick! */ - if ((self->provider.access_type == ACCESS_PEER) && - (self->provider.state != IRLAN_IDLE)) - irlan_client_wakeup(self, self->saddr, self->daddr); - break; - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_conn (event, skb, info) - * - * CONN, We have connected to a provider but has not issued any - * commands yet. - * - */ -static int irlan_client_state_conn(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - - switch (event) { - case IRLAN_CONNECT_COMPLETE: - /* Send getinfo cmd */ - irlan_get_provider_info(self); - irlan_next_client_state(self, IRLAN_INFO); - break; - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_info (self, event, skb, info) - * - * INFO, We have issued a GetInfo command and is awaiting a reply. - */ -static int irlan_client_state_info(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - - switch (event) { - case IRLAN_DATA_INDICATION: - IRDA_ASSERT(skb != NULL, return -1;); - - irlan_client_parse_response(self, skb); - - irlan_next_client_state(self, IRLAN_MEDIA); - - irlan_get_media_char(self); - break; - - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_media (self, event, skb, info) - * - * MEDIA, The irlan_client has issued a GetMedia command and is awaiting a - * reply. - * - */ -static int irlan_client_state_media(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_DATA_INDICATION: - irlan_client_parse_response(self, skb); - irlan_open_data_channel(self); - irlan_next_client_state(self, IRLAN_OPEN); - break; - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_open (self, event, skb, info) - * - * OPEN, The irlan_client has issued a OpenData command and is awaiting a - * reply - * - */ -static int irlan_client_state_open(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - struct qos_info qos; - - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_DATA_INDICATION: - irlan_client_parse_response(self, skb); - - /* - * Check if we have got the remote TSAP for data - * communications - */ - IRDA_ASSERT(self->dtsap_sel_data != 0, return -1;); - - /* Check which access type we are dealing with */ - switch (self->client.access_type) { - case ACCESS_PEER: - if (self->provider.state == IRLAN_OPEN) { - - irlan_next_client_state(self, IRLAN_ARB); - irlan_do_client_event(self, IRLAN_CHECK_CON_ARB, - NULL); - } else { - - irlan_next_client_state(self, IRLAN_WAIT); - } - break; - case ACCESS_DIRECT: - case ACCESS_HOSTED: - qos.link_disc_time.bits = 0x01; /* 3 secs */ - - irttp_connect_request(self->tsap_data, - self->dtsap_sel_data, - self->saddr, self->daddr, &qos, - IRLAN_MTU, NULL); - - irlan_next_client_state(self, IRLAN_DATA); - break; - default: - pr_debug("%s(), unknown access type!\n", __func__); - break; - } - break; - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_wait (self, event, skb, info) - * - * WAIT, The irlan_client is waiting for the local provider to enter the - * provider OPEN state. - * - */ -static int irlan_client_state_wait(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_PROVIDER_SIGNAL: - irlan_next_client_state(self, IRLAN_ARB); - irlan_do_client_event(self, IRLAN_CHECK_CON_ARB, NULL); - break; - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -static int irlan_client_state_arb(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - struct qos_info qos; - - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_CHECK_CON_ARB: - if (self->client.recv_arb_val == self->provider.send_arb_val) { - irlan_next_client_state(self, IRLAN_CLOSE); - irlan_close_data_channel(self); - } else if (self->client.recv_arb_val < - self->provider.send_arb_val) - { - qos.link_disc_time.bits = 0x01; /* 3 secs */ - - irlan_next_client_state(self, IRLAN_DATA); - irttp_connect_request(self->tsap_data, - self->dtsap_sel_data, - self->saddr, self->daddr, &qos, - IRLAN_MTU, NULL); - } else if (self->client.recv_arb_val > - self->provider.send_arb_val) - { - pr_debug("%s(), lost the battle :-(\n", __func__); - } - break; - case IRLAN_DATA_CONNECT_INDICATION: - irlan_next_client_state(self, IRLAN_DATA); - break; - case IRLAN_LMP_DISCONNECT: - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - case IRLAN_WATCHDOG_TIMEOUT: - pr_debug("%s(), IRLAN_WATCHDOG_TIMEOUT\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_data (self, event, skb, info) - * - * DATA, The data channel is connected, allowing data transfers between - * the local and remote machines. - * - */ -static int irlan_client_state_data(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - switch(event) { - case IRLAN_DATA_INDICATION: - irlan_client_parse_response(self, skb); - break; - case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */ - case IRLAN_LAP_DISCONNECT: - irlan_next_client_state(self, IRLAN_IDLE); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_close (self, event, skb, info) - * - * - * - */ -static int irlan_client_state_close(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_client_state_sync (self, event, skb, info) - * - * - * - */ -static int irlan_client_state_sync(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - if (skb) - dev_kfree_skb(skb); - - return 0; -} - - - - - - - - - - - - - diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c deleted file mode 100644 index 481bbc2a4349..000000000000 --- a/net/irda/irlan/irlan_common.c +++ /dev/null @@ -1,1176 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_common.c - * Version: 0.9 - * Description: IrDA LAN Access Protocol Implementation - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sun Dec 26 21:53:10 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/module.h> - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/gfp.h> -#include <linux/init.h> -#include <linux/errno.h> -#include <linux/proc_fs.h> -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/random.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/rtnetlink.h> -#include <linux/moduleparam.h> -#include <linux/bitops.h> - -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/irttp.h> -#include <net/irda/irlmp.h> -#include <net/irda/iriap.h> -#include <net/irda/timer.h> - -#include <net/irda/irlan_common.h> -#include <net/irda/irlan_client.h> -#include <net/irda/irlan_provider.h> -#include <net/irda/irlan_eth.h> -#include <net/irda/irlan_filter.h> - - -/* extern char sysctl_devname[]; */ - -/* - * Master structure - */ -static LIST_HEAD(irlans); - -static void *ckey; -static void *skey; - -/* Module parameters */ -static bool eth; /* Use "eth" or "irlan" name for devices */ -static int access = ACCESS_PEER; /* PEER, DIRECT or HOSTED */ - -#ifdef CONFIG_PROC_FS -static const char *const irlan_access[] = { - "UNKNOWN", - "DIRECT", - "PEER", - "HOSTED" -}; - -static const char *const irlan_media[] = { - "UNKNOWN", - "802.3", - "802.5" -}; - -extern struct proc_dir_entry *proc_irda; - -static int irlan_seq_open(struct inode *inode, struct file *file); - -static const struct file_operations irlan_fops = { - .owner = THIS_MODULE, - .open = irlan_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release, -}; - -extern struct proc_dir_entry *proc_irda; -#endif /* CONFIG_PROC_FS */ - -static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr); -static void __irlan_close(struct irlan_cb *self); -static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, - __u8 value_byte, __u16 value_short, - __u8 *value_array, __u16 value_len); -static void irlan_open_unicast_addr(struct irlan_cb *self); -static void irlan_get_unicast_addr(struct irlan_cb *self); -void irlan_close_tsaps(struct irlan_cb *self); - -/* - * Function irlan_init (void) - * - * Initialize IrLAN layer - * - */ -static int __init irlan_init(void) -{ - struct irlan_cb *new; - __u16 hints; - -#ifdef CONFIG_PROC_FS - { struct proc_dir_entry *proc; - proc = proc_create("irlan", 0, proc_irda, &irlan_fops); - if (!proc) { - printk(KERN_ERR "irlan_init: can't create /proc entry!\n"); - return -ENODEV; - } - } -#endif /* CONFIG_PROC_FS */ - - hints = irlmp_service_to_hint(S_LAN); - - /* Register with IrLMP as a client */ - ckey = irlmp_register_client(hints, &irlan_client_discovery_indication, - NULL, NULL); - if (!ckey) - goto err_ckey; - - /* Register with IrLMP as a service */ - skey = irlmp_register_service(hints); - if (!skey) - goto err_skey; - - /* Start the master IrLAN instance (the only one for now) */ - new = irlan_open(DEV_ADDR_ANY, DEV_ADDR_ANY); - if (!new) - goto err_open; - - /* The master will only open its (listen) control TSAP */ - irlan_provider_open_ctrl_tsap(new); - - /* Do some fast discovery! */ - irlmp_discovery_request(DISCOVERY_DEFAULT_SLOTS); - - return 0; - -err_open: - irlmp_unregister_service(skey); -err_skey: - irlmp_unregister_client(ckey); -err_ckey: -#ifdef CONFIG_PROC_FS - remove_proc_entry("irlan", proc_irda); -#endif /* CONFIG_PROC_FS */ - - return -ENOMEM; -} - -static void __exit irlan_cleanup(void) -{ - struct irlan_cb *self, *next; - - irlmp_unregister_client(ckey); - irlmp_unregister_service(skey); - -#ifdef CONFIG_PROC_FS - remove_proc_entry("irlan", proc_irda); -#endif /* CONFIG_PROC_FS */ - - /* Cleanup any leftover network devices */ - rtnl_lock(); - list_for_each_entry_safe(self, next, &irlans, dev_list) { - __irlan_close(self); - } - rtnl_unlock(); -} - -/* - * Function irlan_open (void) - * - * Open new instance of a client/provider, we should only register the - * network device if this instance is ment for a particular client/provider - */ -static struct irlan_cb __init *irlan_open(__u32 saddr, __u32 daddr) -{ - struct net_device *dev; - struct irlan_cb *self; - - /* Create network device with irlan */ - dev = alloc_irlandev(eth ? "eth%d" : "irlan%d"); - if (!dev) - return NULL; - - self = netdev_priv(dev); - self->dev = dev; - - /* - * Initialize local device structure - */ - self->magic = IRLAN_MAGIC; - self->saddr = saddr; - self->daddr = daddr; - - /* Provider access can only be PEER, DIRECT, or HOSTED */ - self->provider.access_type = access; - if (access == ACCESS_DIRECT) { - /* - * Since we are emulating an IrLAN sever we will have to - * give ourself an ethernet address! - */ - dev->dev_addr[0] = 0x40; - dev->dev_addr[1] = 0x00; - dev->dev_addr[2] = 0x00; - dev->dev_addr[3] = 0x00; - get_random_bytes(dev->dev_addr+4, 1); - get_random_bytes(dev->dev_addr+5, 1); - } - - self->media = MEDIA_802_3; - self->disconnect_reason = LM_USER_REQUEST; - init_timer(&self->watchdog_timer); - init_timer(&self->client.kick_timer); - init_waitqueue_head(&self->open_wait); - - skb_queue_head_init(&self->client.txq); - - irlan_next_client_state(self, IRLAN_IDLE); - irlan_next_provider_state(self, IRLAN_IDLE); - - if (register_netdev(dev)) { - pr_debug("%s(), register_netdev() failed!\n", - __func__); - self = NULL; - free_netdev(dev); - } else { - rtnl_lock(); - list_add_rcu(&self->dev_list, &irlans); - rtnl_unlock(); - } - - return self; -} -/* - * Function __irlan_close (self) - * - * This function closes and deallocates the IrLAN client instances. Be - * aware that other functions which calls client_close() must - * remove self from irlans list first. - */ -static void __irlan_close(struct irlan_cb *self) -{ - ASSERT_RTNL(); - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - del_timer_sync(&self->watchdog_timer); - del_timer_sync(&self->client.kick_timer); - - /* Close all open connections and remove TSAPs */ - irlan_close_tsaps(self); - - if (self->client.iriap) - iriap_close(self->client.iriap); - - /* Remove frames queued on the control channel */ - skb_queue_purge(&self->client.txq); - - /* Unregister and free self via destructor */ - unregister_netdevice(self->dev); -} - -/* Find any instance of irlan, used for client discovery wakeup */ -struct irlan_cb *irlan_get_any(void) -{ - struct irlan_cb *self; - - list_for_each_entry_rcu(self, &irlans, dev_list) { - return self; - } - return NULL; -} - -/* - * Function irlan_connect_indication (instance, sap, qos, max_sdu_size, skb) - * - * Here we receive the connect indication for the data channel - * - */ -static void irlan_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct irlan_cb *self; - struct tsap_cb *tsap; - - self = instance; - tsap = sap; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - IRDA_ASSERT(tsap == self->tsap_data,return;); - - self->max_sdu_size = max_sdu_size; - self->max_header_size = max_header_size; - - pr_debug("%s: We are now connected!\n", __func__); - - del_timer(&self->watchdog_timer); - - /* If you want to pass the skb to *both* state machines, you will - * need to skb_clone() it, so that you don't free it twice. - * As the state machines don't need it, git rid of it here... - * Jean II */ - if (skb) - dev_kfree_skb(skb); - - irlan_do_provider_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL); - irlan_do_client_event(self, IRLAN_DATA_CONNECT_INDICATION, NULL); - - if (self->provider.access_type == ACCESS_PEER) { - /* - * Data channel is open, so we are now allowed to - * configure the remote filter - */ - irlan_get_unicast_addr(self); - irlan_open_unicast_addr(self); - } - /* Ready to transfer Ethernet frames (at last) */ - netif_start_queue(self->dev); /* Clear reason */ -} - -static void irlan_connect_confirm(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct irlan_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - self->max_sdu_size = max_sdu_size; - self->max_header_size = max_header_size; - - /* TODO: we could set the MTU depending on the max_sdu_size */ - - pr_debug("%s: We are now connected!\n", __func__); - del_timer(&self->watchdog_timer); - - /* - * Data channel is open, so we are now allowed to configure the remote - * filter - */ - irlan_get_unicast_addr(self); - irlan_open_unicast_addr(self); - - /* Open broadcast and multicast filter by default */ - irlan_set_broadcast_filter(self, TRUE); - irlan_set_multicast_filter(self, TRUE); - - /* Ready to transfer Ethernet frames */ - netif_start_queue(self->dev); - self->disconnect_reason = 0; /* Clear reason */ - wake_up_interruptible(&self->open_wait); -} - -/* - * Function irlan_client_disconnect_indication (handle) - * - * Callback function for the IrTTP layer. Indicates a disconnection of - * the specified connection (handle) - */ -static void irlan_disconnect_indication(void *instance, - void *sap, LM_REASON reason, - struct sk_buff *userdata) -{ - struct irlan_cb *self; - struct tsap_cb *tsap; - - pr_debug("%s(), reason=%d\n", __func__ , reason); - - self = instance; - tsap = sap; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - IRDA_ASSERT(tsap != NULL, return;); - IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;); - - IRDA_ASSERT(tsap == self->tsap_data, return;); - - pr_debug("IrLAN, data channel disconnected by peer!\n"); - - /* Save reason so we know if we should try to reconnect or not */ - self->disconnect_reason = reason; - - switch (reason) { - case LM_USER_REQUEST: /* User request */ - pr_debug("%s(), User requested\n", __func__); - break; - case LM_LAP_DISCONNECT: /* Unexpected IrLAP disconnect */ - pr_debug("%s(), Unexpected IrLAP disconnect\n", __func__); - break; - case LM_CONNECT_FAILURE: /* Failed to establish IrLAP connection */ - pr_debug("%s(), IrLAP connect failed\n", __func__); - break; - case LM_LAP_RESET: /* IrLAP reset */ - pr_debug("%s(), IrLAP reset\n", __func__); - break; - case LM_INIT_DISCONNECT: - pr_debug("%s(), IrLMP connect failed\n", __func__); - break; - default: - net_err_ratelimited("%s(), Unknown disconnect reason\n", - __func__); - break; - } - - /* If you want to pass the skb to *both* state machines, you will - * need to skb_clone() it, so that you don't free it twice. - * As the state machines don't need it, git rid of it here... - * Jean II */ - if (userdata) - dev_kfree_skb(userdata); - - irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL); - irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL); - - wake_up_interruptible(&self->open_wait); -} - -void irlan_open_data_tsap(struct irlan_cb *self) -{ - struct tsap_cb *tsap; - notify_t notify; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* Check if already open */ - if (self->tsap_data) - return; - - irda_notify_init(¬ify); - - notify.data_indication = irlan_eth_receive; - notify.udata_indication = irlan_eth_receive; - notify.connect_indication = irlan_connect_indication; - notify.connect_confirm = irlan_connect_confirm; - notify.flow_indication = irlan_eth_flow_indication; - notify.disconnect_indication = irlan_disconnect_indication; - notify.instance = self; - strlcpy(notify.name, "IrLAN data", sizeof(notify.name)); - - tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, ¬ify); - if (!tsap) { - pr_debug("%s(), Got no tsap!\n", __func__); - return; - } - self->tsap_data = tsap; - - /* - * This is the data TSAP selector which we will pass to the client - * when the client ask for it. - */ - self->stsap_sel_data = self->tsap_data->stsap_sel; -} - -void irlan_close_tsaps(struct irlan_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* Disconnect and close all open TSAP connections */ - if (self->tsap_data) { - irttp_disconnect_request(self->tsap_data, NULL, P_NORMAL); - irttp_close_tsap(self->tsap_data); - self->tsap_data = NULL; - } - if (self->client.tsap_ctrl) { - irttp_disconnect_request(self->client.tsap_ctrl, NULL, - P_NORMAL); - irttp_close_tsap(self->client.tsap_ctrl); - self->client.tsap_ctrl = NULL; - } - if (self->provider.tsap_ctrl) { - irttp_disconnect_request(self->provider.tsap_ctrl, NULL, - P_NORMAL); - irttp_close_tsap(self->provider.tsap_ctrl); - self->provider.tsap_ctrl = NULL; - } - self->disconnect_reason = LM_USER_REQUEST; -} - -/* - * Function irlan_ias_register (self, tsap_sel) - * - * Register with LM-IAS - * - */ -void irlan_ias_register(struct irlan_cb *self, __u8 tsap_sel) -{ - struct ias_object *obj; - struct ias_value *new_value; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* - * Check if object has already been registered by a previous provider. - * If that is the case, we just change the value of the attribute - */ - if (!irias_find_object("IrLAN")) { - obj = irias_new_object("IrLAN", IAS_IRLAN_ID); - irias_add_integer_attrib(obj, "IrDA:TinyTP:LsapSel", tsap_sel, - IAS_KERNEL_ATTR); - irias_insert_object(obj); - } else { - new_value = irias_new_integer_value(tsap_sel); - irias_object_change_attribute("IrLAN", "IrDA:TinyTP:LsapSel", - new_value); - } - - /* Register PnP object only if not registered before */ - if (!irias_find_object("PnP")) { - obj = irias_new_object("PnP", IAS_PNP_ID); -#if 0 - irias_add_string_attrib(obj, "Name", sysctl_devname, - IAS_KERNEL_ATTR); -#else - irias_add_string_attrib(obj, "Name", "Linux", IAS_KERNEL_ATTR); -#endif - irias_add_string_attrib(obj, "DeviceID", "HWP19F0", - IAS_KERNEL_ATTR); - irias_add_integer_attrib(obj, "CompCnt", 1, IAS_KERNEL_ATTR); - if (self->provider.access_type == ACCESS_PEER) - irias_add_string_attrib(obj, "Comp#01", "PNP8389", - IAS_KERNEL_ATTR); - else - irias_add_string_attrib(obj, "Comp#01", "PNP8294", - IAS_KERNEL_ATTR); - - irias_add_string_attrib(obj, "Manufacturer", - "Linux-IrDA Project", IAS_KERNEL_ATTR); - irias_insert_object(obj); - } -} - -/* - * Function irlan_run_ctrl_tx_queue (self) - * - * Try to send the next command in the control transmit queue - * - */ -int irlan_run_ctrl_tx_queue(struct irlan_cb *self) -{ - struct sk_buff *skb; - - if (irda_lock(&self->client.tx_busy) == FALSE) - return -EBUSY; - - skb = skb_dequeue(&self->client.txq); - if (!skb) { - self->client.tx_busy = FALSE; - return 0; - } - - /* Check that it's really possible to send commands */ - if ((self->client.tsap_ctrl == NULL) || - (self->client.state == IRLAN_IDLE)) - { - self->client.tx_busy = FALSE; - dev_kfree_skb(skb); - return -1; - } - pr_debug("%s(), sending ...\n", __func__); - - return irttp_data_request(self->client.tsap_ctrl, skb); -} - -/* - * Function irlan_ctrl_data_request (self, skb) - * - * This function makes sure that commands on the control channel is being - * sent in a command/response fashion - */ -static void irlan_ctrl_data_request(struct irlan_cb *self, struct sk_buff *skb) -{ - /* Queue command */ - skb_queue_tail(&self->client.txq, skb); - - /* Try to send command */ - irlan_run_ctrl_tx_queue(self); -} - -/* - * Function irlan_get_provider_info (self) - * - * Send Get Provider Information command to peer IrLAN layer - * - */ -void irlan_get_provider_info(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER, - GFP_ATOMIC); - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - frame[0] = CMD_GET_PROVIDER_INFO; - frame[1] = 0x00; /* Zero parameters */ - - irlan_ctrl_data_request(self, skb); -} - -/* - * Function irlan_open_data_channel (self) - * - * Send an Open Data Command to provider - * - */ -void irlan_open_data_channel(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3") + - IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "DIRECT"), - GFP_ATOMIC); - if (!skb) - return; - - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - /* Build frame */ - frame[0] = CMD_OPEN_DATA_CHANNEL; - frame[1] = 0x02; /* Two parameters */ - - irlan_insert_string_param(skb, "MEDIA", "802.3"); - irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT"); - /* irlan_insert_string_param(skb, "MODE", "UNRELIABLE"); */ - -/* self->use_udata = TRUE; */ - - irlan_ctrl_data_request(self, skb); -} - -void irlan_close_data_channel(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* Check if the TSAP is still there */ - if (self->client.tsap_ctrl == NULL) - return; - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN"), - GFP_ATOMIC); - if (!skb) - return; - - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - /* Build frame */ - frame[0] = CMD_CLOSE_DATA_CHAN; - frame[1] = 0x01; /* One parameter */ - - irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data); - - irlan_ctrl_data_request(self, skb); -} - -/* - * Function irlan_open_unicast_addr (self) - * - * Make IrLAN provider accept ethernet frames addressed to the unicast - * address. - * - */ -static void irlan_open_unicast_addr(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + - IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"), - GFP_ATOMIC); - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - frame[0] = CMD_FILTER_OPERATION; - frame[1] = 0x03; /* Three parameters */ - irlan_insert_byte_param(skb, "DATA_CHAN" , self->dtsap_sel_data); - irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED"); - irlan_insert_string_param(skb, "FILTER_MODE", "FILTER"); - - irlan_ctrl_data_request(self, skb); -} - -/* - * Function irlan_set_broadcast_filter (self, status) - * - * Make IrLAN provider accept ethernet frames addressed to the broadcast - * address. Be careful with the use of this one, since there may be a lot - * of broadcast traffic out there. We can still function without this - * one but then _we_ have to initiate all communication with other - * hosts, since ARP request for this host will not be answered. - */ -void irlan_set_broadcast_filter(struct irlan_cb *self, int status) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") + - /* We may waste one byte here...*/ - IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"), - GFP_ATOMIC); - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - frame[0] = CMD_FILTER_OPERATION; - frame[1] = 0x03; /* Three parameters */ - irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data); - irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST"); - if (status) - irlan_insert_string_param(skb, "FILTER_MODE", "FILTER"); - else - irlan_insert_string_param(skb, "FILTER_MODE", "NONE"); - - irlan_ctrl_data_request(self, skb); -} - -/* - * Function irlan_set_multicast_filter (self, status) - * - * Make IrLAN provider accept ethernet frames addressed to the multicast - * address. - * - */ -void irlan_set_multicast_filter(struct irlan_cb *self, int status) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") + - /* We may waste one byte here...*/ - IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "NONE"), - GFP_ATOMIC); - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - frame[0] = CMD_FILTER_OPERATION; - frame[1] = 0x03; /* Three parameters */ - irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data); - irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST"); - if (status) - irlan_insert_string_param(skb, "FILTER_MODE", "ALL"); - else - irlan_insert_string_param(skb, "FILTER_MODE", "NONE"); - - irlan_ctrl_data_request(self, skb); -} - -/* - * Function irlan_get_unicast_addr (self) - * - * Retrieves the unicast address from the IrLAN provider. This address - * will be inserted into the devices structure, so the ethernet layer - * can construct its packets. - * - */ -static void irlan_get_unicast_addr(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + - IRLAN_STRING_PARAMETER_LEN("FILTER_OPERATION", - "DYNAMIC"), - GFP_ATOMIC); - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - frame[0] = CMD_FILTER_OPERATION; - frame[1] = 0x03; /* Three parameters */ - irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data); - irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED"); - irlan_insert_string_param(skb, "FILTER_OPERATION", "DYNAMIC"); - - irlan_ctrl_data_request(self, skb); -} - -/* - * Function irlan_get_media_char (self) - * - * - * - */ -void irlan_get_media_char(struct irlan_cb *self) -{ - struct sk_buff *skb; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3"), - GFP_ATOMIC); - - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->client.max_header_size); - skb_put(skb, 2); - - frame = skb->data; - - /* Build frame */ - frame[0] = CMD_GET_MEDIA_CHAR; - frame[1] = 0x01; /* One parameter */ - - irlan_insert_string_param(skb, "MEDIA", "802.3"); - irlan_ctrl_data_request(self, skb); -} - -/* - * Function insert_byte_param (skb, param, value) - * - * Insert byte parameter into frame - * - */ -int irlan_insert_byte_param(struct sk_buff *skb, char *param, __u8 value) -{ - return __irlan_insert_param(skb, param, IRLAN_BYTE, value, 0, NULL, 0); -} - -int irlan_insert_short_param(struct sk_buff *skb, char *param, __u16 value) -{ - return __irlan_insert_param(skb, param, IRLAN_SHORT, 0, value, NULL, 0); -} - -/* - * Function insert_string (skb, param, value) - * - * Insert string parameter into frame - * - */ -int irlan_insert_string_param(struct sk_buff *skb, char *param, char *string) -{ - int string_len = strlen(string); - - return __irlan_insert_param(skb, param, IRLAN_ARRAY, 0, 0, string, - string_len); -} - -/* - * Function insert_array_param(skb, param, value, len_value) - * - * Insert array parameter into frame - * - */ -int irlan_insert_array_param(struct sk_buff *skb, char *name, __u8 *array, - __u16 array_len) -{ - return __irlan_insert_param(skb, name, IRLAN_ARRAY, 0, 0, array, - array_len); -} - -/* - * Function insert_param (skb, param, value, byte) - * - * Insert parameter at end of buffer, structure of a parameter is: - * - * ----------------------------------------------------------------------- - * | Name Length[1] | Param Name[1..255] | Val Length[2] | Value[0..1016]| - * ----------------------------------------------------------------------- - */ -static int __irlan_insert_param(struct sk_buff *skb, char *param, int type, - __u8 value_byte, __u16 value_short, - __u8 *value_array, __u16 value_len) -{ - __u8 *frame; - __u8 param_len; - __le16 tmp_le; /* Temporary value in little endian format */ - int n=0; - - if (skb == NULL) { - pr_debug("%s(), Got NULL skb\n", __func__); - return 0; - } - - param_len = strlen(param); - switch (type) { - case IRLAN_BYTE: - value_len = 1; - break; - case IRLAN_SHORT: - value_len = 2; - break; - case IRLAN_ARRAY: - IRDA_ASSERT(value_array != NULL, return 0;); - IRDA_ASSERT(value_len > 0, return 0;); - break; - default: - pr_debug("%s(), Unknown parameter type!\n", __func__); - return 0; - } - - /* Insert at end of sk-buffer */ - frame = skb_tail_pointer(skb); - - /* Make space for data */ - if (skb_tailroom(skb) < (param_len+value_len+3)) { - pr_debug("%s(), No more space at end of skb\n", __func__); - return 0; - } - skb_put(skb, param_len+value_len+3); - - /* Insert parameter length */ - frame[n++] = param_len; - - /* Insert parameter */ - memcpy(frame+n, param, param_len); n += param_len; - - /* Insert value length (2 byte little endian format, LSB first) */ - tmp_le = cpu_to_le16(value_len); - memcpy(frame+n, &tmp_le, 2); n += 2; /* To avoid alignment problems */ - - /* Insert value */ - switch (type) { - case IRLAN_BYTE: - frame[n++] = value_byte; - break; - case IRLAN_SHORT: - tmp_le = cpu_to_le16(value_short); - memcpy(frame+n, &tmp_le, 2); n += 2; - break; - case IRLAN_ARRAY: - memcpy(frame+n, value_array, value_len); n+=value_len; - break; - default: - break; - } - IRDA_ASSERT(n == (param_len+value_len+3), return 0;); - - return param_len+value_len+3; -} - -/* - * Function irlan_extract_param (buf, name, value, len) - * - * Extracts a single parameter name/value pair from buffer and updates - * the buffer pointer to point to the next name/value pair. - */ -int irlan_extract_param(__u8 *buf, char *name, char *value, __u16 *len) -{ - __u8 name_len; - __u16 val_len; - int n=0; - - /* get length of parameter name (1 byte) */ - name_len = buf[n++]; - - if (name_len > 254) { - pr_debug("%s(), name_len > 254\n", __func__); - return -RSP_INVALID_COMMAND_FORMAT; - } - - /* get parameter name */ - memcpy(name, buf+n, name_len); - name[name_len] = '\0'; - n+=name_len; - - /* - * Get length of parameter value (2 bytes in little endian - * format) - */ - memcpy(&val_len, buf+n, 2); /* To avoid alignment problems */ - le16_to_cpus(&val_len); n+=2; - - if (val_len >= 1016) { - pr_debug("%s(), parameter length to long\n", __func__); - return -RSP_INVALID_COMMAND_FORMAT; - } - *len = val_len; - - /* get parameter value */ - memcpy(value, buf+n, val_len); - value[val_len] = '\0'; - n+=val_len; - - pr_debug("Parameter: %s ", name); - pr_debug("Value: %s\n", value); - - return n; -} - -#ifdef CONFIG_PROC_FS - -/* - * Start of reading /proc entries. - * Return entry at pos, - * or start_token to indicate print header line - * or NULL if end of file - */ -static void *irlan_seq_start(struct seq_file *seq, loff_t *pos) -{ - rcu_read_lock(); - return seq_list_start_head(&irlans, *pos); -} - -/* Return entry after v, and increment pos */ -static void *irlan_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - return seq_list_next(v, &irlans, pos); -} - -/* End of reading /proc file */ -static void irlan_seq_stop(struct seq_file *seq, void *v) -{ - rcu_read_unlock(); -} - - -/* - * Show one entry in /proc file. - */ -static int irlan_seq_show(struct seq_file *seq, void *v) -{ - if (v == &irlans) - seq_puts(seq, "IrLAN instances:\n"); - else { - struct irlan_cb *self = list_entry(v, struct irlan_cb, dev_list); - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - seq_printf(seq,"ifname: %s,\n", - self->dev->name); - seq_printf(seq,"client state: %s, ", - irlan_state[ self->client.state]); - seq_printf(seq,"provider state: %s,\n", - irlan_state[ self->provider.state]); - seq_printf(seq,"saddr: %#08x, ", - self->saddr); - seq_printf(seq,"daddr: %#08x\n", - self->daddr); - seq_printf(seq,"version: %d.%d,\n", - self->version[1], self->version[0]); - seq_printf(seq,"access type: %s\n", - irlan_access[self->client.access_type]); - seq_printf(seq,"media: %s\n", - irlan_media[self->media]); - - seq_printf(seq,"local filter:\n"); - seq_printf(seq,"remote filter: "); - irlan_print_filter(seq, self->client.filter_type); - seq_printf(seq,"tx busy: %s\n", - netif_queue_stopped(self->dev) ? "TRUE" : "FALSE"); - - seq_putc(seq,'\n'); - } - return 0; -} - -static const struct seq_operations irlan_seq_ops = { - .start = irlan_seq_start, - .next = irlan_seq_next, - .stop = irlan_seq_stop, - .show = irlan_seq_show, -}; - -static int irlan_seq_open(struct inode *inode, struct file *file) -{ - return seq_open(file, &irlan_seq_ops); -} -#endif - -MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no>"); -MODULE_DESCRIPTION("The Linux IrDA LAN protocol"); -MODULE_LICENSE("GPL"); - -module_param(eth, bool, 0); -MODULE_PARM_DESC(eth, "Name devices ethX (0) or irlanX (1)"); -module_param(access, int, 0); -MODULE_PARM_DESC(access, "Access type DIRECT=1, PEER=2, HOSTED=3"); - -module_init(irlan_init); -module_exit(irlan_cleanup); - diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c deleted file mode 100644 index 3be852808a9d..000000000000 --- a/net/irda/irlan/irlan_eth.c +++ /dev/null @@ -1,340 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_eth.c - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Thu Oct 15 08:37:58 1998 - * Modified at: Tue Mar 21 09:06:41 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Sources: skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov> - * slip.c by Laurence Culhane, <loz@holmes.demon.co.uk> - * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org> - * - * Copyright (c) 1998-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/inetdevice.h> -#include <linux/if_arp.h> -#include <linux/module.h> -#include <linux/sched.h> -#include <net/arp.h> - -#include <net/irda/irda.h> -#include <net/irda/irmod.h> -#include <net/irda/irlan_common.h> -#include <net/irda/irlan_client.h> -#include <net/irda/irlan_event.h> -#include <net/irda/irlan_eth.h> - -static int irlan_eth_open(struct net_device *dev); -static int irlan_eth_close(struct net_device *dev); -static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, - struct net_device *dev); -static void irlan_eth_set_multicast_list(struct net_device *dev); - -static const struct net_device_ops irlan_eth_netdev_ops = { - .ndo_open = irlan_eth_open, - .ndo_stop = irlan_eth_close, - .ndo_start_xmit = irlan_eth_xmit, - .ndo_set_rx_mode = irlan_eth_set_multicast_list, - .ndo_validate_addr = eth_validate_addr, -}; - -/* - * Function irlan_eth_setup (dev) - * - * The network device initialization function. - * - */ -static void irlan_eth_setup(struct net_device *dev) -{ - ether_setup(dev); - - dev->netdev_ops = &irlan_eth_netdev_ops; - dev->needs_free_netdev = true; - dev->min_mtu = 0; - dev->max_mtu = ETH_MAX_MTU; - - /* - * Lets do all queueing in IrTTP instead of this device driver. - * Queueing here as well can introduce some strange latency - * problems, which we will avoid by setting the queue size to 0. - */ - /* - * The bugs in IrTTP and IrLAN that created this latency issue - * have now been fixed, and we can propagate flow control properly - * to the network layer. However, this requires a minimal queue of - * packets for the device. - * Without flow control, the Tx Queue is 14 (ttp) + 0 (dev) = 14 - * With flow control, the Tx Queue is 7 (ttp) + 4 (dev) = 11 - * See irlan_eth_flow_indication()... - * Note : this number was randomly selected and would need to - * be adjusted. - * Jean II */ - dev->tx_queue_len = 4; -} - -/* - * Function alloc_irlandev - * - * Allocate network device and control block - * - */ -struct net_device *alloc_irlandev(const char *name) -{ - return alloc_netdev(sizeof(struct irlan_cb), name, NET_NAME_UNKNOWN, - irlan_eth_setup); -} - -/* - * Function irlan_eth_open (dev) - * - * Network device has been opened by user - * - */ -static int irlan_eth_open(struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - - /* Ready to play! */ - netif_stop_queue(dev); /* Wait until data link is ready */ - - /* We are now open, so time to do some work */ - self->disconnect_reason = 0; - irlan_client_wakeup(self, self->saddr, self->daddr); - - /* Make sure we have a hardware address before we return, - so DHCP clients gets happy */ - return wait_event_interruptible(self->open_wait, - !self->tsap_data->connected); -} - -/* - * Function irlan_eth_close (dev) - * - * Stop the ether network device, his function will usually be called by - * ifconfig down. We should now disconnect the link, We start the - * close timer, so that the instance will be removed if we are unable - * to discover the remote device after the disconnect. - */ -static int irlan_eth_close(struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - - /* Stop device */ - netif_stop_queue(dev); - - irlan_close_data_channel(self); - irlan_close_tsaps(self); - - irlan_do_client_event(self, IRLAN_LMP_DISCONNECT, NULL); - irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL); - - /* Remove frames queued on the control channel */ - skb_queue_purge(&self->client.txq); - - self->client.tx_busy = 0; - - return 0; -} - -/* - * Function irlan_eth_tx (skb) - * - * Transmits ethernet frames over IrDA link. - * - */ -static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, - struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - int ret; - unsigned int len; - - /* skb headroom large enough to contain all IrDA-headers? */ - if ((skb_headroom(skb) < self->max_header_size) || (skb_shared(skb))) { - struct sk_buff *new_skb = - skb_realloc_headroom(skb, self->max_header_size); - - /* We have to free the original skb anyway */ - dev_kfree_skb(skb); - - /* Did the realloc succeed? */ - if (new_skb == NULL) - return NETDEV_TX_OK; - - /* Use the new skb instead */ - skb = new_skb; - } - - netif_trans_update(dev); - - len = skb->len; - /* Now queue the packet in the transport layer */ - if (self->use_udata) - ret = irttp_udata_request(self->tsap_data, skb); - else - ret = irttp_data_request(self->tsap_data, skb); - - if (ret < 0) { - /* - * IrTTPs tx queue is full, so we just have to - * drop the frame! You might think that we should - * just return -1 and don't deallocate the frame, - * but that is dangerous since it's possible that - * we have replaced the original skb with a new - * one with larger headroom, and that would really - * confuse do_dev_queue_xmit() in dev.c! I have - * tried :-) DB - */ - /* irttp_data_request already free the packet */ - dev->stats.tx_dropped++; - } else { - dev->stats.tx_packets++; - dev->stats.tx_bytes += len; - } - - return NETDEV_TX_OK; -} - -/* - * Function irlan_eth_receive (handle, skb) - * - * This function gets the data that is received on the data channel - * - */ -int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) -{ - struct irlan_cb *self = instance; - struct net_device *dev = self->dev; - - if (skb == NULL) { - dev->stats.rx_dropped++; - return 0; - } - if (skb->len < ETH_HLEN) { - pr_debug("%s() : IrLAN frame too short (%d)\n", - __func__, skb->len); - dev->stats.rx_dropped++; - dev_kfree_skb(skb); - return 0; - } - - /* - * Adopt this frame! Important to set all these fields since they - * might have been previously set by the low level IrDA network - * device driver - */ - skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */ - - dev->stats.rx_packets++; - dev->stats.rx_bytes += skb->len; - - netif_rx(skb); /* Eat it! */ - - return 0; -} - -/* - * Function irlan_eth_flow (status) - * - * Do flow control between IP/Ethernet and IrLAN/IrTTP. This is done by - * controlling the queue stop/start. - * - * The IrDA link layer has the advantage to have flow control, and - * IrTTP now properly handles that. Flow controlling the higher layers - * prevent us to drop Tx packets in here (up to 15% for a TCP socket, - * more for UDP socket). - * Also, this allow us to reduce the overall transmit queue, which means - * less latency in case of mixed traffic. - * Jean II - */ -void irlan_eth_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) -{ - struct irlan_cb *self; - struct net_device *dev; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - dev = self->dev; - - IRDA_ASSERT(dev != NULL, return;); - - pr_debug("%s() : flow %s ; running %d\n", __func__, - flow == FLOW_STOP ? "FLOW_STOP" : "FLOW_START", - netif_running(dev)); - - switch (flow) { - case FLOW_STOP: - /* IrTTP is full, stop higher layers */ - netif_stop_queue(dev); - break; - case FLOW_START: - default: - /* Tell upper layers that its time to transmit frames again */ - /* Schedule network layer */ - netif_wake_queue(dev); - break; - } -} - -/* - * Function set_multicast_list (dev) - * - * Configure the filtering of the device - * - */ -#define HW_MAX_ADDRS 4 /* Must query to get it! */ -static void irlan_eth_set_multicast_list(struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - - /* Check if data channel has been connected yet */ - if (self->client.state != IRLAN_DATA) { - pr_debug("%s(), delaying!\n", __func__); - return; - } - - if (dev->flags & IFF_PROMISC) { - /* Enable promiscuous mode */ - net_warn_ratelimited("Promiscuous mode not implemented by IrLAN!\n"); - } else if ((dev->flags & IFF_ALLMULTI) || - netdev_mc_count(dev) > HW_MAX_ADDRS) { - /* Disable promiscuous mode, use normal mode. */ - pr_debug("%s(), Setting multicast filter\n", __func__); - /* hardware_set_filter(NULL); */ - - irlan_set_multicast_filter(self, TRUE); - } else if (!netdev_mc_empty(dev)) { - pr_debug("%s(), Setting multicast filter\n", __func__); - /* Walk the address list, and load the filter */ - /* hardware_set_filter(dev->mc_list); */ - - irlan_set_multicast_filter(self, TRUE); - } else { - pr_debug("%s(), Clearing multicast filter\n", __func__); - irlan_set_multicast_filter(self, FALSE); - } - - if (dev->flags & IFF_BROADCAST) - irlan_set_broadcast_filter(self, TRUE); - else - irlan_set_broadcast_filter(self, FALSE); -} diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c deleted file mode 100644 index 9a1cc11c16f6..000000000000 --- a/net/irda/irlan/irlan_event.c +++ /dev/null @@ -1,60 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_event.c - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Tue Oct 20 09:10:16 1998 - * Modified at: Sat Oct 30 12:59:01 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <net/irda/irlan_event.h> - -const char * const irlan_state[] = { - "IRLAN_IDLE", - "IRLAN_QUERY", - "IRLAN_CONN", - "IRLAN_INFO", - "IRLAN_MEDIA", - "IRLAN_OPEN", - "IRLAN_WAIT", - "IRLAN_ARB", - "IRLAN_DATA", - "IRLAN_CLOSE", - "IRLAN_SYNC", -}; - -void irlan_next_client_state(struct irlan_cb *self, IRLAN_STATE state) -{ - pr_debug("%s(), %s\n", __func__ , irlan_state[state]); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - self->client.state = state; -} - -void irlan_next_provider_state(struct irlan_cb *self, IRLAN_STATE state) -{ - pr_debug("%s(), %s\n", __func__ , irlan_state[state]); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - self->provider.state = state; -} - diff --git a/net/irda/irlan/irlan_filter.c b/net/irda/irlan/irlan_filter.c deleted file mode 100644 index e755e90b2f26..000000000000 --- a/net/irda/irlan/irlan_filter.c +++ /dev/null @@ -1,240 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_filter.c - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Fri Jan 29 11:16:38 1999 - * Modified at: Sat Oct 30 12:58:45 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/skbuff.h> -#include <linux/random.h> -#include <linux/seq_file.h> - -#include <net/irda/irlan_common.h> -#include <net/irda/irlan_filter.h> - -/* - * Function irlan_filter_request (self, skb) - * - * Handle filter request from client peer device - * - */ -void irlan_filter_request(struct irlan_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - if ((self->provider.filter_type == IRLAN_DIRECTED) && - (self->provider.filter_operation == DYNAMIC)) - { - pr_debug("Giving peer a dynamic Ethernet address\n"); - self->provider.mac_address[0] = 0x40; - self->provider.mac_address[1] = 0x00; - self->provider.mac_address[2] = 0x00; - self->provider.mac_address[3] = 0x00; - - /* Use arbitration value to generate MAC address */ - if (self->provider.access_type == ACCESS_PEER) { - self->provider.mac_address[4] = - self->provider.send_arb_val & 0xff; - self->provider.mac_address[5] = - (self->provider.send_arb_val >> 8) & 0xff; - } else { - /* Just generate something for now */ - get_random_bytes(self->provider.mac_address+4, 1); - get_random_bytes(self->provider.mac_address+5, 1); - } - - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x03; - irlan_insert_string_param(skb, "FILTER_MODE", "NONE"); - irlan_insert_short_param(skb, "MAX_ENTRY", 0x0001); - irlan_insert_array_param(skb, "FILTER_ENTRY", - self->provider.mac_address, 6); - return; - } - - if ((self->provider.filter_type == IRLAN_DIRECTED) && - (self->provider.filter_mode == FILTER)) - { - pr_debug("Directed filter on\n"); - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x00; - return; - } - if ((self->provider.filter_type == IRLAN_DIRECTED) && - (self->provider.filter_mode == NONE)) - { - pr_debug("Directed filter off\n"); - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x00; - return; - } - - if ((self->provider.filter_type == IRLAN_BROADCAST) && - (self->provider.filter_mode == FILTER)) - { - pr_debug("Broadcast filter on\n"); - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x00; - return; - } - if ((self->provider.filter_type == IRLAN_BROADCAST) && - (self->provider.filter_mode == NONE)) - { - pr_debug("Broadcast filter off\n"); - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x00; - return; - } - if ((self->provider.filter_type == IRLAN_MULTICAST) && - (self->provider.filter_mode == FILTER)) - { - pr_debug("Multicast filter on\n"); - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x00; - return; - } - if ((self->provider.filter_type == IRLAN_MULTICAST) && - (self->provider.filter_mode == NONE)) - { - pr_debug("Multicast filter off\n"); - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x00; - return; - } - if ((self->provider.filter_type == IRLAN_MULTICAST) && - (self->provider.filter_operation == GET)) - { - pr_debug("Multicast filter get\n"); - skb->data[0] = 0x00; /* Success? */ - skb->data[1] = 0x02; - irlan_insert_string_param(skb, "FILTER_MODE", "NONE"); - irlan_insert_short_param(skb, "MAX_ENTRY", 16); - return; - } - skb->data[0] = 0x00; /* Command not supported */ - skb->data[1] = 0x00; - - pr_debug("Not implemented!\n"); -} - -/* - * Function check_request_param (self, param, value) - * - * Check parameters in request from peer device - * - */ -void irlan_check_command_param(struct irlan_cb *self, char *param, char *value) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - pr_debug("%s, %s\n", param, value); - - /* - * This is experimental!! DB. - */ - if (strcmp(param, "MODE") == 0) { - self->use_udata = TRUE; - return; - } - - /* - * FILTER_TYPE - */ - if (strcmp(param, "FILTER_TYPE") == 0) { - if (strcmp(value, "DIRECTED") == 0) { - self->provider.filter_type = IRLAN_DIRECTED; - return; - } - if (strcmp(value, "MULTICAST") == 0) { - self->provider.filter_type = IRLAN_MULTICAST; - return; - } - if (strcmp(value, "BROADCAST") == 0) { - self->provider.filter_type = IRLAN_BROADCAST; - return; - } - } - /* - * FILTER_MODE - */ - if (strcmp(param, "FILTER_MODE") == 0) { - if (strcmp(value, "ALL") == 0) { - self->provider.filter_mode = ALL; - return; - } - if (strcmp(value, "FILTER") == 0) { - self->provider.filter_mode = FILTER; - return; - } - if (strcmp(value, "NONE") == 0) { - self->provider.filter_mode = FILTER; - return; - } - } - /* - * FILTER_OPERATION - */ - if (strcmp(param, "FILTER_OPERATION") == 0) { - if (strcmp(value, "DYNAMIC") == 0) { - self->provider.filter_operation = DYNAMIC; - return; - } - if (strcmp(value, "GET") == 0) { - self->provider.filter_operation = GET; - return; - } - } -} - -/* - * Function irlan_print_filter (filter_type, buf) - * - * Print status of filter. Used by /proc file system - * - */ -#ifdef CONFIG_PROC_FS -#define MASK2STR(m,s) { .mask = m, .str = s } - -void irlan_print_filter(struct seq_file *seq, int filter_type) -{ - static struct { - int mask; - const char *str; - } filter_mask2str[] = { - MASK2STR(IRLAN_DIRECTED, "DIRECTED"), - MASK2STR(IRLAN_FUNCTIONAL, "FUNCTIONAL"), - MASK2STR(IRLAN_GROUP, "GROUP"), - MASK2STR(IRLAN_MAC_FRAME, "MAC_FRAME"), - MASK2STR(IRLAN_MULTICAST, "MULTICAST"), - MASK2STR(IRLAN_BROADCAST, "BROADCAST"), - MASK2STR(IRLAN_IPX_SOCKET, "IPX_SOCKET"), - MASK2STR(0, NULL) - }, *p; - - for (p = filter_mask2str; p->str; p++) { - if (filter_type & p->mask) - seq_printf(seq, "%s ", p->str); - } - seq_putc(seq, '\n'); -} -#undef MASK2STR -#endif diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c deleted file mode 100644 index 15c292cf2644..000000000000 --- a/net/irda/irlan/irlan_provider.c +++ /dev/null @@ -1,408 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_provider.c - * Version: 0.9 - * Description: IrDA LAN Access Protocol Implementation - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sat Oct 30 12:52:10 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Sources: skeleton.c by Donald Becker <becker@CESDIS.gsfc.nasa.gov> - * slip.c by Laurence Culhane, <loz@holmes.demon.co.uk> - * Fred N. van Kempen, <waltje@uwalt.nl.mugnet.org> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/kernel.h> -#include <linux/string.h> -#include <linux/errno.h> -#include <linux/netdevice.h> -#include <linux/etherdevice.h> -#include <linux/init.h> -#include <linux/random.h> -#include <linux/bitops.h> -#include <linux/slab.h> - -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/irttp.h> -#include <net/irda/irlmp.h> -#include <net/irda/irias_object.h> -#include <net/irda/iriap.h> -#include <net/irda/timer.h> - -#include <net/irda/irlan_common.h> -#include <net/irda/irlan_eth.h> -#include <net/irda/irlan_event.h> -#include <net/irda/irlan_provider.h> -#include <net/irda/irlan_filter.h> -#include <net/irda/irlan_client.h> - -static void irlan_provider_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb); - -/* - * Function irlan_provider_control_data_indication (handle, skb) - * - * This function gets the data that is received on the control channel - * - */ -static int irlan_provider_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct irlan_cb *self; - __u8 code; - - self = instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - IRDA_ASSERT(skb != NULL, return -1;); - - code = skb->data[0]; - switch(code) { - case CMD_GET_PROVIDER_INFO: - pr_debug("Got GET_PROVIDER_INFO command!\n"); - irlan_do_provider_event(self, IRLAN_GET_INFO_CMD, skb); - break; - - case CMD_GET_MEDIA_CHAR: - pr_debug("Got GET_MEDIA_CHAR command!\n"); - irlan_do_provider_event(self, IRLAN_GET_MEDIA_CMD, skb); - break; - case CMD_OPEN_DATA_CHANNEL: - pr_debug("Got OPEN_DATA_CHANNEL command!\n"); - irlan_do_provider_event(self, IRLAN_OPEN_DATA_CMD, skb); - break; - case CMD_FILTER_OPERATION: - pr_debug("Got FILTER_OPERATION command!\n"); - irlan_do_provider_event(self, IRLAN_FILTER_CONFIG_CMD, skb); - break; - case CMD_RECONNECT_DATA_CHAN: - pr_debug("%s(), Got RECONNECT_DATA_CHAN command\n", __func__); - pr_debug("%s(), NOT IMPLEMENTED\n", __func__); - break; - case CMD_CLOSE_DATA_CHAN: - pr_debug("Got CLOSE_DATA_CHAN command!\n"); - pr_debug("%s(), NOT IMPLEMENTED\n", __func__); - break; - default: - pr_debug("%s(), Unknown command!\n", __func__); - break; - } - return 0; -} - -/* - * Function irlan_provider_connect_indication (handle, skb, priv) - * - * Got connection from peer IrLAN client - * - */ -static void irlan_provider_connect_indication(void *instance, void *sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - struct irlan_cb *self; - struct tsap_cb *tsap; - - self = instance; - tsap = sap; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - IRDA_ASSERT(tsap == self->provider.tsap_ctrl,return;); - IRDA_ASSERT(self->provider.state == IRLAN_IDLE, return;); - - self->provider.max_sdu_size = max_sdu_size; - self->provider.max_header_size = max_header_size; - - irlan_do_provider_event(self, IRLAN_CONNECT_INDICATION, NULL); - - /* - * If we are in peer mode, the client may not have got the discovery - * indication it needs to make progress. If the client is still in - * IDLE state, we must kick it. - */ - if ((self->provider.access_type == ACCESS_PEER) && - (self->client.state == IRLAN_IDLE)) - { - irlan_client_wakeup(self, self->saddr, self->daddr); - } -} - -/* - * Function irlan_provider_connect_response (handle) - * - * Accept incoming connection - * - */ -void irlan_provider_connect_response(struct irlan_cb *self, - struct tsap_cb *tsap) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - /* Just accept */ - irttp_connect_response(tsap, IRLAN_MTU, NULL); -} - -static void irlan_provider_disconnect_indication(void *instance, void *sap, - LM_REASON reason, - struct sk_buff *userdata) -{ - struct irlan_cb *self; - struct tsap_cb *tsap; - - pr_debug("%s(), reason=%d\n", __func__ , reason); - - self = instance; - tsap = sap; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - IRDA_ASSERT(tsap != NULL, return;); - IRDA_ASSERT(tsap->magic == TTP_TSAP_MAGIC, return;); - - IRDA_ASSERT(tsap == self->provider.tsap_ctrl, return;); - - irlan_do_provider_event(self, IRLAN_LMP_DISCONNECT, NULL); -} - -/* - * Function irlan_parse_open_data_cmd (self, skb) - * - * - * - */ -int irlan_parse_open_data_cmd(struct irlan_cb *self, struct sk_buff *skb) -{ - int ret; - - ret = irlan_provider_parse_command(self, CMD_OPEN_DATA_CHANNEL, skb); - - /* Open data channel */ - irlan_open_data_tsap(self); - - return ret; -} - -/* - * Function parse_command (skb) - * - * Extract all parameters from received buffer, then feed them to - * check_params for parsing - * - */ -int irlan_provider_parse_command(struct irlan_cb *self, int cmd, - struct sk_buff *skb) -{ - __u8 *frame; - __u8 *ptr; - int count; - __u16 val_len; - int i; - char *name; - char *value; - int ret = RSP_SUCCESS; - - IRDA_ASSERT(skb != NULL, return -RSP_PROTOCOL_ERROR;); - - pr_debug("%s(), skb->len=%d\n", __func__ , (int)skb->len); - - IRDA_ASSERT(self != NULL, return -RSP_PROTOCOL_ERROR;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -RSP_PROTOCOL_ERROR;); - - if (!skb) - return -RSP_PROTOCOL_ERROR; - - frame = skb->data; - - name = kmalloc(255, GFP_ATOMIC); - if (!name) - return -RSP_INSUFFICIENT_RESOURCES; - value = kmalloc(1016, GFP_ATOMIC); - if (!value) { - kfree(name); - return -RSP_INSUFFICIENT_RESOURCES; - } - - /* How many parameters? */ - count = frame[1]; - - pr_debug("Got %d parameters\n", count); - - ptr = frame+2; - - /* For all parameters */ - for (i=0; i<count;i++) { - ret = irlan_extract_param(ptr, name, value, &val_len); - if (ret < 0) { - pr_debug("%s(), IrLAN, Error!\n", __func__); - break; - } - ptr+=ret; - ret = RSP_SUCCESS; - irlan_check_command_param(self, name, value); - } - /* Cleanup */ - kfree(name); - kfree(value); - - return ret; -} - -/* - * Function irlan_provider_send_reply (self, info) - * - * Send reply to query to peer IrLAN layer - * - */ -void irlan_provider_send_reply(struct irlan_cb *self, int command, - int ret_code) -{ - struct sk_buff *skb; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - - skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + - /* Bigger param length comes from CMD_GET_MEDIA_CHAR */ - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") + - IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") + - IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "HOSTED"), - GFP_ATOMIC); - - if (!skb) - return; - - /* Reserve space for TTP, LMP, and LAP header */ - skb_reserve(skb, self->provider.max_header_size); - skb_put(skb, 2); - - switch (command) { - case CMD_GET_PROVIDER_INFO: - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x02; /* 2 parameters */ - switch (self->media) { - case MEDIA_802_3: - irlan_insert_string_param(skb, "MEDIA", "802.3"); - break; - case MEDIA_802_5: - irlan_insert_string_param(skb, "MEDIA", "802.5"); - break; - default: - pr_debug("%s(), unknown media type!\n", __func__); - break; - } - irlan_insert_short_param(skb, "IRLAN_VER", 0x0101); - break; - - case CMD_GET_MEDIA_CHAR: - skb->data[0] = 0x00; /* Success */ - skb->data[1] = 0x05; /* 5 parameters */ - irlan_insert_string_param(skb, "FILTER_TYPE", "DIRECTED"); - irlan_insert_string_param(skb, "FILTER_TYPE", "BROADCAST"); - irlan_insert_string_param(skb, "FILTER_TYPE", "MULTICAST"); - - switch (self->provider.access_type) { - case ACCESS_DIRECT: - irlan_insert_string_param(skb, "ACCESS_TYPE", "DIRECT"); - break; - case ACCESS_PEER: - irlan_insert_string_param(skb, "ACCESS_TYPE", "PEER"); - break; - case ACCESS_HOSTED: - irlan_insert_string_param(skb, "ACCESS_TYPE", "HOSTED"); - break; - default: - pr_debug("%s(), Unknown access type\n", __func__); - break; - } - irlan_insert_short_param(skb, "MAX_FRAME", 0x05ee); - break; - case CMD_OPEN_DATA_CHANNEL: - skb->data[0] = 0x00; /* Success */ - if (self->provider.send_arb_val) { - skb->data[1] = 0x03; /* 3 parameters */ - irlan_insert_short_param(skb, "CON_ARB", - self->provider.send_arb_val); - } else - skb->data[1] = 0x02; /* 2 parameters */ - irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data); - irlan_insert_string_param(skb, "RECONNECT_KEY", "LINUX RULES!"); - break; - case CMD_FILTER_OPERATION: - irlan_filter_request(self, skb); - break; - default: - pr_debug("%s(), Unknown command!\n", __func__); - break; - } - - irttp_data_request(self->provider.tsap_ctrl, skb); -} - -/* - * Function irlan_provider_register(void) - * - * Register provider support so we can accept incoming connections. - * - */ -int irlan_provider_open_ctrl_tsap(struct irlan_cb *self) -{ - struct tsap_cb *tsap; - notify_t notify; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - /* Check if already open */ - if (self->provider.tsap_ctrl) - return -1; - - /* - * First register well known control TSAP - */ - irda_notify_init(¬ify); - notify.data_indication = irlan_provider_data_indication; - notify.connect_indication = irlan_provider_connect_indication; - notify.disconnect_indication = irlan_provider_disconnect_indication; - notify.instance = self; - strlcpy(notify.name, "IrLAN ctrl (p)", sizeof(notify.name)); - - tsap = irttp_open_tsap(LSAP_ANY, 1, ¬ify); - if (!tsap) { - pr_debug("%s(), Got no tsap!\n", __func__); - return -1; - } - self->provider.tsap_ctrl = tsap; - - /* Register with LM-IAS */ - irlan_ias_register(self, tsap->stsap_sel); - - return 0; -} - diff --git a/net/irda/irlan/irlan_provider_event.c b/net/irda/irlan/irlan_provider_event.c deleted file mode 100644 index 9c4f7f51d6b5..000000000000 --- a/net/irda/irlan/irlan_provider_event.c +++ /dev/null @@ -1,233 +0,0 @@ -/********************************************************************* - * - * Filename: irlan_provider_event.c - * Version: 0.9 - * Description: IrLAN provider state machine) - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 31 20:14:37 1997 - * Modified at: Sat Oct 30 12:52:41 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <net/irda/irda.h> -#include <net/irda/iriap.h> -#include <net/irda/irlmp.h> -#include <net/irda/irttp.h> - -#include <net/irda/irlan_provider.h> -#include <net/irda/irlan_event.h> - -static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); -static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb); - -static int (*state[])(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) = -{ - irlan_provider_state_idle, - NULL, /* Query */ - NULL, /* Info */ - irlan_provider_state_info, - NULL, /* Media */ - irlan_provider_state_open, - NULL, /* Wait */ - NULL, /* Arb */ - irlan_provider_state_data, - NULL, /* Close */ - NULL, /* Sync */ -}; - -void irlan_do_provider_event(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(*state[ self->provider.state] != NULL, return;); - - (*state[self->provider.state]) (self, event, skb); -} - -/* - * Function irlan_provider_state_idle (event, skb, info) - * - * IDLE, We are waiting for an indication that there is a provider - * available. - */ -static int irlan_provider_state_idle(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_CONNECT_INDICATION: - irlan_provider_connect_response( self, self->provider.tsap_ctrl); - irlan_next_provider_state( self, IRLAN_INFO); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_provider_state_info (self, event, skb, info) - * - * INFO, We have issued a GetInfo command and is awaiting a reply. - */ -static int irlan_provider_state_info(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_GET_INFO_CMD: - /* Be sure to use 802.3 in case of peer mode */ - if (self->provider.access_type == ACCESS_PEER) { - self->media = MEDIA_802_3; - - /* Check if client has started yet */ - if (self->client.state == IRLAN_IDLE) { - /* This should get the client going */ - irlmp_discovery_request(8); - } - } - - irlan_provider_send_reply(self, CMD_GET_PROVIDER_INFO, - RSP_SUCCESS); - /* Keep state */ - break; - case IRLAN_GET_MEDIA_CMD: - irlan_provider_send_reply(self, CMD_GET_MEDIA_CHAR, - RSP_SUCCESS); - /* Keep state */ - break; - case IRLAN_OPEN_DATA_CMD: - ret = irlan_parse_open_data_cmd(self, skb); - if (self->provider.access_type == ACCESS_PEER) { - /* FIXME: make use of random functions! */ - self->provider.send_arb_val = (jiffies & 0xffff); - } - irlan_provider_send_reply(self, CMD_OPEN_DATA_CHANNEL, ret); - - if (ret == RSP_SUCCESS) { - irlan_next_provider_state(self, IRLAN_OPEN); - - /* Signal client that we are now open */ - irlan_do_client_event(self, IRLAN_PROVIDER_SIGNAL, NULL); - } - break; - case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */ - case IRLAN_LAP_DISCONNECT: - irlan_next_provider_state(self, IRLAN_IDLE); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_provider_state_open (self, event, skb, info) - * - * OPEN, The client has issued a OpenData command and is awaiting a - * reply - * - */ -static int irlan_provider_state_open(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - - switch(event) { - case IRLAN_FILTER_CONFIG_CMD: - irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb); - irlan_provider_send_reply(self, CMD_FILTER_OPERATION, - RSP_SUCCESS); - /* Keep state */ - break; - case IRLAN_DATA_CONNECT_INDICATION: - irlan_next_provider_state(self, IRLAN_DATA); - irlan_provider_connect_response(self, self->tsap_data); - break; - case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */ - case IRLAN_LAP_DISCONNECT: - irlan_next_provider_state(self, IRLAN_IDLE); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irlan_provider_state_data (self, event, skb, info) - * - * DATA, The data channel is connected, allowing data transfers between - * the local and remote machines. - * - */ -static int irlan_provider_state_data(struct irlan_cb *self, IRLAN_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == IRLAN_MAGIC, return -1;); - - switch(event) { - case IRLAN_FILTER_CONFIG_CMD: - irlan_provider_parse_command(self, CMD_FILTER_OPERATION, skb); - irlan_provider_send_reply(self, CMD_FILTER_OPERATION, - RSP_SUCCESS); - break; - case IRLAN_LMP_DISCONNECT: /* FALLTHROUGH */ - case IRLAN_LAP_DISCONNECT: - irlan_next_provider_state(self, IRLAN_IDLE); - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__ , event); - break; - } - if (skb) - dev_kfree_skb(skb); - - return 0; -} - - - - - - - - - - diff --git a/net/irda/irlap.c b/net/irda/irlap.c deleted file mode 100644 index 1cde711bcab5..000000000000 --- a/net/irda/irlap.c +++ /dev/null @@ -1,1207 +0,0 @@ -/********************************************************************* - * - * Filename: irlap.c - * Version: 1.0 - * Description: IrLAP implementation for Linux - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Tue Dec 14 09:26:44 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/delay.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/random.h> -#include <linux/module.h> -#include <linux/seq_file.h> - -#include <net/irda/irda.h> -#include <net/irda/irda_device.h> -#include <net/irda/irqueue.h> -#include <net/irda/irlmp.h> -#include <net/irda/irlmp_frame.h> -#include <net/irda/irlap_frame.h> -#include <net/irda/irlap.h> -#include <net/irda/timer.h> -#include <net/irda/qos.h> - -static hashbin_t *irlap = NULL; -int sysctl_slot_timeout = SLOT_TIMEOUT * 1000 / HZ; - -/* This is the delay of missed pf period before generating an event - * to the application. The spec mandate 3 seconds, but in some cases - * it's way too long. - Jean II */ -int sysctl_warn_noreply_time = 3; - -extern void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb); -static void __irlap_close(struct irlap_cb *self); -static void irlap_init_qos_capabilities(struct irlap_cb *self, - struct qos_info *qos_user); - -static const char *const lap_reasons[] __maybe_unused = { - "ERROR, NOT USED", - "LAP_DISC_INDICATION", - "LAP_NO_RESPONSE", - "LAP_RESET_INDICATION", - "LAP_FOUND_NONE", - "LAP_MEDIA_BUSY", - "LAP_PRIMARY_CONFLICT", - "ERROR, NOT USED", -}; - -int __init irlap_init(void) -{ - /* Check if the compiler did its job properly. - * May happen on some ARM configuration, check with Russell King. */ - IRDA_ASSERT(sizeof(struct xid_frame) == 14, ;); - IRDA_ASSERT(sizeof(struct test_frame) == 10, ;); - IRDA_ASSERT(sizeof(struct ua_frame) == 10, ;); - IRDA_ASSERT(sizeof(struct snrm_frame) == 11, ;); - - /* Allocate master array */ - irlap = hashbin_new(HB_LOCK); - if (irlap == NULL) { - net_err_ratelimited("%s: can't allocate irlap hashbin!\n", - __func__); - return -ENOMEM; - } - - return 0; -} - -void irlap_cleanup(void) -{ - IRDA_ASSERT(irlap != NULL, return;); - - hashbin_delete(irlap, (FREE_FUNC) __irlap_close); -} - -/* - * Function irlap_open (driver) - * - * Initialize IrLAP layer - * - */ -struct irlap_cb *irlap_open(struct net_device *dev, struct qos_info *qos, - const char *hw_name) -{ - struct irlap_cb *self; - - /* Initialize the irlap structure. */ - self = kzalloc(sizeof(struct irlap_cb), GFP_KERNEL); - if (self == NULL) - return NULL; - - self->magic = LAP_MAGIC; - - /* Make a binding between the layers */ - self->netdev = dev; - self->qos_dev = qos; - /* Copy hardware name */ - if(hw_name != NULL) { - strlcpy(self->hw_name, hw_name, sizeof(self->hw_name)); - } else { - self->hw_name[0] = '\0'; - } - - /* FIXME: should we get our own field? */ - dev->atalk_ptr = self; - - self->state = LAP_OFFLINE; - - /* Initialize transmit queue */ - skb_queue_head_init(&self->txq); - skb_queue_head_init(&self->txq_ultra); - skb_queue_head_init(&self->wx_list); - - /* My unique IrLAP device address! */ - /* We don't want the broadcast address, neither the NULL address - * (most often used to signify "invalid"), and we don't want an - * address already in use (otherwise connect won't be able - * to select the proper link). - Jean II */ - do { - get_random_bytes(&self->saddr, sizeof(self->saddr)); - } while ((self->saddr == 0x0) || (self->saddr == BROADCAST) || - (hashbin_lock_find(irlap, self->saddr, NULL)) ); - /* Copy to the driver */ - memcpy(dev->dev_addr, &self->saddr, 4); - - init_timer(&self->slot_timer); - init_timer(&self->query_timer); - init_timer(&self->discovery_timer); - init_timer(&self->final_timer); - init_timer(&self->poll_timer); - init_timer(&self->wd_timer); - init_timer(&self->backoff_timer); - init_timer(&self->media_busy_timer); - - irlap_apply_default_connection_parameters(self); - - self->N3 = 3; /* # connections attempts to try before giving up */ - - self->state = LAP_NDM; - - hashbin_insert(irlap, (irda_queue_t *) self, self->saddr, NULL); - - irlmp_register_link(self, self->saddr, &self->notify); - - return self; -} -EXPORT_SYMBOL(irlap_open); - -/* - * Function __irlap_close (self) - * - * Remove IrLAP and all allocated memory. Stop any pending timers. - * - */ -static void __irlap_close(struct irlap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Stop timers */ - del_timer(&self->slot_timer); - del_timer(&self->query_timer); - del_timer(&self->discovery_timer); - del_timer(&self->final_timer); - del_timer(&self->poll_timer); - del_timer(&self->wd_timer); - del_timer(&self->backoff_timer); - del_timer(&self->media_busy_timer); - - irlap_flush_all_queues(self); - - self->magic = 0; - - kfree(self); -} - -/* - * Function irlap_close (self) - * - * Remove IrLAP instance - * - */ -void irlap_close(struct irlap_cb *self) -{ - struct irlap_cb *lap; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* We used to send a LAP_DISC_INDICATION here, but this was - * racy. This has been move within irlmp_unregister_link() - * itself. Jean II */ - - /* Kill the LAP and all LSAPs on top of it */ - irlmp_unregister_link(self->saddr); - self->notify.instance = NULL; - - /* Be sure that we manage to remove ourself from the hash */ - lap = hashbin_remove(irlap, self->saddr, NULL); - if (!lap) { - pr_debug("%s(), Didn't find myself!\n", __func__); - return; - } - __irlap_close(lap); -} -EXPORT_SYMBOL(irlap_close); - -/* - * Function irlap_connect_indication (self, skb) - * - * Another device is attempting to make a connection - * - */ -void irlap_connect_indication(struct irlap_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_init_qos_capabilities(self, NULL); /* No user QoS! */ - - irlmp_link_connect_indication(self->notify.instance, self->saddr, - self->daddr, &self->qos_tx, skb); -} - -/* - * Function irlap_connect_response (self, skb) - * - * Service user has accepted incoming connection - * - */ -void irlap_connect_response(struct irlap_cb *self, struct sk_buff *userdata) -{ - irlap_do_event(self, CONNECT_RESPONSE, userdata, NULL); -} - -/* - * Function irlap_connect_request (self, daddr, qos_user, sniff) - * - * Request connection with another device, sniffing is not implemented - * yet. - * - */ -void irlap_connect_request(struct irlap_cb *self, __u32 daddr, - struct qos_info *qos_user, int sniff) -{ - pr_debug("%s(), daddr=0x%08x\n", __func__, daddr); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - self->daddr = daddr; - - /* - * If the service user specifies QoS values for this connection, - * then use them - */ - irlap_init_qos_capabilities(self, qos_user); - - if ((self->state == LAP_NDM) && !self->media_busy) - irlap_do_event(self, CONNECT_REQUEST, NULL, NULL); - else - self->connect_pending = TRUE; -} - -/* - * Function irlap_connect_confirm (self, skb) - * - * Connection request has been accepted - * - */ -void irlap_connect_confirm(struct irlap_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlmp_link_connect_confirm(self->notify.instance, &self->qos_tx, skb); -} - -/* - * Function irlap_data_indication (self, skb) - * - * Received data frames from IR-port, so we just pass them up to - * IrLMP for further processing - * - */ -void irlap_data_indication(struct irlap_cb *self, struct sk_buff *skb, - int unreliable) -{ - /* Hide LAP header from IrLMP layer */ - skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER); - - irlmp_link_data_indication(self->notify.instance, skb, unreliable); -} - - -/* - * Function irlap_data_request (self, skb) - * - * Queue data for transmission, must wait until XMIT state - * - */ -void irlap_data_request(struct irlap_cb *self, struct sk_buff *skb, - int unreliable) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER), - return;); - skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER); - - /* - * Must set frame format now so that the rest of the code knows - * if its dealing with an I or an UI frame - */ - if (unreliable) - skb->data[1] = UI_FRAME; - else - skb->data[1] = I_FRAME; - - /* Don't forget to refcount it - see irlmp_connect_request(). */ - skb_get(skb); - - /* Add at the end of the queue (keep ordering) - Jean II */ - skb_queue_tail(&self->txq, skb); - - /* - * Send event if this frame only if we are in the right state - * FIXME: udata should be sent first! (skb_queue_head?) - */ - if ((self->state == LAP_XMIT_P) || (self->state == LAP_XMIT_S)) { - /* If we are not already processing the Tx queue, trigger - * transmission immediately - Jean II */ - if((skb_queue_len(&self->txq) <= 1) && (!self->local_busy)) - irlap_do_event(self, DATA_REQUEST, skb, NULL); - /* Otherwise, the packets will be sent normally at the - * next pf-poll - Jean II */ - } -} - -/* - * Function irlap_unitdata_request (self, skb) - * - * Send Ultra data. This is data that must be sent outside any connection - * - */ -#ifdef CONFIG_IRDA_ULTRA -void irlap_unitdata_request(struct irlap_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - IRDA_ASSERT(skb_headroom(skb) >= (LAP_ADDR_HEADER+LAP_CTRL_HEADER), - return;); - skb_push(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER); - - skb->data[0] = CBROADCAST; - skb->data[1] = UI_FRAME; - - /* Don't need to refcount, see irlmp_connless_data_request() */ - - skb_queue_tail(&self->txq_ultra, skb); - - irlap_do_event(self, SEND_UI_FRAME, NULL, NULL); -} -#endif /*CONFIG_IRDA_ULTRA */ - -/* - * Function irlap_udata_indication (self, skb) - * - * Receive Ultra data. This is data that is received outside any connection - * - */ -#ifdef CONFIG_IRDA_ULTRA -void irlap_unitdata_indication(struct irlap_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* Hide LAP header from IrLMP layer */ - skb_pull(skb, LAP_ADDR_HEADER+LAP_CTRL_HEADER); - - irlmp_link_unitdata_indication(self->notify.instance, skb); -} -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Function irlap_disconnect_request (void) - * - * Request to disconnect connection by service user - */ -void irlap_disconnect_request(struct irlap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Don't disconnect until all data frames are successfully sent */ - if (!skb_queue_empty(&self->txq)) { - self->disconnect_pending = TRUE; - return; - } - - /* Check if we are in the right state for disconnecting */ - switch (self->state) { - case LAP_XMIT_P: /* FALLTHROUGH */ - case LAP_XMIT_S: /* FALLTHROUGH */ - case LAP_CONN: /* FALLTHROUGH */ - case LAP_RESET_WAIT: /* FALLTHROUGH */ - case LAP_RESET_CHECK: - irlap_do_event(self, DISCONNECT_REQUEST, NULL, NULL); - break; - default: - pr_debug("%s(), disconnect pending!\n", __func__); - self->disconnect_pending = TRUE; - break; - } -} - -/* - * Function irlap_disconnect_indication (void) - * - * Disconnect request from other device - * - */ -void irlap_disconnect_indication(struct irlap_cb *self, LAP_REASON reason) -{ - pr_debug("%s(), reason=%s\n", __func__, lap_reasons[reason]); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Flush queues */ - irlap_flush_all_queues(self); - - switch (reason) { - case LAP_RESET_INDICATION: - pr_debug("%s(), Sending reset request!\n", __func__); - irlap_do_event(self, RESET_REQUEST, NULL, NULL); - break; - case LAP_NO_RESPONSE: /* FALLTHROUGH */ - case LAP_DISC_INDICATION: /* FALLTHROUGH */ - case LAP_FOUND_NONE: /* FALLTHROUGH */ - case LAP_MEDIA_BUSY: - irlmp_link_disconnect_indication(self->notify.instance, self, - reason, NULL); - break; - default: - net_err_ratelimited("%s: Unknown reason %d\n", - __func__, reason); - } -} - -/* - * Function irlap_discovery_request (gen_addr_bit) - * - * Start one single discovery operation. - * - */ -void irlap_discovery_request(struct irlap_cb *self, discovery_t *discovery) -{ - struct irlap_info info; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(discovery != NULL, return;); - - pr_debug("%s(), nslots = %d\n", __func__, discovery->nslots); - - IRDA_ASSERT((discovery->nslots == 1) || (discovery->nslots == 6) || - (discovery->nslots == 8) || (discovery->nslots == 16), - return;); - - /* Discovery is only possible in NDM mode */ - if (self->state != LAP_NDM) { - pr_debug("%s(), discovery only possible in NDM mode\n", - __func__); - irlap_discovery_confirm(self, NULL); - /* Note : in theory, if we are not in NDM, we could postpone - * the discovery like we do for connection request. - * In practice, it's not worth it. If the media was busy, - * it's likely next time around it won't be busy. If we are - * in REPLY state, we will get passive discovery info & event. - * Jean II */ - return; - } - - /* Check if last discovery request finished in time, or if - * it was aborted due to the media busy flag. */ - if (self->discovery_log != NULL) { - hashbin_delete(self->discovery_log, (FREE_FUNC) kfree); - self->discovery_log = NULL; - } - - /* All operations will occur at predictable time, no need to lock */ - self->discovery_log = hashbin_new(HB_NOLOCK); - - if (self->discovery_log == NULL) { - net_warn_ratelimited("%s(), Unable to allocate discovery log!\n", - __func__); - return; - } - - info.S = discovery->nslots; /* Number of slots */ - info.s = 0; /* Current slot */ - - self->discovery_cmd = discovery; - info.discovery = discovery; - - /* sysctl_slot_timeout bounds are checked in irsysctl.c - Jean II */ - self->slot_timeout = msecs_to_jiffies(sysctl_slot_timeout); - - irlap_do_event(self, DISCOVERY_REQUEST, NULL, &info); -} - -/* - * Function irlap_discovery_confirm (log) - * - * A device has been discovered in front of this station, we - * report directly to LMP. - */ -void irlap_discovery_confirm(struct irlap_cb *self, hashbin_t *discovery_log) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - IRDA_ASSERT(self->notify.instance != NULL, return;); - - /* - * Check for successful discovery, since we are then allowed to clear - * the media busy condition (IrLAP 6.13.4 - p.94). This should allow - * us to make connection attempts much faster and easier (i.e. no - * collisions). - * Setting media busy to false will also generate an event allowing - * to process pending events in NDM state machine. - * Note : the spec doesn't define what's a successful discovery is. - * If we want Ultra to work, it's successful even if there is - * nobody discovered - Jean II - */ - if (discovery_log) - irda_device_set_media_busy(self->netdev, FALSE); - - /* Inform IrLMP */ - irlmp_link_discovery_confirm(self->notify.instance, discovery_log); -} - -/* - * Function irlap_discovery_indication (log) - * - * Somebody is trying to discover us! - * - */ -void irlap_discovery_indication(struct irlap_cb *self, discovery_t *discovery) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(discovery != NULL, return;); - - IRDA_ASSERT(self->notify.instance != NULL, return;); - - /* A device is very likely to connect immediately after it performs - * a successful discovery. This means that in our case, we are much - * more likely to receive a connection request over the medium. - * So, we backoff to avoid collisions. - * IrLAP spec 6.13.4 suggest 100ms... - * Note : this little trick actually make a *BIG* difference. If I set - * my Linux box with discovery enabled and one Ultra frame sent every - * second, my Palm has no trouble connecting to it every time ! - * Jean II */ - irda_device_set_media_busy(self->netdev, SMALL); - - irlmp_link_discovery_indication(self->notify.instance, discovery); -} - -/* - * Function irlap_status_indication (quality_of_link) - */ -void irlap_status_indication(struct irlap_cb *self, int quality_of_link) -{ - switch (quality_of_link) { - case STATUS_NO_ACTIVITY: - net_info_ratelimited("IrLAP, no activity on link!\n"); - break; - case STATUS_NOISY: - net_info_ratelimited("IrLAP, noisy link!\n"); - break; - default: - break; - } - irlmp_status_indication(self->notify.instance, - quality_of_link, LOCK_NO_CHANGE); -} - -/* - * Function irlap_reset_indication (void) - */ -void irlap_reset_indication(struct irlap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - if (self->state == LAP_RESET_WAIT) - irlap_do_event(self, RESET_REQUEST, NULL, NULL); - else - irlap_do_event(self, RESET_RESPONSE, NULL, NULL); -} - -/* - * Function irlap_reset_confirm (void) - */ -void irlap_reset_confirm(void) -{ -} - -/* - * Function irlap_generate_rand_time_slot (S, s) - * - * Generate a random time slot between s and S-1 where - * S = Number of slots (0 -> S-1) - * s = Current slot - */ -int irlap_generate_rand_time_slot(int S, int s) -{ - static int rand; - int slot; - - IRDA_ASSERT((S - s) > 0, return 0;); - - rand += jiffies; - rand ^= (rand << 12); - rand ^= (rand >> 20); - - slot = s + rand % (S-s); - - IRDA_ASSERT((slot >= s) || (slot < S), return 0;); - - return slot; -} - -/* - * Function irlap_update_nr_received (nr) - * - * Remove all acknowledged frames in current window queue. This code is - * not intuitive and you should not try to change it. If you think it - * contains bugs, please mail a patch to the author instead. - */ -void irlap_update_nr_received(struct irlap_cb *self, int nr) -{ - struct sk_buff *skb = NULL; - int count = 0; - - /* - * Remove all the ack-ed frames from the window queue. - */ - - /* - * Optimize for the common case. It is most likely that the receiver - * will acknowledge all the frames we have sent! So in that case we - * delete all frames stored in window. - */ - if (nr == self->vs) { - while ((skb = skb_dequeue(&self->wx_list)) != NULL) { - dev_kfree_skb(skb); - } - /* The last acked frame is the next to send minus one */ - self->va = nr - 1; - } else { - /* Remove all acknowledged frames in current window */ - while ((skb_peek(&self->wx_list) != NULL) && - (((self->va+1) % 8) != nr)) - { - skb = skb_dequeue(&self->wx_list); - dev_kfree_skb(skb); - - self->va = (self->va + 1) % 8; - count++; - } - } - - /* Advance window */ - self->window = self->window_size - skb_queue_len(&self->wx_list); -} - -/* - * Function irlap_validate_ns_received (ns) - * - * Validate the next to send (ns) field from received frame. - */ -int irlap_validate_ns_received(struct irlap_cb *self, int ns) -{ - /* ns as expected? */ - if (ns == self->vr) - return NS_EXPECTED; - /* - * Stations are allowed to treat invalid NS as unexpected NS - * IrLAP, Recv ... with-invalid-Ns. p. 84 - */ - return NS_UNEXPECTED; - - /* return NR_INVALID; */ -} -/* - * Function irlap_validate_nr_received (nr) - * - * Validate the next to receive (nr) field from received frame. - * - */ -int irlap_validate_nr_received(struct irlap_cb *self, int nr) -{ - /* nr as expected? */ - if (nr == self->vs) { - pr_debug("%s(), expected!\n", __func__); - return NR_EXPECTED; - } - - /* - * unexpected nr? (but within current window), first we check if the - * ns numbers of the frames in the current window wrap. - */ - if (self->va < self->vs) { - if ((nr >= self->va) && (nr <= self->vs)) - return NR_UNEXPECTED; - } else { - if ((nr >= self->va) || (nr <= self->vs)) - return NR_UNEXPECTED; - } - - /* Invalid nr! */ - return NR_INVALID; -} - -/* - * Function irlap_initiate_connection_state () - * - * Initialize the connection state parameters - * - */ -void irlap_initiate_connection_state(struct irlap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Next to send and next to receive */ - self->vs = self->vr = 0; - - /* Last frame which got acked (0 - 1) % 8 */ - self->va = 7; - - self->window = 1; - - self->remote_busy = FALSE; - self->retry_count = 0; -} - -/* - * Function irlap_wait_min_turn_around (self, qos) - * - * Wait negotiated minimum turn around time, this function actually sets - * the number of BOS's that must be sent before the next transmitted - * frame in order to delay for the specified amount of time. This is - * done to avoid using timers, and the forbidden udelay! - */ -void irlap_wait_min_turn_around(struct irlap_cb *self, struct qos_info *qos) -{ - __u32 min_turn_time; - __u32 speed; - - /* Get QoS values. */ - speed = qos->baud_rate.value; - min_turn_time = qos->min_turn_time.value; - - /* No need to calculate XBOFs for speeds over 115200 bps */ - if (speed > 115200) { - self->mtt_required = min_turn_time; - return; - } - - /* - * Send additional BOF's for the next frame for the requested - * min turn time, so now we must calculate how many chars (XBOF's) we - * must send for the requested time period (min turn time) - */ - self->xbofs_delay = irlap_min_turn_time_in_bytes(speed, min_turn_time); -} - -/* - * Function irlap_flush_all_queues (void) - * - * Flush all queues - * - */ -void irlap_flush_all_queues(struct irlap_cb *self) -{ - struct sk_buff* skb; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Free transmission queue */ - while ((skb = skb_dequeue(&self->txq)) != NULL) - dev_kfree_skb(skb); - - while ((skb = skb_dequeue(&self->txq_ultra)) != NULL) - dev_kfree_skb(skb); - - /* Free sliding window buffered packets */ - while ((skb = skb_dequeue(&self->wx_list)) != NULL) - dev_kfree_skb(skb); -} - -/* - * Function irlap_setspeed (self, speed) - * - * Change the speed of the IrDA port - * - */ -static void irlap_change_speed(struct irlap_cb *self, __u32 speed, int now) -{ - struct sk_buff *skb; - - pr_debug("%s(), setting speed to %d\n", __func__, speed); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - self->speed = speed; - - /* Change speed now, or just piggyback speed on frames */ - if (now) { - /* Send down empty frame to trigger speed change */ - skb = alloc_skb(0, GFP_ATOMIC); - if (skb) - irlap_queue_xmit(self, skb); - } -} - -/* - * Function irlap_init_qos_capabilities (self, qos) - * - * Initialize QoS for this IrLAP session, What we do is to compute the - * intersection of the QoS capabilities for the user, driver and for - * IrLAP itself. Normally, IrLAP will not specify any values, but it can - * be used to restrict certain values. - */ -static void irlap_init_qos_capabilities(struct irlap_cb *self, - struct qos_info *qos_user) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(self->netdev != NULL, return;); - - /* Start out with the maximum QoS support possible */ - irda_init_max_qos_capabilies(&self->qos_rx); - - /* Apply drivers QoS capabilities */ - irda_qos_compute_intersection(&self->qos_rx, self->qos_dev); - - /* - * Check for user supplied QoS parameters. The service user is only - * allowed to supply these values. We check each parameter since the - * user may not have set all of them. - */ - if (qos_user) { - pr_debug("%s(), Found user specified QoS!\n", __func__); - - if (qos_user->baud_rate.bits) - self->qos_rx.baud_rate.bits &= qos_user->baud_rate.bits; - - if (qos_user->max_turn_time.bits) - self->qos_rx.max_turn_time.bits &= qos_user->max_turn_time.bits; - if (qos_user->data_size.bits) - self->qos_rx.data_size.bits &= qos_user->data_size.bits; - - if (qos_user->link_disc_time.bits) - self->qos_rx.link_disc_time.bits &= qos_user->link_disc_time.bits; - } - - /* Use 500ms in IrLAP for now */ - self->qos_rx.max_turn_time.bits &= 0x01; - - /* Set data size */ - /*self->qos_rx.data_size.bits &= 0x03;*/ - - irda_qos_bits_to_value(&self->qos_rx); -} - -/* - * Function irlap_apply_default_connection_parameters (void, now) - * - * Use the default connection and transmission parameters - */ -void irlap_apply_default_connection_parameters(struct irlap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* xbofs : Default value in NDM */ - self->next_bofs = 12; - self->bofs_count = 12; - - /* NDM Speed is 9600 */ - irlap_change_speed(self, 9600, TRUE); - - /* Set mbusy when going to NDM state */ - irda_device_set_media_busy(self->netdev, TRUE); - - /* - * Generate random connection address for this session, which must - * be 7 bits wide and different from 0x00 and 0xfe - */ - while ((self->caddr == 0x00) || (self->caddr == 0xfe)) { - get_random_bytes(&self->caddr, sizeof(self->caddr)); - self->caddr &= 0xfe; - } - - /* Use default values until connection has been negitiated */ - self->slot_timeout = sysctl_slot_timeout; - self->final_timeout = FINAL_TIMEOUT; - self->poll_timeout = POLL_TIMEOUT; - self->wd_timeout = WD_TIMEOUT; - - /* Set some default values */ - self->qos_tx.baud_rate.value = 9600; - self->qos_rx.baud_rate.value = 9600; - self->qos_tx.max_turn_time.value = 0; - self->qos_rx.max_turn_time.value = 0; - self->qos_tx.min_turn_time.value = 0; - self->qos_rx.min_turn_time.value = 0; - self->qos_tx.data_size.value = 64; - self->qos_rx.data_size.value = 64; - self->qos_tx.window_size.value = 1; - self->qos_rx.window_size.value = 1; - self->qos_tx.additional_bofs.value = 12; - self->qos_rx.additional_bofs.value = 12; - self->qos_tx.link_disc_time.value = 0; - self->qos_rx.link_disc_time.value = 0; - - irlap_flush_all_queues(self); - - self->disconnect_pending = FALSE; - self->connect_pending = FALSE; -} - -/* - * Function irlap_apply_connection_parameters (qos, now) - * - * Initialize IrLAP with the negotiated QoS values - * - * If 'now' is false, the speed and xbofs will be changed after the next - * frame is sent. - * If 'now' is true, the speed and xbofs is changed immediately - */ -void irlap_apply_connection_parameters(struct irlap_cb *self, int now) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Set the negotiated xbofs value */ - self->next_bofs = self->qos_tx.additional_bofs.value; - if (now) - self->bofs_count = self->next_bofs; - - /* Set the negotiated link speed (may need the new xbofs value) */ - irlap_change_speed(self, self->qos_tx.baud_rate.value, now); - - self->window_size = self->qos_tx.window_size.value; - self->window = self->qos_tx.window_size.value; - -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - /* - * Calculate how many bytes it is possible to transmit before the - * link must be turned around - */ - self->line_capacity = - irlap_max_line_capacity(self->qos_tx.baud_rate.value, - self->qos_tx.max_turn_time.value); - self->bytes_left = self->line_capacity; -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - - - /* - * Initialize timeout values, some of the rules are listed on - * page 92 in IrLAP. - */ - IRDA_ASSERT(self->qos_tx.max_turn_time.value != 0, return;); - IRDA_ASSERT(self->qos_rx.max_turn_time.value != 0, return;); - /* The poll timeout applies only to the primary station. - * It defines the maximum time the primary stay in XMIT mode - * before timeout and turning the link around (sending a RR). - * Or, this is how much we can keep the pf bit in primary mode. - * Therefore, it must be lower or equal than our *OWN* max turn around. - * Jean II */ - self->poll_timeout = msecs_to_jiffies( - self->qos_tx.max_turn_time.value); - /* The Final timeout applies only to the primary station. - * It defines the maximum time the primary wait (mostly in RECV mode) - * for an answer from the secondary station before polling it again. - * Therefore, it must be greater or equal than our *PARTNER* - * max turn around time - Jean II */ - self->final_timeout = msecs_to_jiffies( - self->qos_rx.max_turn_time.value); - /* The Watchdog Bit timeout applies only to the secondary station. - * It defines the maximum time the secondary wait (mostly in RECV mode) - * for poll from the primary station before getting annoyed. - * Therefore, it must be greater or equal than our *PARTNER* - * max turn around time - Jean II */ - self->wd_timeout = self->final_timeout * 2; - - /* - * N1 and N2 are maximum retry count for *both* the final timer - * and the wd timer (with a factor 2) as defined above. - * After N1 retry of a timer, we give a warning to the user. - * After N2 retry, we consider the link dead and disconnect it. - * Jean II - */ - - /* - * Set N1 to 0 if Link Disconnect/Threshold Time = 3 and set it to - * 3 seconds otherwise. See page 71 in IrLAP for more details. - * Actually, it's not always 3 seconds, as we allow to set - * it via sysctl... Max maxtt is 500ms, and N1 need to be multiple - * of 2, so 1 second is minimum we can allow. - Jean II - */ - if (self->qos_tx.link_disc_time.value == sysctl_warn_noreply_time) - /* - * If we set N1 to 0, it will trigger immediately, which is - * not what we want. What we really want is to disable it, - * Jean II - */ - self->N1 = -2; /* Disable - Need to be multiple of 2*/ - else - self->N1 = sysctl_warn_noreply_time * 1000 / - self->qos_rx.max_turn_time.value; - - pr_debug("Setting N1 = %d\n", self->N1); - - /* Set N2 to match our own disconnect time */ - self->N2 = self->qos_tx.link_disc_time.value * 1000 / - self->qos_rx.max_turn_time.value; - pr_debug("Setting N2 = %d\n", self->N2); -} - -#ifdef CONFIG_PROC_FS -struct irlap_iter_state { - int id; -}; - -static void *irlap_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct irlap_iter_state *iter = seq->private; - struct irlap_cb *self; - - /* Protect our access to the tsap list */ - spin_lock_irq(&irlap->hb_spinlock); - iter->id = 0; - - for (self = (struct irlap_cb *) hashbin_get_first(irlap); - self; self = (struct irlap_cb *) hashbin_get_next(irlap)) { - if (iter->id == *pos) - break; - ++iter->id; - } - - return self; -} - -static void *irlap_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct irlap_iter_state *iter = seq->private; - - ++*pos; - ++iter->id; - return (void *) hashbin_get_next(irlap); -} - -static void irlap_seq_stop(struct seq_file *seq, void *v) -{ - spin_unlock_irq(&irlap->hb_spinlock); -} - -static int irlap_seq_show(struct seq_file *seq, void *v) -{ - const struct irlap_iter_state *iter = seq->private; - const struct irlap_cb *self = v; - - IRDA_ASSERT(self->magic == LAP_MAGIC, return -EINVAL;); - - seq_printf(seq, "irlap%d ", iter->id); - seq_printf(seq, "state: %s\n", - irlap_state[self->state]); - - seq_printf(seq, " device name: %s, ", - (self->netdev) ? self->netdev->name : "bug"); - seq_printf(seq, "hardware name: %s\n", self->hw_name); - - seq_printf(seq, " caddr: %#02x, ", self->caddr); - seq_printf(seq, "saddr: %#08x, ", self->saddr); - seq_printf(seq, "daddr: %#08x\n", self->daddr); - - seq_printf(seq, " win size: %d, ", - self->window_size); - seq_printf(seq, "win: %d, ", self->window); -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - seq_printf(seq, "line capacity: %d, ", - self->line_capacity); - seq_printf(seq, "bytes left: %d\n", self->bytes_left); -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - seq_printf(seq, " tx queue len: %d ", - skb_queue_len(&self->txq)); - seq_printf(seq, "win queue len: %d ", - skb_queue_len(&self->wx_list)); - seq_printf(seq, "rbusy: %s", self->remote_busy ? - "TRUE" : "FALSE"); - seq_printf(seq, " mbusy: %s\n", self->media_busy ? - "TRUE" : "FALSE"); - - seq_printf(seq, " retrans: %d ", self->retry_count); - seq_printf(seq, "vs: %d ", self->vs); - seq_printf(seq, "vr: %d ", self->vr); - seq_printf(seq, "va: %d\n", self->va); - - seq_printf(seq, " qos\tbps\tmaxtt\tdsize\twinsize\taddbofs\tmintt\tldisc\tcomp\n"); - - seq_printf(seq, " tx\t%d\t", - self->qos_tx.baud_rate.value); - seq_printf(seq, "%d\t", - self->qos_tx.max_turn_time.value); - seq_printf(seq, "%d\t", - self->qos_tx.data_size.value); - seq_printf(seq, "%d\t", - self->qos_tx.window_size.value); - seq_printf(seq, "%d\t", - self->qos_tx.additional_bofs.value); - seq_printf(seq, "%d\t", - self->qos_tx.min_turn_time.value); - seq_printf(seq, "%d\t", - self->qos_tx.link_disc_time.value); - seq_printf(seq, "\n"); - - seq_printf(seq, " rx\t%d\t", - self->qos_rx.baud_rate.value); - seq_printf(seq, "%d\t", - self->qos_rx.max_turn_time.value); - seq_printf(seq, "%d\t", - self->qos_rx.data_size.value); - seq_printf(seq, "%d\t", - self->qos_rx.window_size.value); - seq_printf(seq, "%d\t", - self->qos_rx.additional_bofs.value); - seq_printf(seq, "%d\t", - self->qos_rx.min_turn_time.value); - seq_printf(seq, "%d\n", - self->qos_rx.link_disc_time.value); - - return 0; -} - -static const struct seq_operations irlap_seq_ops = { - .start = irlap_seq_start, - .next = irlap_seq_next, - .stop = irlap_seq_stop, - .show = irlap_seq_show, -}; - -static int irlap_seq_open(struct inode *inode, struct file *file) -{ - if (irlap == NULL) - return -EINVAL; - - return seq_open_private(file, &irlap_seq_ops, - sizeof(struct irlap_iter_state)); -} - -const struct file_operations irlap_seq_fops = { - .owner = THIS_MODULE, - .open = irlap_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -#endif /* CONFIG_PROC_FS */ diff --git a/net/irda/irlap_event.c b/net/irda/irlap_event.c deleted file mode 100644 index 0e1b4d79f745..000000000000 --- a/net/irda/irlap_event.c +++ /dev/null @@ -1,2316 +0,0 @@ -/********************************************************************* - * - * Filename: irlap_event.c - * Version: 0.9 - * Description: IrLAP state machine implementation - * Status: Experimental. - * Author: Dag Brattli <dag@brattli.net> - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Sat Dec 25 21:07:57 1999 - * Modified by: Dag Brattli <dag@brattli.net> - * - * Copyright (c) 1998-2000 Dag Brattli <dag@brattli.net>, - * Copyright (c) 1998 Thomas Davis <ratbert@radiks.net> - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/string.h> -#include <linux/kernel.h> -#include <linux/delay.h> -#include <linux/skbuff.h> -#include <linux/slab.h> - -#include <net/irda/irda.h> -#include <net/irda/irlap_event.h> - -#include <net/irda/timer.h> -#include <net/irda/irlap.h> -#include <net/irda/irlap_frame.h> -#include <net/irda/qos.h> -#include <net/irda/parameters.h> -#include <net/irda/irlmp.h> /* irlmp_flow_indication(), ... */ - -#include <net/irda/irda_device.h> - -#ifdef CONFIG_IRDA_FAST_RR -int sysctl_fast_poll_increase = 50; -#endif - -static int irlap_state_ndm (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_query (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_reply (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_conn (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_setup (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_xmit_p (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_pclose (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_nrm_p (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_reset (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_nrm_s (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_xmit_s (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_sclose (struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info); -static int irlap_state_reset_check(struct irlap_cb *, IRLAP_EVENT event, - struct sk_buff *, struct irlap_info *); - -static const char *const irlap_event[] __maybe_unused = { - "DISCOVERY_REQUEST", - "CONNECT_REQUEST", - "CONNECT_RESPONSE", - "DISCONNECT_REQUEST", - "DATA_REQUEST", - "RESET_REQUEST", - "RESET_RESPONSE", - "SEND_I_CMD", - "SEND_UI_FRAME", - "RECV_DISCOVERY_XID_CMD", - "RECV_DISCOVERY_XID_RSP", - "RECV_SNRM_CMD", - "RECV_TEST_CMD", - "RECV_TEST_RSP", - "RECV_UA_RSP", - "RECV_DM_RSP", - "RECV_RD_RSP", - "RECV_I_CMD", - "RECV_I_RSP", - "RECV_UI_FRAME", - "RECV_FRMR_RSP", - "RECV_RR_CMD", - "RECV_RR_RSP", - "RECV_RNR_CMD", - "RECV_RNR_RSP", - "RECV_REJ_CMD", - "RECV_REJ_RSP", - "RECV_SREJ_CMD", - "RECV_SREJ_RSP", - "RECV_DISC_CMD", - "SLOT_TIMER_EXPIRED", - "QUERY_TIMER_EXPIRED", - "FINAL_TIMER_EXPIRED", - "POLL_TIMER_EXPIRED", - "DISCOVERY_TIMER_EXPIRED", - "WD_TIMER_EXPIRED", - "BACKOFF_TIMER_EXPIRED", - "MEDIA_BUSY_TIMER_EXPIRED", -}; - -const char *const irlap_state[] = { - "LAP_NDM", - "LAP_QUERY", - "LAP_REPLY", - "LAP_CONN", - "LAP_SETUP", - "LAP_OFFLINE", - "LAP_XMIT_P", - "LAP_PCLOSE", - "LAP_NRM_P", - "LAP_RESET_WAIT", - "LAP_RESET", - "LAP_NRM_S", - "LAP_XMIT_S", - "LAP_SCLOSE", - "LAP_RESET_CHECK", -}; - -static int (*state[])(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) = -{ - irlap_state_ndm, - irlap_state_query, - irlap_state_reply, - irlap_state_conn, - irlap_state_setup, - irlap_state_offline, - irlap_state_xmit_p, - irlap_state_pclose, - irlap_state_nrm_p, - irlap_state_reset_wait, - irlap_state_reset, - irlap_state_nrm_s, - irlap_state_xmit_s, - irlap_state_sclose, - irlap_state_reset_check, -}; - -/* - * Function irda_poll_timer_expired (data) - * - * Poll timer has expired. Normally we must now send a RR frame to the - * remote device - */ -static void irlap_poll_timer_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL); -} - -/* - * Calculate and set time before we will have to send back the pf bit - * to the peer. Use in primary. - * Make sure that state is XMIT_P/XMIT_S when calling this function - * (and that nobody messed up with the state). - Jean II - */ -static void irlap_start_poll_timer(struct irlap_cb *self, int timeout) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - -#ifdef CONFIG_IRDA_FAST_RR - /* - * Send out the RR frames faster if our own transmit queue is empty, or - * if the peer is busy. The effect is a much faster conversation - */ - if (skb_queue_empty(&self->txq) || self->remote_busy) { - if (self->fast_RR == TRUE) { - /* - * Assert that the fast poll timer has not reached the - * normal poll timer yet - */ - if (self->fast_RR_timeout < timeout) { - /* - * FIXME: this should be a more configurable - * function - */ - self->fast_RR_timeout += - (sysctl_fast_poll_increase * HZ/1000); - - /* Use this fast(er) timeout instead */ - timeout = self->fast_RR_timeout; - } - } else { - self->fast_RR = TRUE; - - /* Start with just 0 ms */ - self->fast_RR_timeout = 0; - timeout = 0; - } - } else - self->fast_RR = FALSE; - - pr_debug("%s(), timeout=%d (%ld)\n", __func__, timeout, jiffies); -#endif /* CONFIG_IRDA_FAST_RR */ - - if (timeout == 0) - irlap_do_event(self, POLL_TIMER_EXPIRED, NULL, NULL); - else - irda_start_timer(&self->poll_timer, timeout, self, - irlap_poll_timer_expired); -} - -/* - * Function irlap_do_event (event, skb, info) - * - * Rushes through the state machine without any delay. If state == XMIT - * then send queued data frames. - */ -void irlap_do_event(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret; - - if (!self || self->magic != LAP_MAGIC) - return; - - pr_debug("%s(), event = %s, state = %s\n", __func__, - irlap_event[event], irlap_state[self->state]); - - ret = (*state[self->state])(self, event, skb, info); - - /* - * Check if there are any pending events that needs to be executed - */ - switch (self->state) { - case LAP_XMIT_P: /* FALLTHROUGH */ - case LAP_XMIT_S: - /* - * We just received the pf bit and are at the beginning - * of a new LAP transmit window. - * Check if there are any queued data frames, and do not - * try to disconnect link if we send any data frames, since - * that will change the state away form XMIT - */ - pr_debug("%s() : queue len = %d\n", __func__, - skb_queue_len(&self->txq)); - - if (!skb_queue_empty(&self->txq)) { - /* Prevent race conditions with irlap_data_request() */ - self->local_busy = TRUE; - - /* Theory of operation. - * We send frames up to when we fill the window or - * reach line capacity. Those frames will queue up - * in the device queue, and the driver will slowly - * send them. - * After each frame that we send, we poll the higher - * layer for more data. It's the right time to do - * that because the link layer need to perform the mtt - * and then send the first frame, so we can afford - * to send a bit of time in kernel space. - * The explicit flow indication allow to minimise - * buffers (== lower latency), to avoid higher layer - * polling via timers (== less context switches) and - * to implement a crude scheduler - Jean II */ - - /* Try to send away all queued data frames */ - while ((skb = skb_dequeue(&self->txq)) != NULL) { - /* Send one frame */ - ret = (*state[self->state])(self, SEND_I_CMD, - skb, NULL); - /* Drop reference count. - * It will be increase as needed in - * irlap_send_data_xxx() */ - kfree_skb(skb); - - /* Poll the higher layers for one more frame */ - irlmp_flow_indication(self->notify.instance, - FLOW_START); - - if (ret == -EPROTO) - break; /* Try again later! */ - } - /* Finished transmitting */ - self->local_busy = FALSE; - } else if (self->disconnect_pending) { - self->disconnect_pending = FALSE; - - ret = (*state[self->state])(self, DISCONNECT_REQUEST, - NULL, NULL); - } - break; -/* case LAP_NDM: */ -/* case LAP_CONN: */ -/* case LAP_RESET_WAIT: */ -/* case LAP_RESET_CHECK: */ - default: - break; - } -} - -/* - * Function irlap_state_ndm (event, skb, frame) - * - * NDM (Normal Disconnected Mode) state - * - */ -static int irlap_state_ndm(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - discovery_t *discovery_rsp; - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case CONNECT_REQUEST: - IRDA_ASSERT(self->netdev != NULL, return -1;); - - if (self->media_busy) { - /* Note : this will never happen, because we test - * media busy in irlap_connect_request() and - * postpone the event... - Jean II */ - pr_debug("%s(), CONNECT_REQUEST: media busy!\n", - __func__); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_MEDIA_BUSY); - } else { - irlap_send_snrm_frame(self, &self->qos_rx); - - /* Start Final-bit timer */ - irlap_start_final_timer(self, self->final_timeout); - - self->retry_count = 0; - irlap_next_state(self, LAP_SETUP); - } - break; - case RECV_SNRM_CMD: - /* Check if the frame contains and I field */ - if (info) { - self->daddr = info->daddr; - self->caddr = info->caddr; - - irlap_next_state(self, LAP_CONN); - - irlap_connect_indication(self, skb); - } else { - pr_debug("%s(), SNRM frame does not contain an I field!\n", - __func__); - } - break; - case DISCOVERY_REQUEST: - IRDA_ASSERT(info != NULL, return -1;); - - if (self->media_busy) { - pr_debug("%s(), DISCOVERY_REQUEST: media busy!\n", - __func__); - /* irlap->log.condition = MEDIA_BUSY; */ - - /* This will make IrLMP try again */ - irlap_discovery_confirm(self, NULL); - /* Note : the discovery log is not cleaned up here, - * it will be done in irlap_discovery_request() - * Jean II */ - return 0; - } - - self->S = info->S; - self->s = info->s; - irlap_send_discovery_xid_frame(self, info->S, info->s, TRUE, - info->discovery); - self->frame_sent = FALSE; - self->s++; - - irlap_start_slot_timer(self, self->slot_timeout); - irlap_next_state(self, LAP_QUERY); - break; - case RECV_DISCOVERY_XID_CMD: - IRDA_ASSERT(info != NULL, return -1;); - - /* Assert that this is not the final slot */ - if (info->s <= info->S) { - self->slot = irlap_generate_rand_time_slot(info->S, - info->s); - if (self->slot == info->s) { - discovery_rsp = irlmp_get_discovery_response(); - discovery_rsp->data.daddr = info->daddr; - - irlap_send_discovery_xid_frame(self, info->S, - self->slot, - FALSE, - discovery_rsp); - self->frame_sent = TRUE; - } else - self->frame_sent = FALSE; - - /* - * Go to reply state until end of discovery to - * inhibit our own transmissions. Set the timer - * to not stay forever there... Jean II - */ - irlap_start_query_timer(self, info->S, info->s); - irlap_next_state(self, LAP_REPLY); - } else { - /* This is the final slot. How is it possible ? - * This would happen is both discoveries are just slightly - * offset (if they are in sync, all packets are lost). - * Most often, all the discovery requests will be received - * in QUERY state (see my comment there), except for the - * last frame that will come here. - * The big trouble when it happen is that active discovery - * doesn't happen, because nobody answer the discoveries - * frame of the other guy, so the log shows up empty. - * What should we do ? - * Not much. It's too late to answer those discovery frames, - * so we just pass the info to IrLMP who will put it in the - * log (and post an event). - * Another cause would be devices that do discovery much - * slower than us, however the latest fixes should minimise - * those cases... - * Jean II - */ - pr_debug("%s(), Receiving final discovery request, missed the discovery slots :-(\n", - __func__); - - /* Last discovery request -> in the log */ - irlap_discovery_indication(self, info->discovery); - } - break; - case MEDIA_BUSY_TIMER_EXPIRED: - /* A bunch of events may be postponed because the media is - * busy (usually immediately after we close a connection), - * or while we are doing discovery (state query/reply). - * In all those cases, the media busy flag will be cleared - * when it's OK for us to process those postponed events. - * This event is not mentioned in the state machines in the - * IrLAP spec. It's because they didn't consider Ultra and - * postponing connection request is optional. - * Jean II */ -#ifdef CONFIG_IRDA_ULTRA - /* Send any pending Ultra frames if any */ - if (!skb_queue_empty(&self->txq_ultra)) { - /* We don't send the frame, just post an event. - * Also, previously this code was in timer.c... - * Jean II */ - ret = (*state[self->state])(self, SEND_UI_FRAME, - NULL, NULL); - } -#endif /* CONFIG_IRDA_ULTRA */ - /* Check if we should try to connect. - * This code was previously in irlap_do_event() */ - if (self->connect_pending) { - self->connect_pending = FALSE; - - /* This one *should* not pend in this state, except - * if a socket try to connect and immediately - * disconnect. - clear - Jean II */ - if (self->disconnect_pending) - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - else - ret = (*state[self->state])(self, - CONNECT_REQUEST, - NULL, NULL); - self->disconnect_pending = FALSE; - } - /* Note : one way to test if this code works well (including - * media busy and small busy) is to create a user space - * application generating an Ultra packet every 3.05 sec (or - * 2.95 sec) and to see how it interact with discovery. - * It's fairly easy to check that no packet is lost, that the - * packets are postponed during discovery and that after - * discovery indication you have a 100ms "gap". - * As connection request and Ultra are now processed the same - * way, this avoid the tedious job of trying IrLAP connection - * in all those cases... - * Jean II */ - break; -#ifdef CONFIG_IRDA_ULTRA - case SEND_UI_FRAME: - { - int i; - /* Only allowed to repeat an operation twice */ - for (i=0; ((i<2) && (self->media_busy == FALSE)); i++) { - skb = skb_dequeue(&self->txq_ultra); - if (skb) - irlap_send_ui_frame(self, skb, CBROADCAST, - CMD_FRAME); - else - break; - /* irlap_send_ui_frame() won't increase skb reference - * count, so no dev_kfree_skb() - Jean II */ - } - if (i == 2) { - /* Force us to listen 500 ms again */ - irda_device_set_media_busy(self->netdev, TRUE); - } - break; - } - case RECV_UI_FRAME: - /* Only accept broadcast frames in NDM mode */ - if (info->caddr != CBROADCAST) { - pr_debug("%s(), not a broadcast frame!\n", - __func__); - } else - irlap_unitdata_indication(self, skb); - break; -#endif /* CONFIG_IRDA_ULTRA */ - case RECV_TEST_CMD: - /* Remove test frame header */ - skb_pull(skb, sizeof(struct test_frame)); - - /* - * Send response. This skb will not be sent out again, and - * will only be used to send out the same info as the cmd - */ - irlap_send_test_frame(self, CBROADCAST, info->daddr, skb); - break; - case RECV_TEST_RSP: - pr_debug("%s() not implemented!\n", __func__); - break; - default: - pr_debug("%s(), Unknown event %s\n", __func__, - irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_query (event, skb, info) - * - * QUERY state - * - */ -static int irlap_state_query(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case RECV_DISCOVERY_XID_RSP: - IRDA_ASSERT(info != NULL, return -1;); - IRDA_ASSERT(info->discovery != NULL, return -1;); - - pr_debug("%s(), daddr=%08x\n", __func__, - info->discovery->data.daddr); - - if (!self->discovery_log) { - net_warn_ratelimited("%s: discovery log is gone! maybe the discovery timeout has been set too short?\n", - __func__); - break; - } - hashbin_insert(self->discovery_log, - (irda_queue_t *) info->discovery, - info->discovery->data.daddr, NULL); - - /* Keep state */ - /* irlap_next_state(self, LAP_QUERY); */ - - break; - case RECV_DISCOVERY_XID_CMD: - /* Yes, it is possible to receive those frames in this mode. - * Note that most often the last discovery request won't - * occur here but in NDM state (see my comment there). - * What should we do ? - * Not much. We are currently performing our own discovery, - * therefore we can't answer those frames. We don't want - * to change state either. We just pass the info to - * IrLMP who will put it in the log (and post an event). - * Jean II - */ - - IRDA_ASSERT(info != NULL, return -1;); - - pr_debug("%s(), Receiving discovery request (s = %d) while performing discovery :-(\n", - __func__, info->s); - - /* Last discovery request ? */ - if (info->s == 0xff) - irlap_discovery_indication(self, info->discovery); - break; - case SLOT_TIMER_EXPIRED: - /* - * Wait a little longer if we detect an incoming frame. This - * is not mentioned in the spec, but is a good thing to do, - * since we want to work even with devices that violate the - * timing requirements. - */ - if (irda_device_is_receiving(self->netdev) && !self->add_wait) { - pr_debug("%s(), device is slow to answer, waiting some more!\n", - __func__); - irlap_start_slot_timer(self, msecs_to_jiffies(10)); - self->add_wait = TRUE; - return ret; - } - self->add_wait = FALSE; - - if (self->s < self->S) { - irlap_send_discovery_xid_frame(self, self->S, - self->s, TRUE, - self->discovery_cmd); - self->s++; - irlap_start_slot_timer(self, self->slot_timeout); - - /* Keep state */ - irlap_next_state(self, LAP_QUERY); - } else { - /* This is the final slot! */ - irlap_send_discovery_xid_frame(self, self->S, 0xff, - TRUE, - self->discovery_cmd); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - /* - * We are now finished with the discovery procedure, - * so now we must return the results - */ - irlap_discovery_confirm(self, self->discovery_log); - - /* IrLMP should now have taken care of the log */ - self->discovery_log = NULL; - } - break; - default: - pr_debug("%s(), Unknown event %s\n", __func__, - irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_reply (self, event, skb, info) - * - * REPLY, we have received a XID discovery frame from a device and we - * are waiting for the right time slot to send a response XID frame - * - */ -static int irlap_state_reply(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - discovery_t *discovery_rsp; - int ret=0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case QUERY_TIMER_EXPIRED: - pr_debug("%s(), QUERY_TIMER_EXPIRED <%ld>\n", - __func__, jiffies); - irlap_next_state(self, LAP_NDM); - break; - case RECV_DISCOVERY_XID_CMD: - IRDA_ASSERT(info != NULL, return -1;); - /* Last frame? */ - if (info->s == 0xff) { - del_timer(&self->query_timer); - - /* info->log.condition = REMOTE; */ - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_discovery_indication(self, info->discovery); - } else { - /* If it's our slot, send our reply */ - if ((info->s >= self->slot) && (!self->frame_sent)) { - discovery_rsp = irlmp_get_discovery_response(); - discovery_rsp->data.daddr = info->daddr; - - irlap_send_discovery_xid_frame(self, info->S, - self->slot, - FALSE, - discovery_rsp); - - self->frame_sent = TRUE; - } - /* Readjust our timer to accommodate devices - * doing faster or slower discovery than us... - * Jean II */ - irlap_start_query_timer(self, info->S, info->s); - - /* Keep state */ - //irlap_next_state(self, LAP_REPLY); - } - break; - default: - pr_debug("%s(), Unknown event %d, %s\n", __func__, - event, irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_conn (event, skb, info) - * - * CONN, we have received a SNRM command and is waiting for the upper - * layer to accept or refuse connection - * - */ -static int irlap_state_conn(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - pr_debug("%s(), event=%s\n", __func__, irlap_event[event]); - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case CONNECT_RESPONSE: - skb_pull(skb, sizeof(struct snrm_frame)); - - IRDA_ASSERT(self->netdev != NULL, return -1;); - - irlap_qos_negotiate(self, skb); - - irlap_initiate_connection_state(self); - - /* - * Applying the parameters now will make sure we change speed - * *after* we have sent the next frame - */ - irlap_apply_connection_parameters(self, FALSE); - - /* - * Sending this frame will force a speed change after it has - * been sent (i.e. the frame will be sent at 9600). - */ - irlap_send_ua_response_frame(self, &self->qos_rx); - -#if 0 - /* - * We are allowed to send two frames, but this may increase - * the connect latency, so lets not do it for now. - */ - /* This is full of good intentions, but doesn't work in - * practice. - * After sending the first UA response, we switch the - * dongle to the negotiated speed, which is usually - * different than 9600 kb/s. - * From there, there is two solutions : - * 1) The other end has received the first UA response : - * it will set up the connection, move to state LAP_NRM_P, - * and will ignore and drop the second UA response. - * Actually, it's even worse : the other side will almost - * immediately send a RR that will likely collide with the - * UA response (depending on negotiated turnaround). - * 2) The other end has not received the first UA response, - * will stay at 9600 and will never see the second UA response. - * Jean II */ - irlap_send_ua_response_frame(self, &self->qos_rx); -#endif - - /* - * The WD-timer could be set to the duration of the P-timer - * for this case, but it is recommended to use twice the - * value (note 3 IrLAP p. 60). - */ - irlap_start_wd_timer(self, self->wd_timeout); - irlap_next_state(self, LAP_NRM_S); - - break; - case RECV_DISCOVERY_XID_CMD: - pr_debug("%s(), event RECV_DISCOVER_XID_CMD!\n", - __func__); - irlap_next_state(self, LAP_NDM); - - break; - case DISCONNECT_REQUEST: - pr_debug("%s(), Disconnect request!\n", __func__); - irlap_send_dm_frame(self); - irlap_next_state( self, LAP_NDM); - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - default: - pr_debug("%s(), Unknown event %d, %s\n", __func__, - event, irlap_event[event]); - - ret = -1; - break; - } - - return ret; -} - -/* - * Function irlap_state_setup (event, skb, frame) - * - * SETUP state, The local layer has transmitted a SNRM command frame to - * a remote peer layer and is awaiting a reply . - * - */ -static int irlap_state_setup(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case FINAL_TIMER_EXPIRED: - if (self->retry_count < self->N3) { -/* - * Perform random backoff, Wait a random number of time units, minimum - * duration half the time taken to transmitt a SNRM frame, maximum duration - * 1.5 times the time taken to transmit a SNRM frame. So this time should - * between 15 msecs and 45 msecs. - */ - irlap_start_backoff_timer(self, msecs_to_jiffies(20 + - (jiffies % 30))); - } else { - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_FOUND_NONE); - } - break; - case BACKOFF_TIMER_EXPIRED: - irlap_send_snrm_frame(self, &self->qos_rx); - irlap_start_final_timer(self, self->final_timeout); - self->retry_count++; - break; - case RECV_SNRM_CMD: - pr_debug("%s(), SNRM battle!\n", __func__); - - IRDA_ASSERT(skb != NULL, return 0;); - IRDA_ASSERT(info != NULL, return 0;); - - /* - * The device with the largest device address wins the battle - * (both have sent a SNRM command!) - */ - if (info &&(info->daddr > self->saddr)) { - del_timer(&self->final_timer); - irlap_initiate_connection_state(self); - - IRDA_ASSERT(self->netdev != NULL, return -1;); - - skb_pull(skb, sizeof(struct snrm_frame)); - - irlap_qos_negotiate(self, skb); - - /* Send UA frame and then change link settings */ - irlap_apply_connection_parameters(self, FALSE); - irlap_send_ua_response_frame(self, &self->qos_rx); - - irlap_next_state(self, LAP_NRM_S); - irlap_connect_confirm(self, skb); - - /* - * The WD-timer could be set to the duration of the - * P-timer for this case, but it is recommended - * to use twice the value (note 3 IrLAP p. 60). - */ - irlap_start_wd_timer(self, self->wd_timeout); - } else { - /* We just ignore the other device! */ - irlap_next_state(self, LAP_SETUP); - } - break; - case RECV_UA_RSP: - /* Stop F-timer */ - del_timer(&self->final_timer); - - /* Initiate connection state */ - irlap_initiate_connection_state(self); - - /* Negotiate connection parameters */ - IRDA_ASSERT(skb->len > 10, return -1;); - - skb_pull(skb, sizeof(struct ua_frame)); - - IRDA_ASSERT(self->netdev != NULL, return -1;); - - irlap_qos_negotiate(self, skb); - - /* Set the new link setting *now* (before the rr frame) */ - irlap_apply_connection_parameters(self, TRUE); - self->retry_count = 0; - - /* Wait for turnaround time to give a chance to the other - * device to be ready to receive us. - * Note : the time to switch speed is typically larger - * than the turnaround time, but as we don't have the other - * side speed switch time, that's our best guess... - * Jean II */ - irlap_wait_min_turn_around(self, &self->qos_tx); - - /* This frame will actually be sent at the new speed */ - irlap_send_rr_frame(self, CMD_FRAME); - - /* The timer is set to half the normal timer to quickly - * detect a failure to negotiate the new connection - * parameters. IrLAP 6.11.3.2, note 3. - * Note that currently we don't process this failure - * properly, as we should do a quick disconnect. - * Jean II */ - irlap_start_final_timer(self, self->final_timeout/2); - irlap_next_state(self, LAP_NRM_P); - - irlap_connect_confirm(self, skb); - break; - case RECV_DM_RSP: /* FALLTHROUGH */ - case RECV_DISC_CMD: - del_timer(&self->final_timer); - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - default: - pr_debug("%s(), Unknown event %d, %s\n", __func__, - event, irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_offline (self, event, skb, info) - * - * OFFLINE state, not used for now! - * - */ -static int irlap_state_offline(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - pr_debug("%s(), Unknown event\n", __func__); - - return -1; -} - -/* - * Function irlap_state_xmit_p (self, event, skb, info) - * - * XMIT, Only the primary station has right to transmit, and we - * therefore do not expect to receive any transmissions from other - * stations. - * - */ -static int irlap_state_xmit_p(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - switch (event) { - case SEND_I_CMD: - /* - * Only send frame if send-window > 0. - */ - if ((self->window > 0) && (!self->remote_busy)) { - int nextfit; -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - struct sk_buff *skb_next; - - /* With DYNAMIC_WINDOW, we keep the window size - * maximum, and adapt on the packets we are sending. - * At 115k, we can send only 2 packets of 2048 bytes - * in a 500 ms turnaround. Without this option, we - * would always limit the window to 2. With this - * option, if we send smaller packets, we can send - * up to 7 of them (always depending on QoS). - * Jean II */ - - /* Look at the next skb. This is safe, as we are - * the only consumer of the Tx queue (if we are not, - * we have other problems) - Jean II */ - skb_next = skb_peek(&self->txq); - - /* Check if a subsequent skb exist and would fit in - * the current window (with respect to turnaround - * time). - * This allow us to properly mark the current packet - * with the pf bit, to avoid falling back on the - * second test below, and avoid waiting the - * end of the window and sending a extra RR. - * Note : (skb_next != NULL) <=> (skb_queue_len() > 0) - * Jean II */ - nextfit = ((skb_next != NULL) && - ((skb_next->len + skb->len) <= - self->bytes_left)); - - /* - * The current packet may not fit ! Because of test - * above, this should not happen any more !!! - * Test if we have transmitted more bytes over the - * link than its possible to do with the current - * speed and turn-around-time. - */ - if((!nextfit) && (skb->len > self->bytes_left)) { - pr_debug("%s(), Not allowed to transmit more bytes!\n", - __func__); - /* Requeue the skb */ - skb_queue_head(&self->txq, skb_get(skb)); - /* - * We should switch state to LAP_NRM_P, but - * that is not possible since we must be sure - * that we poll the other side. Since we have - * used up our time, the poll timer should - * trigger anyway now, so we just wait for it - * DB - */ - /* - * Sorry, but that's not totally true. If - * we send 2000B packets, we may wait another - * 1000B until our turnaround expire. That's - * why we need to be proactive in avoiding - * coming here. - Jean II - */ - return -EPROTO; - } - - /* Subtract space used by this skb */ - self->bytes_left -= skb->len; -#else /* CONFIG_IRDA_DYNAMIC_WINDOW */ - /* Window has been adjusted for the max packet - * size, so much simpler... - Jean II */ - nextfit = !skb_queue_empty(&self->txq); -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - /* - * Send data with poll bit cleared only if window > 1 - * and there is more frames after this one to be sent - */ - if ((self->window > 1) && (nextfit)) { - /* More packet to send in current window */ - irlap_send_data_primary(self, skb); - irlap_next_state(self, LAP_XMIT_P); - } else { - /* Final packet of window */ - irlap_send_data_primary_poll(self, skb); - - /* - * Make sure state machine does not try to send - * any more frames - */ - ret = -EPROTO; - } -#ifdef CONFIG_IRDA_FAST_RR - /* Peer may want to reply immediately */ - self->fast_RR = FALSE; -#endif /* CONFIG_IRDA_FAST_RR */ - } else { - pr_debug("%s(), Unable to send! remote busy?\n", - __func__); - skb_queue_head(&self->txq, skb_get(skb)); - - /* - * The next ret is important, because it tells - * irlap_next_state _not_ to deliver more frames - */ - ret = -EPROTO; - } - break; - case POLL_TIMER_EXPIRED: - pr_debug("%s(), POLL_TIMER_EXPIRED <%ld>\n", - __func__, jiffies); - irlap_send_rr_frame(self, CMD_FRAME); - /* Return to NRM properly - Jean II */ - self->window = self->window_size; -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - /* Allowed to transmit a maximum number of bytes again. */ - self->bytes_left = self->line_capacity; -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - irlap_start_final_timer(self, self->final_timeout); - irlap_next_state(self, LAP_NRM_P); - break; - case DISCONNECT_REQUEST: - del_timer(&self->poll_timer); - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_disc_frame(self); - irlap_flush_all_queues(self); - irlap_start_final_timer(self, self->final_timeout); - self->retry_count = 0; - irlap_next_state(self, LAP_PCLOSE); - break; - case DATA_REQUEST: - /* Nothing to do, irlap_do_event() will send the packet - * when we return... - Jean II */ - break; - default: - pr_debug("%s(), Unknown event %s\n", - __func__, irlap_event[event]); - - ret = -EINVAL; - break; - } - return ret; -} - -/* - * Function irlap_state_pclose (event, skb, info) - * - * PCLOSE state - */ -static int irlap_state_pclose(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case RECV_UA_RSP: /* FALLTHROUGH */ - case RECV_DM_RSP: - del_timer(&self->final_timer); - - /* Set new link parameters */ - irlap_apply_default_connection_parameters(self); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - case FINAL_TIMER_EXPIRED: - if (self->retry_count < self->N3) { - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_disc_frame(self); - irlap_start_final_timer(self, self->final_timeout); - self->retry_count++; - /* Keep state */ - } else { - irlap_apply_default_connection_parameters(self); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_NO_RESPONSE); - } - break; - default: - pr_debug("%s(), Unknown event %d\n", __func__, event); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_nrm_p (self, event, skb, info) - * - * NRM_P (Normal Response Mode as Primary), The primary station has given - * permissions to a secondary station to transmit IrLAP resonse frames - * (by sending a frame with the P bit set). The primary station will not - * transmit any frames and is expecting to receive frames only from the - * secondary to which transmission permissions has been given. - */ -static int irlap_state_nrm_p(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - int ns_status; - int nr_status; - - switch (event) { - case RECV_I_RSP: /* Optimize for the common case */ - if (unlikely(skb->len <= LAP_ADDR_HEADER + LAP_CTRL_HEADER)) { - /* - * Input validation check: a stir4200/mcp2150 - * combination sometimes results in an empty i:rsp. - * This makes no sense; we can just ignore the frame - * and send an rr:cmd immediately. This happens before - * changing nr or ns so triggers a retransmit - */ - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, CMD_FRAME); - /* Keep state */ - break; - } - /* FIXME: must check for remote_busy below */ -#ifdef CONFIG_IRDA_FAST_RR - /* - * Reset the fast_RR so we can use the fast RR code with - * full speed the next time since peer may have more frames - * to transmitt - */ - self->fast_RR = FALSE; -#endif /* CONFIG_IRDA_FAST_RR */ - IRDA_ASSERT( info != NULL, return -1;); - - ns_status = irlap_validate_ns_received(self, info->ns); - nr_status = irlap_validate_nr_received(self, info->nr); - - /* - * Check for expected I(nformation) frame - */ - if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) { - - /* Update Vr (next frame for us to receive) */ - self->vr = (self->vr + 1) % 8; - - /* Update Nr received, cleanup our retry queue */ - irlap_update_nr_received(self, info->nr); - - /* - * Got expected NR, so reset the - * retry_count. This is not done by IrLAP spec, - * which is strange! - */ - self->retry_count = 0; - self->ack_required = TRUE; - - /* poll bit cleared? */ - if (!info->pf) { - /* Keep state, do not move this line */ - irlap_next_state(self, LAP_NRM_P); - - irlap_data_indication(self, skb, FALSE); - } else { - /* No longer waiting for pf */ - del_timer(&self->final_timer); - - irlap_wait_min_turn_around(self, &self->qos_tx); - - /* Call higher layer *before* changing state - * to give them a chance to send data in the - * next LAP frame. - * Jean II */ - irlap_data_indication(self, skb, FALSE); - - /* XMIT states are the most dangerous state - * to be in, because user requests are - * processed directly and may change state. - * On the other hand, in NDM_P, those - * requests are queued and we will process - * them when we return to irlap_do_event(). - * Jean II - */ - irlap_next_state(self, LAP_XMIT_P); - - /* This is the last frame. - * Make sure it's always called in XMIT state. - * - Jean II */ - irlap_start_poll_timer(self, self->poll_timeout); - } - break; - - } - /* Unexpected next to send (Ns) */ - if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED)) - { - if (!info->pf) { - irlap_update_nr_received(self, info->nr); - - /* - * Wait until the last frame before doing - * anything - */ - - /* Keep state */ - irlap_next_state(self, LAP_NRM_P); - } else { - pr_debug("%s(), missing or duplicate frame!\n", - __func__); - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, CMD_FRAME); - - self->ack_required = FALSE; - - irlap_start_final_timer(self, self->final_timeout); - irlap_next_state(self, LAP_NRM_P); - } - break; - } - /* - * Unexpected next to receive (Nr) - */ - if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED)) - { - if (info->pf) { - self->vr = (self->vr + 1) % 8; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - /* Resend rejected frames */ - irlap_resend_rejected_frames(self, CMD_FRAME); - - self->ack_required = FALSE; - - /* Make sure we account for the time - * to transmit our frames. See comemnts - * in irlap_send_data_primary_poll(). - * Jean II */ - irlap_start_final_timer(self, 2 * self->final_timeout); - - /* Keep state, do not move this line */ - irlap_next_state(self, LAP_NRM_P); - - irlap_data_indication(self, skb, FALSE); - } else { - /* - * Do not resend frames until the last - * frame has arrived from the other - * device. This is not documented in - * IrLAP!! - */ - self->vr = (self->vr + 1) % 8; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - self->ack_required = FALSE; - - /* Keep state, do not move this line!*/ - irlap_next_state(self, LAP_NRM_P); - - irlap_data_indication(self, skb, FALSE); - } - break; - } - /* - * Unexpected next to send (Ns) and next to receive (Nr) - * Not documented by IrLAP! - */ - if ((ns_status == NS_UNEXPECTED) && - (nr_status == NR_UNEXPECTED)) - { - pr_debug("%s(), unexpected nr and ns!\n", - __func__); - if (info->pf) { - /* Resend rejected frames */ - irlap_resend_rejected_frames(self, CMD_FRAME); - - /* Give peer some time to retransmit! - * But account for our own Tx. */ - irlap_start_final_timer(self, 2 * self->final_timeout); - - /* Keep state, do not move this line */ - irlap_next_state(self, LAP_NRM_P); - } else { - /* Update Nr received */ - /* irlap_update_nr_received( info->nr); */ - - self->ack_required = FALSE; - } - break; - } - - /* - * Invalid NR or NS - */ - if ((nr_status == NR_INVALID) || (ns_status == NS_INVALID)) { - if (info->pf) { - del_timer(&self->final_timer); - - irlap_next_state(self, LAP_RESET_WAIT); - - irlap_disconnect_indication(self, LAP_RESET_INDICATION); - self->xmitflag = TRUE; - } else { - del_timer(&self->final_timer); - - irlap_disconnect_indication(self, LAP_RESET_INDICATION); - - self->xmitflag = FALSE; - } - break; - } - pr_debug("%s(), Not implemented!\n", __func__); - pr_debug("%s(), event=%s, ns_status=%d, nr_status=%d\n", - __func__, irlap_event[event], ns_status, nr_status); - break; - case RECV_UI_FRAME: - /* Poll bit cleared? */ - if (!info->pf) { - irlap_data_indication(self, skb, TRUE); - irlap_next_state(self, LAP_NRM_P); - } else { - del_timer(&self->final_timer); - irlap_data_indication(self, skb, TRUE); - irlap_next_state(self, LAP_XMIT_P); - pr_debug("%s: RECV_UI_FRAME: next state %s\n", - __func__, irlap_state[self->state]); - irlap_start_poll_timer(self, self->poll_timeout); - } - break; - case RECV_RR_RSP: - /* - * If you get a RR, the remote isn't busy anymore, - * no matter what the NR - */ - self->remote_busy = FALSE; - - /* Stop final timer */ - del_timer(&self->final_timer); - - /* - * Nr as expected? - */ - ret = irlap_validate_nr_received(self, info->nr); - if (ret == NR_EXPECTED) { - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - /* - * Got expected NR, so reset the retry_count. This - * is not done by the IrLAP standard , which is - * strange! DB. - */ - self->retry_count = 0; - irlap_wait_min_turn_around(self, &self->qos_tx); - - irlap_next_state(self, LAP_XMIT_P); - - /* Start poll timer */ - irlap_start_poll_timer(self, self->poll_timeout); - } else if (ret == NR_UNEXPECTED) { - IRDA_ASSERT(info != NULL, return -1;); - /* - * Unexpected nr! - */ - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - pr_debug("RECV_RR_FRAME: Retrans:%d, nr=%d, va=%d, vs=%d, vr=%d\n", - self->retry_count, info->nr, self->va, - self->vs, self->vr); - - /* Resend rejected frames */ - irlap_resend_rejected_frames(self, CMD_FRAME); - irlap_start_final_timer(self, self->final_timeout * 2); - - irlap_next_state(self, LAP_NRM_P); - } else if (ret == NR_INVALID) { - pr_debug("%s(), Received RR with invalid nr !\n", - __func__); - - irlap_next_state(self, LAP_RESET_WAIT); - - irlap_disconnect_indication(self, LAP_RESET_INDICATION); - self->xmitflag = TRUE; - } - break; - case RECV_RNR_RSP: - IRDA_ASSERT(info != NULL, return -1;); - - /* Stop final timer */ - del_timer(&self->final_timer); - self->remote_busy = TRUE; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - irlap_next_state(self, LAP_XMIT_P); - - /* Start poll timer */ - irlap_start_poll_timer(self, self->poll_timeout); - break; - case RECV_FRMR_RSP: - del_timer(&self->final_timer); - self->xmitflag = TRUE; - irlap_next_state(self, LAP_RESET_WAIT); - irlap_reset_indication(self); - break; - case FINAL_TIMER_EXPIRED: - /* - * We are allowed to wait for additional 300 ms if - * final timer expires when we are in the middle - * of receiving a frame (page 45, IrLAP). Check that - * we only do this once for each frame. - */ - if (irda_device_is_receiving(self->netdev) && !self->add_wait) { - pr_debug("FINAL_TIMER_EXPIRED when receiving a frame! Waiting a little bit more!\n"); - irlap_start_final_timer(self, msecs_to_jiffies(300)); - - /* - * Don't allow this to happen one more time in a row, - * or else we can get a pretty tight loop here if - * if we only receive half a frame. DB. - */ - self->add_wait = TRUE; - break; - } - self->add_wait = FALSE; - - /* N2 is the disconnect timer. Until we reach it, we retry */ - if (self->retry_count < self->N2) { - if (skb_peek(&self->wx_list) == NULL) { - /* Retry sending the pf bit to the secondary */ - pr_debug("nrm_p: resending rr"); - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, CMD_FRAME); - } else { - pr_debug("nrm_p: resend frames"); - irlap_resend_rejected_frames(self, CMD_FRAME); - } - - irlap_start_final_timer(self, self->final_timeout); - self->retry_count++; - pr_debug("irlap_state_nrm_p: FINAL_TIMER_EXPIRED: retry_count=%d\n", - self->retry_count); - - /* Early warning event. I'm using a pretty liberal - * interpretation of the spec and generate an event - * every time the timer is multiple of N1 (and not - * only the first time). This allow application - * to know precisely if connectivity restart... - * Jean II */ - if((self->retry_count % self->N1) == 0) - irlap_status_indication(self, - STATUS_NO_ACTIVITY); - - /* Keep state */ - } else { - irlap_apply_default_connection_parameters(self); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - irlap_disconnect_indication(self, LAP_NO_RESPONSE); - } - break; - case RECV_REJ_RSP: - irlap_update_nr_received(self, info->nr); - if (self->remote_busy) { - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, CMD_FRAME); - } else - irlap_resend_rejected_frames(self, CMD_FRAME); - irlap_start_final_timer(self, 2 * self->final_timeout); - break; - case RECV_SREJ_RSP: - irlap_update_nr_received(self, info->nr); - if (self->remote_busy) { - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, CMD_FRAME); - } else - irlap_resend_rejected_frame(self, CMD_FRAME); - irlap_start_final_timer(self, 2 * self->final_timeout); - break; - case RECV_RD_RSP: - pr_debug("%s(), RECV_RD_RSP\n", __func__); - - irlap_flush_all_queues(self); - irlap_next_state(self, LAP_XMIT_P); - /* Call back the LAP state machine to do a proper disconnect */ - irlap_disconnect_request(self); - break; - default: - pr_debug("%s(), Unknown event %s\n", - __func__, irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_reset_wait (event, skb, info) - * - * We have informed the service user of a reset condition, and is - * awaiting reset of disconnect request. - * - */ -static int irlap_state_reset_wait(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - pr_debug("%s(), event = %s\n", __func__, irlap_event[event]); - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case RESET_REQUEST: - if (self->xmitflag) { - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_snrm_frame(self, NULL); - irlap_start_final_timer(self, self->final_timeout); - irlap_next_state(self, LAP_RESET); - } else { - irlap_start_final_timer(self, self->final_timeout); - irlap_next_state(self, LAP_RESET); - } - break; - case DISCONNECT_REQUEST: - irlap_wait_min_turn_around( self, &self->qos_tx); - irlap_send_disc_frame( self); - irlap_flush_all_queues( self); - irlap_start_final_timer( self, self->final_timeout); - self->retry_count = 0; - irlap_next_state( self, LAP_PCLOSE); - break; - default: - pr_debug("%s(), Unknown event %s\n", __func__, - irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_reset (self, event, skb, info) - * - * We have sent a SNRM reset command to the peer layer, and is awaiting - * reply. - * - */ -static int irlap_state_reset(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - pr_debug("%s(), event = %s\n", __func__, irlap_event[event]); - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case RECV_DISC_CMD: - del_timer(&self->final_timer); - - irlap_apply_default_connection_parameters(self); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_NO_RESPONSE); - - break; - case RECV_UA_RSP: - del_timer(&self->final_timer); - - /* Initiate connection state */ - irlap_initiate_connection_state(self); - - irlap_reset_confirm(); - - self->remote_busy = FALSE; - - irlap_next_state(self, LAP_XMIT_P); - - irlap_start_poll_timer(self, self->poll_timeout); - - break; - case FINAL_TIMER_EXPIRED: - if (self->retry_count < 3) { - irlap_wait_min_turn_around(self, &self->qos_tx); - - IRDA_ASSERT(self->netdev != NULL, return -1;); - irlap_send_snrm_frame(self, self->qos_dev); - - self->retry_count++; /* Experimental!! */ - - irlap_start_final_timer(self, self->final_timeout); - irlap_next_state(self, LAP_RESET); - } else if (self->retry_count >= self->N3) { - irlap_apply_default_connection_parameters(self); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_disconnect_indication(self, LAP_NO_RESPONSE); - } - break; - case RECV_SNRM_CMD: - /* - * SNRM frame is not allowed to contain an I-field in this - * state - */ - if (!info) { - pr_debug("%s(), RECV_SNRM_CMD\n", __func__); - irlap_initiate_connection_state(self); - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_ua_response_frame(self, &self->qos_rx); - irlap_reset_confirm(); - irlap_start_wd_timer(self, self->wd_timeout); - irlap_next_state(self, LAP_NDM); - } else { - pr_debug("%s(), SNRM frame contained an I field!\n", - __func__); - } - break; - default: - pr_debug("%s(), Unknown event %s\n", - __func__, irlap_event[event]); - - ret = -1; - break; - } - return ret; -} - -/* - * Function irlap_state_xmit_s (event, skb, info) - * - * XMIT_S, The secondary station has been given the right to transmit, - * and we therefore do not expect to receive any transmissions from other - * stations. - */ -static int irlap_state_xmit_s(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ret = 0; - - pr_debug("%s(), event=%s\n", __func__, irlap_event[event]); - - IRDA_ASSERT(self != NULL, return -ENODEV;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;); - - switch (event) { - case SEND_I_CMD: - /* - * Send frame only if send window > 0 - */ - if ((self->window > 0) && (!self->remote_busy)) { - int nextfit; -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - struct sk_buff *skb_next; - - /* - * Same deal as in irlap_state_xmit_p(), so see - * the comments at that point. - * We are the secondary, so there are only subtle - * differences. - Jean II - */ - - /* Check if a subsequent skb exist and would fit in - * the current window (with respect to turnaround - * time). - Jean II */ - skb_next = skb_peek(&self->txq); - nextfit = ((skb_next != NULL) && - ((skb_next->len + skb->len) <= - self->bytes_left)); - - /* - * Test if we have transmitted more bytes over the - * link than its possible to do with the current - * speed and turn-around-time. - */ - if((!nextfit) && (skb->len > self->bytes_left)) { - pr_debug("%s(), Not allowed to transmit more bytes!\n", - __func__); - /* Requeue the skb */ - skb_queue_head(&self->txq, skb_get(skb)); - - /* - * Switch to NRM_S, this is only possible - * when we are in secondary mode, since we - * must be sure that we don't miss any RR - * frames - */ - self->window = self->window_size; - self->bytes_left = self->line_capacity; - irlap_start_wd_timer(self, self->wd_timeout); - - irlap_next_state(self, LAP_NRM_S); - /* Slight difference with primary : - * here we would wait for the other side to - * expire the turnaround. - Jean II */ - - return -EPROTO; /* Try again later */ - } - /* Subtract space used by this skb */ - self->bytes_left -= skb->len; -#else /* CONFIG_IRDA_DYNAMIC_WINDOW */ - /* Window has been adjusted for the max packet - * size, so much simpler... - Jean II */ - nextfit = !skb_queue_empty(&self->txq); -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - /* - * Send data with final bit cleared only if window > 1 - * and there is more frames to be sent - */ - if ((self->window > 1) && (nextfit)) { - irlap_send_data_secondary(self, skb); - irlap_next_state(self, LAP_XMIT_S); - } else { - irlap_send_data_secondary_final(self, skb); - irlap_next_state(self, LAP_NRM_S); - - /* - * Make sure state machine does not try to send - * any more frames - */ - ret = -EPROTO; - } - } else { - pr_debug("%s(), Unable to send!\n", __func__); - skb_queue_head(&self->txq, skb_get(skb)); - ret = -EPROTO; - } - break; - case DISCONNECT_REQUEST: - irlap_send_rd_frame(self); - irlap_flush_all_queues(self); - irlap_start_wd_timer(self, self->wd_timeout); - irlap_next_state(self, LAP_SCLOSE); - break; - case DATA_REQUEST: - /* Nothing to do, irlap_do_event() will send the packet - * when we return... - Jean II */ - break; - default: - pr_debug("%s(), Unknown event %s\n", __func__, - irlap_event[event]); - - ret = -EINVAL; - break; - } - return ret; -} - -/* - * Function irlap_state_nrm_s (event, skb, info) - * - * NRM_S (Normal Response Mode as Secondary) state, in this state we are - * expecting to receive frames from the primary station - * - */ -static int irlap_state_nrm_s(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - int ns_status; - int nr_status; - int ret = 0; - - pr_debug("%s(), event=%s\n", __func__, irlap_event[event]); - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - switch (event) { - case RECV_I_CMD: /* Optimize for the common case */ - /* FIXME: must check for remote_busy below */ - pr_debug("%s(), event=%s nr=%d, vs=%d, ns=%d, vr=%d, pf=%d\n", - __func__, irlap_event[event], info->nr, - self->vs, info->ns, self->vr, info->pf); - - self->retry_count = 0; - - ns_status = irlap_validate_ns_received(self, info->ns); - nr_status = irlap_validate_nr_received(self, info->nr); - /* - * Check for expected I(nformation) frame - */ - if ((ns_status == NS_EXPECTED) && (nr_status == NR_EXPECTED)) { - - /* Update Vr (next frame for us to receive) */ - self->vr = (self->vr + 1) % 8; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - /* - * poll bit cleared? - */ - if (!info->pf) { - - self->ack_required = TRUE; - - /* - * Starting WD-timer here is optional, but - * not recommended. Note 6 IrLAP p. 83 - */ -#if 0 - irda_start_timer(WD_TIMER, self->wd_timeout); -#endif - /* Keep state, do not move this line */ - irlap_next_state(self, LAP_NRM_S); - - irlap_data_indication(self, skb, FALSE); - break; - } else { - /* - * We should wait before sending RR, and - * also before changing to XMIT_S - * state. (note 1, IrLAP p. 82) - */ - irlap_wait_min_turn_around(self, &self->qos_tx); - - /* - * Give higher layers a chance to - * immediately reply with some data before - * we decide if we should send a RR frame - * or not - */ - irlap_data_indication(self, skb, FALSE); - - /* Any pending data requests? */ - if (!skb_queue_empty(&self->txq) && - (self->window > 0)) - { - self->ack_required = TRUE; - - del_timer(&self->wd_timer); - - irlap_next_state(self, LAP_XMIT_S); - } else { - irlap_send_rr_frame(self, RSP_FRAME); - irlap_start_wd_timer(self, - self->wd_timeout); - - /* Keep the state */ - irlap_next_state(self, LAP_NRM_S); - } - break; - } - } - /* - * Check for Unexpected next to send (Ns) - */ - if ((ns_status == NS_UNEXPECTED) && (nr_status == NR_EXPECTED)) - { - /* Unexpected next to send, with final bit cleared */ - if (!info->pf) { - irlap_update_nr_received(self, info->nr); - - irlap_start_wd_timer(self, self->wd_timeout); - } else { - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, RSP_FRAME); - - irlap_start_wd_timer(self, self->wd_timeout); - } - break; - } - - /* - * Unexpected Next to Receive(NR) ? - */ - if ((ns_status == NS_EXPECTED) && (nr_status == NR_UNEXPECTED)) - { - if (info->pf) { - pr_debug("RECV_I_RSP: frame(s) lost\n"); - - self->vr = (self->vr + 1) % 8; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - /* Resend rejected frames */ - irlap_resend_rejected_frames(self, RSP_FRAME); - - /* Keep state, do not move this line */ - irlap_next_state(self, LAP_NRM_S); - - irlap_data_indication(self, skb, FALSE); - irlap_start_wd_timer(self, self->wd_timeout); - break; - } - /* - * This is not documented in IrLAP!! Unexpected NR - * with poll bit cleared - */ - if (!info->pf) { - self->vr = (self->vr + 1) % 8; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - - /* Keep state, do not move this line */ - irlap_next_state(self, LAP_NRM_S); - - irlap_data_indication(self, skb, FALSE); - irlap_start_wd_timer(self, self->wd_timeout); - } - break; - } - - if (ret == NR_INVALID) { - pr_debug("NRM_S, NR_INVALID not implemented!\n"); - } - if (ret == NS_INVALID) { - pr_debug("NRM_S, NS_INVALID not implemented!\n"); - } - break; - case RECV_UI_FRAME: - /* - * poll bit cleared? - */ - if (!info->pf) { - irlap_data_indication(self, skb, TRUE); - irlap_next_state(self, LAP_NRM_S); /* Keep state */ - } else { - /* - * Any pending data requests? - */ - if (!skb_queue_empty(&self->txq) && - (self->window > 0) && !self->remote_busy) - { - irlap_data_indication(self, skb, TRUE); - - del_timer(&self->wd_timer); - - irlap_next_state(self, LAP_XMIT_S); - } else { - irlap_data_indication(self, skb, TRUE); - - irlap_wait_min_turn_around(self, &self->qos_tx); - - irlap_send_rr_frame(self, RSP_FRAME); - self->ack_required = FALSE; - - irlap_start_wd_timer(self, self->wd_timeout); - - /* Keep the state */ - irlap_next_state(self, LAP_NRM_S); - } - } - break; - case RECV_RR_CMD: - self->retry_count = 0; - - /* - * Nr as expected? - */ - nr_status = irlap_validate_nr_received(self, info->nr); - if (nr_status == NR_EXPECTED) { - if (!skb_queue_empty(&self->txq) && - (self->window > 0)) { - self->remote_busy = FALSE; - - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - del_timer(&self->wd_timer); - - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_next_state(self, LAP_XMIT_S); - } else { - self->remote_busy = FALSE; - /* Update Nr received */ - irlap_update_nr_received(self, info->nr); - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_start_wd_timer(self, self->wd_timeout); - - /* Note : if the link is idle (this case), - * we never go in XMIT_S, so we never get a - * chance to process any DISCONNECT_REQUEST. - * Do it now ! - Jean II */ - if (self->disconnect_pending) { - /* Disconnect */ - irlap_send_rd_frame(self); - irlap_flush_all_queues(self); - - irlap_next_state(self, LAP_SCLOSE); - } else { - /* Just send back pf bit */ - irlap_send_rr_frame(self, RSP_FRAME); - - irlap_next_state(self, LAP_NRM_S); - } - } - } else if (nr_status == NR_UNEXPECTED) { - self->remote_busy = FALSE; - irlap_update_nr_received(self, info->nr); - irlap_resend_rejected_frames(self, RSP_FRAME); - - irlap_start_wd_timer(self, self->wd_timeout); - - /* Keep state */ - irlap_next_state(self, LAP_NRM_S); - } else { - pr_debug("%s(), invalid nr not implemented!\n", - __func__); - } - break; - case RECV_SNRM_CMD: - /* SNRM frame is not allowed to contain an I-field */ - if (!info) { - del_timer(&self->wd_timer); - pr_debug("%s(), received SNRM cmd\n", __func__); - irlap_next_state(self, LAP_RESET_CHECK); - - irlap_reset_indication(self); - } else { - pr_debug("%s(), SNRM frame contained an I-field!\n", - __func__); - - } - break; - case RECV_REJ_CMD: - irlap_update_nr_received(self, info->nr); - if (self->remote_busy) { - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, RSP_FRAME); - } else - irlap_resend_rejected_frames(self, RSP_FRAME); - irlap_start_wd_timer(self, self->wd_timeout); - break; - case RECV_SREJ_CMD: - irlap_update_nr_received(self, info->nr); - if (self->remote_busy) { - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, RSP_FRAME); - } else - irlap_resend_rejected_frame(self, RSP_FRAME); - irlap_start_wd_timer(self, self->wd_timeout); - break; - case WD_TIMER_EXPIRED: - /* - * Wait until retry_count * n matches negotiated threshold/ - * disconnect time (note 2 in IrLAP p. 82) - * - * Similar to irlap_state_nrm_p() -> FINAL_TIMER_EXPIRED - * Note : self->wd_timeout = (self->final_timeout * 2), - * which explain why we use (self->N2 / 2) here !!! - * Jean II - */ - pr_debug("%s(), retry_count = %d\n", __func__, - self->retry_count); - - if (self->retry_count < (self->N2 / 2)) { - /* No retry, just wait for primary */ - irlap_start_wd_timer(self, self->wd_timeout); - self->retry_count++; - - if((self->retry_count % (self->N1 / 2)) == 0) - irlap_status_indication(self, - STATUS_NO_ACTIVITY); - } else { - irlap_apply_default_connection_parameters(self); - - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - irlap_disconnect_indication(self, LAP_NO_RESPONSE); - } - break; - case RECV_DISC_CMD: - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - /* Send disconnect response */ - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_ua_response_frame(self, NULL); - - del_timer(&self->wd_timer); - irlap_flush_all_queues(self); - /* Set default link parameters */ - irlap_apply_default_connection_parameters(self); - - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - case RECV_DISCOVERY_XID_CMD: - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rr_frame(self, RSP_FRAME); - self->ack_required = TRUE; - irlap_start_wd_timer(self, self->wd_timeout); - irlap_next_state(self, LAP_NRM_S); - - break; - case RECV_TEST_CMD: - /* Remove test frame header (only LAP header in NRM) */ - skb_pull(skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER); - - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_start_wd_timer(self, self->wd_timeout); - - /* Send response (info will be copied) */ - irlap_send_test_frame(self, self->caddr, info->daddr, skb); - break; - default: - pr_debug("%s(), Unknown event %d, (%s)\n", __func__, - event, irlap_event[event]); - - ret = -EINVAL; - break; - } - return ret; -} - -/* - * Function irlap_state_sclose (self, event, skb, info) - */ -static int irlap_state_sclose(struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, struct irlap_info *info) -{ - IRDA_ASSERT(self != NULL, return -ENODEV;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;); - - switch (event) { - case RECV_DISC_CMD: - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - /* Send disconnect response */ - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_ua_response_frame(self, NULL); - - del_timer(&self->wd_timer); - /* Set default link parameters */ - irlap_apply_default_connection_parameters(self); - - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - case RECV_DM_RSP: - /* IrLAP-1.1 p.82: in SCLOSE, S and I type RSP frames - * shall take us down into default NDM state, like DM_RSP - */ - case RECV_RR_RSP: - case RECV_RNR_RSP: - case RECV_REJ_RSP: - case RECV_SREJ_RSP: - case RECV_I_RSP: - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - del_timer(&self->wd_timer); - irlap_apply_default_connection_parameters(self); - - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - case WD_TIMER_EXPIRED: - /* Always switch state before calling upper layers */ - irlap_next_state(self, LAP_NDM); - - irlap_apply_default_connection_parameters(self); - - irlap_disconnect_indication(self, LAP_DISC_INDICATION); - break; - default: - /* IrLAP-1.1 p.82: in SCLOSE, basically any received frame - * with pf=1 shall restart the wd-timer and resend the rd:rsp - */ - if (info != NULL && info->pf) { - del_timer(&self->wd_timer); - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rd_frame(self); - irlap_start_wd_timer(self, self->wd_timeout); - break; /* stay in SCLOSE */ - } - - pr_debug("%s(), Unknown event %d, (%s)\n", __func__, - event, irlap_event[event]); - - break; - } - - return -1; -} - -static int irlap_state_reset_check( struct irlap_cb *self, IRLAP_EVENT event, - struct sk_buff *skb, - struct irlap_info *info) -{ - int ret = 0; - - pr_debug("%s(), event=%s\n", __func__, irlap_event[event]); - - IRDA_ASSERT(self != NULL, return -ENODEV;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -EBADR;); - - switch (event) { - case RESET_RESPONSE: - irlap_send_ua_response_frame(self, &self->qos_rx); - irlap_initiate_connection_state(self); - irlap_start_wd_timer(self, WD_TIMEOUT); - irlap_flush_all_queues(self); - - irlap_next_state(self, LAP_NRM_S); - break; - case DISCONNECT_REQUEST: - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_send_rd_frame(self); - irlap_start_wd_timer(self, WD_TIMEOUT); - irlap_next_state(self, LAP_SCLOSE); - break; - default: - pr_debug("%s(), Unknown event %d, (%s)\n", __func__, - event, irlap_event[event]); - - ret = -EINVAL; - break; - } - return ret; -} diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c deleted file mode 100644 index debda3de4726..000000000000 --- a/net/irda/irlap_frame.c +++ /dev/null @@ -1,1407 +0,0 @@ -/********************************************************************* - * - * Filename: irlap_frame.c - * Version: 1.0 - * Description: Build and transmit IrLAP frames - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Tue Aug 19 10:27:26 1997 - * Modified at: Wed Jan 5 08:59:04 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/skbuff.h> -#include <linux/if.h> -#include <linux/if_ether.h> -#include <linux/netdevice.h> -#include <linux/irda.h> -#include <linux/slab.h> - -#include <net/pkt_sched.h> -#include <net/sock.h> - -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/irda_device.h> -#include <net/irda/irlap.h> -#include <net/irda/wrapper.h> -#include <net/irda/timer.h> -#include <net/irda/irlap_frame.h> -#include <net/irda/qos.h> - -static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb, - int command); - -/* - * Function irlap_insert_info (self, skb) - * - * Insert minimum turnaround time and speed information into the skb. We - * need to do this since it's per packet relevant information. Safe to - * have this function inlined since it's only called from one place - */ -static inline void irlap_insert_info(struct irlap_cb *self, - struct sk_buff *skb) -{ - struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb; - - /* - * Insert MTT (min. turn time) and speed into skb, so that the - * device driver knows which settings to use - */ - cb->magic = LAP_MAGIC; - cb->mtt = self->mtt_required; - cb->next_speed = self->speed; - - /* Reset */ - self->mtt_required = 0; - - /* - * Delay equals negotiated BOFs count, plus the number of BOFs to - * force the negotiated minimum turnaround time - */ - cb->xbofs = self->bofs_count; - cb->next_xbofs = self->next_bofs; - cb->xbofs_delay = self->xbofs_delay; - - /* Reset XBOF's delay (used only for getting min turn time) */ - self->xbofs_delay = 0; - /* Put the correct xbofs value for the next packet */ - self->bofs_count = self->next_bofs; -} - -/* - * Function irlap_queue_xmit (self, skb) - * - * A little wrapper for dev_queue_xmit, so we can insert some common - * code into it. - */ -void irlap_queue_xmit(struct irlap_cb *self, struct sk_buff *skb) -{ - /* Some common init stuff */ - skb->dev = self->netdev; - skb_reset_mac_header(skb); - skb_reset_network_header(skb); - skb_reset_transport_header(skb); - skb->protocol = htons(ETH_P_IRDA); - skb->priority = TC_PRIO_BESTEFFORT; - - irlap_insert_info(self, skb); - - if (unlikely(self->mode & IRDA_MODE_MONITOR)) { - pr_debug("%s(): %s is in monitor mode\n", __func__, - self->netdev->name); - dev_kfree_skb(skb); - return; - } - - dev_queue_xmit(skb); -} - -/* - * Function irlap_send_snrm_cmd (void) - * - * Transmits a connect SNRM command frame - */ -void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos) -{ - struct sk_buff *tx_skb; - struct snrm_frame *frame; - int ret; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Allocate frame */ - tx_skb = alloc_skb(sizeof(struct snrm_frame) + - IRLAP_NEGOCIATION_PARAMS_LEN, - GFP_ATOMIC); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 2); - - /* Insert connection address field */ - if (qos) - frame->caddr = CMD_FRAME | CBROADCAST; - else - frame->caddr = CMD_FRAME | self->caddr; - - /* Insert control field */ - frame->control = SNRM_CMD | PF_BIT; - - /* - * If we are establishing a connection then insert QoS parameters - */ - if (qos) { - skb_put(tx_skb, 9); /* 25 left */ - frame->saddr = cpu_to_le32(self->saddr); - frame->daddr = cpu_to_le32(self->daddr); - - frame->ncaddr = self->caddr; - - ret = irlap_insert_qos_negotiation_params(self, tx_skb); - if (ret < 0) { - dev_kfree_skb(tx_skb); - return; - } - } - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_recv_snrm_cmd (skb, info) - * - * Received SNRM (Set Normal Response Mode) command frame - * - */ -static void irlap_recv_snrm_cmd(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info) -{ - struct snrm_frame *frame; - - if (pskb_may_pull(skb,sizeof(struct snrm_frame))) { - frame = (struct snrm_frame *) skb->data; - - /* Copy the new connection address ignoring the C/R bit */ - info->caddr = frame->ncaddr & 0xFE; - - /* Check if the new connection address is valid */ - if ((info->caddr == 0x00) || (info->caddr == 0xfe)) { - pr_debug("%s(), invalid connection address!\n", - __func__); - return; - } - - /* Copy peer device address */ - info->daddr = le32_to_cpu(frame->saddr); - info->saddr = le32_to_cpu(frame->daddr); - - /* Only accept if addressed directly to us */ - if (info->saddr != self->saddr) { - pr_debug("%s(), not addressed to us!\n", - __func__); - return; - } - irlap_do_event(self, RECV_SNRM_CMD, skb, info); - } else { - /* Signal that this SNRM frame does not contain and I-field */ - irlap_do_event(self, RECV_SNRM_CMD, skb, NULL); - } -} - -/* - * Function irlap_send_ua_response_frame (qos) - * - * Send UA (Unnumbered Acknowledgement) frame - * - */ -void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos) -{ - struct sk_buff *tx_skb; - struct ua_frame *frame; - int ret; - - pr_debug("%s() <%ld>\n", __func__, jiffies); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Allocate frame */ - tx_skb = alloc_skb(sizeof(struct ua_frame) + - IRLAP_NEGOCIATION_PARAMS_LEN, - GFP_ATOMIC); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 10); - - /* Build UA response */ - frame->caddr = self->caddr; - frame->control = UA_RSP | PF_BIT; - - frame->saddr = cpu_to_le32(self->saddr); - frame->daddr = cpu_to_le32(self->daddr); - - /* Should we send QoS negotiation parameters? */ - if (qos) { - ret = irlap_insert_qos_negotiation_params(self, tx_skb); - if (ret < 0) { - dev_kfree_skb(tx_skb); - return; - } - } - - irlap_queue_xmit(self, tx_skb); -} - - -/* - * Function irlap_send_dm_frame (void) - * - * Send disconnected mode (DM) frame - * - */ -void irlap_send_dm_frame( struct irlap_cb *self) -{ - struct sk_buff *tx_skb = NULL; - struct dm_frame *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - tx_skb = alloc_skb(sizeof(struct dm_frame), GFP_ATOMIC); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 2); - - if (self->state == LAP_NDM) - frame->caddr = CBROADCAST; - else - frame->caddr = self->caddr; - - frame->control = DM_RSP | PF_BIT; - - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_send_disc_frame (void) - * - * Send disconnect (DISC) frame - * - */ -void irlap_send_disc_frame(struct irlap_cb *self) -{ - struct sk_buff *tx_skb = NULL; - struct disc_frame *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - tx_skb = alloc_skb(sizeof(struct disc_frame), GFP_ATOMIC); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 2); - - frame->caddr = self->caddr | CMD_FRAME; - frame->control = DISC_CMD | PF_BIT; - - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_send_discovery_xid_frame (S, s, command) - * - * Build and transmit a XID (eXchange station IDentifier) discovery - * frame. - */ -void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s, - __u8 command, discovery_t *discovery) -{ - struct sk_buff *tx_skb = NULL; - struct xid_frame *frame; - __u32 bcast = BROADCAST; - __u8 *info; - - pr_debug("%s(), s=%d, S=%d, command=%d\n", __func__, - s, S, command); - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(discovery != NULL, return;); - - tx_skb = alloc_skb(sizeof(struct xid_frame) + IRLAP_DISCOVERY_INFO_LEN, - GFP_ATOMIC); - if (!tx_skb) - return; - - skb_put(tx_skb, 14); - frame = (struct xid_frame *) tx_skb->data; - - if (command) { - frame->caddr = CBROADCAST | CMD_FRAME; - frame->control = XID_CMD | PF_BIT; - } else { - frame->caddr = CBROADCAST; - frame->control = XID_RSP | PF_BIT; - } - frame->ident = XID_FORMAT; - - frame->saddr = cpu_to_le32(self->saddr); - - if (command) - frame->daddr = cpu_to_le32(bcast); - else - frame->daddr = cpu_to_le32(discovery->data.daddr); - - switch (S) { - case 1: - frame->flags = 0x00; - break; - case 6: - frame->flags = 0x01; - break; - case 8: - frame->flags = 0x02; - break; - case 16: - frame->flags = 0x03; - break; - default: - frame->flags = 0x02; - break; - } - - frame->slotnr = s; - frame->version = 0x00; - - /* - * Provide info for final slot only in commands, and for all - * responses. Send the second byte of the hint only if the - * EXTENSION bit is set in the first byte. - */ - if (!command || (frame->slotnr == 0xff)) { - int len; - - if (discovery->data.hints[0] & HINT_EXTENSION) { - info = skb_put(tx_skb, 2); - info[0] = discovery->data.hints[0]; - info[1] = discovery->data.hints[1]; - } else { - info = skb_put(tx_skb, 1); - info[0] = discovery->data.hints[0]; - } - info = skb_put(tx_skb, 1); - info[0] = discovery->data.charset; - - len = IRDA_MIN(discovery->name_len, skb_tailroom(tx_skb)); - skb_put_data(tx_skb, discovery->data.info, len); - } - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_recv_discovery_xid_rsp (skb, info) - * - * Received a XID discovery response - * - */ -static void irlap_recv_discovery_xid_rsp(struct irlap_cb *self, - struct sk_buff *skb, - struct irlap_info *info) -{ - struct xid_frame *xid; - discovery_t *discovery = NULL; - __u8 *discovery_info; - char *text; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - if (!pskb_may_pull(skb, sizeof(struct xid_frame))) { - net_err_ratelimited("%s: frame too short!\n", __func__); - return; - } - - xid = (struct xid_frame *) skb->data; - - info->daddr = le32_to_cpu(xid->saddr); - info->saddr = le32_to_cpu(xid->daddr); - - /* Make sure frame is addressed to us */ - if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) { - pr_debug("%s(), frame is not addressed to us!\n", - __func__); - return; - } - - if ((discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC)) == NULL) { - net_warn_ratelimited("%s: kmalloc failed!\n", __func__); - return; - } - - discovery->data.daddr = info->daddr; - discovery->data.saddr = self->saddr; - discovery->timestamp = jiffies; - - pr_debug("%s(), daddr=%08x\n", __func__, - discovery->data.daddr); - - discovery_info = skb_pull(skb, sizeof(struct xid_frame)); - - /* Get info returned from peer */ - discovery->data.hints[0] = discovery_info[0]; - if (discovery_info[0] & HINT_EXTENSION) { - pr_debug("EXTENSION\n"); - discovery->data.hints[1] = discovery_info[1]; - discovery->data.charset = discovery_info[2]; - text = (char *) &discovery_info[3]; - } else { - discovery->data.hints[1] = 0; - discovery->data.charset = discovery_info[1]; - text = (char *) &discovery_info[2]; - } - /* - * Terminate info string, should be safe since this is where the - * FCS bytes resides. - */ - skb->data[skb->len] = '\0'; - strncpy(discovery->data.info, text, NICKNAME_MAX_LEN); - discovery->name_len = strlen(discovery->data.info); - - info->discovery = discovery; - - irlap_do_event(self, RECV_DISCOVERY_XID_RSP, skb, info); -} - -/* - * Function irlap_recv_discovery_xid_cmd (skb, info) - * - * Received a XID discovery command - * - */ -static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self, - struct sk_buff *skb, - struct irlap_info *info) -{ - struct xid_frame *xid; - discovery_t *discovery = NULL; - __u8 *discovery_info; - char *text; - - if (!pskb_may_pull(skb, sizeof(struct xid_frame))) { - net_err_ratelimited("%s: frame too short!\n", __func__); - return; - } - - xid = (struct xid_frame *) skb->data; - - info->daddr = le32_to_cpu(xid->saddr); - info->saddr = le32_to_cpu(xid->daddr); - - /* Make sure frame is addressed to us */ - if ((info->saddr != self->saddr) && (info->saddr != BROADCAST)) { - pr_debug("%s(), frame is not addressed to us!\n", - __func__); - return; - } - - switch (xid->flags & 0x03) { - case 0x00: - info->S = 1; - break; - case 0x01: - info->S = 6; - break; - case 0x02: - info->S = 8; - break; - case 0x03: - info->S = 16; - break; - default: - /* Error!! */ - return; - } - info->s = xid->slotnr; - - discovery_info = skb_pull(skb, sizeof(struct xid_frame)); - - /* - * Check if last frame - */ - if (info->s == 0xff) { - /* Check if things are sane at this point... */ - if((discovery_info == NULL) || - !pskb_may_pull(skb, 3)) { - net_err_ratelimited("%s: discovery frame too short!\n", - __func__); - return; - } - - /* - * We now have some discovery info to deliver! - */ - discovery = kzalloc(sizeof(discovery_t), GFP_ATOMIC); - if (!discovery) - return; - - discovery->data.daddr = info->daddr; - discovery->data.saddr = self->saddr; - discovery->timestamp = jiffies; - - discovery->data.hints[0] = discovery_info[0]; - if (discovery_info[0] & HINT_EXTENSION) { - discovery->data.hints[1] = discovery_info[1]; - discovery->data.charset = discovery_info[2]; - text = (char *) &discovery_info[3]; - } else { - discovery->data.hints[1] = 0; - discovery->data.charset = discovery_info[1]; - text = (char *) &discovery_info[2]; - } - /* - * Terminate string, should be safe since this is where the - * FCS bytes resides. - */ - skb->data[skb->len] = '\0'; - strncpy(discovery->data.info, text, NICKNAME_MAX_LEN); - discovery->name_len = strlen(discovery->data.info); - - info->discovery = discovery; - } else - info->discovery = NULL; - - irlap_do_event(self, RECV_DISCOVERY_XID_CMD, skb, info); -} - -/* - * Function irlap_send_rr_frame (self, command) - * - * Build and transmit RR (Receive Ready) frame. Notice that it is currently - * only possible to send RR frames with the poll bit set. - */ -void irlap_send_rr_frame(struct irlap_cb *self, int command) -{ - struct sk_buff *tx_skb; - struct rr_frame *frame; - - tx_skb = alloc_skb(sizeof(struct rr_frame), GFP_ATOMIC); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 2); - - frame->caddr = self->caddr; - frame->caddr |= (command) ? CMD_FRAME : 0; - - frame->control = RR | PF_BIT | (self->vr << 5); - - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_send_rd_frame (self) - * - * Request disconnect. Used by a secondary station to request the - * disconnection of the link. - */ -void irlap_send_rd_frame(struct irlap_cb *self) -{ - struct sk_buff *tx_skb; - struct rd_frame *frame; - - tx_skb = alloc_skb(sizeof(struct rd_frame), GFP_ATOMIC); - if (!tx_skb) - return; - - frame = skb_put(tx_skb, 2); - - frame->caddr = self->caddr; - frame->control = RD_RSP | PF_BIT; - - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_recv_rr_frame (skb, info) - * - * Received RR (Receive Ready) frame from peer station, no harm in - * making it inline since its called only from one single place - * (irlap_driver_rcv). - */ -static inline void irlap_recv_rr_frame(struct irlap_cb *self, - struct sk_buff *skb, - struct irlap_info *info, int command) -{ - info->nr = skb->data[1] >> 5; - - /* Check if this is a command or a response frame */ - if (command) - irlap_do_event(self, RECV_RR_CMD, skb, info); - else - irlap_do_event(self, RECV_RR_RSP, skb, info); -} - -/* - * Function irlap_recv_rnr_frame (self, skb, info) - * - * Received RNR (Receive Not Ready) frame from peer station - * - */ -static void irlap_recv_rnr_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info, int command) -{ - info->nr = skb->data[1] >> 5; - - pr_debug("%s(), nr=%d, %ld\n", __func__, info->nr, jiffies); - - if (command) - irlap_do_event(self, RECV_RNR_CMD, skb, info); - else - irlap_do_event(self, RECV_RNR_RSP, skb, info); -} - -static void irlap_recv_rej_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info, int command) -{ - info->nr = skb->data[1] >> 5; - - /* Check if this is a command or a response frame */ - if (command) - irlap_do_event(self, RECV_REJ_CMD, skb, info); - else - irlap_do_event(self, RECV_REJ_RSP, skb, info); -} - -static void irlap_recv_srej_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info, int command) -{ - info->nr = skb->data[1] >> 5; - - /* Check if this is a command or a response frame */ - if (command) - irlap_do_event(self, RECV_SREJ_CMD, skb, info); - else - irlap_do_event(self, RECV_SREJ_RSP, skb, info); -} - -static void irlap_recv_disc_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info, int command) -{ - /* Check if this is a command or a response frame */ - if (command) - irlap_do_event(self, RECV_DISC_CMD, skb, info); - else - irlap_do_event(self, RECV_RD_RSP, skb, info); -} - -/* - * Function irlap_recv_ua_frame (skb, frame) - * - * Received UA (Unnumbered Acknowledgement) frame - * - */ -static inline void irlap_recv_ua_frame(struct irlap_cb *self, - struct sk_buff *skb, - struct irlap_info *info) -{ - irlap_do_event(self, RECV_UA_RSP, skb, info); -} - -/* - * Function irlap_send_data_primary(self, skb) - * - * Send I-frames as the primary station but without the poll bit set - * - */ -void irlap_send_data_primary(struct irlap_cb *self, struct sk_buff *skb) -{ - struct sk_buff *tx_skb; - - if (skb->data[1] == I_FRAME) { - - /* - * Insert frame sequence number (Vs) in control field before - * inserting into transmit window queue. - */ - skb->data[1] = I_FRAME | (self->vs << 1); - - /* - * Insert frame in store, in case of retransmissions - * Increase skb reference count, see irlap_do_event() - */ - skb_get(skb); - skb_queue_tail(&self->wx_list, skb); - - /* Copy buffer */ - tx_skb = skb_clone(skb, GFP_ATOMIC); - if (tx_skb == NULL) { - return; - } - - self->vs = (self->vs + 1) % 8; - self->ack_required = FALSE; - self->window -= 1; - - irlap_send_i_frame( self, tx_skb, CMD_FRAME); - } else { - pr_debug("%s(), sending unreliable frame\n", __func__); - irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); - self->window -= 1; - } -} -/* - * Function irlap_send_data_primary_poll (self, skb) - * - * Send I(nformation) frame as primary with poll bit set - */ -void irlap_send_data_primary_poll(struct irlap_cb *self, struct sk_buff *skb) -{ - struct sk_buff *tx_skb; - int transmission_time; - - /* Stop P timer */ - del_timer(&self->poll_timer); - - /* Is this reliable or unreliable data? */ - if (skb->data[1] == I_FRAME) { - - /* - * Insert frame sequence number (Vs) in control field before - * inserting into transmit window queue. - */ - skb->data[1] = I_FRAME | (self->vs << 1); - - /* - * Insert frame in store, in case of retransmissions - * Increase skb reference count, see irlap_do_event() - */ - skb_get(skb); - skb_queue_tail(&self->wx_list, skb); - - /* Copy buffer */ - tx_skb = skb_clone(skb, GFP_ATOMIC); - if (tx_skb == NULL) { - return; - } - - /* - * Set poll bit if necessary. We do this to the copied - * skb, since retransmitted need to set or clear the poll - * bit depending on when they are sent. - */ - tx_skb->data[1] |= PF_BIT; - - self->vs = (self->vs + 1) % 8; - self->ack_required = FALSE; - - irlap_next_state(self, LAP_NRM_P); - irlap_send_i_frame(self, tx_skb, CMD_FRAME); - } else { - pr_debug("%s(), sending unreliable frame\n", __func__); - - if (self->ack_required) { - irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); - irlap_next_state(self, LAP_NRM_P); - irlap_send_rr_frame(self, CMD_FRAME); - self->ack_required = FALSE; - } else { - skb->data[1] |= PF_BIT; - irlap_next_state(self, LAP_NRM_P); - irlap_send_ui_frame(self, skb_get(skb), self->caddr, CMD_FRAME); - } - } - - /* How much time we took for transmission of all frames. - * We don't know, so let assume we used the full window. Jean II */ - transmission_time = self->final_timeout; - - /* Reset parameter so that we can fill next window */ - self->window = self->window_size; - -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - /* Remove what we have not used. Just do a prorata of the - * bytes left in window to window capacity. - * See max_line_capacities[][] in qos.c for details. Jean II */ - transmission_time -= (self->final_timeout * self->bytes_left - / self->line_capacity); - pr_debug("%s() adjusting transmission_time : ft=%d, bl=%d, lc=%d -> tt=%d\n", - __func__, self->final_timeout, self->bytes_left, - self->line_capacity, transmission_time); - - /* We are allowed to transmit a maximum number of bytes again. */ - self->bytes_left = self->line_capacity; -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - - /* - * The network layer has a intermediate buffer between IrLAP - * and the IrDA driver which can contain 8 frames. So, even - * though IrLAP is currently sending the *last* frame of the - * tx-window, the driver most likely has only just started - * sending the *first* frame of the same tx-window. - * I.e. we are always at the very beginning of or Tx window. - * Now, we are supposed to set the final timer from the end - * of our tx-window to let the other peer reply. So, we need - * to add extra time to compensate for the fact that we - * are really at the start of tx-window, otherwise the final timer - * might expire before he can answer... - * Jean II - */ - irlap_start_final_timer(self, self->final_timeout + transmission_time); - - /* - * The clever amongst you might ask why we do this adjustement - * only here, and not in all the other cases in irlap_event.c. - * In all those other case, we only send a very short management - * frame (few bytes), so the adjustement would be lost in the - * noise... - * The exception of course is irlap_resend_rejected_frame(). - * Jean II */ -} - -/* - * Function irlap_send_data_secondary_final (self, skb) - * - * Send I(nformation) frame as secondary with final bit set - * - */ -void irlap_send_data_secondary_final(struct irlap_cb *self, - struct sk_buff *skb) -{ - struct sk_buff *tx_skb = NULL; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* Is this reliable or unreliable data? */ - if (skb->data[1] == I_FRAME) { - - /* - * Insert frame sequence number (Vs) in control field before - * inserting into transmit window queue. - */ - skb->data[1] = I_FRAME | (self->vs << 1); - - /* - * Insert frame in store, in case of retransmissions - * Increase skb reference count, see irlap_do_event() - */ - skb_get(skb); - skb_queue_tail(&self->wx_list, skb); - - tx_skb = skb_clone(skb, GFP_ATOMIC); - if (tx_skb == NULL) { - return; - } - - tx_skb->data[1] |= PF_BIT; - - self->vs = (self->vs + 1) % 8; - self->ack_required = FALSE; - - irlap_send_i_frame(self, tx_skb, RSP_FRAME); - } else { - if (self->ack_required) { - irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME); - irlap_send_rr_frame(self, RSP_FRAME); - self->ack_required = FALSE; - } else { - skb->data[1] |= PF_BIT; - irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME); - } - } - - self->window = self->window_size; -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - /* We are allowed to transmit a maximum number of bytes again. */ - self->bytes_left = self->line_capacity; -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - - irlap_start_wd_timer(self, self->wd_timeout); -} - -/* - * Function irlap_send_data_secondary (self, skb) - * - * Send I(nformation) frame as secondary without final bit set - * - */ -void irlap_send_data_secondary(struct irlap_cb *self, struct sk_buff *skb) -{ - struct sk_buff *tx_skb = NULL; - - /* Is this reliable or unreliable data? */ - if (skb->data[1] == I_FRAME) { - - /* - * Insert frame sequence number (Vs) in control field before - * inserting into transmit window queue. - */ - skb->data[1] = I_FRAME | (self->vs << 1); - - /* - * Insert frame in store, in case of retransmissions - * Increase skb reference count, see irlap_do_event() - */ - skb_get(skb); - skb_queue_tail(&self->wx_list, skb); - - tx_skb = skb_clone(skb, GFP_ATOMIC); - if (tx_skb == NULL) { - return; - } - - self->vs = (self->vs + 1) % 8; - self->ack_required = FALSE; - self->window -= 1; - - irlap_send_i_frame(self, tx_skb, RSP_FRAME); - } else { - irlap_send_ui_frame(self, skb_get(skb), self->caddr, RSP_FRAME); - self->window -= 1; - } -} - -/* - * Function irlap_resend_rejected_frames (nr) - * - * Resend frames which has not been acknowledged. Should be safe to - * traverse the list without locking it since this function will only be - * called from interrupt context (BH) - */ -void irlap_resend_rejected_frames(struct irlap_cb *self, int command) -{ - struct sk_buff *tx_skb; - struct sk_buff *skb; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Resend unacknowledged frame(s) */ - skb_queue_walk(&self->wx_list, skb) { - irlap_wait_min_turn_around(self, &self->qos_tx); - - /* We copy the skb to be retransmitted since we will have to - * modify it. Cloning will confuse packet sniffers - */ - /* tx_skb = skb_clone( skb, GFP_ATOMIC); */ - tx_skb = skb_copy(skb, GFP_ATOMIC); - if (!tx_skb) { - pr_debug("%s(), unable to copy\n", __func__); - return; - } - - /* Clear old Nr field + poll bit */ - tx_skb->data[1] &= 0x0f; - - /* - * Set poll bit on the last frame retransmitted - */ - if (skb_queue_is_last(&self->wx_list, skb)) - tx_skb->data[1] |= PF_BIT; /* Set p/f bit */ - else - tx_skb->data[1] &= ~PF_BIT; /* Clear p/f bit */ - - irlap_send_i_frame(self, tx_skb, command); - } -#if 0 /* Not yet */ - /* - * We can now fill the window with additional data frames - */ - while (!skb_queue_empty(&self->txq)) { - - pr_debug("%s(), sending additional frames!\n", __func__); - if (self->window > 0) { - skb = skb_dequeue( &self->txq); - IRDA_ASSERT(skb != NULL, return;); - - /* - * If send window > 1 then send frame with pf - * bit cleared - */ - if ((self->window > 1) && - !skb_queue_empty(&self->txq)) { - irlap_send_data_primary(self, skb); - } else { - irlap_send_data_primary_poll(self, skb); - } - kfree_skb(skb); - } - } -#endif -} - -void irlap_resend_rejected_frame(struct irlap_cb *self, int command) -{ - struct sk_buff *tx_skb; - struct sk_buff *skb; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - /* Resend unacknowledged frame(s) */ - skb = skb_peek(&self->wx_list); - if (skb != NULL) { - irlap_wait_min_turn_around(self, &self->qos_tx); - - /* We copy the skb to be retransmitted since we will have to - * modify it. Cloning will confuse packet sniffers - */ - /* tx_skb = skb_clone( skb, GFP_ATOMIC); */ - tx_skb = skb_copy(skb, GFP_ATOMIC); - if (!tx_skb) { - pr_debug("%s(), unable to copy\n", __func__); - return; - } - - /* Clear old Nr field + poll bit */ - tx_skb->data[1] &= 0x0f; - - /* Set poll/final bit */ - tx_skb->data[1] |= PF_BIT; /* Set p/f bit */ - - irlap_send_i_frame(self, tx_skb, command); - } -} - -/* - * Function irlap_send_ui_frame (self, skb, command) - * - * Contruct and transmit an Unnumbered Information (UI) frame - * - */ -void irlap_send_ui_frame(struct irlap_cb *self, struct sk_buff *skb, - __u8 caddr, int command) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* Insert connection address */ - skb->data[0] = caddr | ((command) ? CMD_FRAME : 0); - - irlap_queue_xmit(self, skb); -} - -/* - * Function irlap_send_i_frame (skb) - * - * Contruct and transmit Information (I) frame - */ -static void irlap_send_i_frame(struct irlap_cb *self, struct sk_buff *skb, - int command) -{ - /* Insert connection address */ - skb->data[0] = self->caddr; - skb->data[0] |= (command) ? CMD_FRAME : 0; - - /* Insert next to receive (Vr) */ - skb->data[1] |= (self->vr << 5); /* insert nr */ - - irlap_queue_xmit(self, skb); -} - -/* - * Function irlap_recv_i_frame (skb, frame) - * - * Receive and parse an I (Information) frame, no harm in making it inline - * since it's called only from one single place (irlap_driver_rcv). - */ -static inline void irlap_recv_i_frame(struct irlap_cb *self, - struct sk_buff *skb, - struct irlap_info *info, int command) -{ - info->nr = skb->data[1] >> 5; /* Next to receive */ - info->pf = skb->data[1] & PF_BIT; /* Final bit */ - info->ns = (skb->data[1] >> 1) & 0x07; /* Next to send */ - - /* Check if this is a command or a response frame */ - if (command) - irlap_do_event(self, RECV_I_CMD, skb, info); - else - irlap_do_event(self, RECV_I_RSP, skb, info); -} - -/* - * Function irlap_recv_ui_frame (self, skb, info) - * - * Receive and parse an Unnumbered Information (UI) frame - * - */ -static void irlap_recv_ui_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info) -{ - info->pf = skb->data[1] & PF_BIT; /* Final bit */ - - irlap_do_event(self, RECV_UI_FRAME, skb, info); -} - -/* - * Function irlap_recv_frmr_frame (skb, frame) - * - * Received Frame Reject response. - * - */ -static void irlap_recv_frmr_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info) -{ - __u8 *frame; - int w, x, y, z; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(info != NULL, return;); - - if (!pskb_may_pull(skb, 4)) { - net_err_ratelimited("%s: frame too short!\n", __func__); - return; - } - - frame = skb->data; - - info->nr = frame[2] >> 5; /* Next to receive */ - info->pf = frame[2] & PF_BIT; /* Final bit */ - info->ns = (frame[2] >> 1) & 0x07; /* Next to send */ - - w = frame[3] & 0x01; - x = frame[3] & 0x02; - y = frame[3] & 0x04; - z = frame[3] & 0x08; - - if (w) { - pr_debug("Rejected control field is undefined or not implemented\n"); - } - if (x) { - pr_debug("Rejected control field was invalid because it contained a non permitted I field\n"); - } - if (y) { - pr_debug("Received I field exceeded the maximum negotiated for the existing connection or exceeded the maximum this station supports if no connection exists\n"); - } - if (z) { - pr_debug("Rejected control field control field contained an invalid Nr count\n"); - } - irlap_do_event(self, RECV_FRMR_RSP, skb, info); -} - -/* - * Function irlap_send_test_frame (self, daddr) - * - * Send a test frame response - * - */ -void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, - struct sk_buff *cmd) -{ - struct sk_buff *tx_skb; - struct test_frame *frame; - - tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC); - if (!tx_skb) - return; - - /* Broadcast frames must include saddr and daddr fields */ - if (caddr == CBROADCAST) { - frame = skb_put(tx_skb, sizeof(struct test_frame)); - - /* Insert the swapped addresses */ - frame->saddr = cpu_to_le32(self->saddr); - frame->daddr = cpu_to_le32(daddr); - } else - frame = skb_put(tx_skb, LAP_ADDR_HEADER + LAP_CTRL_HEADER); - - frame->caddr = caddr; - frame->control = TEST_RSP | PF_BIT; - - /* Copy info */ - skb_put_data(tx_skb, cmd->data, cmd->len); - - /* Return to sender */ - irlap_wait_min_turn_around(self, &self->qos_tx); - irlap_queue_xmit(self, tx_skb); -} - -/* - * Function irlap_recv_test_frame (self, skb) - * - * Receive a test frame - * - */ -static void irlap_recv_test_frame(struct irlap_cb *self, struct sk_buff *skb, - struct irlap_info *info, int command) -{ - struct test_frame *frame; - - if (!pskb_may_pull(skb, sizeof(*frame))) { - net_err_ratelimited("%s: frame too short!\n", __func__); - return; - } - frame = (struct test_frame *) skb->data; - - /* Broadcast frames must carry saddr and daddr fields */ - if (info->caddr == CBROADCAST) { - if (skb->len < sizeof(struct test_frame)) { - pr_debug("%s() test frame too short!\n", - __func__); - return; - } - - /* Read and swap addresses */ - info->daddr = le32_to_cpu(frame->saddr); - info->saddr = le32_to_cpu(frame->daddr); - - /* Make sure frame is addressed to us */ - if ((info->saddr != self->saddr) && - (info->saddr != BROADCAST)) { - return; - } - } - - if (command) - irlap_do_event(self, RECV_TEST_CMD, skb, info); - else - irlap_do_event(self, RECV_TEST_RSP, skb, info); -} - -/* - * Function irlap_driver_rcv (skb, netdev, ptype) - * - * Called when a frame is received. Dispatches the right receive function - * for processing of the frame. - * - * Note on skb management : - * After calling the higher layers of the IrDA stack, we always - * kfree() the skb, which drop the reference count (and potentially - * destroy it). - * If a higher layer of the stack want to keep the skb around (to put - * in a queue or pass it to the higher layer), it will need to use - * skb_get() to keep a reference on it. This is usually done at the - * LMP level in irlmp.c. - * Jean II - */ -int irlap_driver_rcv(struct sk_buff *skb, struct net_device *dev, - struct packet_type *ptype, struct net_device *orig_dev) -{ - struct irlap_info info; - struct irlap_cb *self; - int command; - __u8 control; - int ret = -1; - - if (!net_eq(dev_net(dev), &init_net)) - goto out; - - /* FIXME: should we get our own field? */ - self = (struct irlap_cb *) dev->atalk_ptr; - - /* If the net device is down, then IrLAP is gone! */ - if (!self || self->magic != LAP_MAGIC) - goto err; - - /* We are no longer an "old" protocol, so we need to handle - * share and non linear skbs. This should never happen, so - * we don't need to be clever about it. Jean II */ - if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) { - net_err_ratelimited("%s: can't clone shared skb!\n", __func__); - goto err; - } - - /* Check if frame is large enough for parsing */ - if (!pskb_may_pull(skb, 2)) { - net_err_ratelimited("%s: frame too short!\n", __func__); - goto err; - } - - command = skb->data[0] & CMD_FRAME; - info.caddr = skb->data[0] & CBROADCAST; - - info.pf = skb->data[1] & PF_BIT; - info.control = skb->data[1] & ~PF_BIT; /* Mask away poll/final bit */ - - control = info.control; - - /* First we check if this frame has a valid connection address */ - if ((info.caddr != self->caddr) && (info.caddr != CBROADCAST)) { - pr_debug("%s(), wrong connection address!\n", - __func__); - goto out; - } - /* - * Optimize for the common case and check if the frame is an - * I(nformation) frame. Only I-frames have bit 0 set to 0 - */ - if (~control & 0x01) { - irlap_recv_i_frame(self, skb, &info, command); - goto out; - } - /* - * We now check is the frame is an S(upervisory) frame. Only - * S-frames have bit 0 set to 1 and bit 1 set to 0 - */ - if (~control & 0x02) { - /* - * Received S(upervisory) frame, check which frame type it is - * only the first nibble is of interest - */ - switch (control & 0x0f) { - case RR: - irlap_recv_rr_frame(self, skb, &info, command); - break; - case RNR: - irlap_recv_rnr_frame(self, skb, &info, command); - break; - case REJ: - irlap_recv_rej_frame(self, skb, &info, command); - break; - case SREJ: - irlap_recv_srej_frame(self, skb, &info, command); - break; - default: - net_warn_ratelimited("%s: Unknown S-frame %02x received!\n", - __func__, info.control); - break; - } - goto out; - } - /* - * This must be a C(ontrol) frame - */ - switch (control) { - case XID_RSP: - irlap_recv_discovery_xid_rsp(self, skb, &info); - break; - case XID_CMD: - irlap_recv_discovery_xid_cmd(self, skb, &info); - break; - case SNRM_CMD: - irlap_recv_snrm_cmd(self, skb, &info); - break; - case DM_RSP: - irlap_do_event(self, RECV_DM_RSP, skb, &info); - break; - case DISC_CMD: /* And RD_RSP since they have the same value */ - irlap_recv_disc_frame(self, skb, &info, command); - break; - case TEST_CMD: - irlap_recv_test_frame(self, skb, &info, command); - break; - case UA_RSP: - irlap_recv_ua_frame(self, skb, &info); - break; - case FRMR_RSP: - irlap_recv_frmr_frame(self, skb, &info); - break; - case UI_FRAME: - irlap_recv_ui_frame(self, skb, &info); - break; - default: - net_warn_ratelimited("%s: Unknown frame %02x received!\n", - __func__, info.control); - break; - } -out: - ret = 0; -err: - /* Always drop our reference on the skb */ - dev_kfree_skb(skb); - return ret; -} diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c deleted file mode 100644 index 43964594aa12..000000000000 --- a/net/irda/irlmp.c +++ /dev/null @@ -1,1996 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp.c - * Version: 1.0 - * Description: IrDA Link Management Protocol (LMP) layer - * Status: Stable. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 17 20:54:32 1997 - * Modified at: Wed Jan 5 11:26:03 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/module.h> -#include <linux/slab.h> -#include <linux/string.h> -#include <linux/skbuff.h> -#include <linux/types.h> -#include <linux/proc_fs.h> -#include <linux/init.h> -#include <linux/kmod.h> -#include <linux/random.h> -#include <linux/seq_file.h> - -#include <net/irda/irda.h> -#include <net/irda/timer.h> -#include <net/irda/qos.h> -#include <net/irda/irlap.h> -#include <net/irda/iriap.h> -#include <net/irda/irlmp.h> -#include <net/irda/irlmp_frame.h> - -#include <asm/unaligned.h> - -static __u8 irlmp_find_free_slsap(void); -static int irlmp_slsap_inuse(__u8 slsap_sel); - -/* Master structure */ -struct irlmp_cb *irlmp = NULL; - -/* These can be altered by the sysctl interface */ -int sysctl_discovery = 0; -int sysctl_discovery_timeout = 3; /* 3 seconds by default */ -int sysctl_discovery_slots = 6; /* 6 slots by default */ -int sysctl_lap_keepalive_time = LM_IDLE_TIMEOUT * 1000 / HZ; -char sysctl_devname[65]; - -static const char *irlmp_reasons[] = { - "ERROR, NOT USED", - "LM_USER_REQUEST", - "LM_LAP_DISCONNECT", - "LM_CONNECT_FAILURE", - "LM_LAP_RESET", - "LM_INIT_DISCONNECT", - "ERROR, NOT USED", - "UNKNOWN", -}; - -const char *irlmp_reason_str(LM_REASON reason) -{ - reason = min_t(size_t, reason, ARRAY_SIZE(irlmp_reasons) - 1); - return irlmp_reasons[reason]; -} - -/* - * Function irlmp_init (void) - * - * Create (allocate) the main IrLMP structure - * - */ -int __init irlmp_init(void) -{ - /* Initialize the irlmp structure. */ - irlmp = kzalloc( sizeof(struct irlmp_cb), GFP_KERNEL); - if (irlmp == NULL) - return -ENOMEM; - - irlmp->magic = LMP_MAGIC; - - irlmp->clients = hashbin_new(HB_LOCK); - irlmp->services = hashbin_new(HB_LOCK); - irlmp->links = hashbin_new(HB_LOCK); - irlmp->unconnected_lsaps = hashbin_new(HB_LOCK); - irlmp->cachelog = hashbin_new(HB_NOLOCK); - - if ((irlmp->clients == NULL) || - (irlmp->services == NULL) || - (irlmp->links == NULL) || - (irlmp->unconnected_lsaps == NULL) || - (irlmp->cachelog == NULL)) { - return -ENOMEM; - } - - spin_lock_init(&irlmp->cachelog->hb_spinlock); - - irlmp->last_lsap_sel = 0x0f; /* Reserved 0x00-0x0f */ - strcpy(sysctl_devname, "Linux"); - - init_timer(&irlmp->discovery_timer); - - /* Do discovery every 3 seconds, conditionally */ - if (sysctl_discovery) - irlmp_start_discovery_timer(irlmp, - sysctl_discovery_timeout*HZ); - - return 0; -} - -/* - * Function irlmp_cleanup (void) - * - * Remove IrLMP layer - * - */ -void irlmp_cleanup(void) -{ - /* Check for main structure */ - IRDA_ASSERT(irlmp != NULL, return;); - IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;); - - del_timer(&irlmp->discovery_timer); - - hashbin_delete(irlmp->links, (FREE_FUNC) kfree); - hashbin_delete(irlmp->unconnected_lsaps, (FREE_FUNC) kfree); - hashbin_delete(irlmp->clients, (FREE_FUNC) kfree); - hashbin_delete(irlmp->services, (FREE_FUNC) kfree); - hashbin_delete(irlmp->cachelog, (FREE_FUNC) kfree); - - /* De-allocate main structure */ - kfree(irlmp); - irlmp = NULL; -} - -/* - * Function irlmp_open_lsap (slsap, notify) - * - * Register with IrLMP and create a local LSAP, - * returns handle to LSAP. - */ -struct lsap_cb *irlmp_open_lsap(__u8 slsap_sel, notify_t *notify, __u8 pid) -{ - struct lsap_cb *self; - - IRDA_ASSERT(notify != NULL, return NULL;); - IRDA_ASSERT(irlmp != NULL, return NULL;); - IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return NULL;); - IRDA_ASSERT(notify->instance != NULL, return NULL;); - - /* Does the client care which Source LSAP selector it gets? */ - if (slsap_sel == LSAP_ANY) { - slsap_sel = irlmp_find_free_slsap(); - if (!slsap_sel) - return NULL; - } else if (irlmp_slsap_inuse(slsap_sel)) - return NULL; - - /* Allocate new instance of a LSAP connection */ - self = kzalloc(sizeof(struct lsap_cb), GFP_ATOMIC); - if (self == NULL) - return NULL; - - self->magic = LMP_LSAP_MAGIC; - self->slsap_sel = slsap_sel; - - /* Fix connectionless LSAP's */ - if (slsap_sel == LSAP_CONNLESS) { -#ifdef CONFIG_IRDA_ULTRA - self->dlsap_sel = LSAP_CONNLESS; - self->pid = pid; -#endif /* CONFIG_IRDA_ULTRA */ - } else - self->dlsap_sel = LSAP_ANY; - /* self->connected = FALSE; -> already NULL via memset() */ - - init_timer(&self->watchdog_timer); - - self->notify = *notify; - - self->lsap_state = LSAP_DISCONNECTED; - - /* Insert into queue of unconnected LSAPs */ - hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self, - (long) self, NULL); - - return self; -} -EXPORT_SYMBOL(irlmp_open_lsap); - -/* - * Function __irlmp_close_lsap (self) - * - * Remove an instance of LSAP - */ -static void __irlmp_close_lsap(struct lsap_cb *self) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - - /* - * Set some of the variables to preset values - */ - self->magic = 0; - del_timer(&self->watchdog_timer); /* Important! */ - - if (self->conn_skb) - dev_kfree_skb(self->conn_skb); - - kfree(self); -} - -/* - * Function irlmp_close_lsap (self) - * - * Close and remove LSAP - * - */ -void irlmp_close_lsap(struct lsap_cb *self) -{ - struct lap_cb *lap; - struct lsap_cb *lsap = NULL; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - - /* - * Find out if we should remove this LSAP from a link or from the - * list of unconnected lsaps (not associated with a link) - */ - lap = self->lap; - if (lap) { - IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;); - /* We might close a LSAP before it has completed the - * connection setup. In those case, higher layers won't - * send a proper disconnect request. Harmless, except - * that we will forget to close LAP... - Jean II */ - if(self->lsap_state != LSAP_DISCONNECTED) { - self->lsap_state = LSAP_DISCONNECTED; - irlmp_do_lap_event(self->lap, - LM_LAP_DISCONNECT_REQUEST, NULL); - } - /* Now, remove from the link */ - lsap = hashbin_remove(lap->lsaps, (long) self, NULL); -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - lap->cache.valid = FALSE; -#endif - } - self->lap = NULL; - /* Check if we found the LSAP! If not then try the unconnected lsaps */ - if (!lsap) { - lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self, - NULL); - } - if (!lsap) { - pr_debug("%s(), Looks like somebody has removed me already!\n", - __func__); - return; - } - __irlmp_close_lsap(self); -} -EXPORT_SYMBOL(irlmp_close_lsap); - -/* - * Function irlmp_register_irlap (saddr, notify) - * - * Register IrLAP layer with IrLMP. There is possible to have multiple - * instances of the IrLAP layer, each connected to different IrDA ports - * - */ -void irlmp_register_link(struct irlap_cb *irlap, __u32 saddr, notify_t *notify) -{ - struct lap_cb *lap; - - IRDA_ASSERT(irlmp != NULL, return;); - IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return;); - IRDA_ASSERT(notify != NULL, return;); - - /* - * Allocate new instance of a LSAP connection - */ - lap = kzalloc(sizeof(struct lap_cb), GFP_KERNEL); - if (lap == NULL) - return; - - lap->irlap = irlap; - lap->magic = LMP_LAP_MAGIC; - lap->saddr = saddr; - lap->daddr = DEV_ADDR_ANY; -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - lap->cache.valid = FALSE; -#endif - lap->lsaps = hashbin_new(HB_LOCK); - if (lap->lsaps == NULL) { - net_warn_ratelimited("%s(), unable to kmalloc lsaps\n", - __func__); - kfree(lap); - return; - } - - lap->lap_state = LAP_STANDBY; - - init_timer(&lap->idle_timer); - - /* - * Insert into queue of LMP links - */ - hashbin_insert(irlmp->links, (irda_queue_t *) lap, lap->saddr, NULL); - - /* - * We set only this variable so IrLAP can tell us on which link the - * different events happened on - */ - irda_notify_init(notify); - notify->instance = lap; -} - -/* - * Function irlmp_unregister_irlap (saddr) - * - * IrLAP layer has been removed! - * - */ -void irlmp_unregister_link(__u32 saddr) -{ - struct lap_cb *link; - - /* We must remove ourselves from the hashbin *first*. This ensure - * that no more LSAPs will be open on this link and no discovery - * will be triggered anymore. Jean II */ - link = hashbin_remove(irlmp->links, saddr, NULL); - if (link) { - IRDA_ASSERT(link->magic == LMP_LAP_MAGIC, return;); - - /* Kill all the LSAPs on this link. Jean II */ - link->reason = LAP_DISC_INDICATION; - link->daddr = DEV_ADDR_ANY; - irlmp_do_lap_event(link, LM_LAP_DISCONNECT_INDICATION, NULL); - - /* Remove all discoveries discovered at this link */ - irlmp_expire_discoveries(irlmp->cachelog, link->saddr, TRUE); - - /* Final cleanup */ - del_timer(&link->idle_timer); - link->magic = 0; - hashbin_delete(link->lsaps, (FREE_FUNC) __irlmp_close_lsap); - kfree(link); - } -} - -/* - * Function irlmp_connect_request (handle, dlsap, userdata) - * - * Connect with a peer LSAP - * - */ -int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *qos, struct sk_buff *userdata) -{ - struct sk_buff *tx_skb = userdata; - struct lap_cb *lap; - struct lsap_cb *lsap; - int ret; - - IRDA_ASSERT(self != NULL, return -EBADR;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EBADR;); - - pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x, saddr=%08x, daddr=%08x\n", - __func__, self->slsap_sel, dlsap_sel, saddr, daddr); - - if (test_bit(0, &self->connected)) { - ret = -EISCONN; - goto err; - } - - /* Client must supply destination device address */ - if (!daddr) { - ret = -EINVAL; - goto err; - } - - /* Any userdata? */ - if (tx_skb == NULL) { - tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - skb_reserve(tx_skb, LMP_MAX_HEADER); - } - - /* Make room for MUX control header (3 bytes) */ - IRDA_ASSERT(skb_headroom(tx_skb) >= LMP_CONTROL_HEADER, return -1;); - skb_push(tx_skb, LMP_CONTROL_HEADER); - - self->dlsap_sel = dlsap_sel; - - /* - * Find the link to where we should try to connect since there may - * be more than one IrDA port on this machine. If the client has - * passed us the saddr (and already knows which link to use), then - * we use that to find the link, if not then we have to look in the - * discovery log and check if any of the links has discovered a - * device with the given daddr - */ - if ((!saddr) || (saddr == DEV_ADDR_ANY)) { - discovery_t *discovery; - unsigned long flags; - - spin_lock_irqsave(&irlmp->cachelog->hb_spinlock, flags); - if (daddr != DEV_ADDR_ANY) - discovery = hashbin_find(irlmp->cachelog, daddr, NULL); - else { - pr_debug("%s(), no daddr\n", __func__); - discovery = (discovery_t *) - hashbin_get_first(irlmp->cachelog); - } - - if (discovery) { - saddr = discovery->data.saddr; - daddr = discovery->data.daddr; - } - spin_unlock_irqrestore(&irlmp->cachelog->hb_spinlock, flags); - } - lap = hashbin_lock_find(irlmp->links, saddr, NULL); - if (lap == NULL) { - pr_debug("%s(), Unable to find a usable link!\n", __func__); - ret = -EHOSTUNREACH; - goto err; - } - - /* Check if LAP is disconnected or already connected */ - if (lap->daddr == DEV_ADDR_ANY) - lap->daddr = daddr; - else if (lap->daddr != daddr) { - /* Check if some LSAPs are active on this LAP */ - if (HASHBIN_GET_SIZE(lap->lsaps) == 0) { - /* No active connection, but LAP hasn't been - * disconnected yet (waiting for timeout in LAP). - * Maybe we could give LAP a bit of help in this case. - */ - pr_debug("%s(), sorry, but I'm waiting for LAP to timeout!\n", - __func__); - ret = -EAGAIN; - goto err; - } - - /* LAP is already connected to a different node, and LAP - * can only talk to one node at a time */ - pr_debug("%s(), sorry, but link is busy!\n", __func__); - ret = -EBUSY; - goto err; - } - - self->lap = lap; - - /* - * Remove LSAP from list of unconnected LSAPs and insert it into the - * list of connected LSAPs for the particular link - */ - lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self, NULL); - - IRDA_ASSERT(lsap != NULL, return -1;); - IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;); - IRDA_ASSERT(lsap->lap != NULL, return -1;); - IRDA_ASSERT(lsap->lap->magic == LMP_LAP_MAGIC, return -1;); - - hashbin_insert(self->lap->lsaps, (irda_queue_t *) self, (long) self, - NULL); - - set_bit(0, &self->connected); /* TRUE */ - - /* - * User supplied qos specifications? - */ - if (qos) - self->qos = *qos; - - irlmp_do_lsap_event(self, LM_CONNECT_REQUEST, tx_skb); - - /* Drop reference count - see irlap_data_request(). */ - dev_kfree_skb(tx_skb); - - return 0; - -err: - /* Cleanup */ - if(tx_skb) - dev_kfree_skb(tx_skb); - return ret; -} -EXPORT_SYMBOL(irlmp_connect_request); - -/* - * Function irlmp_connect_indication (self) - * - * Incoming connection - * - */ -void irlmp_connect_indication(struct lsap_cb *self, struct sk_buff *skb) -{ - int max_seg_size; - int lap_header_size; - int max_header_size; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(self->lap != NULL, return;); - - pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n", - __func__, self->slsap_sel, self->dlsap_sel); - - /* Note : self->lap is set in irlmp_link_data_indication(), - * (case CONNECT_CMD:) because we have no way to set it here. - * Similarly, self->dlsap_sel is usually set in irlmp_find_lsap(). - * Jean II */ - - self->qos = *self->lap->qos; - - max_seg_size = self->lap->qos->data_size.value-LMP_HEADER; - lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap); - max_header_size = LMP_HEADER + lap_header_size; - - /* Hide LMP_CONTROL_HEADER header from layer above */ - skb_pull(skb, LMP_CONTROL_HEADER); - - if (self->notify.connect_indication) { - /* Don't forget to refcount it - see irlap_driver_rcv(). */ - skb_get(skb); - self->notify.connect_indication(self->notify.instance, self, - &self->qos, max_seg_size, - max_header_size, skb); - } -} - -/* - * Function irlmp_connect_response (handle, userdata) - * - * Service user is accepting connection - * - */ -int irlmp_connect_response(struct lsap_cb *self, struct sk_buff *userdata) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - IRDA_ASSERT(userdata != NULL, return -1;); - - /* We set the connected bit and move the lsap to the connected list - * in the state machine itself. Jean II */ - - pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n", - __func__, self->slsap_sel, self->dlsap_sel); - - /* Make room for MUX control header (3 bytes) */ - IRDA_ASSERT(skb_headroom(userdata) >= LMP_CONTROL_HEADER, return -1;); - skb_push(userdata, LMP_CONTROL_HEADER); - - irlmp_do_lsap_event(self, LM_CONNECT_RESPONSE, userdata); - - /* Drop reference count - see irlap_data_request(). */ - dev_kfree_skb(userdata); - - return 0; -} -EXPORT_SYMBOL(irlmp_connect_response); - -/* - * Function irlmp_connect_confirm (handle, skb) - * - * LSAP connection confirmed peer device! - */ -void irlmp_connect_confirm(struct lsap_cb *self, struct sk_buff *skb) -{ - int max_header_size; - int lap_header_size; - int max_seg_size; - - IRDA_ASSERT(skb != NULL, return;); - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - IRDA_ASSERT(self->lap != NULL, return;); - - self->qos = *self->lap->qos; - - max_seg_size = self->lap->qos->data_size.value-LMP_HEADER; - lap_header_size = IRLAP_GET_HEADER_SIZE(self->lap->irlap); - max_header_size = LMP_HEADER + lap_header_size; - - pr_debug("%s(), max_header_size=%d\n", - __func__, max_header_size); - - /* Hide LMP_CONTROL_HEADER header from layer above */ - skb_pull(skb, LMP_CONTROL_HEADER); - - if (self->notify.connect_confirm) { - /* Don't forget to refcount it - see irlap_driver_rcv() */ - skb_get(skb); - self->notify.connect_confirm(self->notify.instance, self, - &self->qos, max_seg_size, - max_header_size, skb); - } -} - -/* - * Function irlmp_dup (orig, instance) - * - * Duplicate LSAP, can be used by servers to confirm a connection on a - * new LSAP so it can keep listening on the old one. - * - */ -struct lsap_cb *irlmp_dup(struct lsap_cb *orig, void *instance) -{ - struct lsap_cb *new; - unsigned long flags; - - spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags); - - /* Only allowed to duplicate unconnected LSAP's, and only LSAPs - * that have received a connect indication. Jean II */ - if ((!hashbin_find(irlmp->unconnected_lsaps, (long) orig, NULL)) || - (orig->lap == NULL)) { - pr_debug("%s(), invalid LSAP (wrong state)\n", - __func__); - spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, - flags); - return NULL; - } - - /* Allocate a new instance */ - new = kmemdup(orig, sizeof(*new), GFP_ATOMIC); - if (!new) { - pr_debug("%s(), unable to kmalloc\n", __func__); - spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, - flags); - return NULL; - } - /* new->lap = orig->lap; => done in the memcpy() */ - /* new->slsap_sel = orig->slsap_sel; => done in the memcpy() */ - new->conn_skb = NULL; - - spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags); - - /* Not everything is the same */ - new->notify.instance = instance; - - init_timer(&new->watchdog_timer); - - hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) new, - (long) new, NULL); - -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - /* Make sure that we invalidate the LSAP cache */ - new->lap->cache.valid = FALSE; -#endif /* CONFIG_IRDA_CACHE_LAST_LSAP */ - - return new; -} - -/* - * Function irlmp_disconnect_request (handle, userdata) - * - * The service user is requesting disconnection, this will not remove the - * LSAP, but only mark it as disconnected - */ -int irlmp_disconnect_request(struct lsap_cb *self, struct sk_buff *userdata) -{ - struct lsap_cb *lsap; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - IRDA_ASSERT(userdata != NULL, return -1;); - - /* Already disconnected ? - * There is a race condition between irlmp_disconnect_indication() - * and us that might mess up the hashbins below. This fixes it. - * Jean II */ - if (! test_and_clear_bit(0, &self->connected)) { - pr_debug("%s(), already disconnected!\n", __func__); - dev_kfree_skb(userdata); - return -1; - } - - skb_push(userdata, LMP_CONTROL_HEADER); - - /* - * Do the event before the other stuff since we must know - * which lap layer that the frame should be transmitted on - */ - irlmp_do_lsap_event(self, LM_DISCONNECT_REQUEST, userdata); - - /* Drop reference count - see irlap_data_request(). */ - dev_kfree_skb(userdata); - - /* - * Remove LSAP from list of connected LSAPs for the particular link - * and insert it into the list of unconnected LSAPs - */ - IRDA_ASSERT(self->lap != NULL, return -1;); - IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;); - IRDA_ASSERT(self->lap->lsaps != NULL, return -1;); - - lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL); -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - self->lap->cache.valid = FALSE; -#endif - - IRDA_ASSERT(lsap != NULL, return -1;); - IRDA_ASSERT(lsap->magic == LMP_LSAP_MAGIC, return -1;); - IRDA_ASSERT(lsap == self, return -1;); - - hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) self, - (long) self, NULL); - - /* Reset some values */ - self->dlsap_sel = LSAP_ANY; - self->lap = NULL; - - return 0; -} -EXPORT_SYMBOL(irlmp_disconnect_request); - -/* - * Function irlmp_disconnect_indication (reason, userdata) - * - * LSAP is being closed! - */ -void irlmp_disconnect_indication(struct lsap_cb *self, LM_REASON reason, - struct sk_buff *skb) -{ - struct lsap_cb *lsap; - - pr_debug("%s(), reason=%s [%d]\n", __func__, - irlmp_reason_str(reason), reason); - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - - pr_debug("%s(), slsap_sel=%02x, dlsap_sel=%02x\n", - __func__, self->slsap_sel, self->dlsap_sel); - - /* Already disconnected ? - * There is a race condition between irlmp_disconnect_request() - * and us that might mess up the hashbins below. This fixes it. - * Jean II */ - if (! test_and_clear_bit(0, &self->connected)) { - pr_debug("%s(), already disconnected!\n", __func__); - return; - } - - /* - * Remove association between this LSAP and the link it used - */ - IRDA_ASSERT(self->lap != NULL, return;); - IRDA_ASSERT(self->lap->lsaps != NULL, return;); - - lsap = hashbin_remove(self->lap->lsaps, (long) self, NULL); -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - self->lap->cache.valid = FALSE; -#endif - - IRDA_ASSERT(lsap != NULL, return;); - IRDA_ASSERT(lsap == self, return;); - hashbin_insert(irlmp->unconnected_lsaps, (irda_queue_t *) lsap, - (long) lsap, NULL); - - self->dlsap_sel = LSAP_ANY; - self->lap = NULL; - - /* - * Inform service user - */ - if (self->notify.disconnect_indication) { - /* Don't forget to refcount it - see irlap_driver_rcv(). */ - if(skb) - skb_get(skb); - self->notify.disconnect_indication(self->notify.instance, - self, reason, skb); - } else { - pr_debug("%s(), no handler\n", __func__); - } -} - -/* - * Function irlmp_do_expiry (void) - * - * Do a cleanup of the discovery log (remove old entries) - * - * Note : separate from irlmp_do_discovery() so that we can handle - * passive discovery properly. - */ -void irlmp_do_expiry(void) -{ - struct lap_cb *lap; - - /* - * Expire discovery on all links which are *not* connected. - * On links which are connected, we can't do discovery - * anymore and can't refresh the log, so we freeze the - * discovery log to keep info about the device we are - * connected to. - * This info is mandatory if we want irlmp_connect_request() - * to work properly. - Jean II - */ - lap = (struct lap_cb *) hashbin_get_first(irlmp->links); - while (lap != NULL) { - IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;); - - if (lap->lap_state == LAP_STANDBY) { - /* Expire discoveries discovered on this link */ - irlmp_expire_discoveries(irlmp->cachelog, lap->saddr, - FALSE); - } - lap = (struct lap_cb *) hashbin_get_next(irlmp->links); - } -} - -/* - * Function irlmp_do_discovery (nslots) - * - * Do some discovery on all links - * - * Note : log expiry is done above. - */ -void irlmp_do_discovery(int nslots) -{ - struct lap_cb *lap; - __u16 *data_hintsp; - - /* Make sure the value is sane */ - if ((nslots != 1) && (nslots != 6) && (nslots != 8) && (nslots != 16)){ - net_warn_ratelimited("%s: invalid value for number of slots!\n", - __func__); - nslots = sysctl_discovery_slots = 8; - } - - /* Construct new discovery info to be used by IrLAP, */ - data_hintsp = (__u16 *) irlmp->discovery_cmd.data.hints; - put_unaligned(irlmp->hints.word, data_hintsp); - - /* - * Set character set for device name (we use ASCII), and - * copy device name. Remember to make room for a \0 at the - * end - */ - irlmp->discovery_cmd.data.charset = CS_ASCII; - strncpy(irlmp->discovery_cmd.data.info, sysctl_devname, - NICKNAME_MAX_LEN); - irlmp->discovery_cmd.name_len = strlen(irlmp->discovery_cmd.data.info); - irlmp->discovery_cmd.nslots = nslots; - - /* - * Try to send discovery packets on all links - */ - lap = (struct lap_cb *) hashbin_get_first(irlmp->links); - while (lap != NULL) { - IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;); - - if (lap->lap_state == LAP_STANDBY) { - /* Try to discover */ - irlmp_do_lap_event(lap, LM_LAP_DISCOVERY_REQUEST, - NULL); - } - lap = (struct lap_cb *) hashbin_get_next(irlmp->links); - } -} - -/* - * Function irlmp_discovery_request (nslots) - * - * Do a discovery of devices in front of the computer - * - * If the caller has registered a client discovery callback, this - * allow him to receive the full content of the discovery log through - * this callback (as normally he will receive only new discoveries). - */ -void irlmp_discovery_request(int nslots) -{ - /* Return current cached discovery log (in full) */ - irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_LOG); - - /* - * Start a single discovery operation if discovery is not already - * running - */ - if (!sysctl_discovery) { - /* Check if user wants to override the default */ - if (nslots == DISCOVERY_DEFAULT_SLOTS) - nslots = sysctl_discovery_slots; - - irlmp_do_discovery(nslots); - /* Note : we never do expiry here. Expiry will run on the - * discovery timer regardless of the state of sysctl_discovery - * Jean II */ - } -} -EXPORT_SYMBOL(irlmp_discovery_request); - -/* - * Function irlmp_get_discoveries (pn, mask, slots) - * - * Return the current discovery log - * - * If discovery is not enabled, you should call this function again - * after 1 or 2 seconds (i.e. after discovery has been done). - */ -struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots) -{ - /* If discovery is not enabled, it's likely that the discovery log - * will be empty. So, we trigger a single discovery, so that next - * time the user call us there might be some results in the log. - * Jean II - */ - if (!sysctl_discovery) { - /* Check if user wants to override the default */ - if (nslots == DISCOVERY_DEFAULT_SLOTS) - nslots = sysctl_discovery_slots; - - /* Start discovery - will complete sometime later */ - irlmp_do_discovery(nslots); - /* Note : we never do expiry here. Expiry will run on the - * discovery timer regardless of the state of sysctl_discovery - * Jean II */ - } - - /* Return current cached discovery log */ - return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE); -} -EXPORT_SYMBOL(irlmp_get_discoveries); - -/* - * Function irlmp_notify_client (log) - * - * Notify all about discovered devices - * - * Clients registered with IrLMP are : - * o IrComm - * o IrLAN - * o Any socket (in any state - ouch, that may be a lot !) - * The client may have defined a callback to be notified in case of - * partial/selective discovery based on the hints that it passed to IrLMP. - */ -static inline void -irlmp_notify_client(irlmp_client_t *client, - hashbin_t *log, DISCOVERY_MODE mode) -{ - discinfo_t *discoveries; /* Copy of the discovery log */ - int number; /* Number of nodes in the log */ - int i; - - /* Check if client wants or not partial/selective log (optimisation) */ - if (!client->disco_callback) - return; - - /* - * Locking notes : - * the old code was manipulating the log directly, which was - * very racy. Now, we use copy_discoveries, that protects - * itself while dumping the log for us. - * The overhead of the copy is compensated by the fact that - * we only pass new discoveries in normal mode and don't - * pass the same old entry every 3s to the caller as we used - * to do (virtual function calling is expensive). - * Jean II - */ - - /* - * Now, check all discovered devices (if any), and notify client - * only about the services that the client is interested in - * We also notify only about the new devices unless the caller - * explicitly request a dump of the log. Jean II - */ - discoveries = irlmp_copy_discoveries(log, &number, - client->hint_mask.word, - (mode == DISCOVERY_LOG)); - /* Check if the we got some results */ - if (discoveries == NULL) - return; /* No nodes discovered */ - - /* Pass all entries to the listener */ - for(i = 0; i < number; i++) - client->disco_callback(&(discoveries[i]), mode, client->priv); - - /* Free up our buffer */ - kfree(discoveries); -} - -/* - * Function irlmp_discovery_confirm ( self, log) - * - * Some device(s) answered to our discovery request! Check to see which - * device it is, and give indication to the client(s) - * - */ -void irlmp_discovery_confirm(hashbin_t *log, DISCOVERY_MODE mode) -{ - irlmp_client_t *client; - irlmp_client_t *client_next; - - IRDA_ASSERT(log != NULL, return;); - - if (!(HASHBIN_GET_SIZE(log))) - return; - - /* For each client - notify callback may touch client list */ - client = (irlmp_client_t *) hashbin_get_first(irlmp->clients); - while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL, - (void *) &client_next) ) { - /* Check if we should notify client */ - irlmp_notify_client(client, log, mode); - - client = client_next; - } -} - -/* - * Function irlmp_discovery_expiry (expiry) - * - * This device is no longer been discovered, and therefore it is being - * purged from the discovery log. Inform all clients who have - * registered for this event... - * - * Note : called exclusively from discovery.c - * Note : this is no longer called under discovery spinlock, so the - * client can do whatever he wants in the callback. - */ -void irlmp_discovery_expiry(discinfo_t *expiries, int number) -{ - irlmp_client_t *client; - irlmp_client_t *client_next; - int i; - - IRDA_ASSERT(expiries != NULL, return;); - - /* For each client - notify callback may touch client list */ - client = (irlmp_client_t *) hashbin_get_first(irlmp->clients); - while (NULL != hashbin_find_next(irlmp->clients, (long) client, NULL, - (void *) &client_next) ) { - - /* Pass all entries to the listener */ - for(i = 0; i < number; i++) { - /* Check if we should notify client */ - if ((client->expir_callback) && - (client->hint_mask.word & - get_unaligned((__u16 *)expiries[i].hints) - & 0x7f7f) ) - client->expir_callback(&(expiries[i]), - EXPIRY_TIMEOUT, - client->priv); - } - - /* Next client */ - client = client_next; - } -} - -/* - * Function irlmp_get_discovery_response () - * - * Used by IrLAP to get the discovery info it needs when answering - * discovery requests by other devices. - */ -discovery_t *irlmp_get_discovery_response(void) -{ - IRDA_ASSERT(irlmp != NULL, return NULL;); - - put_unaligned(irlmp->hints.word, (__u16 *)irlmp->discovery_rsp.data.hints); - - /* - * Set character set for device name (we use ASCII), and - * copy device name. Remember to make room for a \0 at the - * end - */ - irlmp->discovery_rsp.data.charset = CS_ASCII; - - strncpy(irlmp->discovery_rsp.data.info, sysctl_devname, - NICKNAME_MAX_LEN); - irlmp->discovery_rsp.name_len = strlen(irlmp->discovery_rsp.data.info); - - return &irlmp->discovery_rsp; -} - -/* - * Function irlmp_data_request (self, skb) - * - * Send some data to peer device - * - * Note on skb management : - * After calling the lower layers of the IrDA stack, we always - * kfree() the skb, which drop the reference count (and potentially - * destroy it). - * IrLMP and IrLAP may queue the packet, and in those cases will need - * to use skb_get() to keep it around. - * Jean II - */ -int irlmp_data_request(struct lsap_cb *self, struct sk_buff *userdata) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - - /* Make room for MUX header */ - IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;); - skb_push(userdata, LMP_HEADER); - - ret = irlmp_do_lsap_event(self, LM_DATA_REQUEST, userdata); - - /* Drop reference count - see irlap_data_request(). */ - dev_kfree_skb(userdata); - - return ret; -} -EXPORT_SYMBOL(irlmp_data_request); - -/* - * Function irlmp_data_indication (handle, skb) - * - * Got data from LAP layer so pass it up to upper layer - * - */ -void irlmp_data_indication(struct lsap_cb *self, struct sk_buff *skb) -{ - /* Hide LMP header from layer above */ - skb_pull(skb, LMP_HEADER); - - if (self->notify.data_indication) { - /* Don't forget to refcount it - see irlap_driver_rcv(). */ - skb_get(skb); - self->notify.data_indication(self->notify.instance, self, skb); - } -} - -/* - * Function irlmp_udata_request (self, skb) - */ -int irlmp_udata_request(struct lsap_cb *self, struct sk_buff *userdata) -{ - int ret; - - IRDA_ASSERT(userdata != NULL, return -1;); - - /* Make room for MUX header */ - IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER, return -1;); - skb_push(userdata, LMP_HEADER); - - ret = irlmp_do_lsap_event(self, LM_UDATA_REQUEST, userdata); - - /* Drop reference count - see irlap_data_request(). */ - dev_kfree_skb(userdata); - - return ret; -} - -/* - * Function irlmp_udata_indication (self, skb) - * - * Send unreliable data (but still within the connection) - * - */ -void irlmp_udata_indication(struct lsap_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* Hide LMP header from layer above */ - skb_pull(skb, LMP_HEADER); - - if (self->notify.udata_indication) { - /* Don't forget to refcount it - see irlap_driver_rcv(). */ - skb_get(skb); - self->notify.udata_indication(self->notify.instance, self, - skb); - } -} - -/* - * Function irlmp_connless_data_request (self, skb) - */ -#ifdef CONFIG_IRDA_ULTRA -int irlmp_connless_data_request(struct lsap_cb *self, struct sk_buff *userdata, - __u8 pid) -{ - struct sk_buff *clone_skb; - struct lap_cb *lap; - - IRDA_ASSERT(userdata != NULL, return -1;); - - /* Make room for MUX and PID header */ - IRDA_ASSERT(skb_headroom(userdata) >= LMP_HEADER+LMP_PID_HEADER, - return -1;); - - /* Insert protocol identifier */ - skb_push(userdata, LMP_PID_HEADER); - if(self != NULL) - userdata->data[0] = self->pid; - else - userdata->data[0] = pid; - - /* Connectionless sockets must use 0x70 */ - skb_push(userdata, LMP_HEADER); - userdata->data[0] = userdata->data[1] = LSAP_CONNLESS; - - /* Try to send Connectionless packets out on all links */ - lap = (struct lap_cb *) hashbin_get_first(irlmp->links); - while (lap != NULL) { - IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return -1;); - - clone_skb = skb_clone(userdata, GFP_ATOMIC); - if (!clone_skb) { - dev_kfree_skb(userdata); - return -ENOMEM; - } - - irlap_unitdata_request(lap->irlap, clone_skb); - /* irlap_unitdata_request() don't increase refcount, - * so no dev_kfree_skb() - Jean II */ - - lap = (struct lap_cb *) hashbin_get_next(irlmp->links); - } - dev_kfree_skb(userdata); - - return 0; -} -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Function irlmp_connless_data_indication (self, skb) - * - * Receive unreliable data outside any connection. Mostly used by Ultra - * - */ -#ifdef CONFIG_IRDA_ULTRA -void irlmp_connless_data_indication(struct lsap_cb *self, struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* Hide LMP and PID header from layer above */ - skb_pull(skb, LMP_HEADER+LMP_PID_HEADER); - - if (self->notify.udata_indication) { - /* Don't forget to refcount it - see irlap_driver_rcv(). */ - skb_get(skb); - self->notify.udata_indication(self->notify.instance, self, - skb); - } -} -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Propagate status indication from LAP to LSAPs (via LMP) - * This don't trigger any change of state in lap_cb, lmp_cb or lsap_cb, - * and the event is stateless, therefore we can bypass both state machines - * and send the event direct to the LSAP user. - * Jean II - */ -void irlmp_status_indication(struct lap_cb *self, - LINK_STATUS link, LOCK_STATUS lock) -{ - struct lsap_cb *next; - struct lsap_cb *curr; - - /* Send status_indication to all LSAPs using this link */ - curr = (struct lsap_cb *) hashbin_get_first( self->lsaps); - while (NULL != hashbin_find_next(self->lsaps, (long) curr, NULL, - (void *) &next) ) { - IRDA_ASSERT(curr->magic == LMP_LSAP_MAGIC, return;); - /* - * Inform service user if he has requested it - */ - if (curr->notify.status_indication != NULL) - curr->notify.status_indication(curr->notify.instance, - link, lock); - else - pr_debug("%s(), no handler\n", __func__); - - curr = next; - } -} - -/* - * Receive flow control indication from LAP. - * LAP want us to send it one more frame. We implement a simple round - * robin scheduler between the active sockets so that we get a bit of - * fairness. Note that the round robin is far from perfect, but it's - * better than nothing. - * We then poll the selected socket so that we can do synchronous - * refilling of IrLAP (which allow to minimise the number of buffers). - * Jean II - */ -void irlmp_flow_indication(struct lap_cb *self, LOCAL_FLOW flow) -{ - struct lsap_cb *next; - struct lsap_cb *curr; - int lsap_todo; - - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - IRDA_ASSERT(flow == FLOW_START, return;); - - /* Get the number of lsap. That's the only safe way to know - * that we have looped around... - Jean II */ - lsap_todo = HASHBIN_GET_SIZE(self->lsaps); - pr_debug("%s() : %d lsaps to scan\n", __func__, lsap_todo); - - /* Poll lsap in order until the queue is full or until we - * tried them all. - * Most often, the current LSAP will have something to send, - * so we will go through this loop only once. - Jean II */ - while((lsap_todo--) && - (IRLAP_GET_TX_QUEUE_LEN(self->irlap) < LAP_HIGH_THRESHOLD)) { - /* Try to find the next lsap we should poll. */ - next = self->flow_next; - /* If we have no lsap, restart from first one */ - if(next == NULL) - next = (struct lsap_cb *) hashbin_get_first(self->lsaps); - /* Verify current one and find the next one */ - curr = hashbin_find_next(self->lsaps, (long) next, NULL, - (void *) &self->flow_next); - /* Uh-oh... Paranoia */ - if(curr == NULL) - break; - pr_debug("%s() : curr is %p, next was %p and is now %p, still %d to go - queue len = %d\n", - __func__, curr, next, self->flow_next, lsap_todo, - IRLAP_GET_TX_QUEUE_LEN(self->irlap)); - - /* Inform lsap user that it can send one more packet. */ - if (curr->notify.flow_indication != NULL) - curr->notify.flow_indication(curr->notify.instance, - curr, flow); - else - pr_debug("%s(), no handler\n", __func__); - } -} - -#if 0 -/* - * Function irlmp_hint_to_service (hint) - * - * Returns a list of all servics contained in the given hint bits. This - * function assumes that the hint bits have the size of two bytes only - */ -__u8 *irlmp_hint_to_service(__u8 *hint) -{ - __u8 *service; - int i = 0; - - /* - * Allocate array to store services in. 16 entries should be safe - * since we currently only support 2 hint bytes - */ - service = kmalloc(16, GFP_ATOMIC); - if (!service) - return NULL; - - if (!hint[0]) { - pr_debug("<None>\n"); - kfree(service); - return NULL; - } - if (hint[0] & HINT_PNP) - pr_debug("PnP Compatible "); - if (hint[0] & HINT_PDA) - pr_debug("PDA/Palmtop "); - if (hint[0] & HINT_COMPUTER) - pr_debug("Computer "); - if (hint[0] & HINT_PRINTER) { - pr_debug("Printer "); - service[i++] = S_PRINTER; - } - if (hint[0] & HINT_MODEM) - pr_debug("Modem "); - if (hint[0] & HINT_FAX) - pr_debug("Fax "); - if (hint[0] & HINT_LAN) { - pr_debug("LAN Access "); - service[i++] = S_LAN; - } - /* - * Test if extension byte exists. This byte will usually be - * there, but this is not really required by the standard. - * (IrLMP p. 29) - */ - if (hint[0] & HINT_EXTENSION) { - if (hint[1] & HINT_TELEPHONY) { - pr_debug("Telephony "); - service[i++] = S_TELEPHONY; - } - if (hint[1] & HINT_FILE_SERVER) - pr_debug("File Server "); - - if (hint[1] & HINT_COMM) { - pr_debug("IrCOMM "); - service[i++] = S_COMM; - } - if (hint[1] & HINT_OBEX) { - pr_debug("IrOBEX "); - service[i++] = S_OBEX; - } - } - pr_debug("\n"); - - /* So that client can be notified about any discovery */ - service[i++] = S_ANY; - - service[i] = S_END; - - return service; -} -#endif - -static const __u16 service_hint_mapping[S_END][2] = { - { HINT_PNP, 0 }, /* S_PNP */ - { HINT_PDA, 0 }, /* S_PDA */ - { HINT_COMPUTER, 0 }, /* S_COMPUTER */ - { HINT_PRINTER, 0 }, /* S_PRINTER */ - { HINT_MODEM, 0 }, /* S_MODEM */ - { HINT_FAX, 0 }, /* S_FAX */ - { HINT_LAN, 0 }, /* S_LAN */ - { HINT_EXTENSION, HINT_TELEPHONY }, /* S_TELEPHONY */ - { HINT_EXTENSION, HINT_COMM }, /* S_COMM */ - { HINT_EXTENSION, HINT_OBEX }, /* S_OBEX */ - { 0xFF, 0xFF }, /* S_ANY */ -}; - -/* - * Function irlmp_service_to_hint (service) - * - * Converts a service type, to a hint bit - * - * Returns: a 16 bit hint value, with the service bit set - */ -__u16 irlmp_service_to_hint(int service) -{ - __u16_host_order hint; - - hint.byte[0] = service_hint_mapping[service][0]; - hint.byte[1] = service_hint_mapping[service][1]; - - return hint.word; -} -EXPORT_SYMBOL(irlmp_service_to_hint); - -/* - * Function irlmp_register_service (service) - * - * Register local service with IrLMP - * - */ -void *irlmp_register_service(__u16 hints) -{ - irlmp_service_t *service; - - pr_debug("%s(), hints = %04x\n", __func__, hints); - - /* Make a new registration */ - service = kmalloc(sizeof(irlmp_service_t), GFP_ATOMIC); - if (!service) - return NULL; - - service->hints.word = hints; - hashbin_insert(irlmp->services, (irda_queue_t *) service, - (long) service, NULL); - - irlmp->hints.word |= hints; - - return (void *)service; -} -EXPORT_SYMBOL(irlmp_register_service); - -/* - * Function irlmp_unregister_service (handle) - * - * Unregister service with IrLMP. - * - * Returns: 0 on success, -1 on error - */ -int irlmp_unregister_service(void *handle) -{ - irlmp_service_t *service; - unsigned long flags; - - if (!handle) - return -1; - - /* Caller may call with invalid handle (it's legal) - Jean II */ - service = hashbin_lock_find(irlmp->services, (long) handle, NULL); - if (!service) { - pr_debug("%s(), Unknown service!\n", __func__); - return -1; - } - - hashbin_remove_this(irlmp->services, (irda_queue_t *) service); - kfree(service); - - /* Remove old hint bits */ - irlmp->hints.word = 0; - - /* Refresh current hint bits */ - spin_lock_irqsave(&irlmp->services->hb_spinlock, flags); - service = (irlmp_service_t *) hashbin_get_first(irlmp->services); - while (service) { - irlmp->hints.word |= service->hints.word; - - service = (irlmp_service_t *)hashbin_get_next(irlmp->services); - } - spin_unlock_irqrestore(&irlmp->services->hb_spinlock, flags); - return 0; -} -EXPORT_SYMBOL(irlmp_unregister_service); - -/* - * Function irlmp_register_client (hint_mask, callback1, callback2) - * - * Register a local client with IrLMP - * First callback is selective discovery (based on hints) - * Second callback is for selective discovery expiries - * - * Returns: handle > 0 on success, 0 on error - */ -void *irlmp_register_client(__u16 hint_mask, DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv) -{ - irlmp_client_t *client; - - IRDA_ASSERT(irlmp != NULL, return NULL;); - - /* Make a new registration */ - client = kmalloc(sizeof(irlmp_client_t), GFP_ATOMIC); - if (!client) - return NULL; - - /* Register the details */ - client->hint_mask.word = hint_mask; - client->disco_callback = disco_clb; - client->expir_callback = expir_clb; - client->priv = priv; - - hashbin_insert(irlmp->clients, (irda_queue_t *) client, - (long) client, NULL); - - return (void *) client; -} -EXPORT_SYMBOL(irlmp_register_client); - -/* - * Function irlmp_update_client (handle, hint_mask, callback1, callback2) - * - * Updates specified client (handle) with possibly new hint_mask and - * callback - * - * Returns: 0 on success, -1 on error - */ -int irlmp_update_client(void *handle, __u16 hint_mask, - DISCOVERY_CALLBACK1 disco_clb, - DISCOVERY_CALLBACK2 expir_clb, void *priv) -{ - irlmp_client_t *client; - - if (!handle) - return -1; - - client = hashbin_lock_find(irlmp->clients, (long) handle, NULL); - if (!client) { - pr_debug("%s(), Unknown client!\n", __func__); - return -1; - } - - client->hint_mask.word = hint_mask; - client->disco_callback = disco_clb; - client->expir_callback = expir_clb; - client->priv = priv; - - return 0; -} -EXPORT_SYMBOL(irlmp_update_client); - -/* - * Function irlmp_unregister_client (handle) - * - * Returns: 0 on success, -1 on error - * - */ -int irlmp_unregister_client(void *handle) -{ - struct irlmp_client *client; - - if (!handle) - return -1; - - /* Caller may call with invalid handle (it's legal) - Jean II */ - client = hashbin_lock_find(irlmp->clients, (long) handle, NULL); - if (!client) { - pr_debug("%s(), Unknown client!\n", __func__); - return -1; - } - - pr_debug("%s(), removing client!\n", __func__); - hashbin_remove_this(irlmp->clients, (irda_queue_t *) client); - kfree(client); - - return 0; -} -EXPORT_SYMBOL(irlmp_unregister_client); - -/* - * Function irlmp_slsap_inuse (slsap) - * - * Check if the given source LSAP selector is in use - * - * This function is clearly not very efficient. On the mitigating side, the - * stack make sure that in 99% of the cases, we are called only once - * for each socket allocation. We could probably keep a bitmap - * of the allocated LSAP, but I'm not sure the complexity is worth it. - * Jean II - */ -static int irlmp_slsap_inuse(__u8 slsap_sel) -{ - struct lsap_cb *self; - struct lap_cb *lap; - unsigned long flags; - - IRDA_ASSERT(irlmp != NULL, return TRUE;); - IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return TRUE;); - IRDA_ASSERT(slsap_sel != LSAP_ANY, return TRUE;); - -#ifdef CONFIG_IRDA_ULTRA - /* Accept all bindings to the connectionless LSAP */ - if (slsap_sel == LSAP_CONNLESS) - return FALSE; -#endif /* CONFIG_IRDA_ULTRA */ - - /* Valid values are between 0 and 127 (0x0-0x6F) */ - if (slsap_sel > LSAP_MAX) - return TRUE; - - /* - * Check if slsap is already in use. To do this we have to loop over - * every IrLAP connection and check every LSAP associated with each - * the connection. - */ - spin_lock_irqsave_nested(&irlmp->links->hb_spinlock, flags, - SINGLE_DEPTH_NESTING); - lap = (struct lap_cb *) hashbin_get_first(irlmp->links); - while (lap != NULL) { - IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, goto errlap;); - - /* Careful for priority inversions here ! - * irlmp->links is never taken while another IrDA - * spinlock is held, so we are safe. Jean II */ - spin_lock(&lap->lsaps->hb_spinlock); - - /* For this IrLAP, check all the LSAPs */ - self = (struct lsap_cb *) hashbin_get_first(lap->lsaps); - while (self != NULL) { - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, - goto errlsap;); - - if ((self->slsap_sel == slsap_sel)) { - pr_debug("Source LSAP selector=%02x in use\n", - self->slsap_sel); - goto errlsap; - } - self = (struct lsap_cb*) hashbin_get_next(lap->lsaps); - } - spin_unlock(&lap->lsaps->hb_spinlock); - - /* Next LAP */ - lap = (struct lap_cb *) hashbin_get_next(irlmp->links); - } - spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags); - - /* - * Server sockets are typically waiting for connections and - * therefore reside in the unconnected list. We don't want - * to give out their LSAPs for obvious reasons... - * Jean II - */ - spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags); - - self = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps); - while (self != NULL) { - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, goto erruncon;); - if ((self->slsap_sel == slsap_sel)) { - pr_debug("Source LSAP selector=%02x in use (unconnected)\n", - self->slsap_sel); - goto erruncon; - } - self = (struct lsap_cb*) hashbin_get_next(irlmp->unconnected_lsaps); - } - spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags); - - return FALSE; - - /* Error exit from within one of the two nested loops. - * Make sure we release the right spinlock in the righ order. - * Jean II */ -errlsap: - spin_unlock(&lap->lsaps->hb_spinlock); -IRDA_ASSERT_LABEL(errlap:) - spin_unlock_irqrestore(&irlmp->links->hb_spinlock, flags); - return TRUE; - - /* Error exit from within the unconnected loop. - * Just one spinlock to release... Jean II */ -erruncon: - spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags); - return TRUE; -} - -/* - * Function irlmp_find_free_slsap () - * - * Find a free source LSAP to use. This function is called if the service - * user has requested a source LSAP equal to LM_ANY - */ -static __u8 irlmp_find_free_slsap(void) -{ - __u8 lsap_sel; - int wrapped = 0; - - IRDA_ASSERT(irlmp != NULL, return -1;); - IRDA_ASSERT(irlmp->magic == LMP_MAGIC, return -1;); - - /* Most users don't really care which LSAPs they are given, - * and therefore we automatically give them a free LSAP. - * This function try to find a suitable LSAP, i.e. which is - * not in use and is within the acceptable range. Jean II */ - - do { - /* Always increment to LSAP number before using it. - * In theory, we could reuse the last LSAP number, as long - * as it is no longer in use. Some IrDA stack do that. - * However, the previous socket may be half closed, i.e. - * we closed it, we think it's no longer in use, but the - * other side did not receive our close and think it's - * active and still send data on it. - * This is similar to what is done with PIDs and TCP ports. - * Also, this reduce the number of calls to irlmp_slsap_inuse() - * which is an expensive function to call. - * Jean II */ - irlmp->last_lsap_sel++; - - /* Check if we need to wraparound (0x70-0x7f are reserved) */ - if (irlmp->last_lsap_sel > LSAP_MAX) { - /* 0x00-0x10 are also reserved for well know ports */ - irlmp->last_lsap_sel = 0x10; - - /* Make sure we terminate the loop */ - if (wrapped++) { - net_err_ratelimited("%s: no more free LSAPs !\n", - __func__); - return 0; - } - } - - /* If the LSAP is in use, try the next one. - * Despite the autoincrement, we need to check if the lsap - * is really in use or not, first because LSAP may be - * directly allocated in irlmp_open_lsap(), and also because - * we may wraparound on old sockets. Jean II */ - } while (irlmp_slsap_inuse(irlmp->last_lsap_sel)); - - /* Got it ! */ - lsap_sel = irlmp->last_lsap_sel; - pr_debug("%s(), found free lsap_sel=%02x\n", - __func__, lsap_sel); - - return lsap_sel; -} - -/* - * Function irlmp_convert_lap_reason (lap_reason) - * - * Converts IrLAP disconnect reason codes to IrLMP disconnect reason - * codes - * - */ -LM_REASON irlmp_convert_lap_reason( LAP_REASON lap_reason) -{ - int reason = LM_LAP_DISCONNECT; - - switch (lap_reason) { - case LAP_DISC_INDICATION: /* Received a disconnect request from peer */ - pr_debug("%s(), LAP_DISC_INDICATION\n", __func__); - reason = LM_USER_REQUEST; - break; - case LAP_NO_RESPONSE: /* To many retransmits without response */ - pr_debug("%s(), LAP_NO_RESPONSE\n", __func__); - reason = LM_LAP_DISCONNECT; - break; - case LAP_RESET_INDICATION: - pr_debug("%s(), LAP_RESET_INDICATION\n", __func__); - reason = LM_LAP_RESET; - break; - case LAP_FOUND_NONE: - case LAP_MEDIA_BUSY: - case LAP_PRIMARY_CONFLICT: - pr_debug("%s(), LAP_FOUND_NONE, LAP_MEDIA_BUSY or LAP_PRIMARY_CONFLICT\n", - __func__); - reason = LM_CONNECT_FAILURE; - break; - default: - pr_debug("%s(), Unknown IrLAP disconnect reason %d!\n", - __func__, lap_reason); - reason = LM_LAP_DISCONNECT; - break; - } - - return reason; -} - -#ifdef CONFIG_PROC_FS - -struct irlmp_iter_state { - hashbin_t *hashbin; -}; - -#define LSAP_START_TOKEN ((void *)1) -#define LINK_START_TOKEN ((void *)2) - -static void *irlmp_seq_hb_idx(struct irlmp_iter_state *iter, loff_t *off) -{ - void *element; - - spin_lock_irq(&iter->hashbin->hb_spinlock); - for (element = hashbin_get_first(iter->hashbin); - element != NULL; - element = hashbin_get_next(iter->hashbin)) { - if (!off || (*off)-- == 0) { - /* NB: hashbin left locked */ - return element; - } - } - spin_unlock_irq(&iter->hashbin->hb_spinlock); - iter->hashbin = NULL; - return NULL; -} - - -static void *irlmp_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct irlmp_iter_state *iter = seq->private; - void *v; - loff_t off = *pos; - - iter->hashbin = NULL; - if (off-- == 0) - return LSAP_START_TOKEN; - - iter->hashbin = irlmp->unconnected_lsaps; - v = irlmp_seq_hb_idx(iter, &off); - if (v) - return v; - - if (off-- == 0) - return LINK_START_TOKEN; - - iter->hashbin = irlmp->links; - return irlmp_seq_hb_idx(iter, &off); -} - -static void *irlmp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct irlmp_iter_state *iter = seq->private; - - ++*pos; - - if (v == LSAP_START_TOKEN) { /* start of list of lsaps */ - iter->hashbin = irlmp->unconnected_lsaps; - v = irlmp_seq_hb_idx(iter, NULL); - return v ? v : LINK_START_TOKEN; - } - - if (v == LINK_START_TOKEN) { /* start of list of links */ - iter->hashbin = irlmp->links; - return irlmp_seq_hb_idx(iter, NULL); - } - - v = hashbin_get_next(iter->hashbin); - - if (v == NULL) { /* no more in this hash bin */ - spin_unlock_irq(&iter->hashbin->hb_spinlock); - - if (iter->hashbin == irlmp->unconnected_lsaps) - v = LINK_START_TOKEN; - - iter->hashbin = NULL; - } - return v; -} - -static void irlmp_seq_stop(struct seq_file *seq, void *v) -{ - struct irlmp_iter_state *iter = seq->private; - - if (iter->hashbin) - spin_unlock_irq(&iter->hashbin->hb_spinlock); -} - -static int irlmp_seq_show(struct seq_file *seq, void *v) -{ - const struct irlmp_iter_state *iter = seq->private; - struct lsap_cb *self = v; - - if (v == LSAP_START_TOKEN) - seq_puts(seq, "Unconnected LSAPs:\n"); - else if (v == LINK_START_TOKEN) - seq_puts(seq, "\nRegistered Link Layers:\n"); - else if (iter->hashbin == irlmp->unconnected_lsaps) { - self = v; - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -EINVAL; ); - seq_printf(seq, "lsap state: %s, ", - irlsap_state[ self->lsap_state]); - seq_printf(seq, - "slsap_sel: %#02x, dlsap_sel: %#02x, ", - self->slsap_sel, self->dlsap_sel); - seq_printf(seq, "(%s)", self->notify.name); - seq_printf(seq, "\n"); - } else if (iter->hashbin == irlmp->links) { - struct lap_cb *lap = v; - - seq_printf(seq, "lap state: %s, ", - irlmp_state[lap->lap_state]); - - seq_printf(seq, "saddr: %#08x, daddr: %#08x, ", - lap->saddr, lap->daddr); - seq_printf(seq, "num lsaps: %d", - HASHBIN_GET_SIZE(lap->lsaps)); - seq_printf(seq, "\n"); - - /* Careful for priority inversions here ! - * All other uses of attrib spinlock are independent of - * the object spinlock, so we are safe. Jean II */ - spin_lock(&lap->lsaps->hb_spinlock); - - seq_printf(seq, "\n Connected LSAPs:\n"); - for (self = (struct lsap_cb *) hashbin_get_first(lap->lsaps); - self != NULL; - self = (struct lsap_cb *)hashbin_get_next(lap->lsaps)) { - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, - goto outloop;); - seq_printf(seq, " lsap state: %s, ", - irlsap_state[ self->lsap_state]); - seq_printf(seq, - "slsap_sel: %#02x, dlsap_sel: %#02x, ", - self->slsap_sel, self->dlsap_sel); - seq_printf(seq, "(%s)", self->notify.name); - seq_putc(seq, '\n'); - - } - IRDA_ASSERT_LABEL(outloop:) - spin_unlock(&lap->lsaps->hb_spinlock); - seq_putc(seq, '\n'); - } else - return -EINVAL; - - return 0; -} - -static const struct seq_operations irlmp_seq_ops = { - .start = irlmp_seq_start, - .next = irlmp_seq_next, - .stop = irlmp_seq_stop, - .show = irlmp_seq_show, -}; - -static int irlmp_seq_open(struct inode *inode, struct file *file) -{ - IRDA_ASSERT(irlmp != NULL, return -EINVAL;); - - return seq_open_private(file, &irlmp_seq_ops, - sizeof(struct irlmp_iter_state)); -} - -const struct file_operations irlmp_seq_fops = { - .owner = THIS_MODULE, - .open = irlmp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -#endif /* PROC_FS */ diff --git a/net/irda/irlmp_event.c b/net/irda/irlmp_event.c deleted file mode 100644 index e306cf2c1e04..000000000000 --- a/net/irda/irlmp_event.c +++ /dev/null @@ -1,886 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_event.c - * Version: 0.8 - * Description: An IrDA LMP event driver for Linux - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Tue Dec 14 23:04:16 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/kernel.h> - -#include <net/irda/irda.h> -#include <net/irda/timer.h> -#include <net/irda/irlap.h> -#include <net/irda/irlmp.h> -#include <net/irda/irlmp_frame.h> -#include <net/irda/irlmp_event.h> - -const char *const irlmp_state[] = { - "LAP_STANDBY", - "LAP_U_CONNECT", - "LAP_ACTIVE", -}; - -const char *const irlsap_state[] = { - "LSAP_DISCONNECTED", - "LSAP_CONNECT", - "LSAP_CONNECT_PEND", - "LSAP_DATA_TRANSFER_READY", - "LSAP_SETUP", - "LSAP_SETUP_PEND", -}; - -static const char *const irlmp_event[] __maybe_unused = { - "LM_CONNECT_REQUEST", - "LM_CONNECT_CONFIRM", - "LM_CONNECT_RESPONSE", - "LM_CONNECT_INDICATION", - - "LM_DISCONNECT_INDICATION", - "LM_DISCONNECT_REQUEST", - - "LM_DATA_REQUEST", - "LM_UDATA_REQUEST", - "LM_DATA_INDICATION", - "LM_UDATA_INDICATION", - - "LM_WATCHDOG_TIMEOUT", - - /* IrLAP events */ - "LM_LAP_CONNECT_REQUEST", - "LM_LAP_CONNECT_INDICATION", - "LM_LAP_CONNECT_CONFIRM", - "LM_LAP_DISCONNECT_INDICATION", - "LM_LAP_DISCONNECT_REQUEST", - "LM_LAP_DISCOVERY_REQUEST", - "LM_LAP_DISCOVERY_CONFIRM", - "LM_LAP_IDLE_TIMEOUT", -}; - -/* LAP Connection control proto declarations */ -static void irlmp_state_standby (struct lap_cb *, IRLMP_EVENT, - struct sk_buff *); -static void irlmp_state_u_connect(struct lap_cb *, IRLMP_EVENT, - struct sk_buff *); -static void irlmp_state_active (struct lap_cb *, IRLMP_EVENT, - struct sk_buff *); - -/* LSAP Connection control proto declarations */ -static int irlmp_state_disconnected(struct lsap_cb *, IRLMP_EVENT, - struct sk_buff *); -static int irlmp_state_connect (struct lsap_cb *, IRLMP_EVENT, - struct sk_buff *); -static int irlmp_state_connect_pend(struct lsap_cb *, IRLMP_EVENT, - struct sk_buff *); -static int irlmp_state_dtr (struct lsap_cb *, IRLMP_EVENT, - struct sk_buff *); -static int irlmp_state_setup (struct lsap_cb *, IRLMP_EVENT, - struct sk_buff *); -static int irlmp_state_setup_pend (struct lsap_cb *, IRLMP_EVENT, - struct sk_buff *); - -static void (*lap_state[]) (struct lap_cb *, IRLMP_EVENT, struct sk_buff *) = -{ - irlmp_state_standby, - irlmp_state_u_connect, - irlmp_state_active, -}; - -static int (*lsap_state[])( struct lsap_cb *, IRLMP_EVENT, struct sk_buff *) = -{ - irlmp_state_disconnected, - irlmp_state_connect, - irlmp_state_connect_pend, - irlmp_state_dtr, - irlmp_state_setup, - irlmp_state_setup_pend -}; - -static inline void irlmp_next_lap_state(struct lap_cb *self, - IRLMP_STATE state) -{ - /* - pr_debug("%s(), LMP LAP = %s\n", __func__, irlmp_state[state]); - */ - self->lap_state = state; -} - -static inline void irlmp_next_lsap_state(struct lsap_cb *self, - LSAP_STATE state) -{ - /* - IRDA_ASSERT(self != NULL, return;); - pr_debug("%s(), LMP LSAP = %s\n", __func__, irlsap_state[state]); - */ - self->lsap_state = state; -} - -/* Do connection control events */ -int irlmp_do_lsap_event(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - - pr_debug("%s(), EVENT = %s, STATE = %s\n", - __func__, irlmp_event[event], irlsap_state[self->lsap_state]); - - return (*lsap_state[self->lsap_state]) (self, event, skb); -} - -/* - * Function do_lap_event (event, skb, info) - * - * Do IrLAP control events - * - */ -void irlmp_do_lap_event(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - - pr_debug("%s(), EVENT = %s, STATE = %s\n", __func__, - irlmp_event[event], - irlmp_state[self->lap_state]); - - (*lap_state[self->lap_state]) (self, event, skb); -} - -void irlmp_discovery_timer_expired(void *data) -{ - /* We always cleanup the log (active & passive discovery) */ - irlmp_do_expiry(); - - irlmp_do_discovery(sysctl_discovery_slots); - - /* Restart timer */ - irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout * HZ); -} - -void irlmp_watchdog_timer_expired(void *data) -{ - struct lsap_cb *self = (struct lsap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return;); - - irlmp_do_lsap_event(self, LM_WATCHDOG_TIMEOUT, NULL); -} - -void irlmp_idle_timer_expired(void *data) -{ - struct lap_cb *self = (struct lap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - - irlmp_do_lap_event(self, LM_LAP_IDLE_TIMEOUT, NULL); -} - -/* - * Send an event on all LSAPs attached to this LAP. - */ -static inline void -irlmp_do_all_lsap_event(hashbin_t * lsap_hashbin, - IRLMP_EVENT event) -{ - struct lsap_cb *lsap; - struct lsap_cb *lsap_next; - - /* Note : this function use the new hashbin_find_next() - * function, instead of the old hashbin_get_next(). - * This make sure that we are always pointing one lsap - * ahead, so that if the current lsap is removed as the - * result of sending the event, we don't care. - * Also, as we store the context ourselves, if an enumeration - * of the same lsap hashbin happens as the result of sending the - * event, we don't care. - * The only problem is if the next lsap is removed. In that case, - * hashbin_find_next() will return NULL and we will abort the - * enumeration. - Jean II */ - - /* Also : we don't accept any skb in input. We can *NOT* pass - * the same skb to multiple clients safely, we would need to - * skb_clone() it. - Jean II */ - - lsap = (struct lsap_cb *) hashbin_get_first(lsap_hashbin); - - while (NULL != hashbin_find_next(lsap_hashbin, - (long) lsap, - NULL, - (void *) &lsap_next) ) { - irlmp_do_lsap_event(lsap, event, NULL); - lsap = lsap_next; - } -} - -/********************************************************************* - * - * LAP connection control states - * - ********************************************************************/ - -/* - * Function irlmp_state_standby (event, skb, info) - * - * STANDBY, The IrLAP connection does not exist. - * - */ -static void irlmp_state_standby(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - IRDA_ASSERT(self->irlap != NULL, return;); - - switch (event) { - case LM_LAP_DISCOVERY_REQUEST: - /* irlmp_next_station_state( LMP_DISCOVER); */ - - irlap_discovery_request(self->irlap, &irlmp->discovery_cmd); - break; - case LM_LAP_CONNECT_INDICATION: - /* It's important to switch state first, to avoid IrLMP to - * think that the link is free since IrLMP may then start - * discovery before the connection is properly set up. DB. - */ - irlmp_next_lap_state(self, LAP_ACTIVE); - - /* Just accept connection TODO, this should be fixed */ - irlap_connect_response(self->irlap, skb); - break; - case LM_LAP_CONNECT_REQUEST: - pr_debug("%s() LS_CONNECT_REQUEST\n", __func__); - - irlmp_next_lap_state(self, LAP_U_CONNECT); - - /* FIXME: need to set users requested QoS */ - irlap_connect_request(self->irlap, self->daddr, NULL, 0); - break; - case LM_LAP_DISCONNECT_INDICATION: - pr_debug("%s(), Error LM_LAP_DISCONNECT_INDICATION\n", - __func__); - - irlmp_next_lap_state(self, LAP_STANDBY); - break; - default: - pr_debug("%s(), Unknown event %s\n", - __func__, irlmp_event[event]); - break; - } -} - -/* - * Function irlmp_state_u_connect (event, skb, info) - * - * U_CONNECT, The layer above has tried to open an LSAP connection but - * since the IrLAP connection does not exist, we must first start an - * IrLAP connection. We are now waiting response from IrLAP. - * */ -static void irlmp_state_u_connect(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - pr_debug("%s(), event=%s\n", __func__, irlmp_event[event]); - - switch (event) { - case LM_LAP_CONNECT_INDICATION: - /* It's important to switch state first, to avoid IrLMP to - * think that the link is free since IrLMP may then start - * discovery before the connection is properly set up. DB. - */ - irlmp_next_lap_state(self, LAP_ACTIVE); - - /* Just accept connection TODO, this should be fixed */ - irlap_connect_response(self->irlap, skb); - - /* Tell LSAPs that they can start sending data */ - irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM); - - /* Note : by the time we get there (LAP retries and co), - * the lsaps may already have gone. This avoid getting stuck - * forever in LAP_ACTIVE state - Jean II */ - if (HASHBIN_GET_SIZE(self->lsaps) == 0) { - pr_debug("%s() NO LSAPs !\n", __func__); - irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT); - } - break; - case LM_LAP_CONNECT_REQUEST: - /* Already trying to connect */ - break; - case LM_LAP_CONNECT_CONFIRM: - /* For all lsap_ce E Associated do LS_Connect_confirm */ - irlmp_next_lap_state(self, LAP_ACTIVE); - - /* Tell LSAPs that they can start sending data */ - irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM); - - /* Note : by the time we get there (LAP retries and co), - * the lsaps may already have gone. This avoid getting stuck - * forever in LAP_ACTIVE state - Jean II */ - if (HASHBIN_GET_SIZE(self->lsaps) == 0) { - pr_debug("%s() NO LSAPs !\n", __func__); - irlmp_start_idle_timer(self, LM_IDLE_TIMEOUT); - } - break; - case LM_LAP_DISCONNECT_INDICATION: - pr_debug("%s(), LM_LAP_DISCONNECT_INDICATION\n", __func__); - irlmp_next_lap_state(self, LAP_STANDBY); - - /* Send disconnect event to all LSAPs using this link */ - irlmp_do_all_lsap_event(self->lsaps, - LM_LAP_DISCONNECT_INDICATION); - break; - case LM_LAP_DISCONNECT_REQUEST: - pr_debug("%s(), LM_LAP_DISCONNECT_REQUEST\n", __func__); - - /* One of the LSAP did timeout or was closed, if it was - * the last one, try to get out of here - Jean II */ - if (HASHBIN_GET_SIZE(self->lsaps) <= 1) { - irlap_disconnect_request(self->irlap); - } - break; - default: - pr_debug("%s(), Unknown event %s\n", - __func__, irlmp_event[event]); - break; - } -} - -/* - * Function irlmp_state_active (event, skb, info) - * - * ACTIVE, IrLAP connection is active - * - */ -static void irlmp_state_active(struct lap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - switch (event) { - case LM_LAP_CONNECT_REQUEST: - pr_debug("%s(), LS_CONNECT_REQUEST\n", __func__); - - /* - * IrLAP may have a pending disconnect. We tried to close - * IrLAP, but it was postponed because the link was - * busy or we were still sending packets. As we now - * need it, make sure it stays on. Jean II - */ - irlap_clear_disconnect(self->irlap); - - /* - * LAP connection already active, just bounce back! Since we - * don't know which LSAP that tried to do this, we have to - * notify all LSAPs using this LAP, but that should be safe to - * do anyway. - */ - irlmp_do_all_lsap_event(self->lsaps, LM_LAP_CONNECT_CONFIRM); - - /* Needed by connect indication */ - irlmp_do_all_lsap_event(irlmp->unconnected_lsaps, - LM_LAP_CONNECT_CONFIRM); - /* Keep state */ - break; - case LM_LAP_DISCONNECT_REQUEST: - /* - * Need to find out if we should close IrLAP or not. If there - * is only one LSAP connection left on this link, that LSAP - * must be the one that tries to close IrLAP. It will be - * removed later and moved to the list of unconnected LSAPs - */ - if (HASHBIN_GET_SIZE(self->lsaps) > 0) { - /* Timer value is checked in irsysctl - Jean II */ - irlmp_start_idle_timer(self, sysctl_lap_keepalive_time * HZ / 1000); - } else { - /* No more connections, so close IrLAP */ - - /* We don't want to change state just yet, because - * we want to reflect accurately the real state of - * the LAP, not the state we wish it was in, - * so that we don't lose LM_LAP_CONNECT_REQUEST. - * In some cases, IrLAP won't close the LAP - * immediately. For example, it might still be - * retrying packets or waiting for the pf bit. - * As the LAP always send a DISCONNECT_INDICATION - * in PCLOSE or SCLOSE, just change state on that. - * Jean II */ - irlap_disconnect_request(self->irlap); - } - break; - case LM_LAP_IDLE_TIMEOUT: - if (HASHBIN_GET_SIZE(self->lsaps) == 0) { - /* Same reasoning as above - keep state */ - irlap_disconnect_request(self->irlap); - } - break; - case LM_LAP_DISCONNECT_INDICATION: - irlmp_next_lap_state(self, LAP_STANDBY); - - /* In some case, at this point our side has already closed - * all lsaps, and we are waiting for the idle_timer to - * expire. If another device reconnect immediately, the - * idle timer will expire in the midle of the connection - * initialisation, screwing up things a lot... - * Therefore, we must stop the timer... */ - irlmp_stop_idle_timer(self); - - /* - * Inform all connected LSAP's using this link - */ - irlmp_do_all_lsap_event(self->lsaps, - LM_LAP_DISCONNECT_INDICATION); - - /* Force an expiry of the discovery log. - * Now that the LAP is free, the system may attempt to - * connect to another device. Unfortunately, our entries - * are stale. There is a small window (<3s) before the - * normal discovery will run and where irlmp_connect_request() - * can get the wrong info, so make sure things get - * cleaned *NOW* ;-) - Jean II */ - irlmp_do_expiry(); - break; - default: - pr_debug("%s(), Unknown event %s\n", - __func__, irlmp_event[event]); - break; - } -} - -/********************************************************************* - * - * LSAP connection control states - * - ********************************************************************/ - -/* - * Function irlmp_state_disconnected (event, skb, info) - * - * DISCONNECTED - * - */ -static int irlmp_state_disconnected(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - - switch (event) { -#ifdef CONFIG_IRDA_ULTRA - case LM_UDATA_INDICATION: - /* This is most bizarre. Those packets are aka unreliable - * connected, aka IrLPT or SOCK_DGRAM/IRDAPROTO_UNITDATA. - * Why do we pass them as Ultra ??? Jean II */ - irlmp_connless_data_indication(self, skb); - break; -#endif /* CONFIG_IRDA_ULTRA */ - case LM_CONNECT_REQUEST: - pr_debug("%s(), LM_CONNECT_REQUEST\n", __func__); - - if (self->conn_skb) { - net_warn_ratelimited("%s: busy with another request!\n", - __func__); - return -EBUSY; - } - /* Don't forget to refcount it (see irlmp_connect_request()) */ - skb_get(skb); - self->conn_skb = skb; - - irlmp_next_lsap_state(self, LSAP_SETUP_PEND); - - /* Start watchdog timer (5 secs for now) */ - irlmp_start_watchdog_timer(self, 5*HZ); - - irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL); - break; - case LM_CONNECT_INDICATION: - if (self->conn_skb) { - net_warn_ratelimited("%s: busy with another request!\n", - __func__); - return -EBUSY; - } - /* Don't forget to refcount it (see irlap_driver_rcv()) */ - skb_get(skb); - self->conn_skb = skb; - - irlmp_next_lsap_state(self, LSAP_CONNECT_PEND); - - /* Start watchdog timer - * This is not mentionned in the spec, but there is a rare - * race condition that can get the socket stuck. - * If we receive this event while our LAP is closing down, - * the LM_LAP_CONNECT_REQUEST get lost and we get stuck in - * CONNECT_PEND state forever. - * The other cause of getting stuck down there is if the - * higher layer never reply to the CONNECT_INDICATION. - * Anyway, it make sense to make sure that we always have - * a backup plan. 1 second is plenty (should be immediate). - * Jean II */ - irlmp_start_watchdog_timer(self, 1*HZ); - - irlmp_do_lap_event(self->lap, LM_LAP_CONNECT_REQUEST, NULL); - break; - default: - pr_debug("%s(), Unknown event %s on LSAP %#02x\n", - __func__, irlmp_event[event], self->slsap_sel); - break; - } - return ret; -} - -/* - * Function irlmp_state_connect (self, event, skb) - * - * CONNECT - * - */ -static int irlmp_state_connect(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - struct lsap_cb *lsap; - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - - switch (event) { - case LM_CONNECT_RESPONSE: - /* - * Bind this LSAP to the IrLAP link where the connect was - * received - */ - lsap = hashbin_remove(irlmp->unconnected_lsaps, (long) self, - NULL); - - IRDA_ASSERT(lsap == self, return -1;); - IRDA_ASSERT(self->lap != NULL, return -1;); - IRDA_ASSERT(self->lap->lsaps != NULL, return -1;); - - hashbin_insert(self->lap->lsaps, (irda_queue_t *) self, - (long) self, NULL); - - set_bit(0, &self->connected); /* TRUE */ - - irlmp_send_lcf_pdu(self->lap, self->dlsap_sel, - self->slsap_sel, CONNECT_CNF, skb); - - del_timer(&self->watchdog_timer); - - irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY); - break; - case LM_WATCHDOG_TIMEOUT: - /* May happen, who knows... - * Jean II */ - pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__); - - /* Disconnect, get out... - Jean II */ - self->lap = NULL; - self->dlsap_sel = LSAP_ANY; - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - break; - default: - /* LM_LAP_DISCONNECT_INDICATION : Should never happen, we - * are *not* yet bound to the IrLAP link. Jean II */ - pr_debug("%s(), Unknown event %s on LSAP %#02x\n", - __func__, irlmp_event[event], self->slsap_sel); - break; - } - return ret; -} - -/* - * Function irlmp_state_connect_pend (event, skb, info) - * - * CONNECT_PEND - * - */ -static int irlmp_state_connect_pend(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - struct sk_buff *tx_skb; - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - - switch (event) { - case LM_CONNECT_REQUEST: - /* Keep state */ - break; - case LM_CONNECT_RESPONSE: - pr_debug("%s(), LM_CONNECT_RESPONSE, no indication issued yet\n", - __func__); - /* Keep state */ - break; - case LM_DISCONNECT_REQUEST: - pr_debug("%s(), LM_DISCONNECT_REQUEST, not yet bound to IrLAP connection\n", - __func__); - /* Keep state */ - break; - case LM_LAP_CONNECT_CONFIRM: - pr_debug("%s(), LS_CONNECT_CONFIRM\n", __func__); - irlmp_next_lsap_state(self, LSAP_CONNECT); - - tx_skb = self->conn_skb; - self->conn_skb = NULL; - - irlmp_connect_indication(self, tx_skb); - /* Drop reference count - see irlmp_connect_indication(). */ - dev_kfree_skb(tx_skb); - break; - case LM_WATCHDOG_TIMEOUT: - /* Will happen in some rare cases because of a race condition. - * Just make sure we don't stay there forever... - * Jean II */ - pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__); - - /* Go back to disconnected mode, keep the socket waiting */ - self->lap = NULL; - self->dlsap_sel = LSAP_ANY; - if(self->conn_skb) - dev_kfree_skb(self->conn_skb); - self->conn_skb = NULL; - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - break; - default: - /* LM_LAP_DISCONNECT_INDICATION : Should never happen, we - * are *not* yet bound to the IrLAP link. Jean II */ - pr_debug("%s(), Unknown event %s on LSAP %#02x\n", - __func__, irlmp_event[event], self->slsap_sel); - break; - } - return ret; -} - -/* - * Function irlmp_state_dtr (self, event, skb) - * - * DATA_TRANSFER_READY - * - */ -static int irlmp_state_dtr(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - LM_REASON reason; - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - IRDA_ASSERT(self->lap != NULL, return -1;); - - switch (event) { - case LM_DATA_REQUEST: /* Optimize for the common case */ - irlmp_send_data_pdu(self->lap, self->dlsap_sel, - self->slsap_sel, FALSE, skb); - break; - case LM_DATA_INDICATION: /* Optimize for the common case */ - irlmp_data_indication(self, skb); - break; - case LM_UDATA_REQUEST: - IRDA_ASSERT(skb != NULL, return -1;); - irlmp_send_data_pdu(self->lap, self->dlsap_sel, - self->slsap_sel, TRUE, skb); - break; - case LM_UDATA_INDICATION: - irlmp_udata_indication(self, skb); - break; - case LM_CONNECT_REQUEST: - pr_debug("%s(), LM_CONNECT_REQUEST, error, LSAP already connected\n", - __func__); - /* Keep state */ - break; - case LM_CONNECT_RESPONSE: - pr_debug("%s(), LM_CONNECT_RESPONSE, error, LSAP already connected\n", - __func__); - /* Keep state */ - break; - case LM_DISCONNECT_REQUEST: - irlmp_send_lcf_pdu(self->lap, self->dlsap_sel, self->slsap_sel, - DISCONNECT, skb); - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - /* Called only from irlmp_disconnect_request(), will - * unbind from LAP over there. Jean II */ - - /* Try to close the LAP connection if its still there */ - if (self->lap) { - pr_debug("%s(), trying to close IrLAP\n", - __func__); - irlmp_do_lap_event(self->lap, - LM_LAP_DISCONNECT_REQUEST, - NULL); - } - break; - case LM_LAP_DISCONNECT_INDICATION: - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - reason = irlmp_convert_lap_reason(self->lap->reason); - - irlmp_disconnect_indication(self, reason, NULL); - break; - case LM_DISCONNECT_INDICATION: - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - IRDA_ASSERT(self->lap != NULL, return -1;); - IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;); - - IRDA_ASSERT(skb != NULL, return -1;); - IRDA_ASSERT(skb->len > 3, return -1;); - reason = skb->data[3]; - - /* Try to close the LAP connection */ - pr_debug("%s(), trying to close IrLAP\n", __func__); - irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL); - - irlmp_disconnect_indication(self, reason, skb); - break; - default: - pr_debug("%s(), Unknown event %s on LSAP %#02x\n", - __func__, irlmp_event[event], self->slsap_sel); - break; - } - return ret; -} - -/* - * Function irlmp_state_setup (event, skb, info) - * - * SETUP, Station Control has set up the underlying IrLAP connection. - * An LSAP connection request has been transmitted to the peer - * LSAP-Connection Control FSM and we are awaiting reply. - */ -static int irlmp_state_setup(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - LM_REASON reason; - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LMP_LSAP_MAGIC, return -1;); - - switch (event) { - case LM_CONNECT_CONFIRM: - irlmp_next_lsap_state(self, LSAP_DATA_TRANSFER_READY); - - del_timer(&self->watchdog_timer); - - irlmp_connect_confirm(self, skb); - break; - case LM_DISCONNECT_INDICATION: - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - IRDA_ASSERT(self->lap != NULL, return -1;); - IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;); - - IRDA_ASSERT(skb != NULL, return -1;); - IRDA_ASSERT(skb->len > 3, return -1;); - reason = skb->data[3]; - - /* Try to close the LAP connection */ - pr_debug("%s(), trying to close IrLAP\n", __func__); - irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL); - - irlmp_disconnect_indication(self, reason, skb); - break; - case LM_LAP_DISCONNECT_INDICATION: - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - del_timer(&self->watchdog_timer); - - IRDA_ASSERT(self->lap != NULL, return -1;); - IRDA_ASSERT(self->lap->magic == LMP_LAP_MAGIC, return -1;); - - reason = irlmp_convert_lap_reason(self->lap->reason); - - irlmp_disconnect_indication(self, reason, skb); - break; - case LM_WATCHDOG_TIMEOUT: - pr_debug("%s() WATCHDOG_TIMEOUT!\n", __func__); - - IRDA_ASSERT(self->lap != NULL, return -1;); - irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL); - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL); - break; - default: - pr_debug("%s(), Unknown event %s on LSAP %#02x\n", - __func__, irlmp_event[event], self->slsap_sel); - break; - } - return ret; -} - -/* - * Function irlmp_state_setup_pend (event, skb, info) - * - * SETUP_PEND, An LM_CONNECT_REQUEST has been received from the service - * user to set up an LSAP connection. A request has been sent to the - * LAP FSM to set up the underlying IrLAP connection, and we - * are awaiting confirm. - */ -static int irlmp_state_setup_pend(struct lsap_cb *self, IRLMP_EVENT event, - struct sk_buff *skb) -{ - struct sk_buff *tx_skb; - LM_REASON reason; - int ret = 0; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(irlmp != NULL, return -1;); - - switch (event) { - case LM_LAP_CONNECT_CONFIRM: - IRDA_ASSERT(self->conn_skb != NULL, return -1;); - - tx_skb = self->conn_skb; - self->conn_skb = NULL; - - irlmp_send_lcf_pdu(self->lap, self->dlsap_sel, - self->slsap_sel, CONNECT_CMD, tx_skb); - /* Drop reference count - see irlap_data_request(). */ - dev_kfree_skb(tx_skb); - - irlmp_next_lsap_state(self, LSAP_SETUP); - break; - case LM_WATCHDOG_TIMEOUT: - pr_debug("%s() : WATCHDOG_TIMEOUT !\n", __func__); - - IRDA_ASSERT(self->lap != NULL, return -1;); - irlmp_do_lap_event(self->lap, LM_LAP_DISCONNECT_REQUEST, NULL); - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - irlmp_disconnect_indication(self, LM_CONNECT_FAILURE, NULL); - break; - case LM_LAP_DISCONNECT_INDICATION: /* LS_Disconnect.indication */ - del_timer( &self->watchdog_timer); - - irlmp_next_lsap_state(self, LSAP_DISCONNECTED); - - reason = irlmp_convert_lap_reason(self->lap->reason); - - irlmp_disconnect_indication(self, reason, NULL); - break; - default: - pr_debug("%s(), Unknown event %s on LSAP %#02x\n", - __func__, irlmp_event[event], self->slsap_sel); - break; - } - return ret; -} diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c deleted file mode 100644 index 38b0f994bc7b..000000000000 --- a/net/irda/irlmp_frame.c +++ /dev/null @@ -1,476 +0,0 @@ -/********************************************************************* - * - * Filename: irlmp_frame.c - * Version: 0.9 - * Description: IrLMP frame implementation - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Tue Aug 19 02:09:59 1997 - * Modified at: Mon Dec 13 13:41:12 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999 Dag Brattli <dagb@cs.uit.no> - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/skbuff.h> -#include <linux/kernel.h> - -#include <net/irda/irda.h> -#include <net/irda/irlap.h> -#include <net/irda/timer.h> -#include <net/irda/irlmp.h> -#include <net/irda/irlmp_frame.h> -#include <net/irda/discovery.h> - -static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap, - __u8 slsap, int status, hashbin_t *); - -inline void irlmp_send_data_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - int expedited, struct sk_buff *skb) -{ - skb->data[0] = dlsap; - skb->data[1] = slsap; - - if (expedited) { - pr_debug("%s(), sending expedited data\n", __func__); - irlap_data_request(self->irlap, skb, TRUE); - } else - irlap_data_request(self->irlap, skb, FALSE); -} - -/* - * Function irlmp_send_lcf_pdu (dlsap, slsap, opcode,skb) - * - * Send Link Control Frame to IrLAP - */ -void irlmp_send_lcf_pdu(struct lap_cb *self, __u8 dlsap, __u8 slsap, - __u8 opcode, struct sk_buff *skb) -{ - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - frame = skb->data; - - frame[0] = dlsap | CONTROL_BIT; - frame[1] = slsap; - - frame[2] = opcode; - - if (opcode == DISCONNECT) - frame[3] = 0x01; /* Service user request */ - else - frame[3] = 0x00; /* rsvd */ - - irlap_data_request(self->irlap, skb, FALSE); -} - -/* - * Function irlmp_input (skb) - * - * Used by IrLAP to pass received data frames to IrLMP layer - * - */ -void irlmp_link_data_indication(struct lap_cb *self, struct sk_buff *skb, - int unreliable) -{ - struct lsap_cb *lsap; - __u8 slsap_sel; /* Source (this) LSAP address */ - __u8 dlsap_sel; /* Destination LSAP address */ - __u8 *fp; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - IRDA_ASSERT(skb->len > 2, return;); - - fp = skb->data; - - /* - * The next statements may be confusing, but we do this so that - * destination LSAP of received frame is source LSAP in our view - */ - slsap_sel = fp[0] & LSAP_MASK; - dlsap_sel = fp[1]; - - /* - * Check if this is an incoming connection, since we must deal with - * it in a different way than other established connections. - */ - if ((fp[0] & CONTROL_BIT) && (fp[2] == CONNECT_CMD)) { - pr_debug("%s(), incoming connection, source LSAP=%d, dest LSAP=%d\n", - __func__, slsap_sel, dlsap_sel); - - /* Try to find LSAP among the unconnected LSAPs */ - lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, CONNECT_CMD, - irlmp->unconnected_lsaps); - - /* Maybe LSAP was already connected, so try one more time */ - if (!lsap) { - pr_debug("%s(), incoming connection for LSAP already connected\n", - __func__); - lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0, - self->lsaps); - } - } else - lsap = irlmp_find_lsap(self, dlsap_sel, slsap_sel, 0, - self->lsaps); - - if (lsap == NULL) { - pr_debug("IrLMP, Sorry, no LSAP for received frame!\n"); - pr_debug("%s(), slsap_sel = %02x, dlsap_sel = %02x\n", - __func__, slsap_sel, dlsap_sel); - if (fp[0] & CONTROL_BIT) { - pr_debug("%s(), received control frame %02x\n", - __func__, fp[2]); - } else { - pr_debug("%s(), received data frame\n", __func__); - } - return; - } - - /* - * Check if we received a control frame? - */ - if (fp[0] & CONTROL_BIT) { - switch (fp[2]) { - case CONNECT_CMD: - lsap->lap = self; - irlmp_do_lsap_event(lsap, LM_CONNECT_INDICATION, skb); - break; - case CONNECT_CNF: - irlmp_do_lsap_event(lsap, LM_CONNECT_CONFIRM, skb); - break; - case DISCONNECT: - pr_debug("%s(), Disconnect indication!\n", - __func__); - irlmp_do_lsap_event(lsap, LM_DISCONNECT_INDICATION, - skb); - break; - case ACCESSMODE_CMD: - pr_debug("Access mode cmd not implemented!\n"); - break; - case ACCESSMODE_CNF: - pr_debug("Access mode cnf not implemented!\n"); - break; - default: - pr_debug("%s(), Unknown control frame %02x\n", - __func__, fp[2]); - break; - } - } else if (unreliable) { - /* Optimize and bypass the state machine if possible */ - if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY) - irlmp_udata_indication(lsap, skb); - else - irlmp_do_lsap_event(lsap, LM_UDATA_INDICATION, skb); - } else { - /* Optimize and bypass the state machine if possible */ - if (lsap->lsap_state == LSAP_DATA_TRANSFER_READY) - irlmp_data_indication(lsap, skb); - else - irlmp_do_lsap_event(lsap, LM_DATA_INDICATION, skb); - } -} - -/* - * Function irlmp_link_unitdata_indication (self, skb) - * - * - * - */ -#ifdef CONFIG_IRDA_ULTRA -void irlmp_link_unitdata_indication(struct lap_cb *self, struct sk_buff *skb) -{ - struct lsap_cb *lsap; - __u8 slsap_sel; /* Source (this) LSAP address */ - __u8 dlsap_sel; /* Destination LSAP address */ - __u8 pid; /* Protocol identifier */ - __u8 *fp; - unsigned long flags; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - IRDA_ASSERT(skb->len > 2, return;); - - fp = skb->data; - - /* - * The next statements may be confusing, but we do this so that - * destination LSAP of received frame is source LSAP in our view - */ - slsap_sel = fp[0] & LSAP_MASK; - dlsap_sel = fp[1]; - pid = fp[2]; - - if (pid & 0x80) { - pr_debug("%s(), extension in PID not supp!\n", - __func__); - return; - } - - /* Check if frame is addressed to the connectionless LSAP */ - if ((slsap_sel != LSAP_CONNLESS) || (dlsap_sel != LSAP_CONNLESS)) { - pr_debug("%s(), dropping frame!\n", __func__); - return; - } - - /* Search the connectionless LSAP */ - spin_lock_irqsave(&irlmp->unconnected_lsaps->hb_spinlock, flags); - lsap = (struct lsap_cb *) hashbin_get_first(irlmp->unconnected_lsaps); - while (lsap != NULL) { - /* - * Check if source LSAP and dest LSAP selectors and PID match. - */ - if ((lsap->slsap_sel == slsap_sel) && - (lsap->dlsap_sel == dlsap_sel) && - (lsap->pid == pid)) - { - break; - } - lsap = (struct lsap_cb *) hashbin_get_next(irlmp->unconnected_lsaps); - } - spin_unlock_irqrestore(&irlmp->unconnected_lsaps->hb_spinlock, flags); - - if (lsap) - irlmp_connless_data_indication(lsap, skb); - else { - pr_debug("%s(), found no matching LSAP!\n", __func__); - } -} -#endif /* CONFIG_IRDA_ULTRA */ - -/* - * Function irlmp_link_disconnect_indication (reason, userdata) - * - * IrLAP has disconnected - * - */ -void irlmp_link_disconnect_indication(struct lap_cb *lap, - struct irlap_cb *irlap, - LAP_REASON reason, - struct sk_buff *skb) -{ - IRDA_ASSERT(lap != NULL, return;); - IRDA_ASSERT(lap->magic == LMP_LAP_MAGIC, return;); - - lap->reason = reason; - lap->daddr = DEV_ADDR_ANY; - - /* FIXME: must do something with the skb if any */ - - /* - * Inform station state machine - */ - irlmp_do_lap_event(lap, LM_LAP_DISCONNECT_INDICATION, NULL); -} - -/* - * Function irlmp_link_connect_indication (qos) - * - * Incoming LAP connection! - * - */ -void irlmp_link_connect_indication(struct lap_cb *self, __u32 saddr, - __u32 daddr, struct qos_info *qos, - struct sk_buff *skb) -{ - /* Copy QoS settings for this session */ - self->qos = qos; - - /* Update destination device address */ - self->daddr = daddr; - IRDA_ASSERT(self->saddr == saddr, return;); - - irlmp_do_lap_event(self, LM_LAP_CONNECT_INDICATION, skb); -} - -/* - * Function irlmp_link_connect_confirm (qos) - * - * LAP connection confirmed! - * - */ -void irlmp_link_connect_confirm(struct lap_cb *self, struct qos_info *qos, - struct sk_buff *skb) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - IRDA_ASSERT(qos != NULL, return;); - - /* Don't need use the skb for now */ - - /* Copy QoS settings for this session */ - self->qos = qos; - - irlmp_do_lap_event(self, LM_LAP_CONNECT_CONFIRM, NULL); -} - -/* - * Function irlmp_link_discovery_indication (self, log) - * - * Device is discovering us - * - * It's not an answer to our own discoveries, just another device trying - * to perform discovery, but we don't want to miss the opportunity - * to exploit this information, because : - * o We may not actively perform discovery (just passive discovery) - * o This type of discovery is much more reliable. In some cases, it - * seem that less than 50% of our discoveries get an answer, while - * we always get ~100% of these. - * o Make faster discovery, statistically divide time of discovery - * events by 2 (important for the latency aspect and user feel) - * o Even is we do active discovery, the other node might not - * answer our discoveries (ex: Palm). The Palm will just perform - * one active discovery and connect directly to us. - * - * However, when both devices discover each other, they might attempt to - * connect to each other following the discovery event, and it would create - * collisions on the medium (SNRM battle). - * The "fix" for that is to disable all connection requests in IrLAP - * for 100ms after a discovery indication by setting the media_busy flag. - * Previously, we used to postpone the event which was quite ugly. Now - * that IrLAP takes care of this problem, just pass the event up... - * - * Jean II - */ -void irlmp_link_discovery_indication(struct lap_cb *self, - discovery_t *discovery) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - - /* Add to main log, cleanup */ - irlmp_add_discovery(irlmp->cachelog, discovery); - - /* Just handle it the same way as a discovery confirm, - * bypass the LM_LAP state machine (see below) */ - irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_PASSIVE); -} - -/* - * Function irlmp_link_discovery_confirm (self, log) - * - * Called by IrLAP with a list of discoveries after the discovery - * request has been carried out. A NULL log is received if IrLAP - * was unable to carry out the discovery request - * - */ -void irlmp_link_discovery_confirm(struct lap_cb *self, hashbin_t *log) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LMP_LAP_MAGIC, return;); - - /* Add to main log, cleanup */ - irlmp_add_discovery_log(irlmp->cachelog, log); - - /* Propagate event to various LSAPs registered for it. - * We bypass the LM_LAP state machine because - * 1) We do it regardless of the LM_LAP state - * 2) It doesn't affect the LM_LAP state - * 3) Faster, slimer, simpler, ... - * Jean II */ - irlmp_discovery_confirm(irlmp->cachelog, DISCOVERY_ACTIVE); -} - -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP -static inline void irlmp_update_cache(struct lap_cb *lap, - struct lsap_cb *lsap) -{ - /* Prevent concurrent read to get garbage */ - lap->cache.valid = FALSE; - /* Update cache entry */ - lap->cache.dlsap_sel = lsap->dlsap_sel; - lap->cache.slsap_sel = lsap->slsap_sel; - lap->cache.lsap = lsap; - lap->cache.valid = TRUE; -} -#endif - -/* - * Function irlmp_find_handle (self, dlsap_sel, slsap_sel, status, queue) - * - * Find handle associated with destination and source LSAP - * - * Any IrDA connection (LSAP/TSAP) is uniquely identified by - * 3 parameters, the local lsap, the remote lsap and the remote address. - * We may initiate multiple connections to the same remote service - * (they will have different local lsap), a remote device may initiate - * multiple connections to the same local service (they will have - * different remote lsap), or multiple devices may connect to the same - * service and may use the same remote lsap (and they will have - * different remote address). - * So, where is the remote address ? Each LAP connection is made with - * a single remote device, so imply a specific remote address. - * Jean II - */ -static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel, - __u8 slsap_sel, int status, - hashbin_t *queue) -{ - struct lsap_cb *lsap; - unsigned long flags; - - /* - * Optimize for the common case. We assume that the last frame - * received is in the same connection as the last one, so check in - * cache first to avoid the linear search - */ -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - if ((self->cache.valid) && - (self->cache.slsap_sel == slsap_sel) && - (self->cache.dlsap_sel == dlsap_sel)) - { - return self->cache.lsap; - } -#endif - - spin_lock_irqsave(&queue->hb_spinlock, flags); - - lsap = (struct lsap_cb *) hashbin_get_first(queue); - while (lsap != NULL) { - /* - * If this is an incoming connection, then the destination - * LSAP selector may have been specified as LM_ANY so that - * any client can connect. In that case we only need to check - * if the source LSAP (in our view!) match! - */ - if ((status == CONNECT_CMD) && - (lsap->slsap_sel == slsap_sel) && - (lsap->dlsap_sel == LSAP_ANY)) { - /* This is where the dest lsap sel is set on incoming - * lsaps */ - lsap->dlsap_sel = dlsap_sel; - break; - } - /* - * Check if source LSAP and dest LSAP selectors match. - */ - if ((lsap->slsap_sel == slsap_sel) && - (lsap->dlsap_sel == dlsap_sel)) - break; - - lsap = (struct lsap_cb *) hashbin_get_next(queue); - } -#ifdef CONFIG_IRDA_CACHE_LAST_LSAP - if(lsap) - irlmp_update_cache(self, lsap); -#endif - spin_unlock_irqrestore(&queue->hb_spinlock, flags); - - /* Return what we've found or NULL */ - return lsap; -} diff --git a/net/irda/irmod.c b/net/irda/irmod.c deleted file mode 100644 index c5e35b85c477..000000000000 --- a/net/irda/irmod.c +++ /dev/null @@ -1,199 +0,0 @@ -/********************************************************************* - * - * Filename: irmod.c - * Version: 0.9 - * Description: IrDA stack main entry points - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Mon Dec 15 13:55:39 1997 - * Modified at: Wed Jan 5 15:12:41 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1997, 1999-2000 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2004 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -/* - * This file contains the main entry points of the IrDA stack. - * They are in this file and not af_irda.c because some developpers - * are using the IrDA stack without the socket API (compiling out - * af_irda.c). - * Jean II - */ - -#include <linux/module.h> -#include <linux/moduleparam.h> - -#include <net/irda/irda.h> -#include <net/irda/irmod.h> /* notify_t */ -#include <net/irda/irlap.h> /* irlap_init */ -#include <net/irda/irlmp.h> /* irlmp_init */ -#include <net/irda/iriap.h> /* iriap_init */ -#include <net/irda/irttp.h> /* irttp_init */ -#include <net/irda/irda_device.h> /* irda_device_init */ - -/* Packet type handler. - * Tell the kernel how IrDA packets should be handled. - */ -static struct packet_type irda_packet_type __read_mostly = { - .type = cpu_to_be16(ETH_P_IRDA), - .func = irlap_driver_rcv, /* Packet type handler irlap_frame.c */ -}; - -/* - * Function irda_notify_init (notify) - * - * Used for initializing the notify structure - * - */ -void irda_notify_init(notify_t *notify) -{ - notify->data_indication = NULL; - notify->udata_indication = NULL; - notify->connect_confirm = NULL; - notify->connect_indication = NULL; - notify->disconnect_indication = NULL; - notify->flow_indication = NULL; - notify->status_indication = NULL; - notify->instance = NULL; - strlcpy(notify->name, "Unknown", sizeof(notify->name)); -} -EXPORT_SYMBOL(irda_notify_init); - -/* - * Function irda_init (void) - * - * Protocol stack initialisation entry point. - * Initialise the various components of the IrDA stack - */ -static int __init irda_init(void) -{ - int ret = 0; - - /* Lower layer of the stack */ - irlmp_init(); - irlap_init(); - - /* Driver/dongle support */ - irda_device_init(); - - /* Higher layers of the stack */ - iriap_init(); - irttp_init(); - ret = irsock_init(); - if (ret < 0) - goto out_err_1; - - /* Add IrDA packet type (Start receiving packets) */ - dev_add_pack(&irda_packet_type); - - /* External APIs */ -#ifdef CONFIG_PROC_FS - irda_proc_register(); -#endif -#ifdef CONFIG_SYSCTL - ret = irda_sysctl_register(); - if (ret < 0) - goto out_err_2; -#endif - - ret = irda_nl_register(); - if (ret < 0) - goto out_err_3; - - return 0; - - out_err_3: -#ifdef CONFIG_SYSCTL - irda_sysctl_unregister(); - out_err_2: -#endif -#ifdef CONFIG_PROC_FS - irda_proc_unregister(); -#endif - - /* Remove IrDA packet type (stop receiving packets) */ - dev_remove_pack(&irda_packet_type); - - /* Remove higher layers */ - irsock_cleanup(); - out_err_1: - irttp_cleanup(); - iriap_cleanup(); - - /* Remove lower layers */ - irda_device_cleanup(); - irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */ - - /* Remove middle layer */ - irlmp_cleanup(); - - - return ret; -} - -/* - * Function irda_cleanup (void) - * - * Protocol stack cleanup/removal entry point. - * Cleanup the various components of the IrDA stack - */ -static void __exit irda_cleanup(void) -{ - /* Remove External APIs */ - irda_nl_unregister(); - -#ifdef CONFIG_SYSCTL - irda_sysctl_unregister(); -#endif -#ifdef CONFIG_PROC_FS - irda_proc_unregister(); -#endif - - /* Remove IrDA packet type (stop receiving packets) */ - dev_remove_pack(&irda_packet_type); - - /* Remove higher layers */ - irsock_cleanup(); - irttp_cleanup(); - iriap_cleanup(); - - /* Remove lower layers */ - irda_device_cleanup(); - irlap_cleanup(); /* Must be done before irlmp_cleanup()! DB */ - - /* Remove middle layer */ - irlmp_cleanup(); -} - -/* - * The IrDA stack must be initialised *before* drivers get initialised, - * and *before* higher protocols (IrLAN/IrCOMM/IrNET) get initialised, - * otherwise bad things will happen (hashbins will be NULL for example). - * Those modules are at module_init()/device_initcall() level. - * - * On the other hand, it needs to be initialised *after* the basic - * networking, the /proc/net filesystem and sysctl module. Those are - * currently initialised in .../init/main.c (before initcalls). - * Also, IrDA drivers needs to be initialised *after* the random number - * generator (main stack and higher layer init don't need it anymore). - * - * Jean II - */ -subsys_initcall(irda_init); -module_exit(irda_cleanup); - -MODULE_AUTHOR("Dag Brattli <dagb@cs.uit.no> & Jean Tourrilhes <jt@hpl.hp.com>"); -MODULE_DESCRIPTION("The Linux IrDA Protocol Stack"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_NETPROTO(PF_IRDA); diff --git a/net/irda/irnet/Kconfig b/net/irda/irnet/Kconfig deleted file mode 100644 index 28c557f0fdd2..000000000000 --- a/net/irda/irnet/Kconfig +++ /dev/null @@ -1,13 +0,0 @@ -config IRNET - tristate "IrNET protocol" - depends on IRDA && PPP - help - Say Y here if you want to build support for the IrNET protocol. - To compile it as a module, choose M here: the module will be - called irnet. IrNET is a PPP driver, so you will also need a - working PPP subsystem (driver, daemon and config)... - - IrNET is an alternate way to transfer TCP/IP traffic over IrDA. It - uses synchronous PPP over a set of point to point IrDA sockets. You - can use it between Linux machine or with W2k. - diff --git a/net/irda/irnet/Makefile b/net/irda/irnet/Makefile deleted file mode 100644 index 61c365c8a2a0..000000000000 --- a/net/irda/irnet/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -# -# Makefile for the Linux IrDA IrNET protocol layer. -# - -obj-$(CONFIG_IRNET) += irnet.o - -irnet-y := irnet_ppp.o irnet_irda.o diff --git a/net/irda/irnet/irnet.h b/net/irda/irnet/irnet.h deleted file mode 100644 index 9d451f8ed47a..000000000000 --- a/net/irda/irnet/irnet.h +++ /dev/null @@ -1,522 +0,0 @@ -/* - * IrNET protocol module : Synchronous PPP over an IrDA socket. - * - * Jean II - HPL `00 - <jt@hpl.hp.com> - * - * This file contains definitions and declarations global to the IrNET module, - * all grouped in one place... - * This file is a *private* header, so other modules don't want to know - * what's in there... - * - * Note : as most part of the Linux kernel, this module is available - * under the GNU General Public License (GPL). - */ - -#ifndef IRNET_H -#define IRNET_H - -/************************** DOCUMENTATION ***************************/ -/* - * What is IrNET - * ------------- - * IrNET is a protocol allowing to carry TCP/IP traffic between two - * IrDA peers in an efficient fashion. It is a thin layer, passing PPP - * packets to IrTTP and vice versa. It uses PPP in synchronous mode, - * because IrTTP offer a reliable sequenced packet service (as opposed - * to a byte stream). In fact, you could see IrNET as carrying TCP/IP - * in a IrDA socket, using PPP to provide the glue. - * - * The main difference with traditional PPP over IrCOMM is that we - * avoid the framing and serial emulation which are a performance - * bottleneck. It also allows multipoint communications in a sensible - * fashion. - * - * The main difference with IrLAN is that we use PPP for the link - * management, which is more standard, interoperable and flexible than - * the IrLAN protocol. For example, PPP adds authentication, - * encryption, compression, header compression and automated routing - * setup. And, as IrNET let PPP do the hard work, the implementation - * is much simpler than IrLAN. - * - * The Linux implementation - * ------------------------ - * IrNET is written on top of the Linux-IrDA stack, and interface with - * the generic Linux PPP driver. Because IrNET depend on recent - * changes of the PPP driver interface, IrNET will work only with very - * recent kernel (2.3.99-pre6 and up). - * - * The present implementation offer the following features : - * o simple user interface using pppd - * o efficient implementation (interface directly to PPP and IrTTP) - * o addressing (you can specify the name of the IrNET recipient) - * o multipoint operation (limited by IrLAP specification) - * o information in /proc/net/irda/irnet - * o IrNET events on /dev/irnet (for user space daemon) - * o IrNET daemon (irnetd) to automatically handle incoming requests - * o Windows 2000 compatibility (tested, but need more work) - * Currently missing : - * o Lot's of testing (that's your job) - * o Connection retries (may be too hard to do) - * o Check pppd persist mode - * o User space daemon (to automatically handle incoming requests) - * - * The setup is not currently the most easy, but this should get much - * better when everything will get integrated... - * - * Acknowledgements - * ---------------- - * This module is based on : - * o The PPP driver (ppp_synctty/ppp_generic) by Paul Mackerras - * o The IrLAN protocol (irlan_common/XXX) by Dag Brattli - * o The IrSock interface (af_irda) by Dag Brattli - * o Some other bits from the kernel and my drivers... - * Infinite thanks to those brave souls for providing the infrastructure - * upon which IrNET is built. - * - * Thanks to all my colleagues in HP for helping me. In particular, - * thanks to Salil Pradhan and Bill Serra for W2k testing... - * Thanks to Luiz Magalhaes for irnetd and much testing... - * - * Thanks to Alan Cox for answering lot's of my stupid questions, and - * to Paul Mackerras answering my questions on how to best integrate - * IrNET and pppd. - * - * Jean II - * - * Note on some implementations choices... - * ------------------------------------ - * 1) Direct interface vs tty/socket - * I could have used a tty interface to hook to ppp and use the full - * socket API to connect to IrDA. The code would have been easier to - * maintain, and maybe the code would have been smaller... - * Instead, we hook directly to ppp_generic and to IrTTP, which make - * things more complicated... - * - * The first reason is flexibility : this allow us to create IrNET - * instances on demand (no /dev/ircommX crap) and to allow linkname - * specification on pppd command line... - * - * Second reason is speed optimisation. If you look closely at the - * transmit and receive paths, you will notice that they are "super lean" - * (that's why they look ugly), with no function calls and as little data - * copy and modification as I could... - * - * 2) irnetd in user space - * irnetd is implemented in user space, which is necessary to call pppd. - * This also give maximum benefits in term of flexibility and customability, - * and allow to offer the event channel, useful for other stuff like debug. - * - * On the other hand, this require a loose coordination between the - * present module and irnetd. One critical area is how incoming request - * are handled. - * When irnet receive an incoming request, it send an event to irnetd and - * drop the incoming IrNET socket. - * irnetd start a pppd instance, which create a new IrNET socket. This new - * socket is then connected in the originating node to the pppd instance. - * At this point, in the originating node, the first socket is closed. - * - * I admit, this is a bit messy and waste some resources. The alternative - * is caching incoming socket, and that's also quite messy and waste - * resources. - * We also make connection time slower. For example, on a 115 kb/s link it - * adds 60ms to the connection time (770 ms). However, this is slower than - * the time it takes to fire up pppd on my P133... - * - * - * History : - * ------- - * - * v1 - 15.5.00 - Jean II - * o Basic IrNET (hook to ppp_generic & IrTTP - incl. multipoint) - * o control channel on /dev/irnet (set name/address) - * o event channel on /dev/irnet (for user space daemon) - * - * v2 - 5.6.00 - Jean II - * o Enable DROP_NOT_READY to avoid PPP timeouts & other weirdness... - * o Add DISCONNECT_TO event and rename DISCONNECT_FROM. - * o Set official device number alloaction on /dev/irnet - * - * v3 - 30.8.00 - Jean II - * o Update to latest Linux-IrDA changes : - * - queue_t => irda_queue_t - * o Update to ppp-2.4.0 : - * - move irda_irnet_connect from PPPIOCATTACH to TIOCSETD - * o Add EXPIRE event (depend on new IrDA-Linux patch) - * o Switch from `hashbin_remove' to `hashbin_remove_this' to fix - * a multilink bug... (depend on new IrDA-Linux patch) - * o fix a self->daddr to self->raddr in irda_irnet_connect to fix - * another multilink bug (darn !) - * o Remove LINKNAME_IOCTL cruft - * - * v3b - 31.8.00 - Jean II - * o Dump discovery log at event channel startup - * - * v4 - 28.9.00 - Jean II - * o Fix interaction between poll/select and dump discovery log - * o Add IRNET_BLOCKED_LINK event (depend on new IrDA-Linux patch) - * o Add IRNET_NOANSWER_FROM event (mostly to help support) - * o Release flow control in disconnect_indication - * o Block packets while connecting (speed up connections) - * - * v5 - 11.01.01 - Jean II - * o Init self->max_header_size, just in case... - * o Set up ap->chan.hdrlen, to get zero copy on tx side working. - * o avoid tx->ttp->flow->ppp->tx->... loop, by checking flow state - * Thanks to Christian Gennerat for finding this bug ! - * --- - * o Declare the proper MTU/MRU that we can support - * (but PPP doesn't read the MTU value :-() - * o Declare hashbin HB_NOLOCK instead of HB_LOCAL to avoid - * disabling and enabling irq twice - * - * v6 - 31.05.01 - Jean II - * o Print source address in Found, Discovery, Expiry & Request events - * o Print requested source address in /proc/net/irnet - * o Change control channel input. Allow multiple commands in one line. - * o Add saddr command to change ap->rsaddr (and use that in IrDA) - * --- - * o Make the IrDA connection procedure totally asynchronous. - * Heavy rewrite of the IAS query code and the whole connection - * procedure. Now, irnet_connect() no longer need to be called from - * a process context... - * o Enable IrDA connect retries in ppp_irnet_send(). The good thing - * is that IrDA connect retries are directly driven by PPP LCP - * retries (we retry for each LCP packet), so that everything - * is transparently controlled from pppd lcp-max-configure. - * o Add ttp_connect flag to prevent rentry on the connect procedure - * o Test and fixups to eliminate side effects of retries - * - * v7 - 22.08.01 - Jean II - * o Cleanup : Change "saddr = 0x0" to "saddr = DEV_ADDR_ANY" - * o Fix bug in BLOCK_WHEN_CONNECT introduced in v6 : due to the - * asynchronous IAS query, self->tsap is NULL when PPP send the - * first packet. This was preventing "connect-delay 0" to work. - * Change the test in ppp_irnet_send() to self->ttp_connect. - * - * v8 - 1.11.01 - Jean II - * o Tighten the use of self->ttp_connect and self->ttp_open to - * prevent various race conditions. - * o Avoid leaking discovery log and skb - * o Replace "self" with "server" in irnet_connect_indication() to - * better detect cut'n'paste error ;-) - * - * v9 - 29.11.01 - Jean II - * o Fix event generation in disconnect indication that I broke in v8 - * It was always generation "No-Answer" because I was testing ttp_open - * just after clearing it. *blush*. - * o Use newly created irttp_listen() to fix potential crash when LAP - * destroyed before irnet module removed. - * - * v10 - 4.3.2 - Jean II - * o When receiving a disconnect indication, don't reenable the - * PPP Tx queue, this will trigger a reconnect. Instead, close - * the channel, which will kill pppd... - * - * v11 - 20.3.02 - Jean II - * o Oops ! v10 fix disabled IrNET retries and passive behaviour. - * Better fix in irnet_disconnect_indication() : - * - if connected, kill pppd via hangup. - * - if not connected, reenable ppp Tx, which trigger IrNET retry. - * - * v12 - 10.4.02 - Jean II - * o Fix race condition in irnet_connect_indication(). - * If the socket was already trying to connect, drop old connection - * and use new one only if acting as primary. See comments. - * - * v13 - 30.5.02 - Jean II - * o Update module init code - * - * v14 - 20.2.03 - Jean II - * o Add discovery hint bits in the control channel. - * o Remove obsolete MOD_INC/DEC_USE_COUNT in favor of .owner - * - * v15 - 7.4.03 - Jean II - * o Replace spin_lock_irqsave() with spin_lock_bh() so that we can - * use ppp_unit_number(). It's probably also better overall... - * o Disable call to ppp_unregister_channel(), because we can't do it. - */ - -/***************************** INCLUDES *****************************/ - -#include <linux/module.h> - -#include <linux/kernel.h> -#include <linux/skbuff.h> -#include <linux/tty.h> -#include <linux/proc_fs.h> -#include <linux/netdevice.h> -#include <linux/poll.h> -#include <linux/capability.h> -#include <linux/ctype.h> /* isspace() */ -#include <linux/string.h> /* skip_spaces() */ -#include <linux/uaccess.h> -#include <linux/init.h> - -#include <linux/ppp_defs.h> -#include <linux/ppp-ioctl.h> -#include <linux/ppp_channel.h> - -#include <net/irda/irda.h> -#include <net/irda/iriap.h> -#include <net/irda/irias_object.h> -#include <net/irda/irlmp.h> -#include <net/irda/irttp.h> -#include <net/irda/discovery.h> - -/***************************** OPTIONS *****************************/ -/* - * Define or undefine to compile or not some optional part of the - * IrNET driver... - * Note : the present defaults make sense, play with that at your - * own risk... - */ -/* IrDA side of the business... */ -#define DISCOVERY_NOMASK /* To enable W2k compatibility... */ -#define ADVERTISE_HINT /* Advertise IrLAN hint bit */ -#define ALLOW_SIMULT_CONNECT /* This seem to work, cross fingers... */ -#define DISCOVERY_EVENTS /* Query the discovery log to post events */ -#define INITIAL_DISCOVERY /* Dump current discovery log as events */ -#undef STREAM_COMPAT /* Not needed - potentially messy */ -#undef CONNECT_INDIC_KICK /* Might mess IrDA, not needed */ -#undef FAIL_SEND_DISCONNECT /* Might mess IrDA, not needed */ -#undef PASS_CONNECT_PACKETS /* Not needed ? Safe */ -#undef MISSING_PPP_API /* Stuff I wish I could do */ - -/* PPP side of the business */ -#define BLOCK_WHEN_CONNECT /* Block packets when connecting */ -#define CONNECT_IN_SEND /* Retry IrDA connection procedure */ -#undef FLUSH_TO_PPP /* Not sure about this one, let's play safe */ -#undef SECURE_DEVIRNET /* Bah... */ - -/****************************** DEBUG ******************************/ - -/* - * This set of flags enable and disable all the various warning, - * error and debug message of this driver. - * Each section can be enabled and disabled independently - */ -/* In the PPP part */ -#define DEBUG_CTRL_TRACE 0 /* Control channel */ -#define DEBUG_CTRL_INFO 0 /* various info */ -#define DEBUG_CTRL_ERROR 1 /* problems */ -#define DEBUG_FS_TRACE 0 /* filesystem callbacks */ -#define DEBUG_FS_INFO 0 /* various info */ -#define DEBUG_FS_ERROR 1 /* problems */ -#define DEBUG_PPP_TRACE 0 /* PPP related functions */ -#define DEBUG_PPP_INFO 0 /* various info */ -#define DEBUG_PPP_ERROR 1 /* problems */ -#define DEBUG_MODULE_TRACE 0 /* module insertion/removal */ -#define DEBUG_MODULE_ERROR 1 /* problems */ - -/* In the IrDA part */ -#define DEBUG_IRDA_SR_TRACE 0 /* IRDA subroutines */ -#define DEBUG_IRDA_SR_INFO 0 /* various info */ -#define DEBUG_IRDA_SR_ERROR 1 /* problems */ -#define DEBUG_IRDA_SOCK_TRACE 0 /* IRDA main socket functions */ -#define DEBUG_IRDA_SOCK_INFO 0 /* various info */ -#define DEBUG_IRDA_SOCK_ERROR 1 /* problems */ -#define DEBUG_IRDA_SERV_TRACE 0 /* The IrNET server */ -#define DEBUG_IRDA_SERV_INFO 0 /* various info */ -#define DEBUG_IRDA_SERV_ERROR 1 /* problems */ -#define DEBUG_IRDA_TCB_TRACE 0 /* IRDA IrTTP callbacks */ -#define DEBUG_IRDA_CB_INFO 0 /* various info */ -#define DEBUG_IRDA_CB_ERROR 1 /* problems */ -#define DEBUG_IRDA_OCB_TRACE 0 /* IRDA other callbacks */ -#define DEBUG_IRDA_OCB_INFO 0 /* various info */ -#define DEBUG_IRDA_OCB_ERROR 1 /* problems */ - -#define DEBUG_ASSERT 0 /* Verify all assertions */ - -/* - * These are the macros we are using to actually print the debug - * statements. Don't look at it, it's ugly... - * - * One of the trick is that, as the DEBUG_XXX are constant, the - * compiler will optimise away the if() in all cases. - */ -/* All error messages (will show up in the normal logs) */ -#define DERROR(dbg, format, args...) \ - {if(DEBUG_##dbg) \ - printk(KERN_INFO "irnet: %s(): " format, __func__ , ##args);} - -/* Normal debug message (will show up in /var/log/debug) */ -#define DEBUG(dbg, format, args...) \ - {if(DEBUG_##dbg) \ - printk(KERN_DEBUG "irnet: %s(): " format, __func__ , ##args);} - -/* Entering a function (trace) */ -#define DENTER(dbg, format, args...) \ - {if(DEBUG_##dbg) \ - printk(KERN_DEBUG "irnet: -> %s" format, __func__ , ##args);} - -/* Entering and exiting a function in one go (trace) */ -#define DPASS(dbg, format, args...) \ - {if(DEBUG_##dbg) \ - printk(KERN_DEBUG "irnet: <>%s" format, __func__ , ##args);} - -/* Exiting a function (trace) */ -#define DEXIT(dbg, format, args...) \ - {if(DEBUG_##dbg) \ - printk(KERN_DEBUG "irnet: <-%s()" format, __func__ , ##args);} - -/* Exit a function with debug */ -#define DRETURN(ret, dbg, args...) \ - {DEXIT(dbg, ": " args);\ - return ret; } - -/* Exit a function on failed condition */ -#define DABORT(cond, ret, dbg, args...) \ - {if(cond) {\ - DERROR(dbg, args);\ - return ret; }} - -/* Invalid assertion, print out an error and exit... */ -#define DASSERT(cond, ret, dbg, args...) \ - {if((DEBUG_ASSERT) && !(cond)) {\ - DERROR(dbg, "Invalid assertion: " args);\ - return ret; }} - -/************************ CONSTANTS & MACROS ************************/ - -/* Paranoia */ -#define IRNET_MAGIC 0xB00754 - -/* Number of control events in the control channel buffer... */ -#define IRNET_MAX_EVENTS 8 /* Should be more than enough... */ - -/****************************** TYPES ******************************/ - -/* - * This is the main structure where we store all the data pertaining to - * one instance of irnet. - * Note : in irnet functions, a pointer this structure is usually called - * "ap" or "self". If the code is borrowed from the IrDA stack, it tend - * to be called "self", and if it is borrowed from the PPP driver it is - * "ap". Apart from that, it's exactly the same structure ;-) - */ -typedef struct irnet_socket -{ - /* ------------------- Instance management ------------------- */ - /* We manage a linked list of IrNET socket instances */ - irda_queue_t q; /* Must be first - for hasbin */ - int magic; /* Paranoia */ - - /* --------------------- FileSystem part --------------------- */ - /* "pppd" interact directly with us on a /dev/ file */ - struct file * file; /* File descriptor of this instance */ - /* TTY stuff - to keep "pppd" happy */ - struct ktermios termios; /* Various tty flags */ - /* Stuff for the control channel */ - int event_index; /* Last read in the event log */ - - /* ------------------------- PPP part ------------------------- */ - /* We interface directly to the ppp_generic driver in the kernel */ - int ppp_open; /* registered with ppp_generic */ - struct ppp_channel chan; /* Interface to generic ppp layer */ - - int mru; /* Max size of PPP payload */ - u32 xaccm[8]; /* Asynchronous character map (just */ - u32 raccm; /* to please pppd - dummy) */ - unsigned int flags; /* PPP flags (compression, ...) */ - unsigned int rbits; /* Unused receive flags ??? */ - struct work_struct disconnect_work; /* Process context disconnection */ - /* ------------------------ IrTTP part ------------------------ */ - /* We create a pseudo "socket" over the IrDA tranport */ - unsigned long ttp_open; /* Set when IrTTP is ready */ - unsigned long ttp_connect; /* Set when IrTTP is connecting */ - struct tsap_cb * tsap; /* IrTTP instance (the connection) */ - - char rname[NICKNAME_MAX_LEN + 1]; - /* IrDA nickname of destination */ - __u32 rdaddr; /* Requested peer IrDA address */ - __u32 rsaddr; /* Requested local IrDA address */ - __u32 daddr; /* actual peer IrDA address */ - __u32 saddr; /* my local IrDA address */ - __u8 dtsap_sel; /* Remote TSAP selector */ - __u8 stsap_sel; /* Local TSAP selector */ - - __u32 max_sdu_size_rx;/* Socket parameters used for IrTTP */ - __u32 max_sdu_size_tx; - __u32 max_data_size; - __u8 max_header_size; - LOCAL_FLOW tx_flow; /* State of the Tx path in IrTTP */ - - /* ------------------- IrLMP and IrIAS part ------------------- */ - /* Used for IrDA Discovery and socket name resolution */ - void * ckey; /* IrLMP client handle */ - __u16 mask; /* Hint bits mask (filter discov.)*/ - int nslots; /* Number of slots for discovery */ - - struct iriap_cb * iriap; /* Used to query remote IAS */ - int errno; /* status of the IAS query */ - - /* -------------------- Discovery log part -------------------- */ - /* Used by initial discovery on the control channel - * and by irnet_discover_daddr_and_lsap_sel() */ - struct irda_device_info *discoveries; /* Copy of the discovery log */ - int disco_index; /* Last read in the discovery log */ - int disco_number; /* Size of the discovery log */ - - struct mutex lock; - -} irnet_socket; - -/* - * This is the various event that we will generate on the control channel - */ -typedef enum irnet_event -{ - IRNET_DISCOVER, /* New IrNET node discovered */ - IRNET_EXPIRE, /* IrNET node expired */ - IRNET_CONNECT_TO, /* IrNET socket has connected to other node */ - IRNET_CONNECT_FROM, /* Other node has connected to IrNET socket */ - IRNET_REQUEST_FROM, /* Non satisfied connection request */ - IRNET_NOANSWER_FROM, /* Failed connection request */ - IRNET_BLOCKED_LINK, /* Link (IrLAP) is blocked for > 3s */ - IRNET_DISCONNECT_FROM, /* IrNET socket has disconnected */ - IRNET_DISCONNECT_TO /* Closing IrNET socket */ -} irnet_event; - -/* - * This is the storage for an event and its arguments - */ -typedef struct irnet_log -{ - irnet_event event; - int unit; - __u32 saddr; - __u32 daddr; - char name[NICKNAME_MAX_LEN + 1]; /* 21 + 1 */ - __u16_host_order hints; /* Discovery hint bits */ -} irnet_log; - -/* - * This is the storage for all events and related stuff... - */ -typedef struct irnet_ctrl_channel -{ - irnet_log log[IRNET_MAX_EVENTS]; /* Event log */ - int index; /* Current index in log */ - spinlock_t spinlock; /* Serialize access to the event log */ - wait_queue_head_t rwait; /* processes blocked on read (or poll) */ -} irnet_ctrl_channel; - -/**************************** PROTOTYPES ****************************/ -/* - * Global functions of the IrNET module - * Note : we list here also functions called from one file to the other. - */ - -/* -------------------------- IRDA PART -------------------------- */ -int irda_irnet_create(irnet_socket *); /* Initialise an IrNET socket */ -int irda_irnet_connect(irnet_socket *); /* Try to connect over IrDA */ -void irda_irnet_destroy(irnet_socket *); /* Teardown an IrNET socket */ -int irda_irnet_init(void); /* Initialise IrDA part of IrNET */ -void irda_irnet_cleanup(void); /* Teardown IrDA part of IrNET */ - -/**************************** VARIABLES ****************************/ - -/* Control channel stuff - allocated in irnet_irda.h */ -extern struct irnet_ctrl_channel irnet_events; - -#endif /* IRNET_H */ diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c deleted file mode 100644 index e390bceeb2f8..000000000000 --- a/net/irda/irnet/irnet_irda.c +++ /dev/null @@ -1,1885 +0,0 @@ -/* - * IrNET protocol module : Synchronous PPP over an IrDA socket. - * - * Jean II - HPL `00 - <jt@hpl.hp.com> - * - * This file implement the IRDA interface of IrNET. - * Basically, we sit on top of IrTTP. We set up IrTTP, IrIAS properly, - * and exchange frames with IrTTP. - */ - -#include "irnet_irda.h" /* Private header */ -#include <linux/sched.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <asm/unaligned.h> - -/* - * PPP disconnect work: we need to make sure we're in - * process context when calling ppp_unregister_channel(). - */ -static void irnet_ppp_disconnect(struct work_struct *work) -{ - irnet_socket * self = - container_of(work, irnet_socket, disconnect_work); - - if (self == NULL) - return; - /* - * If we were connected, cleanup & close the PPP - * channel, which will kill pppd (hangup) and the rest. - */ - if (self->ppp_open && !self->ttp_open && !self->ttp_connect) { - ppp_unregister_channel(&self->chan); - self->ppp_open = 0; - } -} - -/************************* CONTROL CHANNEL *************************/ -/* - * When ppp is not active, /dev/irnet act as a control channel. - * Writing allow to set up the IrDA destination of the IrNET channel, - * and any application may be read events happening on IrNET... - */ - -/*------------------------------------------------------------------*/ -/* - * Post an event to the control channel... - * Put the event in the log, and then wait all process blocked on read - * so they can read the log... - */ -static void -irnet_post_event(irnet_socket * ap, - irnet_event event, - __u32 saddr, - __u32 daddr, - char * name, - __u16 hints) -{ - int index; /* In the log */ - - DENTER(CTRL_TRACE, "(ap=0x%p, event=%d, daddr=%08x, name=``%s'')\n", - ap, event, daddr, name); - - /* Protect this section via spinlock. - * Note : as we are the only event producer, we only need to exclude - * ourself when touching the log, which is nice and easy. - */ - spin_lock_bh(&irnet_events.spinlock); - - /* Copy the event in the log */ - index = irnet_events.index; - irnet_events.log[index].event = event; - irnet_events.log[index].daddr = daddr; - irnet_events.log[index].saddr = saddr; - /* Try to copy IrDA nickname */ - if(name) - strcpy(irnet_events.log[index].name, name); - else - irnet_events.log[index].name[0] = '\0'; - /* Copy hints */ - irnet_events.log[index].hints.word = hints; - /* Try to get ppp unit number */ - if((ap != (irnet_socket *) NULL) && (ap->ppp_open)) - irnet_events.log[index].unit = ppp_unit_number(&ap->chan); - else - irnet_events.log[index].unit = -1; - - /* Increment the index - * Note that we increment the index only after the event is written, - * to make sure that the readers don't get garbage... */ - irnet_events.index = (index + 1) % IRNET_MAX_EVENTS; - - DEBUG(CTRL_INFO, "New event index is %d\n", irnet_events.index); - - /* Spin lock end */ - spin_unlock_bh(&irnet_events.spinlock); - - /* Now : wake up everybody waiting for events... */ - wake_up_interruptible_all(&irnet_events.rwait); - - DEXIT(CTRL_TRACE, "\n"); -} - -/************************* IRDA SUBROUTINES *************************/ -/* - * These are a bunch of subroutines called from other functions - * down there, mostly common code or to improve readability... - * - * Note : we duplicate quite heavily some routines of af_irda.c, - * because our input structure (self) is quite different - * (struct irnet instead of struct irda_sock), which make sharing - * the same code impossible (at least, without templates). - */ - -/*------------------------------------------------------------------*/ -/* - * Function irda_open_tsap (self) - * - * Open local Transport Service Access Point (TSAP) - * - * Create a IrTTP instance for us and set all the IrTTP callbacks. - */ -static inline int -irnet_open_tsap(irnet_socket * self) -{ - notify_t notify; /* Callback structure */ - - DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self); - - DABORT(self->tsap != NULL, -EBUSY, IRDA_SR_ERROR, "Already busy !\n"); - - /* Initialize IrTTP callbacks to be used by the IrDA stack */ - irda_notify_init(¬ify); - notify.connect_confirm = irnet_connect_confirm; - notify.connect_indication = irnet_connect_indication; - notify.disconnect_indication = irnet_disconnect_indication; - notify.data_indication = irnet_data_indication; - /*notify.udata_indication = NULL;*/ - notify.flow_indication = irnet_flow_indication; - notify.status_indication = irnet_status_indication; - notify.instance = self; - strlcpy(notify.name, IRNET_NOTIFY_NAME, sizeof(notify.name)); - - /* Open an IrTTP instance */ - self->tsap = irttp_open_tsap(LSAP_ANY, DEFAULT_INITIAL_CREDIT, - ¬ify); - DABORT(self->tsap == NULL, -ENOMEM, - IRDA_SR_ERROR, "Unable to allocate TSAP !\n"); - - /* Remember which TSAP selector we actually got */ - self->stsap_sel = self->tsap->stsap_sel; - - DEXIT(IRDA_SR_TRACE, " - tsap=0x%p, sel=0x%X\n", - self->tsap, self->stsap_sel); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_ias_to_tsap (self, result, value) - * - * Examine an IAS object and extract TSAP - * - * We do an IAP query to find the TSAP associated with the IrNET service. - * When IrIAP pass us the result of the query, this function look at - * the return values to check for failures and extract the TSAP if - * possible. - * Also deallocate value - * The failure is in self->errno - * Return TSAP or -1 - */ -static inline __u8 -irnet_ias_to_tsap(irnet_socket * self, - int result, - struct ias_value * value) -{ - __u8 dtsap_sel = 0; /* TSAP we are looking for */ - - DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self); - - /* By default, no error */ - self->errno = 0; - - /* Check if request succeeded */ - switch(result) - { - /* Standard errors : service not available */ - case IAS_CLASS_UNKNOWN: - case IAS_ATTRIB_UNKNOWN: - DEBUG(IRDA_SR_INFO, "IAS object doesn't exist ! (%d)\n", result); - self->errno = -EADDRNOTAVAIL; - break; - - /* Other errors, most likely IrDA stack failure */ - default : - DEBUG(IRDA_SR_INFO, "IAS query failed ! (%d)\n", result); - self->errno = -EHOSTUNREACH; - break; - - /* Success : we got what we wanted */ - case IAS_SUCCESS: - break; - } - - /* Check what was returned to us */ - if(value != NULL) - { - /* What type of argument have we got ? */ - switch(value->type) - { - case IAS_INTEGER: - DEBUG(IRDA_SR_INFO, "result=%d\n", value->t.integer); - if(value->t.integer != -1) - /* Get the remote TSAP selector */ - dtsap_sel = value->t.integer; - else - self->errno = -EADDRNOTAVAIL; - break; - default: - self->errno = -EADDRNOTAVAIL; - DERROR(IRDA_SR_ERROR, "bad type ! (0x%X)\n", value->type); - break; - } - - /* Cleanup */ - irias_delete_value(value); - } - else /* value == NULL */ - { - /* Nothing returned to us - usually result != SUCCESS */ - if(!(self->errno)) - { - DERROR(IRDA_SR_ERROR, - "IrDA bug : result == SUCCESS && value == NULL\n"); - self->errno = -EHOSTUNREACH; - } - } - DEXIT(IRDA_SR_TRACE, "\n"); - - /* Return the TSAP */ - return dtsap_sel; -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_find_lsap_sel (self) - * - * Try to lookup LSAP selector in remote LM-IAS - * - * Basically, we start a IAP query, and then go to sleep. When the query - * return, irnet_getvalue_confirm will wake us up, and we can examine the - * result of the query... - * Note that in some case, the query fail even before we go to sleep, - * creating some races... - */ -static inline int -irnet_find_lsap_sel(irnet_socket * self) -{ - DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self); - - /* This should not happen */ - DABORT(self->iriap, -EBUSY, IRDA_SR_ERROR, "busy with a previous query.\n"); - - /* Create an IAP instance, will be closed in irnet_getvalue_confirm() */ - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - irnet_getvalue_confirm); - - /* Treat unexpected signals as disconnect */ - self->errno = -EHOSTUNREACH; - - /* Query remote LM-IAS */ - iriap_getvaluebyclass_request(self->iriap, self->rsaddr, self->daddr, - IRNET_SERVICE_NAME, IRNET_IAS_VALUE); - - /* The above request is non-blocking. - * After a while, IrDA will call us back in irnet_getvalue_confirm() - * We will then call irnet_ias_to_tsap() and finish the - * connection procedure */ - - DEXIT(IRDA_SR_TRACE, "\n"); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_connect_tsap (self) - * - * Initialise the TTP socket and initiate TTP connection - * - */ -static inline int -irnet_connect_tsap(irnet_socket * self) -{ - int err; - - DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self); - - /* Open a local TSAP (an IrTTP instance) */ - err = irnet_open_tsap(self); - if(err != 0) - { - clear_bit(0, &self->ttp_connect); - DERROR(IRDA_SR_ERROR, "connect aborted!\n"); - return err; - } - - /* Connect to remote device */ - err = irttp_connect_request(self->tsap, self->dtsap_sel, - self->rsaddr, self->daddr, NULL, - self->max_sdu_size_rx, NULL); - if(err != 0) - { - clear_bit(0, &self->ttp_connect); - DERROR(IRDA_SR_ERROR, "connect aborted!\n"); - return err; - } - - /* The above call is non-blocking. - * After a while, the IrDA stack will either call us back in - * irnet_connect_confirm() or irnet_disconnect_indication() - * See you there ;-) */ - - DEXIT(IRDA_SR_TRACE, "\n"); - return err; -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_discover_next_daddr (self) - * - * Query the IrNET TSAP of the next device in the log. - * - * Used in the TSAP discovery procedure. - */ -static inline int -irnet_discover_next_daddr(irnet_socket * self) -{ - /* Close the last instance of IrIAP, and open a new one. - * We can't reuse the IrIAP instance in the IrIAP callback */ - if(self->iriap) - { - iriap_close(self->iriap); - self->iriap = NULL; - } - /* Create a new IAP instance */ - self->iriap = iriap_open(LSAP_ANY, IAS_CLIENT, self, - irnet_discovervalue_confirm); - if(self->iriap == NULL) - return -ENOMEM; - - /* Next discovery - before the call to avoid races */ - self->disco_index++; - - /* Check if we have one more address to try */ - if(self->disco_index < self->disco_number) - { - /* Query remote LM-IAS */ - iriap_getvaluebyclass_request(self->iriap, - self->discoveries[self->disco_index].saddr, - self->discoveries[self->disco_index].daddr, - IRNET_SERVICE_NAME, IRNET_IAS_VALUE); - /* The above request is non-blocking. - * After a while, IrDA will call us back in irnet_discovervalue_confirm() - * We will then call irnet_ias_to_tsap() and come back here again... */ - return 0; - } - else - return 1; -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_discover_daddr_and_lsap_sel (self) - * - * This try to find a device with the requested service. - * - * Initiate a TSAP discovery procedure. - * It basically look into the discovery log. For each address in the list, - * it queries the LM-IAS of the device to find if this device offer - * the requested service. - * If there is more than one node supporting the service, we complain - * to the user (it should move devices around). - * If we find one node which have the requested TSAP, we connect to it. - * - * This function just start the whole procedure. It request the discovery - * log and submit the first IAS query. - * The bulk of the job is handled in irnet_discovervalue_confirm() - * - * Note : this procedure fails if there is more than one device in range - * on the same dongle, because IrLMP doesn't disconnect the LAP when the - * last LSAP is closed. Moreover, we would need to wait the LAP - * disconnection... - */ -static inline int -irnet_discover_daddr_and_lsap_sel(irnet_socket * self) -{ - int ret; - - DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self); - - /* Ask lmp for the current discovery log */ - self->discoveries = irlmp_get_discoveries(&self->disco_number, self->mask, - DISCOVERY_DEFAULT_SLOTS); - - /* Check if the we got some results */ - if(self->discoveries == NULL) - { - self->disco_number = -1; - clear_bit(0, &self->ttp_connect); - DRETURN(-ENETUNREACH, IRDA_SR_INFO, "No Cachelog...\n"); - } - DEBUG(IRDA_SR_INFO, "Got the log (0x%p), size is %d\n", - self->discoveries, self->disco_number); - - /* Start with the first discovery */ - self->disco_index = -1; - self->daddr = DEV_ADDR_ANY; - - /* This will fail if the log is empty - this is non-blocking */ - ret = irnet_discover_next_daddr(self); - if(ret) - { - /* Close IAP */ - if(self->iriap) - iriap_close(self->iriap); - self->iriap = NULL; - - /* Cleanup our copy of the discovery log */ - kfree(self->discoveries); - self->discoveries = NULL; - - clear_bit(0, &self->ttp_connect); - DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n"); - } - - /* Follow me in irnet_discovervalue_confirm() */ - - DEXIT(IRDA_SR_TRACE, "\n"); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_dname_to_daddr (self) - * - * Convert an IrDA nickname to a valid IrDA address - * - * It basically look into the discovery log until there is a match. - */ -static inline int -irnet_dname_to_daddr(irnet_socket * self) -{ - struct irda_device_info *discoveries; /* Copy of the discovery log */ - int number; /* Number of nodes in the log */ - int i; - - DENTER(IRDA_SR_TRACE, "(self=0x%p)\n", self); - - /* Ask lmp for the current discovery log */ - discoveries = irlmp_get_discoveries(&number, 0xffff, - DISCOVERY_DEFAULT_SLOTS); - /* Check if the we got some results */ - if(discoveries == NULL) - DRETURN(-ENETUNREACH, IRDA_SR_INFO, "Cachelog empty...\n"); - - /* - * Now, check all discovered devices (if any), and connect - * client only about the services that the client is - * interested in... - */ - for(i = 0; i < number; i++) - { - /* Does the name match ? */ - if(!strncmp(discoveries[i].info, self->rname, NICKNAME_MAX_LEN)) - { - /* Yes !!! Get it.. */ - self->daddr = discoveries[i].daddr; - DEBUG(IRDA_SR_INFO, "discovered device ``%s'' at address 0x%08x.\n", - self->rname, self->daddr); - kfree(discoveries); - DEXIT(IRDA_SR_TRACE, "\n"); - return 0; - } - } - /* No luck ! */ - DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname); - kfree(discoveries); - return -EADDRNOTAVAIL; -} - - -/************************* SOCKET ROUTINES *************************/ -/* - * This are the main operations on IrNET sockets, basically to create - * and destroy IrNET sockets. These are called from the PPP part... - */ - -/*------------------------------------------------------------------*/ -/* - * Create a IrNET instance : just initialise some parameters... - */ -int -irda_irnet_create(irnet_socket * self) -{ - DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self); - - self->magic = IRNET_MAGIC; /* Paranoia */ - - self->ttp_open = 0; /* Prevent higher layer from accessing IrTTP */ - self->ttp_connect = 0; /* Not connecting yet */ - self->rname[0] = '\0'; /* May be set via control channel */ - self->rdaddr = DEV_ADDR_ANY; /* May be set via control channel */ - self->rsaddr = DEV_ADDR_ANY; /* May be set via control channel */ - self->daddr = DEV_ADDR_ANY; /* Until we get connected */ - self->saddr = DEV_ADDR_ANY; /* Until we get connected */ - self->max_sdu_size_rx = TTP_SAR_UNBOUND; - - /* Register as a client with IrLMP */ - self->ckey = irlmp_register_client(0, NULL, NULL, NULL); -#ifdef DISCOVERY_NOMASK - self->mask = 0xffff; /* For W2k compatibility */ -#else /* DISCOVERY_NOMASK */ - self->mask = irlmp_service_to_hint(S_LAN); -#endif /* DISCOVERY_NOMASK */ - self->tx_flow = FLOW_START; /* Flow control from IrTTP */ - - INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect); - - DEXIT(IRDA_SOCK_TRACE, "\n"); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Connect to the other side : - * o convert device name to an address - * o find the socket number (dlsap) - * o Establish the connection - * - * Note : We no longer mimic af_irda. The IAS query for finding the TSAP - * is done asynchronously, like the TTP connection. This allow us to - * call this function from any context (not only process). - * The downside is that following what's happening in there is tricky - * because it involve various functions all over the place... - */ -int -irda_irnet_connect(irnet_socket * self) -{ - int err; - - DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self); - - /* Check if we are already trying to connect. - * Because irda_irnet_connect() can be called directly by pppd plus - * packet retries in ppp_generic and connect may take time, plus we may - * race with irnet_connect_indication(), we need to be careful there... */ - if(test_and_set_bit(0, &self->ttp_connect)) - DRETURN(-EBUSY, IRDA_SOCK_INFO, "Already connecting...\n"); - if((self->iriap != NULL) || (self->tsap != NULL)) - DERROR(IRDA_SOCK_ERROR, "Socket not cleaned up...\n"); - - /* Insert ourselves in the hashbin so that the IrNET server can find us. - * Notes : 4th arg is string of 32 char max and must be null terminated - * When 4th arg is used (string), 3rd arg isn't (int) - * Can't re-insert (MUST remove first) so check for that... */ - if((irnet_server.running) && (self->q.q_next == NULL)) - { - spin_lock_bh(&irnet_server.spinlock); - hashbin_insert(irnet_server.list, (irda_queue_t *) self, 0, self->rname); - spin_unlock_bh(&irnet_server.spinlock); - DEBUG(IRDA_SOCK_INFO, "Inserted ``%s'' in hashbin...\n", self->rname); - } - - /* If we don't have anything (no address, no name) */ - if((self->rdaddr == DEV_ADDR_ANY) && (self->rname[0] == '\0')) - { - /* Try to find a suitable address */ - if((err = irnet_discover_daddr_and_lsap_sel(self)) != 0) - DRETURN(err, IRDA_SOCK_INFO, "auto-connect failed!\n"); - /* In most cases, the call above is non-blocking */ - } - else - { - /* If we have only the name (no address), try to get an address */ - if(self->rdaddr == DEV_ADDR_ANY) - { - if((err = irnet_dname_to_daddr(self)) != 0) - DRETURN(err, IRDA_SOCK_INFO, "name connect failed!\n"); - } - else - /* Use the requested destination address */ - self->daddr = self->rdaddr; - - /* Query remote LM-IAS to find LSAP selector */ - irnet_find_lsap_sel(self); - /* The above call is non blocking */ - } - - /* At this point, we are waiting for the IrDA stack to call us back, - * or we have already failed. - * We will finish the connection procedure in irnet_connect_tsap(). - */ - DEXIT(IRDA_SOCK_TRACE, "\n"); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Function irda_irnet_destroy(self) - * - * Destroy irnet instance - * - * Note : this need to be called from a process context. - */ -void -irda_irnet_destroy(irnet_socket * self) -{ - DENTER(IRDA_SOCK_TRACE, "(self=0x%p)\n", self); - if(self == NULL) - return; - - /* Remove ourselves from hashbin (if we are queued in hashbin) - * Note : `irnet_server.running' protect us from calls in hashbin_delete() */ - if((irnet_server.running) && (self->q.q_next != NULL)) - { - struct irnet_socket * entry; - DEBUG(IRDA_SOCK_INFO, "Removing from hash..\n"); - spin_lock_bh(&irnet_server.spinlock); - entry = hashbin_remove_this(irnet_server.list, (irda_queue_t *) self); - self->q.q_next = NULL; - spin_unlock_bh(&irnet_server.spinlock); - DASSERT(entry == self, , IRDA_SOCK_ERROR, "Can't remove from hash.\n"); - } - - /* If we were connected, post a message */ - if(test_bit(0, &self->ttp_open)) - { - /* Note : as the disconnect comes from ppp_generic, the unit number - * doesn't exist anymore when we post the event, so we need to pass - * NULL as the first arg... */ - irnet_post_event(NULL, IRNET_DISCONNECT_TO, - self->saddr, self->daddr, self->rname, 0); - } - - /* Prevent various IrDA callbacks from messing up things - * Need to be first */ - clear_bit(0, &self->ttp_connect); - - /* Prevent higher layer from accessing IrTTP */ - clear_bit(0, &self->ttp_open); - - /* Unregister with IrLMP */ - irlmp_unregister_client(self->ckey); - - /* Unregister with LM-IAS */ - if(self->iriap) - { - iriap_close(self->iriap); - self->iriap = NULL; - } - - /* Cleanup eventual discoveries from connection attempt or control channel */ - if(self->discoveries != NULL) - { - /* Cleanup our copy of the discovery log */ - kfree(self->discoveries); - self->discoveries = NULL; - } - - /* Close our IrTTP connection */ - if(self->tsap) - { - DEBUG(IRDA_SOCK_INFO, "Closing our TTP connection.\n"); - irttp_disconnect_request(self->tsap, NULL, P_NORMAL); - irttp_close_tsap(self->tsap); - self->tsap = NULL; - } - self->stsap_sel = 0; - - DEXIT(IRDA_SOCK_TRACE, "\n"); -} - - -/************************** SERVER SOCKET **************************/ -/* - * The IrNET service is composed of one server socket and a variable - * number of regular IrNET sockets. The server socket is supposed to - * handle incoming connections and redirect them to one IrNET sockets. - * It's a superset of the regular IrNET socket, but has a very distinct - * behaviour... - */ - -/*------------------------------------------------------------------*/ -/* - * Function irnet_daddr_to_dname (self) - * - * Convert an IrDA address to a IrDA nickname - * - * It basically look into the discovery log until there is a match. - */ -static inline int -irnet_daddr_to_dname(irnet_socket * self) -{ - struct irda_device_info *discoveries; /* Copy of the discovery log */ - int number; /* Number of nodes in the log */ - int i; - - DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self); - - /* Ask lmp for the current discovery log */ - discoveries = irlmp_get_discoveries(&number, 0xffff, - DISCOVERY_DEFAULT_SLOTS); - /* Check if the we got some results */ - if (discoveries == NULL) - DRETURN(-ENETUNREACH, IRDA_SERV_INFO, "Cachelog empty...\n"); - - /* Now, check all discovered devices (if any) */ - for(i = 0; i < number; i++) - { - /* Does the name match ? */ - if(discoveries[i].daddr == self->daddr) - { - /* Yes !!! Get it.. */ - strlcpy(self->rname, discoveries[i].info, sizeof(self->rname)); - self->rname[sizeof(self->rname) - 1] = '\0'; - DEBUG(IRDA_SERV_INFO, "Device 0x%08x is in fact ``%s''.\n", - self->daddr, self->rname); - kfree(discoveries); - DEXIT(IRDA_SERV_TRACE, "\n"); - return 0; - } - } - /* No luck ! */ - DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr); - kfree(discoveries); - return -EADDRNOTAVAIL; -} - -/*------------------------------------------------------------------*/ -/* - * Function irda_find_socket (self) - * - * Find the correct IrNET socket - * - * Look into the list of IrNET sockets and finds one with the right - * properties... - */ -static inline irnet_socket * -irnet_find_socket(irnet_socket * self) -{ - irnet_socket * new = (irnet_socket *) NULL; - int err; - - DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self); - - /* Get the addresses of the requester */ - self->daddr = irttp_get_daddr(self->tsap); - self->saddr = irttp_get_saddr(self->tsap); - - /* Try to get the IrDA nickname of the requester */ - err = irnet_daddr_to_dname(self); - - /* Protect access to the instance list */ - spin_lock_bh(&irnet_server.spinlock); - - /* So now, try to get an socket having specifically - * requested that nickname */ - if(err == 0) - { - new = (irnet_socket *) hashbin_find(irnet_server.list, - 0, self->rname); - if(new) - DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches rname ``%s''.\n", - new, new->rname); - } - - /* If no name matches, try to find an socket by the destination address */ - /* It can be either the requested destination address (set via the - * control channel), or the current destination address if the - * socket is in the middle of a connection request */ - if(new == (irnet_socket *) NULL) - { - new = (irnet_socket *) hashbin_get_first(irnet_server.list); - while(new !=(irnet_socket *) NULL) - { - /* Does it have the same address ? */ - if((new->rdaddr == self->daddr) || (new->daddr == self->daddr)) - { - /* Yes !!! Get it.. */ - DEBUG(IRDA_SERV_INFO, "Socket 0x%p matches daddr %#08x.\n", - new, self->daddr); - break; - } - new = (irnet_socket *) hashbin_get_next(irnet_server.list); - } - } - - /* If we don't have any socket, get the first unconnected socket */ - if(new == (irnet_socket *) NULL) - { - new = (irnet_socket *) hashbin_get_first(irnet_server.list); - while(new !=(irnet_socket *) NULL) - { - /* Is it available ? */ - if(!(test_bit(0, &new->ttp_open)) && (new->rdaddr == DEV_ADDR_ANY) && - (new->rname[0] == '\0') && (new->ppp_open)) - { - /* Yes !!! Get it.. */ - DEBUG(IRDA_SERV_INFO, "Socket 0x%p is free.\n", - new); - break; - } - new = (irnet_socket *) hashbin_get_next(irnet_server.list); - } - } - - /* Spin lock end */ - spin_unlock_bh(&irnet_server.spinlock); - - DEXIT(IRDA_SERV_TRACE, " - new = 0x%p\n", new); - return new; -} - -/*------------------------------------------------------------------*/ -/* - * Function irda_connect_socket (self) - * - * Connect an incoming connection to the socket - * - */ -static inline int -irnet_connect_socket(irnet_socket * server, - irnet_socket * new, - struct qos_info * qos, - __u32 max_sdu_size, - __u8 max_header_size) -{ - DENTER(IRDA_SERV_TRACE, "(server=0x%p, new=0x%p)\n", - server, new); - - /* Now attach up the new socket */ - new->tsap = irttp_dup(server->tsap, new); - DABORT(new->tsap == NULL, -1, IRDA_SERV_ERROR, "dup failed!\n"); - - /* Set up all the relevant parameters on the new socket */ - new->stsap_sel = new->tsap->stsap_sel; - new->dtsap_sel = new->tsap->dtsap_sel; - new->saddr = irttp_get_saddr(new->tsap); - new->daddr = irttp_get_daddr(new->tsap); - - new->max_header_size = max_header_size; - new->max_sdu_size_tx = max_sdu_size; - new->max_data_size = max_sdu_size; -#ifdef STREAM_COMPAT - /* If we want to receive "stream sockets" */ - if(max_sdu_size == 0) - new->max_data_size = irttp_get_max_seg_size(new->tsap); -#endif /* STREAM_COMPAT */ - - /* Clean up the original one to keep it in listen state */ - irttp_listen(server->tsap); - - /* Send a connection response on the new socket */ - irttp_connect_response(new->tsap, new->max_sdu_size_rx, NULL); - - /* Allow PPP to send its junk over the new socket... */ - set_bit(0, &new->ttp_open); - - /* Not connecting anymore, and clean up last possible remains - * of connection attempts on the socket */ - clear_bit(0, &new->ttp_connect); - if(new->iriap) - { - iriap_close(new->iriap); - new->iriap = NULL; - } - if(new->discoveries != NULL) - { - kfree(new->discoveries); - new->discoveries = NULL; - } - -#ifdef CONNECT_INDIC_KICK - /* As currently we don't block packets in ppp_irnet_send() while passive, - * this is not really needed... - * Also, not doing it give IrDA a chance to finish the setup properly - * before being swamped with packets... */ - ppp_output_wakeup(&new->chan); -#endif /* CONNECT_INDIC_KICK */ - - /* Notify the control channel */ - irnet_post_event(new, IRNET_CONNECT_FROM, - new->saddr, new->daddr, server->rname, 0); - - DEXIT(IRDA_SERV_TRACE, "\n"); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Function irda_disconnect_server (self) - * - * Cleanup the server socket when the incoming connection abort - * - */ -static inline void -irnet_disconnect_server(irnet_socket * self, - struct sk_buff *skb) -{ - DENTER(IRDA_SERV_TRACE, "(self=0x%p)\n", self); - - /* Put the received packet in the black hole */ - kfree_skb(skb); - -#ifdef FAIL_SEND_DISCONNECT - /* Tell the other party we don't want to be connected */ - /* Hum... Is it the right thing to do ? And do we need to send - * a connect response before ? It looks ok without this... */ - irttp_disconnect_request(self->tsap, NULL, P_NORMAL); -#endif /* FAIL_SEND_DISCONNECT */ - - /* Notify the control channel (see irnet_find_socket()) */ - irnet_post_event(NULL, IRNET_REQUEST_FROM, - self->saddr, self->daddr, self->rname, 0); - - /* Clean up the server to keep it in listen state */ - irttp_listen(self->tsap); - - DEXIT(IRDA_SERV_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irda_setup_server (self) - * - * Create a IrTTP server and set it up... - * - * Register the IrLAN hint bit, create a IrTTP instance for us, - * set all the IrTTP callbacks and create an IrIAS entry... - */ -static inline int -irnet_setup_server(void) -{ - __u16 hints; - - DENTER(IRDA_SERV_TRACE, "()\n"); - - /* Initialise the regular socket part of the server */ - irda_irnet_create(&irnet_server.s); - - /* Open a local TSAP (an IrTTP instance) for the server */ - irnet_open_tsap(&irnet_server.s); - - /* PPP part setup */ - irnet_server.s.ppp_open = 0; - irnet_server.s.chan.private = NULL; - irnet_server.s.file = NULL; - - /* Get the hint bit corresponding to IrLAN */ - /* Note : we overload the IrLAN hint bit. As it is only a "hint", and as - * we provide roughly the same functionality as IrLAN, this is ok. - * In fact, the situation is similar as JetSend overloading the Obex hint - */ - hints = irlmp_service_to_hint(S_LAN); - -#ifdef ADVERTISE_HINT - /* Register with IrLMP as a service (advertise our hint bit) */ - irnet_server.skey = irlmp_register_service(hints); -#endif /* ADVERTISE_HINT */ - - /* Register with LM-IAS (so that people can connect to us) */ - irnet_server.ias_obj = irias_new_object(IRNET_SERVICE_NAME, jiffies); - irias_add_integer_attrib(irnet_server.ias_obj, IRNET_IAS_VALUE, - irnet_server.s.stsap_sel, IAS_KERNEL_ATTR); - irias_insert_object(irnet_server.ias_obj); - -#ifdef DISCOVERY_EVENTS - /* Tell IrLMP we want to be notified of newly discovered nodes */ - irlmp_update_client(irnet_server.s.ckey, hints, - irnet_discovery_indication, irnet_expiry_indication, - (void *) &irnet_server.s); -#endif - - DEXIT(IRDA_SERV_TRACE, " - self=0x%p\n", &irnet_server.s); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Function irda_destroy_server (self) - * - * Destroy the IrTTP server... - * - * Reverse of the previous function... - */ -static inline void -irnet_destroy_server(void) -{ - DENTER(IRDA_SERV_TRACE, "()\n"); - -#ifdef ADVERTISE_HINT - /* Unregister with IrLMP */ - irlmp_unregister_service(irnet_server.skey); -#endif /* ADVERTISE_HINT */ - - /* Unregister with LM-IAS */ - if(irnet_server.ias_obj) - irias_delete_object(irnet_server.ias_obj); - - /* Cleanup the socket part */ - irda_irnet_destroy(&irnet_server.s); - - DEXIT(IRDA_SERV_TRACE, "\n"); -} - - -/************************ IRDA-TTP CALLBACKS ************************/ -/* - * When we create a IrTTP instance, we pass to it a set of callbacks - * that IrTTP will call in case of various events. - * We take care of those events here. - */ - -/*------------------------------------------------------------------*/ -/* - * Function irnet_data_indication (instance, sap, skb) - * - * Received some data from TinyTP. Just queue it on the receive queue - * - */ -static int -irnet_data_indication(void * instance, - void * sap, - struct sk_buff *skb) -{ - irnet_socket * ap = (irnet_socket *) instance; - unsigned char * p; - int code = 0; - - DENTER(IRDA_TCB_TRACE, "(self/ap=0x%p, skb=0x%p)\n", - ap, skb); - DASSERT(skb != NULL, 0, IRDA_CB_ERROR, "skb is NULL !!!\n"); - - /* Check is ppp is ready to receive our packet */ - if(!ap->ppp_open) - { - DERROR(IRDA_CB_ERROR, "PPP not ready, dropping packet...\n"); - /* When we return error, TTP will need to requeue the skb and - * will stop the sender. IrTTP will stall until we send it a - * flow control request... */ - return -ENOMEM; - } - - /* strip address/control field if present */ - p = skb->data; - if((p[0] == PPP_ALLSTATIONS) && (p[1] == PPP_UI)) - { - /* chop off address/control */ - if(skb->len < 3) - goto err_exit; - p = skb_pull(skb, 2); - } - - /* decompress protocol field if compressed */ - if(p[0] & 1) - { - /* protocol is compressed */ - *(u8 *)skb_push(skb, 1) = 0; - } - else - if(skb->len < 2) - goto err_exit; - - /* pass to generic ppp layer */ - /* Note : how do I know if ppp can accept or not the packet ? This is - * essential if I want to manage flow control smoothly... */ - ppp_input(&ap->chan, skb); - - DEXIT(IRDA_TCB_TRACE, "\n"); - return 0; - - err_exit: - DERROR(IRDA_CB_ERROR, "Packet too small, dropping...\n"); - kfree_skb(skb); - ppp_input_error(&ap->chan, code); - return 0; /* Don't return an error code, only for flow control... */ -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_disconnect_indication (instance, sap, reason, skb) - * - * Connection has been closed. Chech reason to find out why - * - * Note : there are many cases where we come here : - * o attempted to connect, timeout - * o connected, link is broken, LAP has timeout - * o connected, other side close the link - * o connection request on the server not handled - */ -static void -irnet_disconnect_indication(void * instance, - void * sap, - LM_REASON reason, - struct sk_buff *skb) -{ - irnet_socket * self = (irnet_socket *) instance; - int test_open; - int test_connect; - - DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self); - DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n"); - - /* Don't care about it, but let's not leak it */ - if(skb) - dev_kfree_skb(skb); - - /* Prevent higher layer from accessing IrTTP */ - test_open = test_and_clear_bit(0, &self->ttp_open); - /* Not connecting anymore... - * (note : TSAP is open, so IAP callbacks are no longer pending...) */ - test_connect = test_and_clear_bit(0, &self->ttp_connect); - - /* If both self->ttp_open and self->ttp_connect are NULL, it mean that we - * have a race condition with irda_irnet_destroy() or - * irnet_connect_indication(), so don't mess up tsap... - */ - if(!(test_open || test_connect)) - { - DERROR(IRDA_CB_ERROR, "Race condition detected...\n"); - return; - } - - /* If we were active, notify the control channel */ - if(test_open) - irnet_post_event(self, IRNET_DISCONNECT_FROM, - self->saddr, self->daddr, self->rname, 0); - else - /* If we were trying to connect, notify the control channel */ - if((self->tsap) && (self != &irnet_server.s)) - irnet_post_event(self, IRNET_NOANSWER_FROM, - self->saddr, self->daddr, self->rname, 0); - - /* Close our IrTTP connection, cleanup tsap */ - if((self->tsap) && (self != &irnet_server.s)) - { - DEBUG(IRDA_CB_INFO, "Closing our TTP connection.\n"); - irttp_close_tsap(self->tsap); - self->tsap = NULL; - } - /* Cleanup the socket in case we want to reconnect in ppp_output_wakeup() */ - self->stsap_sel = 0; - self->daddr = DEV_ADDR_ANY; - self->tx_flow = FLOW_START; - - /* Deal with the ppp instance if it's still alive */ - if(self->ppp_open) - { - if(test_open) - { - /* ppp_unregister_channel() wants a user context. */ - schedule_work(&self->disconnect_work); - } - else - { - /* If we were trying to connect, flush (drain) ppp_generic - * Tx queue (most often we have blocked it), which will - * trigger an other attempt to connect. If we are passive, - * this will empty the Tx queue after last try. */ - ppp_output_wakeup(&self->chan); - } - } - - DEXIT(IRDA_TCB_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_connect_confirm (instance, sap, qos, max_sdu_size, skb) - * - * Connections has been confirmed by the remote device - * - */ -static void -irnet_connect_confirm(void * instance, - void * sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - irnet_socket * self = (irnet_socket *) instance; - - DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self); - - /* Check if socket is closing down (via irda_irnet_destroy()) */ - if(! test_bit(0, &self->ttp_connect)) - { - DERROR(IRDA_CB_ERROR, "Socket no longer connecting. Ouch !\n"); - return; - } - - /* How much header space do we need to reserve */ - self->max_header_size = max_header_size; - - /* IrTTP max SDU size in transmit direction */ - self->max_sdu_size_tx = max_sdu_size; - self->max_data_size = max_sdu_size; -#ifdef STREAM_COMPAT - if(max_sdu_size == 0) - self->max_data_size = irttp_get_max_seg_size(self->tsap); -#endif /* STREAM_COMPAT */ - - /* At this point, IrLMP has assigned our source address */ - self->saddr = irttp_get_saddr(self->tsap); - - /* Allow higher layer to access IrTTP */ - set_bit(0, &self->ttp_open); - clear_bit(0, &self->ttp_connect); /* Not racy, IrDA traffic is serial */ - /* Give a kick in the ass of ppp_generic so that he sends us some data */ - ppp_output_wakeup(&self->chan); - - /* Check size of received packet */ - if(skb->len > 0) - { -#ifdef PASS_CONNECT_PACKETS - DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n"); - /* Try to pass it to PPP */ - irnet_data_indication(instance, sap, skb); -#else /* PASS_CONNECT_PACKETS */ - DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n"); - kfree_skb(skb); /* Note : will be optimised with other kfree... */ -#endif /* PASS_CONNECT_PACKETS */ - } - else - kfree_skb(skb); - - /* Notify the control channel */ - irnet_post_event(self, IRNET_CONNECT_TO, - self->saddr, self->daddr, self->rname, 0); - - DEXIT(IRDA_TCB_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_flow_indication (instance, sap, flow) - * - * Used by TinyTP to tell us if it can accept more data or not - * - */ -static void -irnet_flow_indication(void * instance, - void * sap, - LOCAL_FLOW flow) -{ - irnet_socket * self = (irnet_socket *) instance; - LOCAL_FLOW oldflow = self->tx_flow; - - DENTER(IRDA_TCB_TRACE, "(self=0x%p, flow=%d)\n", self, flow); - - /* Update our state */ - self->tx_flow = flow; - - /* Check what IrTTP want us to do... */ - switch(flow) - { - case FLOW_START: - DEBUG(IRDA_CB_INFO, "IrTTP wants us to start again\n"); - /* Check if we really need to wake up PPP */ - if(oldflow == FLOW_STOP) - ppp_output_wakeup(&self->chan); - else - DEBUG(IRDA_CB_INFO, "But we were already transmitting !!!\n"); - break; - case FLOW_STOP: - DEBUG(IRDA_CB_INFO, "IrTTP wants us to slow down\n"); - break; - default: - DEBUG(IRDA_CB_INFO, "Unknown flow command!\n"); - break; - } - - DEXIT(IRDA_TCB_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_status_indication (instance, sap, reason, skb) - * - * Link (IrLAP) status report. - * - */ -static void -irnet_status_indication(void * instance, - LINK_STATUS link, - LOCK_STATUS lock) -{ - irnet_socket * self = (irnet_socket *) instance; - - DENTER(IRDA_TCB_TRACE, "(self=0x%p)\n", self); - DASSERT(self != NULL, , IRDA_CB_ERROR, "Self is NULL !!!\n"); - - /* We can only get this event if we are connected */ - switch(link) - { - case STATUS_NO_ACTIVITY: - irnet_post_event(self, IRNET_BLOCKED_LINK, - self->saddr, self->daddr, self->rname, 0); - break; - default: - DEBUG(IRDA_CB_INFO, "Unknown status...\n"); - } - - DEXIT(IRDA_TCB_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_connect_indication(instance, sap, qos, max_sdu_size, userdata) - * - * Incoming connection - * - * In theory, this function is called only on the server socket. - * Some other node is attempting to connect to the IrNET service, and has - * sent a connection request on our server socket. - * We just redirect the connection to the relevant IrNET socket. - * - * Note : we also make sure that between 2 irnet nodes, there can - * exist only one irnet connection. - */ -static void -irnet_connect_indication(void * instance, - void * sap, - struct qos_info *qos, - __u32 max_sdu_size, - __u8 max_header_size, - struct sk_buff *skb) -{ - irnet_socket * server = &irnet_server.s; - irnet_socket * new = (irnet_socket *) NULL; - - DENTER(IRDA_TCB_TRACE, "(server=0x%p)\n", server); - DASSERT(instance == &irnet_server, , IRDA_CB_ERROR, - "Invalid instance (0x%p) !!!\n", instance); - DASSERT(sap == irnet_server.s.tsap, , IRDA_CB_ERROR, "Invalid sap !!!\n"); - - /* Try to find the most appropriate IrNET socket */ - new = irnet_find_socket(server); - - /* After all this hard work, do we have an socket ? */ - if(new == (irnet_socket *) NULL) - { - DEXIT(IRDA_CB_INFO, ": No socket waiting for this connection.\n"); - irnet_disconnect_server(server, skb); - return; - } - - /* Is the socket already busy ? */ - if(test_bit(0, &new->ttp_open)) - { - DEXIT(IRDA_CB_INFO, ": Socket already connected.\n"); - irnet_disconnect_server(server, skb); - return; - } - - /* The following code is a bit tricky, so need comments ;-) - */ - /* If ttp_connect is set, the socket is trying to connect to the other - * end and may have sent a IrTTP connection request and is waiting for - * a connection response (that may never come). - * Now, the pain is that the socket may have opened a tsap and is - * waiting on it, while the other end is trying to connect to it on - * another tsap. - * Because IrNET can be peer to peer, we need to workaround this. - * Furthermore, the way the irnetd script is implemented, the - * target will create a second IrNET connection back to the - * originator and expect the originator to bind this new connection - * to the original PPPD instance. - * And of course, if we don't use irnetd, we can have a race when - * both side try to connect simultaneously, which could leave both - * connections half closed (yuck). - * Conclusions : - * 1) The "originator" must accept the new connection and get rid - * of the old one so that irnetd works - * 2) One side must deny the new connection to avoid races, - * but both side must agree on which side it is... - * Most often, the originator is primary at the LAP layer. - * Jean II - */ - /* Now, let's look at the way I wrote the test... - * We need to clear up the ttp_connect flag atomically to prevent - * irnet_disconnect_indication() to mess up the tsap we are going to close. - * We want to clear the ttp_connect flag only if we close the tsap, - * otherwise we will never close it, so we need to check for primary - * *before* doing the test on the flag. - * And of course, ALLOW_SIMULT_CONNECT can disable this entirely... - * Jean II - */ - - /* Socket already connecting ? On primary ? */ - if(0 -#ifdef ALLOW_SIMULT_CONNECT - || ((irttp_is_primary(server->tsap) == 1) && /* primary */ - (test_and_clear_bit(0, &new->ttp_connect))) -#endif /* ALLOW_SIMULT_CONNECT */ - ) - { - DERROR(IRDA_CB_ERROR, "Socket already connecting, but going to reuse it !\n"); - - /* Cleanup the old TSAP if necessary - IrIAP will be cleaned up later */ - if(new->tsap != NULL) - { - /* Close the old connection the new socket was attempting, - * so that we can hook it up to the new connection. - * It's now safe to do it... */ - irttp_close_tsap(new->tsap); - new->tsap = NULL; - } - } - else - { - /* Three options : - * 1) socket was not connecting or connected : ttp_connect should be 0. - * 2) we don't want to connect the socket because we are secondary or - * ALLOW_SIMULT_CONNECT is undefined. ttp_connect should be 1. - * 3) we are half way in irnet_disconnect_indication(), and it's a - * nice race condition... Fortunately, we can detect that by checking - * if tsap is still alive. On the other hand, we can't be in - * irda_irnet_destroy() otherwise we would not have found this - * socket in the hashbin. - * Jean II */ - if((test_bit(0, &new->ttp_connect)) || (new->tsap != NULL)) - { - /* Don't mess this socket, somebody else in in charge... */ - DERROR(IRDA_CB_ERROR, "Race condition detected, socket in use, abort connect...\n"); - irnet_disconnect_server(server, skb); - return; - } - } - - /* So : at this point, we have a socket, and it is idle. Good ! */ - irnet_connect_socket(server, new, qos, max_sdu_size, max_header_size); - - /* Check size of received packet */ - if(skb->len > 0) - { -#ifdef PASS_CONNECT_PACKETS - DEBUG(IRDA_CB_INFO, "Passing connect packet to PPP.\n"); - /* Try to pass it to PPP */ - irnet_data_indication(new, new->tsap, skb); -#else /* PASS_CONNECT_PACKETS */ - DERROR(IRDA_CB_ERROR, "Dropping non empty packet.\n"); - kfree_skb(skb); /* Note : will be optimised with other kfree... */ -#endif /* PASS_CONNECT_PACKETS */ - } - else - kfree_skb(skb); - - DEXIT(IRDA_TCB_TRACE, "\n"); -} - - -/********************** IRDA-IAS/LMP CALLBACKS **********************/ -/* - * These are the callbacks called by other layers of the IrDA stack, - * mainly LMP for discovery and IAS for name queries. - */ - -/*------------------------------------------------------------------*/ -/* - * Function irnet_getvalue_confirm (result, obj_id, value, priv) - * - * Got answer from remote LM-IAS, just connect - * - * This is the reply to a IAS query we were doing to find the TSAP of - * the device we want to connect to. - * If we have found a valid TSAP, just initiate the TTP connection - * on this TSAP. - */ -static void -irnet_getvalue_confirm(int result, - __u16 obj_id, - struct ias_value *value, - void * priv) -{ - irnet_socket * self = (irnet_socket *) priv; - - DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self); - DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n"); - - /* Check if already connected (via irnet_connect_socket()) - * or socket is closing down (via irda_irnet_destroy()) */ - if(! test_bit(0, &self->ttp_connect)) - { - DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n"); - return; - } - - /* We probably don't need to make any more queries */ - iriap_close(self->iriap); - self->iriap = NULL; - - /* Post process the IAS reply */ - self->dtsap_sel = irnet_ias_to_tsap(self, result, value); - - /* If error, just go out */ - if(self->errno) - { - clear_bit(0, &self->ttp_connect); - DERROR(IRDA_OCB_ERROR, "IAS connect failed ! (0x%X)\n", self->errno); - return; - } - - DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n", - self->daddr, self->dtsap_sel); - - /* Start up TTP - non blocking */ - irnet_connect_tsap(self); - - DEXIT(IRDA_OCB_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_discovervalue_confirm (result, obj_id, value, priv) - * - * Handle the TSAP discovery procedure state machine. - * Got answer from remote LM-IAS, try next device - * - * We are doing a TSAP discovery procedure, and we got an answer to - * a IAS query we were doing to find the TSAP on one of the address - * in the discovery log. - * - * If we have found a valid TSAP for the first time, save it. If it's - * not the first time we found one, complain. - * - * If we have more addresses in the log, just initiate a new query. - * Note that those query may fail (see irnet_discover_daddr_and_lsap_sel()) - * - * Otherwise, wrap up the procedure (cleanup), check if we have found - * any device and connect to it. - */ -static void -irnet_discovervalue_confirm(int result, - __u16 obj_id, - struct ias_value *value, - void * priv) -{ - irnet_socket * self = (irnet_socket *) priv; - __u8 dtsap_sel; /* TSAP we are looking for */ - - DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self); - DASSERT(self != NULL, , IRDA_OCB_ERROR, "Self is NULL !!!\n"); - - /* Check if already connected (via irnet_connect_socket()) - * or socket is closing down (via irda_irnet_destroy()) */ - if(! test_bit(0, &self->ttp_connect)) - { - DERROR(IRDA_OCB_ERROR, "Socket no longer connecting. Ouch !\n"); - return; - } - - /* Post process the IAS reply */ - dtsap_sel = irnet_ias_to_tsap(self, result, value); - - /* Have we got something ? */ - if(self->errno == 0) - { - /* We found the requested service */ - if(self->daddr != DEV_ADDR_ANY) - { - DERROR(IRDA_OCB_ERROR, "More than one device in range supports IrNET...\n"); - } - else - { - /* First time we found that one, save it ! */ - self->daddr = self->discoveries[self->disco_index].daddr; - self->dtsap_sel = dtsap_sel; - } - } - - /* If no failure */ - if((self->errno == -EADDRNOTAVAIL) || (self->errno == 0)) - { - int ret; - - /* Search the next node */ - ret = irnet_discover_next_daddr(self); - if(!ret) - { - /* In this case, the above request was non-blocking. - * We will return here after a while... */ - return; - } - /* In this case, we have processed the last discovery item */ - } - - /* No more queries to be done (failure or last one) */ - - /* We probably don't need to make any more queries */ - iriap_close(self->iriap); - self->iriap = NULL; - - /* No more items : remove the log and signal termination */ - DEBUG(IRDA_OCB_INFO, "Cleaning up log (0x%p)\n", - self->discoveries); - if(self->discoveries != NULL) - { - /* Cleanup our copy of the discovery log */ - kfree(self->discoveries); - self->discoveries = NULL; - } - self->disco_number = -1; - - /* Check out what we found */ - if(self->daddr == DEV_ADDR_ANY) - { - self->daddr = DEV_ADDR_ANY; - clear_bit(0, &self->ttp_connect); - DEXIT(IRDA_OCB_TRACE, ": cannot discover IrNET in any device !!!\n"); - return; - } - - /* We have a valid address - just connect */ - - DEBUG(IRDA_OCB_INFO, "daddr = %08x, lsap = %d, starting IrTTP connection\n", - self->daddr, self->dtsap_sel); - - /* Start up TTP - non blocking */ - irnet_connect_tsap(self); - - DEXIT(IRDA_OCB_TRACE, "\n"); -} - -#ifdef DISCOVERY_EVENTS -/*------------------------------------------------------------------*/ -/* - * Function irnet_discovery_indication (discovery) - * - * Got a discovery indication from IrLMP, post an event - * - * Note : IrLMP take care of matching the hint mask for us, and also - * check if it is a "new" node for us... - * - * As IrLMP filter on the IrLAN hint bit, we get both IrLAN and IrNET - * nodes, so it's only at connection time that we will know if the - * node support IrNET, IrLAN or both. The other solution is to check - * in IAS the PNP ids and service name. - * Note : even if a node support IrNET (or IrLAN), it's no guarantee - * that we will be able to connect to it, the node might already be - * busy... - * - * One last thing : in some case, this function will trigger duplicate - * discovery events. On the other hand, we should catch all - * discoveries properly (i.e. not miss one). Filtering duplicate here - * is to messy, so we leave that to user space... - */ -static void -irnet_discovery_indication(discinfo_t * discovery, - DISCOVERY_MODE mode, - void * priv) -{ - irnet_socket * self = &irnet_server.s; - - DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self); - DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR, - "Invalid instance (0x%p) !!!\n", priv); - - DEBUG(IRDA_OCB_INFO, "Discovered new IrNET/IrLAN node %s...\n", - discovery->info); - - /* Notify the control channel */ - irnet_post_event(NULL, IRNET_DISCOVER, - discovery->saddr, discovery->daddr, discovery->info, - get_unaligned((__u16 *)discovery->hints)); - - DEXIT(IRDA_OCB_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_expiry_indication (expiry) - * - * Got a expiry indication from IrLMP, post an event - * - * Note : IrLMP take care of matching the hint mask for us, we only - * check if it is a "new" node... - */ -static void -irnet_expiry_indication(discinfo_t * expiry, - DISCOVERY_MODE mode, - void * priv) -{ - irnet_socket * self = &irnet_server.s; - - DENTER(IRDA_OCB_TRACE, "(self=0x%p)\n", self); - DASSERT(priv == &irnet_server, , IRDA_OCB_ERROR, - "Invalid instance (0x%p) !!!\n", priv); - - DEBUG(IRDA_OCB_INFO, "IrNET/IrLAN node %s expired...\n", - expiry->info); - - /* Notify the control channel */ - irnet_post_event(NULL, IRNET_EXPIRE, - expiry->saddr, expiry->daddr, expiry->info, - get_unaligned((__u16 *)expiry->hints)); - - DEXIT(IRDA_OCB_TRACE, "\n"); -} -#endif /* DISCOVERY_EVENTS */ - - -/*********************** PROC ENTRY CALLBACKS ***********************/ -/* - * We create a instance in the /proc filesystem, and here we take care - * of that... - */ - -#ifdef CONFIG_PROC_FS -static int -irnet_proc_show(struct seq_file *m, void *v) -{ - irnet_socket * self; - char * state; - int i = 0; - - /* Get the IrNET server information... */ - seq_printf(m, "IrNET server - "); - seq_printf(m, "IrDA state: %s, ", - (irnet_server.running ? "running" : "dead")); - seq_printf(m, "stsap_sel: %02x, ", irnet_server.s.stsap_sel); - seq_printf(m, "dtsap_sel: %02x\n", irnet_server.s.dtsap_sel); - - /* Do we need to continue ? */ - if(!irnet_server.running) - return 0; - - /* Protect access to the instance list */ - spin_lock_bh(&irnet_server.spinlock); - - /* Get the sockets one by one... */ - self = (irnet_socket *) hashbin_get_first(irnet_server.list); - while(self != NULL) - { - /* Start printing info about the socket. */ - seq_printf(m, "\nIrNET socket %d - ", i++); - - /* First, get the requested configuration */ - seq_printf(m, "Requested IrDA name: \"%s\", ", self->rname); - seq_printf(m, "daddr: %08x, ", self->rdaddr); - seq_printf(m, "saddr: %08x\n", self->rsaddr); - - /* Second, get all the PPP info */ - seq_printf(m, " PPP state: %s", - (self->ppp_open ? "registered" : "unregistered")); - if(self->ppp_open) - { - seq_printf(m, ", unit: ppp%d", - ppp_unit_number(&self->chan)); - seq_printf(m, ", channel: %d", - ppp_channel_index(&self->chan)); - seq_printf(m, ", mru: %d", - self->mru); - /* Maybe add self->flags ? Later... */ - } - - /* Then, get all the IrDA specific info... */ - if(self->ttp_open) - state = "connected"; - else - if(self->tsap != NULL) - state = "connecting"; - else - if(self->iriap != NULL) - state = "searching"; - else - if(self->ttp_connect) - state = "weird"; - else - state = "idle"; - seq_printf(m, "\n IrDA state: %s, ", state); - seq_printf(m, "daddr: %08x, ", self->daddr); - seq_printf(m, "stsap_sel: %02x, ", self->stsap_sel); - seq_printf(m, "dtsap_sel: %02x\n", self->dtsap_sel); - - /* Next socket, please... */ - self = (irnet_socket *) hashbin_get_next(irnet_server.list); - } - - /* Spin lock end */ - spin_unlock_bh(&irnet_server.spinlock); - - return 0; -} - -static int irnet_proc_open(struct inode *inode, struct file *file) -{ - return single_open(file, irnet_proc_show, NULL); -} - -static const struct file_operations irnet_proc_fops = { - .owner = THIS_MODULE, - .open = irnet_proc_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; -#endif /* PROC_FS */ - - -/********************** CONFIGURATION/CLEANUP **********************/ -/* - * Initialisation and teardown of the IrDA part, called at module - * insertion and removal... - */ - -/*------------------------------------------------------------------*/ -/* - * Prepare the IrNET layer for operation... - */ -int __init -irda_irnet_init(void) -{ - int err = 0; - - DENTER(MODULE_TRACE, "()\n"); - - /* Pure paranoia - should be redundant */ - memset(&irnet_server, 0, sizeof(struct irnet_root)); - - /* Setup start of irnet instance list */ - irnet_server.list = hashbin_new(HB_NOLOCK); - DABORT(irnet_server.list == NULL, -ENOMEM, - MODULE_ERROR, "Can't allocate hashbin!\n"); - /* Init spinlock for instance list */ - spin_lock_init(&irnet_server.spinlock); - - /* Initialise control channel */ - init_waitqueue_head(&irnet_events.rwait); - irnet_events.index = 0; - /* Init spinlock for event logging */ - spin_lock_init(&irnet_events.spinlock); - -#ifdef CONFIG_PROC_FS - /* Add a /proc file for irnet infos */ - proc_create("irnet", 0, proc_irda, &irnet_proc_fops); -#endif /* CONFIG_PROC_FS */ - - /* Setup the IrNET server */ - err = irnet_setup_server(); - - if(!err) - /* We are no longer functional... */ - irnet_server.running = 1; - - DEXIT(MODULE_TRACE, "\n"); - return err; -} - -/*------------------------------------------------------------------*/ -/* - * Cleanup at exit... - */ -void __exit -irda_irnet_cleanup(void) -{ - DENTER(MODULE_TRACE, "()\n"); - - /* We are no longer there... */ - irnet_server.running = 0; - -#ifdef CONFIG_PROC_FS - /* Remove our /proc file */ - remove_proc_entry("irnet", proc_irda); -#endif /* CONFIG_PROC_FS */ - - /* Remove our IrNET server from existence */ - irnet_destroy_server(); - - /* Remove all instances of IrNET socket still present */ - hashbin_delete(irnet_server.list, (FREE_FUNC) irda_irnet_destroy); - - DEXIT(MODULE_TRACE, "\n"); -} diff --git a/net/irda/irnet/irnet_irda.h b/net/irda/irnet/irnet_irda.h deleted file mode 100644 index 3e408952a3f1..000000000000 --- a/net/irda/irnet/irnet_irda.h +++ /dev/null @@ -1,178 +0,0 @@ -/* - * IrNET protocol module : Synchronous PPP over an IrDA socket. - * - * Jean II - HPL `00 - <jt@hpl.hp.com> - * - * This file contains all definitions and declarations necessary for the - * IRDA part of the IrNET module (dealing with IrTTP, IrIAS and co). - * This file is a private header, so other modules don't want to know - * what's in there... - */ - -#ifndef IRNET_IRDA_H -#define IRNET_IRDA_H - -/***************************** INCLUDES *****************************/ -/* Please add other headers in irnet.h */ - -#include "irnet.h" /* Module global include */ - -/************************ CONSTANTS & MACROS ************************/ - -/* - * Name of the service (socket name) used by IrNET - */ -/* IAS object name (or part of it) */ -#define IRNET_SERVICE_NAME "IrNetv1" -/* IAS attribute */ -#define IRNET_IAS_VALUE "IrDA:TinyTP:LsapSel" -/* LMP notify name for client (only for /proc/net/irda/irlmp) */ -#define IRNET_NOTIFY_NAME "IrNET socket" -/* LMP notify name for server (only for /proc/net/irda/irlmp) */ -#define IRNET_NOTIFY_NAME_SERV "IrNET server" - -/****************************** TYPES ******************************/ - -/* - * This is the main structure where we store all the data pertaining to - * the IrNET server (listen for connection requests) and the root - * of the IrNET socket list - */ -typedef struct irnet_root -{ - irnet_socket s; /* To pretend we are a client... */ - - /* Generic stuff */ - int magic; /* Paranoia */ - int running; /* Are we operational ? */ - - /* Link list of all IrNET instances opened */ - hashbin_t * list; - spinlock_t spinlock; /* Serialize access to the list */ - /* Note : the way hashbin has been designed is absolutely not - * reentrant, beware... So, we blindly protect all with spinlock */ - - /* Handle for the hint bit advertised in IrLMP */ - void * skey; - - /* Server socket part */ - struct ias_object * ias_obj; /* Our service name + lsap in IAS */ - -} irnet_root; - - -/**************************** PROTOTYPES ****************************/ - -/* ----------------------- CONTROL CHANNEL ----------------------- */ -static void - irnet_post_event(irnet_socket *, - irnet_event, - __u32, - __u32, - char *, - __u16); -/* ----------------------- IRDA SUBROUTINES ----------------------- */ -static inline int - irnet_open_tsap(irnet_socket *); -static inline __u8 - irnet_ias_to_tsap(irnet_socket *, - int, - struct ias_value *); -static inline int - irnet_find_lsap_sel(irnet_socket *); -static inline int - irnet_connect_tsap(irnet_socket *); -static inline int - irnet_discover_next_daddr(irnet_socket *); -static inline int - irnet_discover_daddr_and_lsap_sel(irnet_socket *); -static inline int - irnet_dname_to_daddr(irnet_socket *); -/* ------------------------ SERVER SOCKET ------------------------ */ -static inline int - irnet_daddr_to_dname(irnet_socket *); -static inline irnet_socket * - irnet_find_socket(irnet_socket *); -static inline int - irnet_connect_socket(irnet_socket *, - irnet_socket *, - struct qos_info *, - __u32, - __u8); -static inline void - irnet_disconnect_server(irnet_socket *, - struct sk_buff *); -static inline int - irnet_setup_server(void); -static inline void - irnet_destroy_server(void); -/* ---------------------- IRDA-TTP CALLBACKS ---------------------- */ -static int - irnet_data_indication(void *, /* instance */ - void *, /* sap */ - struct sk_buff *); -static void - irnet_disconnect_indication(void *, - void *, - LM_REASON, - struct sk_buff *); -static void - irnet_connect_confirm(void *, - void *, - struct qos_info *, - __u32, - __u8, - struct sk_buff *); -static void - irnet_flow_indication(void *, - void *, - LOCAL_FLOW); -static void - irnet_status_indication(void *, - LINK_STATUS, - LOCK_STATUS); -static void - irnet_connect_indication(void *, - void *, - struct qos_info *, - __u32, - __u8, - struct sk_buff *); -/* -------------------- IRDA-IAS/LMP CALLBACKS -------------------- */ -static void - irnet_getvalue_confirm(int, - __u16, - struct ias_value *, - void *); -static void - irnet_discovervalue_confirm(int, - __u16, - struct ias_value *, - void *); -#ifdef DISCOVERY_EVENTS -static void - irnet_discovery_indication(discinfo_t *, - DISCOVERY_MODE, - void *); -static void - irnet_expiry_indication(discinfo_t *, - DISCOVERY_MODE, - void *); -#endif - -/**************************** VARIABLES ****************************/ - -/* - * The IrNET server. Listen to connection requests and co... - */ -static struct irnet_root irnet_server; - -/* Control channel stuff (note : extern) */ -struct irnet_ctrl_channel irnet_events; - -/* The /proc/net/irda directory, defined elsewhere... */ -#ifdef CONFIG_PROC_FS -extern struct proc_dir_entry *proc_irda; -#endif /* CONFIG_PROC_FS */ - -#endif /* IRNET_IRDA_H */ diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c deleted file mode 100644 index 7025dcb853d0..000000000000 --- a/net/irda/irnet/irnet_ppp.c +++ /dev/null @@ -1,1189 +0,0 @@ -/* - * IrNET protocol module : Synchronous PPP over an IrDA socket. - * - * Jean II - HPL `00 - <jt@hpl.hp.com> - * - * This file implement the PPP interface and /dev/irnet character device. - * The PPP interface hook to the ppp_generic module, handle all our - * relationship to the PPP code in the kernel (and by extension to pppd), - * and exchange PPP frames with this module (send/receive). - * The /dev/irnet device is used primarily for 2 functions : - * 1) as a stub for pppd (the ppp daemon), so that we can appropriately - * generate PPP sessions (we pretend we are a tty). - * 2) as a control channel (write commands, read events) - */ - -#include <linux/sched/signal.h> -#include <linux/slab.h> - -#include "irnet_ppp.h" /* Private header */ -/* Please put other headers in irnet.h - Thanks */ - -/* Generic PPP callbacks (to call us) */ -static const struct ppp_channel_ops irnet_ppp_ops = { - .start_xmit = ppp_irnet_send, - .ioctl = ppp_irnet_ioctl -}; - -/************************* CONTROL CHANNEL *************************/ -/* - * When a pppd instance is not active on /dev/irnet, it acts as a control - * channel. - * Writing allow to set up the IrDA destination of the IrNET channel, - * and any application may be read events happening in IrNET... - */ - -/*------------------------------------------------------------------*/ -/* - * Write is used to send a command to configure a IrNET channel - * before it is open by pppd. The syntax is : "command argument" - * Currently there is only two defined commands : - * o name : set the requested IrDA nickname of the IrNET peer. - * o addr : set the requested IrDA address of the IrNET peer. - * Note : the code is crude, but effective... - */ -static inline ssize_t -irnet_ctrl_write(irnet_socket * ap, - const char __user *buf, - size_t count) -{ - char command[IRNET_MAX_COMMAND]; - char * start; /* Current command being processed */ - char * next; /* Next command to process */ - int length; /* Length of current command */ - - DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count); - - /* Check for overflow... */ - DABORT(count >= IRNET_MAX_COMMAND, -ENOMEM, - CTRL_ERROR, "Too much data !!!\n"); - - /* Get the data in the driver */ - if(copy_from_user(command, buf, count)) - { - DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); - return -EFAULT; - } - - /* Safe terminate the string */ - command[count] = '\0'; - DEBUG(CTRL_INFO, "Command line received is ``%s'' (%zd).\n", - command, count); - - /* Check every commands in the command line */ - next = command; - while(next != NULL) - { - /* Look at the next command */ - start = next; - - /* Scrap whitespaces before the command */ - start = skip_spaces(start); - - /* ',' is our command separator */ - next = strchr(start, ','); - if(next) - { - *next = '\0'; /* Terminate command */ - length = next - start; /* Length */ - next++; /* Skip the '\0' */ - } - else - length = strlen(start); - - DEBUG(CTRL_INFO, "Found command ``%s'' (%d).\n", start, length); - - /* Check if we recognised one of the known command - * We can't use "switch" with strings, so hack with "continue" */ - - /* First command : name -> Requested IrDA nickname */ - if(!strncmp(start, "name", 4)) - { - /* Copy the name only if is included and not "any" */ - if((length > 5) && (strcmp(start + 5, "any"))) - { - /* Strip out trailing whitespaces */ - while(isspace(start[length - 1])) - length--; - - DABORT(length < 5 || length > NICKNAME_MAX_LEN + 5, - -EINVAL, CTRL_ERROR, "Invalid nickname.\n"); - - /* Copy the name for later reuse */ - memcpy(ap->rname, start + 5, length - 5); - ap->rname[length - 5] = '\0'; - } - else - ap->rname[0] = '\0'; - DEBUG(CTRL_INFO, "Got rname = ``%s''\n", ap->rname); - - /* Restart the loop */ - continue; - } - - /* Second command : addr, daddr -> Requested IrDA destination address - * Also process : saddr -> Requested IrDA source address */ - if((!strncmp(start, "addr", 4)) || - (!strncmp(start, "daddr", 5)) || - (!strncmp(start, "saddr", 5))) - { - __u32 addr = DEV_ADDR_ANY; - - /* Copy the address only if is included and not "any" */ - if((length > 5) && (strcmp(start + 5, "any"))) - { - char * begp = start + 5; - char * endp; - - /* Scrap whitespaces before the command */ - begp = skip_spaces(begp); - - /* Convert argument to a number (last arg is the base) */ - addr = simple_strtoul(begp, &endp, 16); - /* Has it worked ? (endp should be start + length) */ - DABORT(endp <= (start + 5), -EINVAL, - CTRL_ERROR, "Invalid address.\n"); - } - /* Which type of address ? */ - if(start[0] == 's') - { - /* Save it */ - ap->rsaddr = addr; - DEBUG(CTRL_INFO, "Got rsaddr = %08x\n", ap->rsaddr); - } - else - { - /* Save it */ - ap->rdaddr = addr; - DEBUG(CTRL_INFO, "Got rdaddr = %08x\n", ap->rdaddr); - } - - /* Restart the loop */ - continue; - } - - /* Other possible command : connect N (number of retries) */ - - /* No command matched -> Failed... */ - DABORT(1, -EINVAL, CTRL_ERROR, "Not a recognised IrNET command.\n"); - } - - /* Success : we have parsed all commands successfully */ - return count; -} - -#ifdef INITIAL_DISCOVERY -/*------------------------------------------------------------------*/ -/* - * Function irnet_get_discovery_log (self) - * - * Query the content on the discovery log if not done - * - * This function query the current content of the discovery log - * at the startup of the event channel and save it in the internal struct. - */ -static void -irnet_get_discovery_log(irnet_socket * ap) -{ - __u16 mask = irlmp_service_to_hint(S_LAN); - - /* Ask IrLMP for the current discovery log */ - ap->discoveries = irlmp_get_discoveries(&ap->disco_number, mask, - DISCOVERY_DEFAULT_SLOTS); - - /* Check if the we got some results */ - if(ap->discoveries == NULL) - ap->disco_number = -1; - - DEBUG(CTRL_INFO, "Got the log (0x%p), size is %d\n", - ap->discoveries, ap->disco_number); -} - -/*------------------------------------------------------------------*/ -/* - * Function irnet_read_discovery_log (self, event) - * - * Read the content on the discovery log - * - * This function dump the current content of the discovery log - * at the startup of the event channel. - * Return 1 if wrote an event on the control channel... - * - * State of the ap->disco_XXX variables : - * Socket creation : discoveries = NULL ; disco_index = 0 ; disco_number = 0 - * While reading : discoveries = ptr ; disco_index = X ; disco_number = Y - * After reading : discoveries = NULL ; disco_index = Y ; disco_number = -1 - */ -static inline int -irnet_read_discovery_log(irnet_socket *ap, char *event, int buf_size) -{ - int done_event = 0; - - DENTER(CTRL_TRACE, "(ap=0x%p, event=0x%p)\n", - ap, event); - - /* Test if we have some work to do or we have already finished */ - if(ap->disco_number == -1) - { - DEBUG(CTRL_INFO, "Already done\n"); - return 0; - } - - /* Test if it's the first time and therefore we need to get the log */ - if(ap->discoveries == NULL) - irnet_get_discovery_log(ap); - - /* Check if we have more item to dump */ - if(ap->disco_index < ap->disco_number) - { - /* Write an event */ - snprintf(event, buf_size, - "Found %08x (%s) behind %08x {hints %02X-%02X}\n", - ap->discoveries[ap->disco_index].daddr, - ap->discoveries[ap->disco_index].info, - ap->discoveries[ap->disco_index].saddr, - ap->discoveries[ap->disco_index].hints[0], - ap->discoveries[ap->disco_index].hints[1]); - DEBUG(CTRL_INFO, "Writing discovery %d : %s\n", - ap->disco_index, ap->discoveries[ap->disco_index].info); - - /* We have an event */ - done_event = 1; - /* Next discovery */ - ap->disco_index++; - } - - /* Check if we have done the last item */ - if(ap->disco_index >= ap->disco_number) - { - /* No more items : remove the log and signal termination */ - DEBUG(CTRL_INFO, "Cleaning up log (0x%p)\n", - ap->discoveries); - if(ap->discoveries != NULL) - { - /* Cleanup our copy of the discovery log */ - kfree(ap->discoveries); - ap->discoveries = NULL; - } - ap->disco_number = -1; - } - - return done_event; -} -#endif /* INITIAL_DISCOVERY */ - -/*------------------------------------------------------------------*/ -/* - * Read is used to get IrNET events - */ -static inline ssize_t -irnet_ctrl_read(irnet_socket * ap, - struct file * file, - char __user * buf, - size_t count) -{ - DECLARE_WAITQUEUE(wait, current); - char event[75]; - ssize_t ret = 0; - - DENTER(CTRL_TRACE, "(ap=0x%p, count=%zd)\n", ap, count); - -#ifdef INITIAL_DISCOVERY - /* Check if we have read the log */ - if (irnet_read_discovery_log(ap, event, sizeof(event))) - { - count = min(strlen(event), count); - if (copy_to_user(buf, event, count)) - { - DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); - return -EFAULT; - } - - DEXIT(CTRL_TRACE, "\n"); - return count; - } -#endif /* INITIAL_DISCOVERY */ - - /* Put ourselves on the wait queue to be woken up */ - add_wait_queue(&irnet_events.rwait, &wait); - set_current_state(TASK_INTERRUPTIBLE); - for(;;) - { - /* If there is unread events */ - ret = 0; - if(ap->event_index != irnet_events.index) - break; - ret = -EAGAIN; - if(file->f_flags & O_NONBLOCK) - break; - ret = -ERESTARTSYS; - if(signal_pending(current)) - break; - /* Yield and wait to be woken up */ - schedule(); - } - __set_current_state(TASK_RUNNING); - remove_wait_queue(&irnet_events.rwait, &wait); - - /* Did we got it ? */ - if(ret != 0) - { - /* No, return the error code */ - DEXIT(CTRL_TRACE, " - ret %zd\n", ret); - return ret; - } - - /* Which event is it ? */ - switch(irnet_events.log[ap->event_index].event) - { - case IRNET_DISCOVER: - snprintf(event, sizeof(event), - "Discovered %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); - break; - case IRNET_EXPIRE: - snprintf(event, sizeof(event), - "Expired %08x (%s) behind %08x {hints %02X-%02X}\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr, - irnet_events.log[ap->event_index].hints.byte[0], - irnet_events.log[ap->event_index].hints.byte[1]); - break; - case IRNET_CONNECT_TO: - snprintf(event, sizeof(event), "Connected to %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); - break; - case IRNET_CONNECT_FROM: - snprintf(event, sizeof(event), "Connection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); - break; - case IRNET_REQUEST_FROM: - snprintf(event, sizeof(event), "Request from %08x (%s) behind %08x\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].saddr); - break; - case IRNET_NOANSWER_FROM: - snprintf(event, sizeof(event), "No-answer from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); - break; - case IRNET_BLOCKED_LINK: - snprintf(event, sizeof(event), "Blocked link with %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); - break; - case IRNET_DISCONNECT_FROM: - snprintf(event, sizeof(event), "Disconnection from %08x (%s) on ppp%d\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name, - irnet_events.log[ap->event_index].unit); - break; - case IRNET_DISCONNECT_TO: - snprintf(event, sizeof(event), "Disconnected to %08x (%s)\n", - irnet_events.log[ap->event_index].daddr, - irnet_events.log[ap->event_index].name); - break; - default: - snprintf(event, sizeof(event), "Bug\n"); - } - /* Increment our event index */ - ap->event_index = (ap->event_index + 1) % IRNET_MAX_EVENTS; - - DEBUG(CTRL_INFO, "Event is :%s", event); - - count = min(strlen(event), count); - if (copy_to_user(buf, event, count)) - { - DERROR(CTRL_ERROR, "Invalid user space pointer.\n"); - return -EFAULT; - } - - DEXIT(CTRL_TRACE, "\n"); - return count; -} - -/*------------------------------------------------------------------*/ -/* - * Poll : called when someone do a select on /dev/irnet. - * Just check if there are new events... - */ -static inline unsigned int -irnet_ctrl_poll(irnet_socket * ap, - struct file * file, - poll_table * wait) -{ - unsigned int mask; - - DENTER(CTRL_TRACE, "(ap=0x%p)\n", ap); - - poll_wait(file, &irnet_events.rwait, wait); - mask = POLLOUT | POLLWRNORM; - /* If there is unread events */ - if(ap->event_index != irnet_events.index) - mask |= POLLIN | POLLRDNORM; -#ifdef INITIAL_DISCOVERY - if(ap->disco_number != -1) - { - /* Test if it's the first time and therefore we need to get the log */ - if(ap->discoveries == NULL) - irnet_get_discovery_log(ap); - /* Recheck */ - if(ap->disco_number != -1) - mask |= POLLIN | POLLRDNORM; - } -#endif /* INITIAL_DISCOVERY */ - - DEXIT(CTRL_TRACE, " - mask=0x%X\n", mask); - return mask; -} - - -/*********************** FILESYSTEM CALLBACKS ***********************/ -/* - * Implement the usual open, read, write functions that will be called - * by the file system when some action is performed on /dev/irnet. - * Most of those actions will in fact be performed by "pppd" or - * the control channel, we just act as a redirector... - */ - -/*------------------------------------------------------------------*/ -/* - * Open : when somebody open /dev/irnet - * We basically create a new instance of irnet and initialise it. - */ -static int -dev_irnet_open(struct inode * inode, - struct file * file) -{ - struct irnet_socket * ap; - int err; - - DENTER(FS_TRACE, "(file=0x%p)\n", file); - -#ifdef SECURE_DEVIRNET - /* This could (should?) be enforced by the permissions on /dev/irnet. */ - if(!capable(CAP_NET_ADMIN)) - return -EPERM; -#endif /* SECURE_DEVIRNET */ - - /* Allocate a private structure for this IrNET instance */ - ap = kzalloc(sizeof(*ap), GFP_KERNEL); - DABORT(ap == NULL, -ENOMEM, FS_ERROR, "Can't allocate struct irnet...\n"); - - /* initialize the irnet structure */ - ap->file = file; - - /* PPP channel setup */ - ap->ppp_open = 0; - ap->chan.private = ap; - ap->chan.ops = &irnet_ppp_ops; - ap->chan.mtu = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN); - ap->chan.hdrlen = 2 + TTP_MAX_HEADER; /* for A/C + Max IrDA hdr */ - /* PPP parameters */ - ap->mru = (2048 - TTP_MAX_HEADER - 2 - PPP_HDRLEN); - ap->xaccm[0] = ~0U; - ap->xaccm[3] = 0x60000000U; - ap->raccm = ~0U; - - /* Setup the IrDA part... */ - err = irda_irnet_create(ap); - if(err) - { - DERROR(FS_ERROR, "Can't setup IrDA link...\n"); - kfree(ap); - - return err; - } - - /* For the control channel */ - ap->event_index = irnet_events.index; /* Cancel all past events */ - - mutex_init(&ap->lock); - - /* Put our stuff where we will be able to find it later */ - file->private_data = ap; - - DEXIT(FS_TRACE, " - ap=0x%p\n", ap); - - return 0; -} - - -/*------------------------------------------------------------------*/ -/* - * Close : when somebody close /dev/irnet - * Destroy the instance of /dev/irnet - */ -static int -dev_irnet_close(struct inode * inode, - struct file * file) -{ - irnet_socket * ap = file->private_data; - - DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", - file, ap); - DABORT(ap == NULL, 0, FS_ERROR, "ap is NULL !!!\n"); - - /* Detach ourselves */ - file->private_data = NULL; - - /* Close IrDA stuff */ - irda_irnet_destroy(ap); - - /* Disconnect from the generic PPP layer if not already done */ - if(ap->ppp_open) - { - DERROR(FS_ERROR, "Channel still registered - deregistering !\n"); - ap->ppp_open = 0; - ppp_unregister_channel(&ap->chan); - } - - kfree(ap); - - DEXIT(FS_TRACE, "\n"); - return 0; -} - -/*------------------------------------------------------------------*/ -/* - * Write does nothing. - * (we receive packet from ppp_generic through ppp_irnet_send()) - */ -static ssize_t -dev_irnet_write(struct file * file, - const char __user *buf, - size_t count, - loff_t * ppos) -{ - irnet_socket * ap = file->private_data; - - DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n", - file, ap, count); - DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n"); - - /* If we are connected to ppp_generic, let it handle the job */ - if(ap->ppp_open) - return -EAGAIN; - else - return irnet_ctrl_write(ap, buf, count); -} - -/*------------------------------------------------------------------*/ -/* - * Read doesn't do much either. - * (pppd poll us, but ultimately reads through /dev/ppp) - */ -static ssize_t -dev_irnet_read(struct file * file, - char __user * buf, - size_t count, - loff_t * ppos) -{ - irnet_socket * ap = file->private_data; - - DPASS(FS_TRACE, "(file=0x%p, ap=0x%p, count=%zd)\n", - file, ap, count); - DABORT(ap == NULL, -ENXIO, FS_ERROR, "ap is NULL !!!\n"); - - /* If we are connected to ppp_generic, let it handle the job */ - if(ap->ppp_open) - return -EAGAIN; - else - return irnet_ctrl_read(ap, file, buf, count); -} - -/*------------------------------------------------------------------*/ -/* - * Poll : called when someone do a select on /dev/irnet - */ -static unsigned int -dev_irnet_poll(struct file * file, - poll_table * wait) -{ - irnet_socket * ap = file->private_data; - unsigned int mask; - - DENTER(FS_TRACE, "(file=0x%p, ap=0x%p)\n", - file, ap); - - mask = POLLOUT | POLLWRNORM; - DABORT(ap == NULL, mask, FS_ERROR, "ap is NULL !!!\n"); - - /* If we are connected to ppp_generic, let it handle the job */ - if(!ap->ppp_open) - mask |= irnet_ctrl_poll(ap, file, wait); - - DEXIT(FS_TRACE, " - mask=0x%X\n", mask); - return mask; -} - -/*------------------------------------------------------------------*/ -/* - * IOCtl : Called when someone does some ioctls on /dev/irnet - * This is the way pppd configure us and control us while the PPP - * instance is active. - */ -static long -dev_irnet_ioctl( - struct file * file, - unsigned int cmd, - unsigned long arg) -{ - irnet_socket * ap = file->private_data; - int err; - int val; - void __user *argp = (void __user *)arg; - - DENTER(FS_TRACE, "(file=0x%p, ap=0x%p, cmd=0x%X)\n", - file, ap, cmd); - - /* Basic checks... */ - DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n"); -#ifdef SECURE_DEVIRNET - if(!capable(CAP_NET_ADMIN)) - return -EPERM; -#endif /* SECURE_DEVIRNET */ - - err = -EFAULT; - switch(cmd) - { - /* Set discipline (should be N_SYNC_PPP or N_TTY) */ - case TIOCSETD: - if(get_user(val, (int __user *)argp)) - break; - if((val == N_SYNC_PPP) || (val == N_PPP)) - { - DEBUG(FS_INFO, "Entering PPP discipline.\n"); - /* PPP channel setup (ap->chan in configured in dev_irnet_open())*/ - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - - err = ppp_register_channel(&ap->chan); - if(err == 0) - { - /* Our ppp side is active */ - ap->ppp_open = 1; - - DEBUG(FS_INFO, "Trying to establish a connection.\n"); - /* Setup the IrDA link now - may fail... */ - irda_irnet_connect(ap); - } - else - DERROR(FS_ERROR, "Can't setup PPP channel...\n"); - - mutex_unlock(&ap->lock); - } - else - { - /* In theory, should be N_TTY */ - DEBUG(FS_INFO, "Exiting PPP discipline.\n"); - /* Disconnect from the generic PPP layer */ - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - - if(ap->ppp_open) - { - ap->ppp_open = 0; - ppp_unregister_channel(&ap->chan); - } - else - DERROR(FS_ERROR, "Channel not registered !\n"); - err = 0; - - mutex_unlock(&ap->lock); - } - break; - - /* Query PPP channel and unit number */ - case PPPIOCGCHAN: - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - - if(ap->ppp_open && !put_user(ppp_channel_index(&ap->chan), - (int __user *)argp)) - err = 0; - - mutex_unlock(&ap->lock); - break; - case PPPIOCGUNIT: - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - - if(ap->ppp_open && !put_user(ppp_unit_number(&ap->chan), - (int __user *)argp)) - err = 0; - - mutex_unlock(&ap->lock); - break; - - /* All these ioctls can be passed both directly and from ppp_generic, - * so we just deal with them in one place... - */ - case PPPIOCGFLAGS: - case PPPIOCSFLAGS: - case PPPIOCGASYNCMAP: - case PPPIOCSASYNCMAP: - case PPPIOCGRASYNCMAP: - case PPPIOCSRASYNCMAP: - case PPPIOCGXASYNCMAP: - case PPPIOCSXASYNCMAP: - case PPPIOCGMRU: - case PPPIOCSMRU: - DEBUG(FS_INFO, "Standard PPP ioctl.\n"); - if(!capable(CAP_NET_ADMIN)) - err = -EPERM; - else { - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - - err = ppp_irnet_ioctl(&ap->chan, cmd, arg); - - mutex_unlock(&ap->lock); - } - break; - - /* TTY IOCTLs : Pretend that we are a tty, to keep pppd happy */ - /* Get termios */ - case TCGETS: - DEBUG(FS_INFO, "Get termios.\n"); - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - -#ifndef TCGETS2 - if(!kernel_termios_to_user_termios((struct termios __user *)argp, &ap->termios)) - err = 0; -#else - if(kernel_termios_to_user_termios_1((struct termios __user *)argp, &ap->termios)) - err = 0; -#endif - - mutex_unlock(&ap->lock); - break; - /* Set termios */ - case TCSETSF: - DEBUG(FS_INFO, "Set termios.\n"); - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - -#ifndef TCGETS2 - if(!user_termios_to_kernel_termios(&ap->termios, (struct termios __user *)argp)) - err = 0; -#else - if(!user_termios_to_kernel_termios_1(&ap->termios, (struct termios __user *)argp)) - err = 0; -#endif - - mutex_unlock(&ap->lock); - break; - - /* Set DTR/RTS */ - case TIOCMBIS: - case TIOCMBIC: - /* Set exclusive/non-exclusive mode */ - case TIOCEXCL: - case TIOCNXCL: - DEBUG(FS_INFO, "TTY compatibility.\n"); - err = 0; - break; - - case TCGETA: - DEBUG(FS_INFO, "TCGETA\n"); - break; - - case TCFLSH: - DEBUG(FS_INFO, "TCFLSH\n"); - /* Note : this will flush buffers in PPP, so it *must* be done - * We should also worry that we don't accept junk here and that - * we get rid of our own buffers */ -#ifdef FLUSH_TO_PPP - if (mutex_lock_interruptible(&ap->lock)) - return -EINTR; - ppp_output_wakeup(&ap->chan); - mutex_unlock(&ap->lock); -#endif /* FLUSH_TO_PPP */ - err = 0; - break; - - case FIONREAD: - DEBUG(FS_INFO, "FIONREAD\n"); - val = 0; - if(put_user(val, (int __user *)argp)) - break; - err = 0; - break; - - default: - DERROR(FS_ERROR, "Unsupported ioctl (0x%X)\n", cmd); - err = -ENOTTY; - } - - DEXIT(FS_TRACE, " - err = 0x%X\n", err); - return err; -} - -/************************** PPP CALLBACKS **************************/ -/* - * This are the functions that the generic PPP driver in the kernel - * will call to communicate to us. - */ - -/*------------------------------------------------------------------*/ -/* - * Prepare the ppp frame for transmission over the IrDA socket. - * We make sure that the header space is enough, and we change ppp header - * according to flags passed by pppd. - * This is not a callback, but just a helper function used in ppp_irnet_send() - */ -static inline struct sk_buff * -irnet_prepare_skb(irnet_socket * ap, - struct sk_buff * skb) -{ - unsigned char * data; - int proto; /* PPP protocol */ - int islcp; /* Protocol == LCP */ - int needaddr; /* Need PPP address */ - - DENTER(PPP_TRACE, "(ap=0x%p, skb=0x%p)\n", - ap, skb); - - /* Extract PPP protocol from the frame */ - data = skb->data; - proto = (data[0] << 8) + data[1]; - - /* LCP packets with codes between 1 (configure-request) - * and 7 (code-reject) must be sent as though no options - * have been negotiated. */ - islcp = (proto == PPP_LCP) && (1 <= data[2]) && (data[2] <= 7); - - /* compress protocol field if option enabled */ - if((data[0] == 0) && (ap->flags & SC_COMP_PROT) && (!islcp)) - skb_pull(skb,1); - - /* Check if we need address/control fields */ - needaddr = 2*((ap->flags & SC_COMP_AC) == 0 || islcp); - - /* Is the skb headroom large enough to contain all IrDA-headers? */ - if((skb_headroom(skb) < (ap->max_header_size + needaddr)) || - (skb_shared(skb))) - { - struct sk_buff * new_skb; - - DEBUG(PPP_INFO, "Reallocating skb\n"); - - /* Create a new skb */ - new_skb = skb_realloc_headroom(skb, ap->max_header_size + needaddr); - - /* We have to free the original skb anyway */ - dev_kfree_skb(skb); - - /* Did the realloc succeed ? */ - DABORT(new_skb == NULL, NULL, PPP_ERROR, "Could not realloc skb\n"); - - /* Use the new skb instead */ - skb = new_skb; - } - - /* prepend address/control fields if necessary */ - if(needaddr) - { - skb_push(skb, 2); - skb->data[0] = PPP_ALLSTATIONS; - skb->data[1] = PPP_UI; - } - - DEXIT(PPP_TRACE, "\n"); - - return skb; -} - -/*------------------------------------------------------------------*/ -/* - * Send a packet to the peer over the IrTTP connection. - * Returns 1 iff the packet was accepted. - * Returns 0 iff packet was not consumed. - * If the packet was not accepted, we will call ppp_output_wakeup - * at some later time to reactivate flow control in ppp_generic. - */ -static int -ppp_irnet_send(struct ppp_channel * chan, - struct sk_buff * skb) -{ - irnet_socket * self = (struct irnet_socket *) chan->private; - int ret; - - DENTER(PPP_TRACE, "(channel=0x%p, ap/self=0x%p)\n", - chan, self); - - /* Check if things are somewhat valid... */ - DASSERT(self != NULL, 0, PPP_ERROR, "Self is NULL !!!\n"); - - /* Check if we are connected */ - if(!(test_bit(0, &self->ttp_open))) - { -#ifdef CONNECT_IN_SEND - /* Let's try to connect one more time... */ - /* Note : we won't be connected after this call, but we should be - * ready for next packet... */ - /* If we are already connecting, this will fail */ - irda_irnet_connect(self); -#endif /* CONNECT_IN_SEND */ - - DEBUG(PPP_INFO, "IrTTP not ready ! (%ld-%ld)\n", - self->ttp_open, self->ttp_connect); - - /* Note : we can either drop the packet or block the packet. - * - * Blocking the packet allow us a better connection time, - * because by calling ppp_output_wakeup() we can have - * ppp_generic resending the LCP request immediately to us, - * rather than waiting for one of pppd periodic transmission of - * LCP request. - * - * On the other hand, if we block all packet, all those periodic - * transmissions of pppd accumulate in ppp_generic, creating a - * backlog of LCP request. When we eventually connect later on, - * we have to transmit all this backlog before we can connect - * proper (if we don't timeout before). - * - * The current strategy is as follow : - * While we are attempting to connect, we block packets to get - * a better connection time. - * If we fail to connect, we drain the queue and start dropping packets - */ -#ifdef BLOCK_WHEN_CONNECT - /* If we are attempting to connect */ - if(test_bit(0, &self->ttp_connect)) - { - /* Blocking packet, ppp_generic will retry later */ - return 0; - } -#endif /* BLOCK_WHEN_CONNECT */ - - /* Dropping packet, pppd will retry later */ - dev_kfree_skb(skb); - return 1; - } - - /* Check if the queue can accept any packet, otherwise block */ - if(self->tx_flow != FLOW_START) - DRETURN(0, PPP_INFO, "IrTTP queue full (%d skbs)...\n", - skb_queue_len(&self->tsap->tx_queue)); - - /* Prepare ppp frame for transmission */ - skb = irnet_prepare_skb(self, skb); - DABORT(skb == NULL, 1, PPP_ERROR, "Prepare skb for Tx failed.\n"); - - /* Send the packet to IrTTP */ - ret = irttp_data_request(self->tsap, skb); - if(ret < 0) - { - /* - * > IrTTPs tx queue is full, so we just have to - * > drop the frame! You might think that we should - * > just return -1 and don't deallocate the frame, - * > but that is dangerous since it's possible that - * > we have replaced the original skb with a new - * > one with larger headroom, and that would really - * > confuse do_dev_queue_xmit() in dev.c! I have - * > tried :-) DB - * Correction : we verify the flow control above (self->tx_flow), - * so we come here only if IrTTP doesn't like the packet (empty, - * too large, IrTTP not connected). In those rare cases, it's ok - * to drop it, we don't want to see it here again... - * Jean II - */ - DERROR(PPP_ERROR, "IrTTP doesn't like this packet !!! (0x%X)\n", ret); - /* irttp_data_request already free the packet */ - } - - DEXIT(PPP_TRACE, "\n"); - return 1; /* Packet has been consumed */ -} - -/*------------------------------------------------------------------*/ -/* - * Take care of the ioctls that ppp_generic doesn't want to deal with... - * Note : we are also called from dev_irnet_ioctl(). - */ -static int -ppp_irnet_ioctl(struct ppp_channel * chan, - unsigned int cmd, - unsigned long arg) -{ - irnet_socket * ap = (struct irnet_socket *) chan->private; - int err; - int val; - u32 accm[8]; - void __user *argp = (void __user *)arg; - - DENTER(PPP_TRACE, "(channel=0x%p, ap=0x%p, cmd=0x%X)\n", - chan, ap, cmd); - - /* Basic checks... */ - DASSERT(ap != NULL, -ENXIO, PPP_ERROR, "ap is NULL...\n"); - - err = -EFAULT; - switch(cmd) - { - /* PPP flags */ - case PPPIOCGFLAGS: - val = ap->flags | ap->rbits; - if(put_user(val, (int __user *) argp)) - break; - err = 0; - break; - case PPPIOCSFLAGS: - if(get_user(val, (int __user *) argp)) - break; - ap->flags = val & ~SC_RCV_BITS; - ap->rbits = val & SC_RCV_BITS; - err = 0; - break; - - /* Async map stuff - all dummy to please pppd */ - case PPPIOCGASYNCMAP: - if(put_user(ap->xaccm[0], (u32 __user *) argp)) - break; - err = 0; - break; - case PPPIOCSASYNCMAP: - if(get_user(ap->xaccm[0], (u32 __user *) argp)) - break; - err = 0; - break; - case PPPIOCGRASYNCMAP: - if(put_user(ap->raccm, (u32 __user *) argp)) - break; - err = 0; - break; - case PPPIOCSRASYNCMAP: - if(get_user(ap->raccm, (u32 __user *) argp)) - break; - err = 0; - break; - case PPPIOCGXASYNCMAP: - if(copy_to_user(argp, ap->xaccm, sizeof(ap->xaccm))) - break; - err = 0; - break; - case PPPIOCSXASYNCMAP: - if(copy_from_user(accm, argp, sizeof(accm))) - break; - accm[2] &= ~0x40000000U; /* can't escape 0x5e */ - accm[3] |= 0x60000000U; /* must escape 0x7d, 0x7e */ - memcpy(ap->xaccm, accm, sizeof(ap->xaccm)); - err = 0; - break; - - /* Max PPP frame size */ - case PPPIOCGMRU: - if(put_user(ap->mru, (int __user *) argp)) - break; - err = 0; - break; - case PPPIOCSMRU: - if(get_user(val, (int __user *) argp)) - break; - if(val < PPP_MRU) - val = PPP_MRU; - ap->mru = val; - err = 0; - break; - - default: - DEBUG(PPP_INFO, "Unsupported ioctl (0x%X)\n", cmd); - err = -ENOIOCTLCMD; - } - - DEXIT(PPP_TRACE, " - err = 0x%X\n", err); - return err; -} - -/************************** INITIALISATION **************************/ -/* - * Module initialisation and all that jazz... - */ - -/*------------------------------------------------------------------*/ -/* - * Hook our device callbacks in the filesystem, to connect our code - * to /dev/irnet - */ -static inline int __init -ppp_irnet_init(void) -{ - int err = 0; - - DENTER(MODULE_TRACE, "()\n"); - - /* Allocate ourselves as a minor in the misc range */ - err = misc_register(&irnet_misc_device); - - DEXIT(MODULE_TRACE, "\n"); - return err; -} - -/*------------------------------------------------------------------*/ -/* - * Cleanup at exit... - */ -static inline void __exit -ppp_irnet_cleanup(void) -{ - DENTER(MODULE_TRACE, "()\n"); - - /* De-allocate /dev/irnet minor in misc range */ - misc_deregister(&irnet_misc_device); - - DEXIT(MODULE_TRACE, "\n"); -} - -/*------------------------------------------------------------------*/ -/* - * Module main entry point - */ -static int __init -irnet_init(void) -{ - int err; - - /* Initialise both parts... */ - err = irda_irnet_init(); - if(!err) - err = ppp_irnet_init(); - return err; -} - -/*------------------------------------------------------------------*/ -/* - * Module exit - */ -static void __exit -irnet_cleanup(void) -{ - irda_irnet_cleanup(); - ppp_irnet_cleanup(); -} - -/*------------------------------------------------------------------*/ -/* - * Module magic - */ -module_init(irnet_init); -module_exit(irnet_cleanup); -MODULE_AUTHOR("Jean Tourrilhes <jt@hpl.hp.com>"); -MODULE_DESCRIPTION("IrNET : Synchronous PPP over IrDA"); -MODULE_LICENSE("GPL"); -MODULE_ALIAS_CHARDEV(10, 187); diff --git a/net/irda/irnet/irnet_ppp.h b/net/irda/irnet/irnet_ppp.h deleted file mode 100644 index 32061442cc8e..000000000000 --- a/net/irda/irnet/irnet_ppp.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * IrNET protocol module : Synchronous PPP over an IrDA socket. - * - * Jean II - HPL `00 - <jt@hpl.hp.com> - * - * This file contains all definitions and declarations necessary for the - * PPP part of the IrNET module. - * This file is a private header, so other modules don't want to know - * what's in there... - */ - -#ifndef IRNET_PPP_H -#define IRNET_PPP_H - -/***************************** INCLUDES *****************************/ - -#include "irnet.h" /* Module global include */ -#include <linux/miscdevice.h> - -/************************ CONSTANTS & MACROS ************************/ - -/* IrNET control channel stuff */ -#define IRNET_MAX_COMMAND 256 /* Max length of a command line */ - -/* PPP hardcore stuff */ - -/* Bits in rbits (PPP flags in irnet struct) */ -#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) - -/* Bit numbers in busy */ -#define XMIT_BUSY 0 -#define RECV_BUSY 1 -#define XMIT_WAKEUP 2 -#define XMIT_FULL 3 - -/* Queue management */ -#define PPPSYNC_MAX_RQLEN 32 /* arbitrary */ - -/****************************** TYPES ******************************/ - - -/**************************** PROTOTYPES ****************************/ - -/* ----------------------- CONTROL CHANNEL ----------------------- */ -static inline ssize_t - irnet_ctrl_write(irnet_socket *, - const char *, - size_t); -static inline ssize_t - irnet_ctrl_read(irnet_socket *, - struct file *, - char *, - size_t); -static inline unsigned int - irnet_ctrl_poll(irnet_socket *, - struct file *, - poll_table *); -/* ----------------------- CHARACTER DEVICE ----------------------- */ -static int - dev_irnet_open(struct inode *, /* fs callback : open */ - struct file *), - dev_irnet_close(struct inode *, - struct file *); -static ssize_t - dev_irnet_write(struct file *, - const char __user *, - size_t, - loff_t *), - dev_irnet_read(struct file *, - char __user *, - size_t, - loff_t *); -static unsigned int - dev_irnet_poll(struct file *, - poll_table *); -static long - dev_irnet_ioctl(struct file *, - unsigned int, - unsigned long); -/* ------------------------ PPP INTERFACE ------------------------ */ -static inline struct sk_buff * - irnet_prepare_skb(irnet_socket *, - struct sk_buff *); -static int - ppp_irnet_send(struct ppp_channel *, - struct sk_buff *); -static int - ppp_irnet_ioctl(struct ppp_channel *, - unsigned int, - unsigned long); - -/**************************** VARIABLES ****************************/ - -/* Filesystem callbacks (to call us) */ -static const struct file_operations irnet_device_fops = -{ - .owner = THIS_MODULE, - .read = dev_irnet_read, - .write = dev_irnet_write, - .poll = dev_irnet_poll, - .unlocked_ioctl = dev_irnet_ioctl, - .open = dev_irnet_open, - .release = dev_irnet_close, - .llseek = noop_llseek, - /* Also : llseek, readdir, mmap, flush, fsync, fasync, lock, readv, writev */ -}; - -/* Structure so that the misc major (drivers/char/misc.c) take care of us... */ -static struct miscdevice irnet_misc_device = -{ - .minor = IRNET_MINOR, - .name = "irnet", - .fops = &irnet_device_fops -}; - -#endif /* IRNET_PPP_H */ diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c deleted file mode 100644 index 7fc340e574cf..000000000000 --- a/net/irda/irnetlink.c +++ /dev/null @@ -1,162 +0,0 @@ -/* - * IrDA netlink layer, for stack configuration. - * - * Copyright (c) 2007 Samuel Ortiz <samuel@sortiz.org> - * - * Partly based on the 802.11 nelink implementation - * (see net/wireless/nl80211.c) which is: - * Copyright 2006 Johannes Berg <johannes@sipsolutions.net> - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - */ - -#include <linux/socket.h> -#include <linux/irda.h> -#include <linux/gfp.h> -#include <net/net_namespace.h> -#include <net/sock.h> -#include <net/irda/irda.h> -#include <net/irda/irlap.h> -#include <net/genetlink.h> - - - -static struct genl_family irda_nl_family; - -static struct net_device * ifname_to_netdev(struct net *net, struct genl_info *info) -{ - char * ifname; - - if (!info->attrs[IRDA_NL_ATTR_IFNAME]) - return NULL; - - ifname = nla_data(info->attrs[IRDA_NL_ATTR_IFNAME]); - - pr_debug("%s(): Looking for %s\n", __func__, ifname); - - return dev_get_by_name(net, ifname); -} - -static int irda_nl_set_mode(struct sk_buff *skb, struct genl_info *info) -{ - struct net_device * dev; - struct irlap_cb * irlap; - u32 mode; - - if (!info->attrs[IRDA_NL_ATTR_MODE]) - return -EINVAL; - - mode = nla_get_u32(info->attrs[IRDA_NL_ATTR_MODE]); - - pr_debug("%s(): Switching to mode: %d\n", __func__, mode); - - dev = ifname_to_netdev(&init_net, info); - if (!dev) - return -ENODEV; - - irlap = (struct irlap_cb *)dev->atalk_ptr; - if (!irlap) { - dev_put(dev); - return -ENODEV; - } - - irlap->mode = mode; - - dev_put(dev); - - return 0; -} - -static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) -{ - struct net_device * dev; - struct irlap_cb * irlap; - struct sk_buff *msg; - void *hdr; - int ret = -ENOBUFS; - - dev = ifname_to_netdev(&init_net, info); - if (!dev) - return -ENODEV; - - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); - if (!msg) { - dev_put(dev); - return -ENOMEM; - } - - irlap = (struct irlap_cb *)dev->atalk_ptr; - if (!irlap) { - ret = -ENODEV; - goto err_out; - } - - hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, - &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); - if (hdr == NULL) { - ret = -EMSGSIZE; - goto err_out; - } - - if(nla_put_string(msg, IRDA_NL_ATTR_IFNAME, - dev->name)) - goto err_out; - - if(nla_put_u32(msg, IRDA_NL_ATTR_MODE, irlap->mode)) - goto err_out; - - genlmsg_end(msg, hdr); - - return genlmsg_reply(msg, info); - - err_out: - nlmsg_free(msg); - dev_put(dev); - - return ret; -} - -static const struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = { - [IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING, - .len = IFNAMSIZ-1 }, - [IRDA_NL_ATTR_MODE] = { .type = NLA_U32 }, -}; - -static const struct genl_ops irda_nl_ops[] = { - { - .cmd = IRDA_NL_CMD_SET_MODE, - .doit = irda_nl_set_mode, - .policy = irda_nl_policy, - .flags = GENL_ADMIN_PERM, - }, - { - .cmd = IRDA_NL_CMD_GET_MODE, - .doit = irda_nl_get_mode, - .policy = irda_nl_policy, - /* can be retrieved by unprivileged users */ - }, - -}; - -static struct genl_family irda_nl_family __ro_after_init = { - .name = IRDA_NL_NAME, - .hdrsize = 0, - .version = IRDA_NL_VERSION, - .maxattr = IRDA_NL_CMD_MAX, - .module = THIS_MODULE, - .ops = irda_nl_ops, - .n_ops = ARRAY_SIZE(irda_nl_ops), -}; - -int __init irda_nl_register(void) -{ - return genl_register_family(&irda_nl_family); -} - -void irda_nl_unregister(void) -{ - genl_unregister_family(&irda_nl_family); -} diff --git a/net/irda/irproc.c b/net/irda/irproc.c deleted file mode 100644 index 77cfdde9d82f..000000000000 --- a/net/irda/irproc.c +++ /dev/null @@ -1,96 +0,0 @@ -/********************************************************************* - * - * Filename: irproc.c - * Version: 1.0 - * Description: Various entries in the /proc file system - * Status: Experimental. - * Author: Thomas Davis, <ratbert@radiks.net> - * Created at: Sat Feb 21 21:33:24 1998 - * Modified at: Sun Nov 14 08:54:54 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-1999, Dag Brattli <dagb@cs.uit.no> - * Copyright (c) 1998, Thomas Davis, <ratbert@radiks.net>, - * All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * I, Thomas Davis, provide no warranty for any of this software. - * This material is provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/proc_fs.h> -#include <linux/seq_file.h> -#include <linux/module.h> -#include <linux/init.h> -#include <net/net_namespace.h> - -#include <net/irda/irda.h> -#include <net/irda/irlap.h> -#include <net/irda/irlmp.h> - -extern const struct file_operations discovery_seq_fops; -extern const struct file_operations irlap_seq_fops; -extern const struct file_operations irlmp_seq_fops; -extern const struct file_operations irttp_seq_fops; -extern const struct file_operations irias_seq_fops; - -struct irda_entry { - const char *name; - const struct file_operations *fops; -}; - -struct proc_dir_entry *proc_irda; -EXPORT_SYMBOL(proc_irda); - -static const struct irda_entry irda_dirs[] = { - {"discovery", &discovery_seq_fops}, - {"irttp", &irttp_seq_fops}, - {"irlmp", &irlmp_seq_fops}, - {"irlap", &irlap_seq_fops}, - {"irias", &irias_seq_fops}, -}; - -/* - * Function irda_proc_register (void) - * - * Register irda entry in /proc file system - * - */ -void __init irda_proc_register(void) -{ - int i; - - proc_irda = proc_mkdir("irda", init_net.proc_net); - if (proc_irda == NULL) - return; - - for (i = 0; i < ARRAY_SIZE(irda_dirs); i++) - (void) proc_create(irda_dirs[i].name, 0, proc_irda, - irda_dirs[i].fops); -} - -/* - * Function irda_proc_unregister (void) - * - * Unregister irda entry in /proc file system - * - */ -void irda_proc_unregister(void) -{ - int i; - - if (proc_irda) { - for (i=0; i<ARRAY_SIZE(irda_dirs); i++) - remove_proc_entry(irda_dirs[i].name, proc_irda); - - remove_proc_entry("irda", init_net.proc_net); - proc_irda = NULL; - } -} - - diff --git a/net/irda/irqueue.c b/net/irda/irqueue.c deleted file mode 100644 index 160dc89335e2..000000000000 --- a/net/irda/irqueue.c +++ /dev/null @@ -1,911 +0,0 @@ -/********************************************************************* - * - * Filename: irqueue.c - * Version: 0.3 - * Description: General queue implementation - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Tue Jun 9 13:29:31 1998 - * Modified at: Sun Dec 12 13:48:22 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Modified at: Thu Jan 4 14:29:10 CET 2001 - * Modified by: Marc Zyngier <mzyngier@freesurf.fr> - * - * Copyright (C) 1998-1999, Aage Kvalnes <aage@cs.uit.no> - * Copyright (C) 1998, Dag Brattli, - * All Rights Reserved. - * - * This code is taken from the Vortex Operating System written by Aage - * Kvalnes. Aage has agreed that this code can use the GPL licence, - * although he does not use that licence in his own code. - * - * This copyright does however _not_ include the ELF hash() function - * which I currently don't know which licence or copyright it - * has. Please inform me if you know. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -/* - * NOTE : - * There are various problems with this package : - * o the hash function for ints is pathetic (but could be changed) - * o locking is sometime suspicious (especially during enumeration) - * o most users have only a few elements (== overhead) - * o most users never use search, so don't benefit from hashing - * Problem already fixed : - * o not 64 bit compliant (most users do hashv = (int) self) - * o hashbin_remove() is broken => use hashbin_remove_this() - * I think most users would be better served by a simple linked list - * (like include/linux/list.h) with a global spinlock per list. - * Jean II - */ - -/* - * Notes on the concurrent access to hashbin and other SMP issues - * ------------------------------------------------------------- - * Hashbins are very often in the IrDA stack a global repository of - * information, and therefore used in a very asynchronous manner following - * various events (driver calls, timers, user calls...). - * Therefore, very often it is highly important to consider the - * management of concurrent access to the hashbin and how to guarantee the - * consistency of the operations on it. - * - * First, we need to define the objective of locking : - * 1) Protect user data (content pointed by the hashbin) - * 2) Protect hashbin structure itself (linked list in each bin) - * - * OLD LOCKING - * ----------- - * - * The previous locking strategy, either HB_LOCAL or HB_GLOBAL were - * both inadequate in *both* aspect. - * o HB_GLOBAL was using a spinlock for each bin (local locking). - * o HB_LOCAL was disabling irq on *all* CPUs, so use a single - * global semaphore. - * The problems were : - * A) Global irq disabling is no longer supported by the kernel - * B) No protection for the hashbin struct global data - * o hashbin_delete() - * o hb_current - * C) No protection for user data in some cases - * - * A) HB_LOCAL use global irq disabling, so doesn't work on kernel - * 2.5.X. Even when it is supported (kernel 2.4.X and earlier), its - * performance is not satisfactory on SMP setups. Most hashbins were - * HB_LOCAL, so (A) definitely need fixing. - * B) HB_LOCAL could be modified to fix (B). However, because HB_GLOBAL - * lock only the individual bins, it will never be able to lock the - * global data, so can't do (B). - * C) Some functions return pointer to data that is still in the - * hashbin : - * o hashbin_find() - * o hashbin_get_first() - * o hashbin_get_next() - * As the data is still in the hashbin, it may be changed or free'd - * while the caller is examinimg the data. In those case, locking can't - * be done within the hashbin, but must include use of the data within - * the caller. - * The caller can easily do this with HB_LOCAL (just disable irqs). - * However, this is impossible with HB_GLOBAL because the caller has no - * way to know the proper bin, so don't know which spinlock to use. - * - * Quick summary : can no longer use HB_LOCAL, and HB_GLOBAL is - * fundamentally broken and will never work. - * - * NEW LOCKING - * ----------- - * - * To fix those problems, I've introduce a few changes in the - * hashbin locking : - * 1) New HB_LOCK scheme - * 2) hashbin->hb_spinlock - * 3) New hashbin usage policy - * - * HB_LOCK : - * ------- - * HB_LOCK is a locking scheme intermediate between the old HB_LOCAL - * and HB_GLOBAL. It uses a single spinlock to protect the whole content - * of the hashbin. As it is a single spinlock, it can protect the global - * data of the hashbin and not only the bins themselves. - * HB_LOCK can only protect some of the hashbin calls, so it only lock - * call that can be made 100% safe and leave other call unprotected. - * HB_LOCK in theory is slower than HB_GLOBAL, but as the hashbin - * content is always small contention is not high, so it doesn't matter - * much. HB_LOCK is probably faster than HB_LOCAL. - * - * hashbin->hb_spinlock : - * -------------------- - * The spinlock that HB_LOCK uses is available for caller, so that - * the caller can protect unprotected calls (see below). - * If the caller want to do entirely its own locking (HB_NOLOCK), he - * can do so and may use safely this spinlock. - * Locking is done like this : - * spin_lock_irqsave(&hashbin->hb_spinlock, flags); - * Releasing the lock : - * spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - * - * Safe & Protected calls : - * ---------------------- - * The following calls are safe or protected via HB_LOCK : - * o hashbin_new() -> safe - * o hashbin_delete() - * o hashbin_insert() - * o hashbin_remove_first() - * o hashbin_remove() - * o hashbin_remove_this() - * o HASHBIN_GET_SIZE() -> atomic - * - * The following calls only protect the hashbin itself : - * o hashbin_lock_find() - * o hashbin_find_next() - * - * Unprotected calls : - * ----------------- - * The following calls need to be protected by the caller : - * o hashbin_find() - * o hashbin_get_first() - * o hashbin_get_next() - * - * Locking Policy : - * -------------- - * If the hashbin is used only in a single thread of execution - * (explicitly or implicitely), you can use HB_NOLOCK - * If the calling module already provide concurrent access protection, - * you may use HB_NOLOCK. - * - * In all other cases, you need to use HB_LOCK and lock the hashbin - * every time before calling one of the unprotected calls. You also must - * use the pointer returned by the unprotected call within the locked - * region. - * - * Extra care for enumeration : - * -------------------------- - * hashbin_get_first() and hashbin_get_next() use the hashbin to - * store the current position, in hb_current. - * As long as the hashbin remains locked, this is safe. If you unlock - * the hashbin, the current position may change if anybody else modify - * or enumerate the hashbin. - * Summary : do the full enumeration while locked. - * - * Alternatively, you may use hashbin_find_next(). But, this will - * be slower, is more complex to use and doesn't protect the hashbin - * content. So, care is needed here as well. - * - * Other issues : - * ------------ - * I believe that we are overdoing it by using spin_lock_irqsave() - * and we should use only spin_lock_bh() or similar. But, I don't have - * the balls to try it out. - * Don't believe that because hashbin are now (somewhat) SMP safe - * that the rest of the code is. Higher layers tend to be safest, - * but LAP and LMP would need some serious dedicated love. - * - * Jean II - */ -#include <linux/module.h> -#include <linux/slab.h> - -#include <net/irda/irda.h> -#include <net/irda/irqueue.h> - -/************************ QUEUE SUBROUTINES ************************/ - -/* - * Hashbin - */ -#define GET_HASHBIN(x) ( x & HASHBIN_MASK ) - -/* - * Function hash (name) - * - * This function hash the input string 'name' using the ELF hash - * function for strings. - */ -static __u32 hash( const char* name) -{ - __u32 h = 0; - __u32 g; - - while(*name) { - h = (h<<4) + *name++; - if ((g = (h & 0xf0000000))) - h ^=g>>24; - h &=~g; - } - return h; -} - -/* - * Function enqueue_first (queue, proc) - * - * Insert item first in queue. - * - */ -static void enqueue_first(irda_queue_t **queue, irda_queue_t* element) -{ - - /* - * Check if queue is empty. - */ - if ( *queue == NULL ) { - /* - * Queue is empty. Insert one element into the queue. - */ - element->q_next = element->q_prev = *queue = element; - - } else { - /* - * Queue is not empty. Insert element into front of queue. - */ - element->q_next = (*queue); - (*queue)->q_prev->q_next = element; - element->q_prev = (*queue)->q_prev; - (*queue)->q_prev = element; - (*queue) = element; - } -} - - -/* - * Function dequeue (queue) - * - * Remove first entry in queue - * - */ -static irda_queue_t *dequeue_first(irda_queue_t **queue) -{ - irda_queue_t *ret; - - pr_debug("dequeue_first()\n"); - - /* - * Set return value - */ - ret = *queue; - - if ( *queue == NULL ) { - /* - * Queue was empty. - */ - } else if ( (*queue)->q_next == *queue ) { - /* - * Queue only contained a single element. It will now be - * empty. - */ - *queue = NULL; - } else { - /* - * Queue contained several element. Remove the first one. - */ - (*queue)->q_prev->q_next = (*queue)->q_next; - (*queue)->q_next->q_prev = (*queue)->q_prev; - *queue = (*queue)->q_next; - } - - /* - * Return the removed entry (or NULL of queue was empty). - */ - return ret; -} - -/* - * Function dequeue_general (queue, element) - * - * - */ -static irda_queue_t *dequeue_general(irda_queue_t **queue, irda_queue_t* element) -{ - irda_queue_t *ret; - - pr_debug("dequeue_general()\n"); - - /* - * Set return value - */ - ret = *queue; - - if ( *queue == NULL ) { - /* - * Queue was empty. - */ - } else if ( (*queue)->q_next == *queue ) { - /* - * Queue only contained a single element. It will now be - * empty. - */ - *queue = NULL; - - } else { - /* - * Remove specific element. - */ - element->q_prev->q_next = element->q_next; - element->q_next->q_prev = element->q_prev; - if ( (*queue) == element) - (*queue) = element->q_next; - } - - /* - * Return the removed entry (or NULL of queue was empty). - */ - return ret; -} - -/************************ HASHBIN MANAGEMENT ************************/ - -/* - * Function hashbin_create ( type, name ) - * - * Create hashbin! - * - */ -hashbin_t *hashbin_new(int type) -{ - hashbin_t* hashbin; - - /* - * Allocate new hashbin - */ - hashbin = kzalloc(sizeof(*hashbin), GFP_ATOMIC); - if (!hashbin) - return NULL; - - /* - * Initialize structure - */ - hashbin->hb_type = type; - hashbin->magic = HB_MAGIC; - //hashbin->hb_current = NULL; - - /* Make sure all spinlock's are unlocked */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_init(&hashbin->hb_spinlock); - } - - return hashbin; -} -EXPORT_SYMBOL(hashbin_new); - - -/* - * Function hashbin_delete (hashbin, free_func) - * - * Destroy hashbin, the free_func can be a user supplied special routine - * for deallocating this structure if it's complex. If not the user can - * just supply kfree, which should take care of the job. - */ -int hashbin_delete( hashbin_t* hashbin, FREE_FUNC free_func) -{ - irda_queue_t* queue; - unsigned long flags = 0; - int i; - - IRDA_ASSERT(hashbin != NULL, return -1;); - IRDA_ASSERT(hashbin->magic == HB_MAGIC, return -1;); - - /* Synchronize */ - if (hashbin->hb_type & HB_LOCK) - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - - /* - * Free the entries in the hashbin, TODO: use hashbin_clear when - * it has been shown to work - */ - for (i = 0; i < HASHBIN_SIZE; i ++ ) { - while (1) { - queue = dequeue_first((irda_queue_t**) &hashbin->hb_queue[i]); - - if (!queue) - break; - - if (free_func) { - if (hashbin->hb_type & HB_LOCK) - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - free_func(queue); - if (hashbin->hb_type & HB_LOCK) - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - } - } - } - - /* Cleanup local data */ - hashbin->hb_current = NULL; - hashbin->magic = ~HB_MAGIC; - - /* Release lock */ - if (hashbin->hb_type & HB_LOCK) - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - - /* - * Free the hashbin structure - */ - kfree(hashbin); - - return 0; -} -EXPORT_SYMBOL(hashbin_delete); - -/********************* HASHBIN LIST OPERATIONS *********************/ - -/* - * Function hashbin_insert (hashbin, entry, name) - * - * Insert an entry into the hashbin - * - */ -void hashbin_insert(hashbin_t* hashbin, irda_queue_t* entry, long hashv, - const char* name) -{ - unsigned long flags = 0; - int bin; - - IRDA_ASSERT( hashbin != NULL, return;); - IRDA_ASSERT( hashbin->magic == HB_MAGIC, return;); - - /* - * Locate hashbin - */ - if ( name ) - hashv = hash( name ); - bin = GET_HASHBIN( hashv ); - - /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - /* - * Store name and key - */ - entry->q_hash = hashv; - if ( name ) - strlcpy( entry->q_name, name, sizeof(entry->q_name)); - - /* - * Insert new entry first - */ - enqueue_first( (irda_queue_t**) &hashbin->hb_queue[ bin ], - entry); - hashbin->hb_size++; - - /* Release lock */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ -} -EXPORT_SYMBOL(hashbin_insert); - -/* - * Function hashbin_remove_first (hashbin) - * - * Remove first entry of the hashbin - * - * Note : this function no longer use hashbin_remove(), but does things - * similar to hashbin_remove_this(), so can be considered safe. - * Jean II - */ -void *hashbin_remove_first( hashbin_t *hashbin) -{ - unsigned long flags = 0; - irda_queue_t *entry = NULL; - - /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - entry = hashbin_get_first( hashbin); - if ( entry != NULL) { - int bin; - long hashv; - /* - * Locate hashbin - */ - hashv = entry->q_hash; - bin = GET_HASHBIN( hashv ); - - /* - * Dequeue the entry... - */ - dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ], - entry); - hashbin->hb_size--; - entry->q_next = NULL; - entry->q_prev = NULL; - - /* - * Check if this item is the currently selected item, and in - * that case we must reset hb_current - */ - if ( entry == hashbin->hb_current) - hashbin->hb_current = NULL; - } - - /* Release lock */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - return entry; -} - - -/* - * Function hashbin_remove (hashbin, hashv, name) - * - * Remove entry with the given name - * - * The use of this function is highly discouraged, because the whole - * concept behind hashbin_remove() is broken. In many cases, it's not - * possible to guarantee the unicity of the index (either hashv or name), - * leading to removing the WRONG entry. - * The only simple safe use is : - * hashbin_remove(hasbin, (int) self, NULL); - * In other case, you must think hard to guarantee unicity of the index. - * Jean II - */ -void* hashbin_remove( hashbin_t* hashbin, long hashv, const char* name) -{ - int bin, found = FALSE; - unsigned long flags = 0; - irda_queue_t* entry; - - IRDA_ASSERT( hashbin != NULL, return NULL;); - IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;); - - /* - * Locate hashbin - */ - if ( name ) - hashv = hash( name ); - bin = GET_HASHBIN( hashv ); - - /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - /* - * Search for entry - */ - entry = hashbin->hb_queue[ bin ]; - if ( entry ) { - do { - /* - * Check for key - */ - if ( entry->q_hash == hashv ) { - /* - * Name compare too? - */ - if ( name ) { - if ( strcmp( entry->q_name, name) == 0) - { - found = TRUE; - break; - } - } else { - found = TRUE; - break; - } - } - entry = entry->q_next; - } while ( entry != hashbin->hb_queue[ bin ] ); - } - - /* - * If entry was found, dequeue it - */ - if ( found ) { - dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ], - entry); - hashbin->hb_size--; - - /* - * Check if this item is the currently selected item, and in - * that case we must reset hb_current - */ - if ( entry == hashbin->hb_current) - hashbin->hb_current = NULL; - } - - /* Release lock */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - - /* Return */ - if ( found ) - return entry; - else - return NULL; - -} -EXPORT_SYMBOL(hashbin_remove); - -/* - * Function hashbin_remove_this (hashbin, entry) - * - * Remove entry with the given name - * - * In some cases, the user of hashbin can't guarantee the unicity - * of either the hashv or name. - * In those cases, using the above function is guaranteed to cause troubles, - * so we use this one instead... - * And by the way, it's also faster, because we skip the search phase ;-) - */ -void* hashbin_remove_this( hashbin_t* hashbin, irda_queue_t* entry) -{ - unsigned long flags = 0; - int bin; - long hashv; - - IRDA_ASSERT( hashbin != NULL, return NULL;); - IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;); - IRDA_ASSERT( entry != NULL, return NULL;); - - /* Synchronize */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - /* Check if valid and not already removed... */ - if((entry->q_next == NULL) || (entry->q_prev == NULL)) { - entry = NULL; - goto out; - } - - /* - * Locate hashbin - */ - hashv = entry->q_hash; - bin = GET_HASHBIN( hashv ); - - /* - * Dequeue the entry... - */ - dequeue_general( (irda_queue_t**) &hashbin->hb_queue[ bin ], - entry); - hashbin->hb_size--; - entry->q_next = NULL; - entry->q_prev = NULL; - - /* - * Check if this item is the currently selected item, and in - * that case we must reset hb_current - */ - if ( entry == hashbin->hb_current) - hashbin->hb_current = NULL; -out: - /* Release lock */ - if ( hashbin->hb_type & HB_LOCK ) { - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - } /* Default is no-lock */ - - return entry; -} -EXPORT_SYMBOL(hashbin_remove_this); - -/*********************** HASHBIN ENUMERATION ***********************/ - -/* - * Function hashbin_common_find (hashbin, hashv, name) - * - * Find item with the given hashv or name - * - */ -void* hashbin_find( hashbin_t* hashbin, long hashv, const char* name ) -{ - int bin; - irda_queue_t* entry; - - pr_debug("hashbin_find()\n"); - - IRDA_ASSERT( hashbin != NULL, return NULL;); - IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;); - - /* - * Locate hashbin - */ - if ( name ) - hashv = hash( name ); - bin = GET_HASHBIN( hashv ); - - /* - * Search for entry - */ - entry = hashbin->hb_queue[ bin]; - if ( entry ) { - do { - /* - * Check for key - */ - if ( entry->q_hash == hashv ) { - /* - * Name compare too? - */ - if ( name ) { - if ( strcmp( entry->q_name, name ) == 0 ) { - return entry; - } - } else { - return entry; - } - } - entry = entry->q_next; - } while ( entry != hashbin->hb_queue[ bin ] ); - } - - return NULL; -} -EXPORT_SYMBOL(hashbin_find); - -/* - * Function hashbin_lock_find (hashbin, hashv, name) - * - * Find item with the given hashv or name - * - * Same, but with spinlock protection... - * I call it safe, but it's only safe with respect to the hashbin, not its - * content. - Jean II - */ -void* hashbin_lock_find( hashbin_t* hashbin, long hashv, const char* name ) -{ - unsigned long flags = 0; - irda_queue_t* entry; - - /* Synchronize */ - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - - /* - * Search for entry - */ - entry = hashbin_find(hashbin, hashv, name); - - /* Release lock */ - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - - return entry; -} -EXPORT_SYMBOL(hashbin_lock_find); - -/* - * Function hashbin_find (hashbin, hashv, name, pnext) - * - * Find an item with the given hashv or name, and its successor - * - * This function allow to do concurrent enumerations without the - * need to lock over the whole session, because the caller keep the - * context of the search. On the other hand, it might fail and return - * NULL if the entry is removed. - Jean II - */ -void* hashbin_find_next( hashbin_t* hashbin, long hashv, const char* name, - void ** pnext) -{ - unsigned long flags = 0; - irda_queue_t* entry; - - /* Synchronize */ - spin_lock_irqsave(&hashbin->hb_spinlock, flags); - - /* - * Search for current entry - * This allow to check if the current item is still in the - * hashbin or has been removed. - */ - entry = hashbin_find(hashbin, hashv, name); - - /* - * Trick hashbin_get_next() to return what we want - */ - if(entry) { - hashbin->hb_current = entry; - *pnext = hashbin_get_next( hashbin ); - } else - *pnext = NULL; - - /* Release lock */ - spin_unlock_irqrestore(&hashbin->hb_spinlock, flags); - - return entry; -} - -/* - * Function hashbin_get_first (hashbin) - * - * Get a pointer to first element in hashbin, this function must be - * called before any calls to hashbin_get_next()! - * - */ -irda_queue_t *hashbin_get_first( hashbin_t* hashbin) -{ - irda_queue_t *entry; - int i; - - IRDA_ASSERT( hashbin != NULL, return NULL;); - IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;); - - if ( hashbin == NULL) - return NULL; - - for ( i = 0; i < HASHBIN_SIZE; i ++ ) { - entry = hashbin->hb_queue[ i]; - if ( entry) { - hashbin->hb_current = entry; - return entry; - } - } - /* - * Did not find any item in hashbin - */ - return NULL; -} -EXPORT_SYMBOL(hashbin_get_first); - -/* - * Function hashbin_get_next (hashbin) - * - * Get next item in hashbin. A series of hashbin_get_next() calls must - * be started by a call to hashbin_get_first(). The function returns - * NULL when all items have been traversed - * - * The context of the search is stored within the hashbin, so you must - * protect yourself from concurrent enumerations. - Jean II - */ -irda_queue_t *hashbin_get_next( hashbin_t *hashbin) -{ - irda_queue_t* entry; - int bin; - int i; - - IRDA_ASSERT( hashbin != NULL, return NULL;); - IRDA_ASSERT( hashbin->magic == HB_MAGIC, return NULL;); - - if ( hashbin->hb_current == NULL) { - IRDA_ASSERT( hashbin->hb_current != NULL, return NULL;); - return NULL; - } - entry = hashbin->hb_current->q_next; - bin = GET_HASHBIN( entry->q_hash); - - /* - * Make sure that we are not back at the beginning of the queue - * again - */ - if ( entry != hashbin->hb_queue[ bin ]) { - hashbin->hb_current = entry; - - return entry; - } - - /* - * Check that this is not the last queue in hashbin - */ - if ( bin >= HASHBIN_SIZE) - return NULL; - - /* - * Move to next queue in hashbin - */ - bin++; - for ( i = bin; i < HASHBIN_SIZE; i++ ) { - entry = hashbin->hb_queue[ i]; - if ( entry) { - hashbin->hb_current = entry; - - return entry; - } - } - return NULL; -} -EXPORT_SYMBOL(hashbin_get_next); diff --git a/net/irda/irsysctl.c b/net/irda/irsysctl.c deleted file mode 100644 index 873da5e7d428..000000000000 --- a/net/irda/irsysctl.c +++ /dev/null @@ -1,258 +0,0 @@ -/********************************************************************* - * - * Filename: irsysctl.c - * Version: 1.0 - * Description: Sysctl interface for IrDA - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun May 24 22:12:06 1998 - * Modified at: Fri Jun 4 02:50:15 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1997, 1999 Dag Brattli, All Rights Reserved. - * Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/mm.h> -#include <linux/ctype.h> -#include <linux/sysctl.h> -#include <linux/init.h> - -#include <net/irda/irda.h> /* irda_debug */ -#include <net/irda/irlmp.h> -#include <net/irda/timer.h> -#include <net/irda/irias_object.h> - -extern int sysctl_discovery; -extern int sysctl_discovery_slots; -extern int sysctl_discovery_timeout; -extern int sysctl_slot_timeout; -extern int sysctl_fast_poll_increase; -extern char sysctl_devname[]; -extern int sysctl_max_baud_rate; -extern unsigned int sysctl_min_tx_turn_time; -extern unsigned int sysctl_max_tx_data_size; -extern unsigned int sysctl_max_tx_window; -extern int sysctl_max_noreply_time; -extern int sysctl_warn_noreply_time; -extern int sysctl_lap_keepalive_time; - -extern struct irlmp_cb *irlmp; - -/* this is needed for the proc_dointvec_minmax - Jean II */ -static int max_discovery_slots = 16; /* ??? */ -static int min_discovery_slots = 1; -/* IrLAP 6.13.2 says 25ms to 10+70ms - allow higher since some devices - * seems to require it. (from Dag's comment) */ -static int max_slot_timeout = 160; -static int min_slot_timeout = 20; -static int max_max_baud_rate = 16000000; /* See qos.c - IrLAP spec */ -static int min_max_baud_rate = 2400; -static int max_min_tx_turn_time = 10000; /* See qos.c - IrLAP spec */ -static int min_min_tx_turn_time; -static int max_max_tx_data_size = 2048; /* See qos.c - IrLAP spec */ -static int min_max_tx_data_size = 64; -static int max_max_tx_window = 7; /* See qos.c - IrLAP spec */ -static int min_max_tx_window = 1; -static int max_max_noreply_time = 40; /* See qos.c - IrLAP spec */ -static int min_max_noreply_time = 3; -static int max_warn_noreply_time = 3; /* 3s == standard */ -static int min_warn_noreply_time = 1; /* 1s == min WD_TIMER */ -static int max_lap_keepalive_time = 10000; /* 10s */ -static int min_lap_keepalive_time = 100; /* 100us */ -/* For other sysctl, I've no idea of the range. Maybe Dag could help - * us on that - Jean II */ - -static int do_devname(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int ret; - - ret = proc_dostring(table, write, buffer, lenp, ppos); - if (ret == 0 && write) { - struct ias_value *val; - - val = irias_new_string_value(sysctl_devname); - if (val) - irias_object_change_attribute("Device", "DeviceName", val); - } - return ret; -} - - -static int do_discovery(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) -{ - int ret; - - ret = proc_dointvec(table, write, buffer, lenp, ppos); - if (ret) - return ret; - - if (irlmp == NULL) - return -ENODEV; - - if (sysctl_discovery) - irlmp_start_discovery_timer(irlmp, sysctl_discovery_timeout*HZ); - else - del_timer_sync(&irlmp->discovery_timer); - - return ret; -} - -/* One file */ -static struct ctl_table irda_table[] = { - { - .procname = "discovery", - .data = &sysctl_discovery, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = do_discovery, - }, - { - .procname = "devname", - .data = sysctl_devname, - .maxlen = 65, - .mode = 0644, - .proc_handler = do_devname, - }, -#ifdef CONFIG_IRDA_FAST_RR - { - .procname = "fast_poll_increase", - .data = &sysctl_fast_poll_increase, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, -#endif - { - .procname = "discovery_slots", - .data = &sysctl_discovery_slots, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_discovery_slots, - .extra2 = &max_discovery_slots - }, - { - .procname = "discovery_timeout", - .data = &sysctl_discovery_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "slot_timeout", - .data = &sysctl_slot_timeout, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_slot_timeout, - .extra2 = &max_slot_timeout - }, - { - .procname = "max_baud_rate", - .data = &sysctl_max_baud_rate, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_max_baud_rate, - .extra2 = &max_max_baud_rate - }, - { - .procname = "min_tx_turn_time", - .data = &sysctl_min_tx_turn_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_min_tx_turn_time, - .extra2 = &max_min_tx_turn_time - }, - { - .procname = "max_tx_data_size", - .data = &sysctl_max_tx_data_size, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_max_tx_data_size, - .extra2 = &max_max_tx_data_size - }, - { - .procname = "max_tx_window", - .data = &sysctl_max_tx_window, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_max_tx_window, - .extra2 = &max_max_tx_window - }, - { - .procname = "max_noreply_time", - .data = &sysctl_max_noreply_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_max_noreply_time, - .extra2 = &max_max_noreply_time - }, - { - .procname = "warn_noreply_time", - .data = &sysctl_warn_noreply_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_warn_noreply_time, - .extra2 = &max_warn_noreply_time - }, - { - .procname = "lap_keepalive_time", - .data = &sysctl_lap_keepalive_time, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &min_lap_keepalive_time, - .extra2 = &max_lap_keepalive_time - }, - { } -}; - -static struct ctl_table_header *irda_table_header; - -/* - * Function irda_sysctl_register (void) - * - * Register our sysctl interface - * - */ -int __init irda_sysctl_register(void) -{ - irda_table_header = register_net_sysctl(&init_net, "net/irda", irda_table); - if (!irda_table_header) - return -ENOMEM; - - return 0; -} - -/* - * Function irda_sysctl_unregister (void) - * - * Unregister our sysctl interface - * - */ -void irda_sysctl_unregister(void) -{ - unregister_net_sysctl_table(irda_table_header); -} - - - diff --git a/net/irda/irttp.c b/net/irda/irttp.c deleted file mode 100644 index b6ab41d5b3a3..000000000000 --- a/net/irda/irttp.c +++ /dev/null @@ -1,1891 +0,0 @@ -/********************************************************************* - * - * Filename: irttp.c - * Version: 1.2 - * Description: Tiny Transport Protocol (TTP) implementation - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sun Aug 31 20:14:31 1997 - * Modified at: Wed Jan 5 11:31:27 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2003 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/skbuff.h> -#include <linux/init.h> -#include <linux/fs.h> -#include <linux/seq_file.h> -#include <linux/slab.h> -#include <linux/export.h> - -#include <asm/byteorder.h> -#include <asm/unaligned.h> - -#include <net/irda/irda.h> -#include <net/irda/irlap.h> -#include <net/irda/irlmp.h> -#include <net/irda/parameters.h> -#include <net/irda/irttp.h> - -static struct irttp_cb *irttp; - -static void __irttp_close_tsap(struct tsap_cb *self); - -static int irttp_data_indication(void *instance, void *sap, - struct sk_buff *skb); -static int irttp_udata_indication(void *instance, void *sap, - struct sk_buff *skb); -static void irttp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, struct sk_buff *); -static void irttp_connect_indication(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 header_size, struct sk_buff *skb); -static void irttp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, __u32 max_sdu_size, - __u8 header_size, struct sk_buff *skb); -static void irttp_run_tx_queue(struct tsap_cb *self); -static void irttp_run_rx_queue(struct tsap_cb *self); - -static void irttp_flush_queues(struct tsap_cb *self); -static void irttp_fragment_skb(struct tsap_cb *self, struct sk_buff *skb); -static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self); -static void irttp_todo_expired(unsigned long data); -static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, - int get); - -static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow); -static void irttp_status_indication(void *instance, - LINK_STATUS link, LOCK_STATUS lock); - -/* Information for parsing parameters in IrTTP */ -static const pi_minor_info_t pi_minor_call_table[] = { - { NULL, 0 }, /* 0x00 */ - { irttp_param_max_sdu_size, PV_INTEGER | PV_BIG_ENDIAN } /* 0x01 */ -}; -static const pi_major_info_t pi_major_call_table[] = { - { pi_minor_call_table, 2 } -}; -static pi_param_info_t param_info = { pi_major_call_table, 1, 0x0f, 4 }; - -/************************ GLOBAL PROCEDURES ************************/ - -/* - * Function irttp_init (void) - * - * Initialize the IrTTP layer. Called by module initialization code - * - */ -int __init irttp_init(void) -{ - irttp = kzalloc(sizeof(struct irttp_cb), GFP_KERNEL); - if (irttp == NULL) - return -ENOMEM; - - irttp->magic = TTP_MAGIC; - - irttp->tsaps = hashbin_new(HB_LOCK); - if (!irttp->tsaps) { - net_err_ratelimited("%s: can't allocate IrTTP hashbin!\n", - __func__); - kfree(irttp); - return -ENOMEM; - } - - return 0; -} - -/* - * Function irttp_cleanup (void) - * - * Called by module destruction/cleanup code - * - */ -void irttp_cleanup(void) -{ - /* Check for main structure */ - IRDA_ASSERT(irttp->magic == TTP_MAGIC, return;); - - /* - * Delete hashbin and close all TSAP instances in it - */ - hashbin_delete(irttp->tsaps, (FREE_FUNC) __irttp_close_tsap); - - irttp->magic = 0; - - /* De-allocate main structure */ - kfree(irttp); - - irttp = NULL; -} - -/*************************** SUBROUTINES ***************************/ - -/* - * Function irttp_start_todo_timer (self, timeout) - * - * Start todo timer. - * - * Made it more effient and unsensitive to race conditions - Jean II - */ -static inline void irttp_start_todo_timer(struct tsap_cb *self, int timeout) -{ - /* Set new value for timer */ - mod_timer(&self->todo_timer, jiffies + timeout); -} - -/* - * Function irttp_todo_expired (data) - * - * Todo timer has expired! - * - * One of the restriction of the timer is that it is run only on the timer - * interrupt which run every 10ms. This mean that even if you set the timer - * with a delay of 0, it may take up to 10ms before it's run. - * So, to minimise latency and keep cache fresh, we try to avoid using - * it as much as possible. - * Note : we can't use tasklets, because they can't be asynchronously - * killed (need user context), and we can't guarantee that here... - * Jean II - */ -static void irttp_todo_expired(unsigned long data) -{ - struct tsap_cb *self = (struct tsap_cb *) data; - - /* Check that we still exist */ - if (!self || self->magic != TTP_TSAP_MAGIC) - return; - - pr_debug("%s(instance=%p)\n", __func__, self); - - /* Try to make some progress, especially on Tx side - Jean II */ - irttp_run_rx_queue(self); - irttp_run_tx_queue(self); - - /* Check if time for disconnect */ - if (test_bit(0, &self->disconnect_pend)) { - /* Check if it's possible to disconnect yet */ - if (skb_queue_empty(&self->tx_queue)) { - /* Make sure disconnect is not pending anymore */ - clear_bit(0, &self->disconnect_pend); /* FALSE */ - - /* Note : self->disconnect_skb may be NULL */ - irttp_disconnect_request(self, self->disconnect_skb, - P_NORMAL); - self->disconnect_skb = NULL; - } else { - /* Try again later */ - irttp_start_todo_timer(self, HZ/10); - - /* No reason to try and close now */ - return; - } - } - - /* Check if it's closing time */ - if (self->close_pend) - /* Finish cleanup */ - irttp_close_tsap(self); -} - -/* - * Function irttp_flush_queues (self) - * - * Flushes (removes all frames) in transitt-buffer (tx_list) - */ -static void irttp_flush_queues(struct tsap_cb *self) -{ - struct sk_buff *skb; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - /* Deallocate frames waiting to be sent */ - while ((skb = skb_dequeue(&self->tx_queue)) != NULL) - dev_kfree_skb(skb); - - /* Deallocate received frames */ - while ((skb = skb_dequeue(&self->rx_queue)) != NULL) - dev_kfree_skb(skb); - - /* Deallocate received fragments */ - while ((skb = skb_dequeue(&self->rx_fragments)) != NULL) - dev_kfree_skb(skb); -} - -/* - * Function irttp_reassemble (self) - * - * Makes a new (continuous) skb of all the fragments in the fragment - * queue - * - */ -static struct sk_buff *irttp_reassemble_skb(struct tsap_cb *self) -{ - struct sk_buff *skb, *frag; - int n = 0; /* Fragment index */ - - IRDA_ASSERT(self != NULL, return NULL;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return NULL;); - - pr_debug("%s(), self->rx_sdu_size=%d\n", __func__, - self->rx_sdu_size); - - skb = dev_alloc_skb(TTP_HEADER + self->rx_sdu_size); - if (!skb) - return NULL; - - /* - * Need to reserve space for TTP header in case this skb needs to - * be requeued in case delivery failes - */ - skb_reserve(skb, TTP_HEADER); - skb_put(skb, self->rx_sdu_size); - - /* - * Copy all fragments to a new buffer - */ - while ((frag = skb_dequeue(&self->rx_fragments)) != NULL) { - skb_copy_to_linear_data_offset(skb, n, frag->data, frag->len); - n += frag->len; - - dev_kfree_skb(frag); - } - - pr_debug("%s(), frame len=%d, rx_sdu_size=%d, rx_max_sdu_size=%d\n", - __func__, n, self->rx_sdu_size, self->rx_max_sdu_size); - /* Note : irttp_run_rx_queue() calculate self->rx_sdu_size - * by summing the size of all fragments, so we should always - * have n == self->rx_sdu_size, except in cases where we - * droped the last fragment (when self->rx_sdu_size exceed - * self->rx_max_sdu_size), where n < self->rx_sdu_size. - * Jean II */ - IRDA_ASSERT(n <= self->rx_sdu_size, n = self->rx_sdu_size;); - - /* Set the new length */ - skb_trim(skb, n); - - self->rx_sdu_size = 0; - - return skb; -} - -/* - * Function irttp_fragment_skb (skb) - * - * Fragments a frame and queues all the fragments for transmission - * - */ -static inline void irttp_fragment_skb(struct tsap_cb *self, - struct sk_buff *skb) -{ - struct sk_buff *frag; - __u8 *frame; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - /* - * Split frame into a number of segments - */ - while (skb->len > self->max_seg_size) { - pr_debug("%s(), fragmenting ...\n", __func__); - - /* Make new segment */ - frag = alloc_skb(self->max_seg_size+self->max_header_size, - GFP_ATOMIC); - if (!frag) - return; - - skb_reserve(frag, self->max_header_size); - - /* Copy data from the original skb into this fragment. */ - skb_copy_from_linear_data(skb, skb_put(frag, self->max_seg_size), - self->max_seg_size); - - /* Insert TTP header, with the more bit set */ - frame = skb_push(frag, TTP_HEADER); - frame[0] = TTP_MORE; - - /* Hide the copied data from the original skb */ - skb_pull(skb, self->max_seg_size); - - /* Queue fragment */ - skb_queue_tail(&self->tx_queue, frag); - } - /* Queue what is left of the original skb */ - pr_debug("%s(), queuing last segment\n", __func__); - - frame = skb_push(skb, TTP_HEADER); - frame[0] = 0x00; /* Clear more bit */ - - /* Queue fragment */ - skb_queue_tail(&self->tx_queue, skb); -} - -/* - * Function irttp_param_max_sdu_size (self, param) - * - * Handle the MaxSduSize parameter in the connect frames, this function - * will be called both when this parameter needs to be inserted into, and - * extracted from the connect frames - */ -static int irttp_param_max_sdu_size(void *instance, irda_param_t *param, - int get) -{ - struct tsap_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->tx_max_sdu_size; - else - self->tx_max_sdu_size = param->pv.i; - - pr_debug("%s(), MaxSduSize=%d\n", __func__, param->pv.i); - - return 0; -} - -/*************************** CLIENT CALLS ***************************/ -/************************** LMP CALLBACKS **************************/ -/* Everything is happily mixed up. Waiting for next clean up - Jean II */ - -/* - * Initialization, that has to be done on new tsap - * instance allocation and on duplication - */ -static void irttp_init_tsap(struct tsap_cb *tsap) -{ - spin_lock_init(&tsap->lock); - init_timer(&tsap->todo_timer); - - skb_queue_head_init(&tsap->rx_queue); - skb_queue_head_init(&tsap->tx_queue); - skb_queue_head_init(&tsap->rx_fragments); -} - -/* - * Function irttp_open_tsap (stsap, notify) - * - * Create TSAP connection endpoint, - */ -struct tsap_cb *irttp_open_tsap(__u8 stsap_sel, int credit, notify_t *notify) -{ - struct tsap_cb *self; - struct lsap_cb *lsap; - notify_t ttp_notify; - - IRDA_ASSERT(irttp->magic == TTP_MAGIC, return NULL;); - - /* The IrLMP spec (IrLMP 1.1 p10) says that we have the right to - * use only 0x01-0x6F. Of course, we can use LSAP_ANY as well. - * JeanII */ - if ((stsap_sel != LSAP_ANY) && - ((stsap_sel < 0x01) || (stsap_sel >= 0x70))) { - pr_debug("%s(), invalid tsap!\n", __func__); - return NULL; - } - - self = kzalloc(sizeof(struct tsap_cb), GFP_ATOMIC); - if (self == NULL) - return NULL; - - /* Initialize internal objects */ - irttp_init_tsap(self); - - /* Initialise todo timer */ - self->todo_timer.data = (unsigned long) self; - self->todo_timer.function = &irttp_todo_expired; - - /* Initialize callbacks for IrLMP to use */ - irda_notify_init(&ttp_notify); - ttp_notify.connect_confirm = irttp_connect_confirm; - ttp_notify.connect_indication = irttp_connect_indication; - ttp_notify.disconnect_indication = irttp_disconnect_indication; - ttp_notify.data_indication = irttp_data_indication; - ttp_notify.udata_indication = irttp_udata_indication; - ttp_notify.flow_indication = irttp_flow_indication; - if (notify->status_indication != NULL) - ttp_notify.status_indication = irttp_status_indication; - ttp_notify.instance = self; - strncpy(ttp_notify.name, notify->name, NOTIFY_MAX_NAME); - - self->magic = TTP_TSAP_MAGIC; - self->connected = FALSE; - - /* - * Create LSAP at IrLMP layer - */ - lsap = irlmp_open_lsap(stsap_sel, &ttp_notify, 0); - if (lsap == NULL) { - pr_debug("%s: unable to allocate LSAP!!\n", __func__); - __irttp_close_tsap(self); - return NULL; - } - - /* - * If user specified LSAP_ANY as source TSAP selector, then IrLMP - * will replace it with whatever source selector which is free, so - * the stsap_sel we have might not be valid anymore - */ - self->stsap_sel = lsap->slsap_sel; - pr_debug("%s(), stsap_sel=%02x\n", __func__, self->stsap_sel); - - self->notify = *notify; - self->lsap = lsap; - - hashbin_insert(irttp->tsaps, (irda_queue_t *) self, (long) self, NULL); - - if (credit > TTP_RX_MAX_CREDIT) - self->initial_credit = TTP_RX_MAX_CREDIT; - else - self->initial_credit = credit; - - return self; -} -EXPORT_SYMBOL(irttp_open_tsap); - -/* - * Function irttp_close (handle) - * - * Remove an instance of a TSAP. This function should only deal with the - * deallocation of the TSAP, and resetting of the TSAPs values; - * - */ -static void __irttp_close_tsap(struct tsap_cb *self) -{ - /* First make sure we're connected. */ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - irttp_flush_queues(self); - - del_timer(&self->todo_timer); - - /* This one won't be cleaned up if we are disconnect_pend + close_pend - * and we receive a disconnect_indication */ - if (self->disconnect_skb) - dev_kfree_skb(self->disconnect_skb); - - self->connected = FALSE; - self->magic = ~TTP_TSAP_MAGIC; - - kfree(self); -} - -/* - * Function irttp_close (self) - * - * Remove TSAP from list of all TSAPs and then deallocate all resources - * associated with this TSAP - * - * Note : because we *free* the tsap structure, it is the responsibility - * of the caller to make sure we are called only once and to deal with - * possible race conditions. - Jean II - */ -int irttp_close_tsap(struct tsap_cb *self) -{ - struct tsap_cb *tsap; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - - /* Make sure tsap has been disconnected */ - if (self->connected) { - /* Check if disconnect is not pending */ - if (!test_bit(0, &self->disconnect_pend)) { - net_warn_ratelimited("%s: TSAP still connected!\n", - __func__); - irttp_disconnect_request(self, NULL, P_NORMAL); - } - self->close_pend = TRUE; - irttp_start_todo_timer(self, HZ/10); - - return 0; /* Will be back! */ - } - - tsap = hashbin_remove(irttp->tsaps, (long) self, NULL); - - IRDA_ASSERT(tsap == self, return -1;); - - /* Close corresponding LSAP */ - if (self->lsap) { - irlmp_close_lsap(self->lsap); - self->lsap = NULL; - } - - __irttp_close_tsap(self); - - return 0; -} -EXPORT_SYMBOL(irttp_close_tsap); - -/* - * Function irttp_udata_request (self, skb) - * - * Send unreliable data on this TSAP - * - */ -int irttp_udata_request(struct tsap_cb *self, struct sk_buff *skb) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - /* Take shortcut on zero byte packets */ - if (skb->len == 0) { - ret = 0; - goto err; - } - - /* Check that nothing bad happens */ - if (!self->connected) { - net_warn_ratelimited("%s(), Not connected\n", __func__); - ret = -ENOTCONN; - goto err; - } - - if (skb->len > self->max_seg_size) { - net_err_ratelimited("%s(), UData is too large for IrLAP!\n", - __func__); - ret = -EMSGSIZE; - goto err; - } - - irlmp_udata_request(self->lsap, skb); - self->stats.tx_packets++; - - return 0; - -err: - dev_kfree_skb(skb); - return ret; -} -EXPORT_SYMBOL(irttp_udata_request); - - -/* - * Function irttp_data_request (handle, skb) - * - * Queue frame for transmission. If SAR is enabled, fragement the frame - * and queue the fragments for transmission - */ -int irttp_data_request(struct tsap_cb *self, struct sk_buff *skb) -{ - __u8 *frame; - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - pr_debug("%s() : queue len = %d\n", __func__, - skb_queue_len(&self->tx_queue)); - - /* Take shortcut on zero byte packets */ - if (skb->len == 0) { - ret = 0; - goto err; - } - - /* Check that nothing bad happens */ - if (!self->connected) { - net_warn_ratelimited("%s: Not connected\n", __func__); - ret = -ENOTCONN; - goto err; - } - - /* - * Check if SAR is disabled, and the frame is larger than what fits - * inside an IrLAP frame - */ - if ((self->tx_max_sdu_size == 0) && (skb->len > self->max_seg_size)) { - net_err_ratelimited("%s: SAR disabled, and data is too large for IrLAP!\n", - __func__); - ret = -EMSGSIZE; - goto err; - } - - /* - * Check if SAR is enabled, and the frame is larger than the - * TxMaxSduSize - */ - if ((self->tx_max_sdu_size != 0) && - (self->tx_max_sdu_size != TTP_SAR_UNBOUND) && - (skb->len > self->tx_max_sdu_size)) { - net_err_ratelimited("%s: SAR enabled, but data is larger than TxMaxSduSize!\n", - __func__); - ret = -EMSGSIZE; - goto err; - } - /* - * Check if transmit queue is full - */ - if (skb_queue_len(&self->tx_queue) >= TTP_TX_MAX_QUEUE) { - /* - * Give it a chance to empty itself - */ - irttp_run_tx_queue(self); - - /* Drop packet. This error code should trigger the caller - * to resend the data in the client code - Jean II */ - ret = -ENOBUFS; - goto err; - } - - /* Queue frame, or queue frame segments */ - if ((self->tx_max_sdu_size == 0) || (skb->len < self->max_seg_size)) { - /* Queue frame */ - IRDA_ASSERT(skb_headroom(skb) >= TTP_HEADER, return -1;); - frame = skb_push(skb, TTP_HEADER); - frame[0] = 0x00; /* Clear more bit */ - - skb_queue_tail(&self->tx_queue, skb); - } else { - /* - * Fragment the frame, this function will also queue the - * fragments, we don't care about the fact the transmit - * queue may be overfilled by all the segments for a little - * while - */ - irttp_fragment_skb(self, skb); - } - - /* Check if we can accept more data from client */ - if ((!self->tx_sdu_busy) && - (skb_queue_len(&self->tx_queue) > TTP_TX_HIGH_THRESHOLD)) { - /* Tx queue filling up, so stop client. */ - if (self->notify.flow_indication) { - self->notify.flow_indication(self->notify.instance, - self, FLOW_STOP); - } - /* self->tx_sdu_busy is the state of the client. - * Update state after notifying client to avoid - * race condition with irttp_flow_indication(). - * If the queue empty itself after our test but before - * we set the flag, we will fix ourselves below in - * irttp_run_tx_queue(). - * Jean II */ - self->tx_sdu_busy = TRUE; - } - - /* Try to make some progress */ - irttp_run_tx_queue(self); - - return 0; - -err: - dev_kfree_skb(skb); - return ret; -} -EXPORT_SYMBOL(irttp_data_request); - -/* - * Function irttp_run_tx_queue (self) - * - * Transmit packets queued for transmission (if possible) - * - */ -static void irttp_run_tx_queue(struct tsap_cb *self) -{ - struct sk_buff *skb; - unsigned long flags; - int n; - - pr_debug("%s() : send_credit = %d, queue_len = %d\n", - __func__, - self->send_credit, skb_queue_len(&self->tx_queue)); - - /* Get exclusive access to the tx queue, otherwise don't touch it */ - if (irda_lock(&self->tx_queue_lock) == FALSE) - return; - - /* Try to send out frames as long as we have credits - * and as long as LAP is not full. If LAP is full, it will - * poll us through irttp_flow_indication() - Jean II */ - while ((self->send_credit > 0) && - (!irlmp_lap_tx_queue_full(self->lsap)) && - (skb = skb_dequeue(&self->tx_queue))) { - /* - * Since we can transmit and receive frames concurrently, - * the code below is a critical region and we must assure that - * nobody messes with the credits while we update them. - */ - spin_lock_irqsave(&self->lock, flags); - - n = self->avail_credit; - self->avail_credit = 0; - - /* Only room for 127 credits in frame */ - if (n > 127) { - self->avail_credit = n-127; - n = 127; - } - self->remote_credit += n; - self->send_credit--; - - spin_unlock_irqrestore(&self->lock, flags); - - /* - * More bit must be set by the data_request() or fragment() - * functions - */ - skb->data[0] |= (n & 0x7f); - - /* Detach from socket. - * The current skb has a reference to the socket that sent - * it (skb->sk). When we pass it to IrLMP, the skb will be - * stored in in IrLAP (self->wx_list). When we are within - * IrLAP, we lose the notion of socket, so we should not - * have a reference to a socket. So, we drop it here. - * - * Why does it matter ? - * When the skb is freed (kfree_skb), if it is associated - * with a socket, it release buffer space on the socket - * (through sock_wfree() and sock_def_write_space()). - * If the socket no longer exist, we may crash. Hard. - * When we close a socket, we make sure that associated packets - * in IrTTP are freed. However, we have no way to cancel - * the packet that we have passed to IrLAP. So, if a packet - * remains in IrLAP (retry on the link or else) after we - * close the socket, we are dead ! - * Jean II */ - if (skb->sk != NULL) { - /* IrSOCK application, IrOBEX, ... */ - skb_orphan(skb); - } - /* IrCOMM over IrTTP, IrLAN, ... */ - - /* Pass the skb to IrLMP - done */ - irlmp_data_request(self->lsap, skb); - self->stats.tx_packets++; - } - - /* Check if we can accept more frames from client. - * We don't want to wait until the todo timer to do that, and we - * can't use tasklets (grr...), so we are obliged to give control - * to client. That's ok, this test will be true not too often - * (max once per LAP window) and we are called from places - * where we can spend a bit of time doing stuff. - Jean II */ - if ((self->tx_sdu_busy) && - (skb_queue_len(&self->tx_queue) < TTP_TX_LOW_THRESHOLD) && - (!self->close_pend)) { - if (self->notify.flow_indication) - self->notify.flow_indication(self->notify.instance, - self, FLOW_START); - - /* self->tx_sdu_busy is the state of the client. - * We don't really have a race here, but it's always safer - * to update our state after the client - Jean II */ - self->tx_sdu_busy = FALSE; - } - - /* Reset lock */ - self->tx_queue_lock = 0; -} - -/* - * Function irttp_give_credit (self) - * - * Send a dataless flowdata TTP-PDU and give available credit to peer - * TSAP - */ -static inline void irttp_give_credit(struct tsap_cb *self) -{ - struct sk_buff *tx_skb = NULL; - unsigned long flags; - int n; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - pr_debug("%s() send=%d,avail=%d,remote=%d\n", - __func__, - self->send_credit, self->avail_credit, self->remote_credit); - - /* Give credit to peer */ - tx_skb = alloc_skb(TTP_MAX_HEADER, GFP_ATOMIC); - if (!tx_skb) - return; - - /* Reserve space for LMP, and LAP header */ - skb_reserve(tx_skb, LMP_MAX_HEADER); - - /* - * Since we can transmit and receive frames concurrently, - * the code below is a critical region and we must assure that - * nobody messes with the credits while we update them. - */ - spin_lock_irqsave(&self->lock, flags); - - n = self->avail_credit; - self->avail_credit = 0; - - /* Only space for 127 credits in frame */ - if (n > 127) { - self->avail_credit = n - 127; - n = 127; - } - self->remote_credit += n; - - spin_unlock_irqrestore(&self->lock, flags); - - skb_put(tx_skb, 1); - tx_skb->data[0] = (__u8) (n & 0x7f); - - irlmp_data_request(self->lsap, tx_skb); - self->stats.tx_packets++; -} - -/* - * Function irttp_udata_indication (instance, sap, skb) - * - * Received some unit-data (unreliable) - * - */ -static int irttp_udata_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct tsap_cb *self; - int err; - - self = instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - IRDA_ASSERT(skb != NULL, return -1;); - - self->stats.rx_packets++; - - /* Just pass data to layer above */ - if (self->notify.udata_indication) { - err = self->notify.udata_indication(self->notify.instance, - self, skb); - /* Same comment as in irttp_do_data_indication() */ - if (!err) - return 0; - } - /* Either no handler, or handler returns an error */ - dev_kfree_skb(skb); - - return 0; -} - -/* - * Function irttp_data_indication (instance, sap, skb) - * - * Receive segment from IrLMP. - * - */ -static int irttp_data_indication(void *instance, void *sap, - struct sk_buff *skb) -{ - struct tsap_cb *self; - unsigned long flags; - int n; - - self = instance; - - n = skb->data[0] & 0x7f; /* Extract the credits */ - - self->stats.rx_packets++; - - /* Deal with inbound credit - * Since we can transmit and receive frames concurrently, - * the code below is a critical region and we must assure that - * nobody messes with the credits while we update them. - */ - spin_lock_irqsave(&self->lock, flags); - self->send_credit += n; - if (skb->len > 1) - self->remote_credit--; - spin_unlock_irqrestore(&self->lock, flags); - - /* - * Data or dataless packet? Dataless frames contains only the - * TTP_HEADER. - */ - if (skb->len > 1) { - /* - * We don't remove the TTP header, since we must preserve the - * more bit, so the defragment routing knows what to do - */ - skb_queue_tail(&self->rx_queue, skb); - } else { - /* Dataless flowdata TTP-PDU */ - dev_kfree_skb(skb); - } - - - /* Push data to the higher layer. - * We do it synchronously because running the todo timer for each - * receive packet would be too much overhead and latency. - * By passing control to the higher layer, we run the risk that - * it may take time or grab a lock. Most often, the higher layer - * will only put packet in a queue. - * Anyway, packets are only dripping through the IrDA, so we can - * have time before the next packet. - * Further, we are run from NET_BH, so the worse that can happen is - * us missing the optimal time to send back the PF bit in LAP. - * Jean II */ - irttp_run_rx_queue(self); - - /* We now give credits to peer in irttp_run_rx_queue(). - * We need to send credit *NOW*, otherwise we are going - * to miss the next Tx window. The todo timer may take - * a while before it's run... - Jean II */ - - /* - * If the peer device has given us some credits and we didn't have - * anyone from before, then we need to shedule the tx queue. - * We need to do that because our Tx have stopped (so we may not - * get any LAP flow indication) and the user may be stopped as - * well. - Jean II - */ - if (self->send_credit == n) { - /* Restart pushing stuff to LAP */ - irttp_run_tx_queue(self); - /* Note : we don't want to schedule the todo timer - * because it has horrible latency. No tasklets - * because the tasklet API is broken. - Jean II */ - } - - return 0; -} - -/* - * Function irttp_status_indication (self, reason) - * - * Status_indication, just pass to the higher layer... - * - */ -static void irttp_status_indication(void *instance, - LINK_STATUS link, LOCK_STATUS lock) -{ - struct tsap_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - /* Check if client has already closed the TSAP and gone away */ - if (self->close_pend) - return; - - /* - * Inform service user if he has requested it - */ - if (self->notify.status_indication != NULL) - self->notify.status_indication(self->notify.instance, - link, lock); - else - pr_debug("%s(), no handler\n", __func__); -} - -/* - * Function irttp_flow_indication (self, reason) - * - * Flow_indication : IrLAP tells us to send more data. - * - */ -static void irttp_flow_indication(void *instance, void *sap, LOCAL_FLOW flow) -{ - struct tsap_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - pr_debug("%s(instance=%p)\n", __func__, self); - - /* We are "polled" directly from LAP, and the LAP want to fill - * its Tx window. We want to do our best to send it data, so that - * we maximise the window. On the other hand, we want to limit the - * amount of work here so that LAP doesn't hang forever waiting - * for packets. - Jean II */ - - /* Try to send some packets. Currently, LAP calls us every time - * there is one free slot, so we will send only one packet. - * This allow the scheduler to do its round robin - Jean II */ - irttp_run_tx_queue(self); - - /* Note regarding the interraction with higher layer. - * irttp_run_tx_queue() may call the client when its queue - * start to empty, via notify.flow_indication(). Initially. - * I wanted this to happen in a tasklet, to avoid client - * grabbing the CPU, but we can't use tasklets safely. And timer - * is definitely too slow. - * This will happen only once per LAP window, and usually at - * the third packet (unless window is smaller). LAP is still - * doing mtt and sending first packet so it's sort of OK - * to do that. Jean II */ - - /* If we need to send disconnect. try to do it now */ - if (self->disconnect_pend) - irttp_start_todo_timer(self, 0); -} - -/* - * Function irttp_flow_request (self, command) - * - * This function could be used by the upper layers to tell IrTTP to stop - * delivering frames if the receive queues are starting to get full, or - * to tell IrTTP to start delivering frames again. - */ -void irttp_flow_request(struct tsap_cb *self, LOCAL_FLOW flow) -{ - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - switch (flow) { - case FLOW_STOP: - pr_debug("%s(), flow stop\n", __func__); - self->rx_sdu_busy = TRUE; - break; - case FLOW_START: - pr_debug("%s(), flow start\n", __func__); - self->rx_sdu_busy = FALSE; - - /* Client say he can accept more data, try to free our - * queues ASAP - Jean II */ - irttp_run_rx_queue(self); - - break; - default: - pr_debug("%s(), Unknown flow command!\n", __func__); - } -} -EXPORT_SYMBOL(irttp_flow_request); - -/* - * Function irttp_connect_request (self, dtsap_sel, daddr, qos) - * - * Try to connect to remote destination TSAP selector - * - */ -int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, - __u32 saddr, __u32 daddr, - struct qos_info *qos, __u32 max_sdu_size, - struct sk_buff *userdata) -{ - struct sk_buff *tx_skb; - __u8 *frame; - __u8 n; - - pr_debug("%s(), max_sdu_size=%d\n", __func__, max_sdu_size); - - IRDA_ASSERT(self != NULL, return -EBADR;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -EBADR;); - - if (self->connected) { - if (userdata) - dev_kfree_skb(userdata); - return -EISCONN; - } - - /* Any userdata supplied? */ - if (userdata == NULL) { - tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, - GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER); - } else { - tx_skb = userdata; - /* - * Check that the client has reserved enough space for - * headers - */ - IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER, - { dev_kfree_skb(userdata); return -1; }); - } - - /* Initialize connection parameters */ - self->connected = FALSE; - self->avail_credit = 0; - self->rx_max_sdu_size = max_sdu_size; - self->rx_sdu_size = 0; - self->rx_sdu_busy = FALSE; - self->dtsap_sel = dtsap_sel; - - n = self->initial_credit; - - self->remote_credit = 0; - self->send_credit = 0; - - /* - * Give away max 127 credits for now - */ - if (n > 127) { - self->avail_credit = n - 127; - n = 127; - } - - self->remote_credit = n; - - /* SAR enabled? */ - if (max_sdu_size > 0) { - IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER), - { dev_kfree_skb(tx_skb); return -1; }); - - /* Insert SAR parameters */ - frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER); - - frame[0] = TTP_PARAMETERS | n; - frame[1] = 0x04; /* Length */ - frame[2] = 0x01; /* MaxSduSize */ - frame[3] = 0x02; /* Value length */ - - put_unaligned(cpu_to_be16((__u16) max_sdu_size), - (__be16 *)(frame+4)); - } else { - /* Insert plain TTP header */ - frame = skb_push(tx_skb, TTP_HEADER); - - /* Insert initial credit in frame */ - frame[0] = n & 0x7f; - } - - /* Connect with IrLMP. No QoS parameters for now */ - return irlmp_connect_request(self->lsap, dtsap_sel, saddr, daddr, qos, - tx_skb); -} -EXPORT_SYMBOL(irttp_connect_request); - -/* - * Function irttp_connect_confirm (handle, qos, skb) - * - * Service user confirms TSAP connection with peer. - * - */ -static void irttp_connect_confirm(void *instance, void *sap, - struct qos_info *qos, __u32 max_seg_size, - __u8 max_header_size, struct sk_buff *skb) -{ - struct tsap_cb *self; - int parameters; - int ret; - __u8 plen; - __u8 n; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - self->max_seg_size = max_seg_size - TTP_HEADER; - self->max_header_size = max_header_size + TTP_HEADER; - - /* - * Check if we have got some QoS parameters back! This should be the - * negotiated QoS for the link. - */ - if (qos) { - pr_debug("IrTTP, Negotiated BAUD_RATE: %02x\n", - qos->baud_rate.bits); - pr_debug("IrTTP, Negotiated BAUD_RATE: %d bps.\n", - qos->baud_rate.value); - } - - n = skb->data[0] & 0x7f; - - pr_debug("%s(), Initial send_credit=%d\n", __func__, n); - - self->send_credit = n; - self->tx_max_sdu_size = 0; - self->connected = TRUE; - - parameters = skb->data[0] & 0x80; - - IRDA_ASSERT(skb->len >= TTP_HEADER, return;); - skb_pull(skb, TTP_HEADER); - - if (parameters) { - plen = skb->data[0]; - - ret = irda_param_extract_all(self, skb->data+1, - IRDA_MIN(skb->len-1, plen), - ¶m_info); - - /* Any errors in the parameter list? */ - if (ret < 0) { - net_warn_ratelimited("%s: error extracting parameters\n", - __func__); - dev_kfree_skb(skb); - - /* Do not accept this connection attempt */ - return; - } - /* Remove parameters */ - skb_pull(skb, IRDA_MIN(skb->len, plen+1)); - } - - pr_debug("%s() send=%d,avail=%d,remote=%d\n", __func__, - self->send_credit, self->avail_credit, self->remote_credit); - - pr_debug("%s(), MaxSduSize=%d\n", __func__, - self->tx_max_sdu_size); - - if (self->notify.connect_confirm) { - self->notify.connect_confirm(self->notify.instance, self, qos, - self->tx_max_sdu_size, - self->max_header_size, skb); - } else - dev_kfree_skb(skb); -} - -/* - * Function irttp_connect_indication (handle, skb) - * - * Some other device is connecting to this TSAP - * - */ -static void irttp_connect_indication(void *instance, void *sap, - struct qos_info *qos, __u32 max_seg_size, __u8 max_header_size, - struct sk_buff *skb) -{ - struct tsap_cb *self; - struct lsap_cb *lsap; - int parameters; - int ret; - __u8 plen; - __u8 n; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - IRDA_ASSERT(skb != NULL, return;); - - lsap = sap; - - self->max_seg_size = max_seg_size - TTP_HEADER; - self->max_header_size = max_header_size+TTP_HEADER; - - pr_debug("%s(), TSAP sel=%02x\n", __func__, self->stsap_sel); - - /* Need to update dtsap_sel if its equal to LSAP_ANY */ - self->dtsap_sel = lsap->dlsap_sel; - - n = skb->data[0] & 0x7f; - - self->send_credit = n; - self->tx_max_sdu_size = 0; - - parameters = skb->data[0] & 0x80; - - IRDA_ASSERT(skb->len >= TTP_HEADER, return;); - skb_pull(skb, TTP_HEADER); - - if (parameters) { - plen = skb->data[0]; - - ret = irda_param_extract_all(self, skb->data+1, - IRDA_MIN(skb->len-1, plen), - ¶m_info); - - /* Any errors in the parameter list? */ - if (ret < 0) { - net_warn_ratelimited("%s: error extracting parameters\n", - __func__); - dev_kfree_skb(skb); - - /* Do not accept this connection attempt */ - return; - } - - /* Remove parameters */ - skb_pull(skb, IRDA_MIN(skb->len, plen+1)); - } - - if (self->notify.connect_indication) { - self->notify.connect_indication(self->notify.instance, self, - qos, self->tx_max_sdu_size, - self->max_header_size, skb); - } else - dev_kfree_skb(skb); -} - -/* - * Function irttp_connect_response (handle, userdata) - * - * Service user is accepting the connection, just pass it down to - * IrLMP! - * - */ -int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, - struct sk_buff *userdata) -{ - struct sk_buff *tx_skb; - __u8 *frame; - int ret; - __u8 n; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - - pr_debug("%s(), Source TSAP selector=%02x\n", __func__, - self->stsap_sel); - - /* Any userdata supplied? */ - if (userdata == NULL) { - tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, - GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - /* Reserve space for MUX_CONTROL and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER + TTP_SAR_HEADER); - } else { - tx_skb = userdata; - /* - * Check that the client has reserved enough space for - * headers - */ - IRDA_ASSERT(skb_headroom(userdata) >= TTP_MAX_HEADER, - { dev_kfree_skb(userdata); return -1; }); - } - - self->avail_credit = 0; - self->remote_credit = 0; - self->rx_max_sdu_size = max_sdu_size; - self->rx_sdu_size = 0; - self->rx_sdu_busy = FALSE; - - n = self->initial_credit; - - /* Frame has only space for max 127 credits (7 bits) */ - if (n > 127) { - self->avail_credit = n - 127; - n = 127; - } - - self->remote_credit = n; - self->connected = TRUE; - - /* SAR enabled? */ - if (max_sdu_size > 0) { - IRDA_ASSERT(skb_headroom(tx_skb) >= (TTP_MAX_HEADER + TTP_SAR_HEADER), - { dev_kfree_skb(tx_skb); return -1; }); - - /* Insert TTP header with SAR parameters */ - frame = skb_push(tx_skb, TTP_HEADER + TTP_SAR_HEADER); - - frame[0] = TTP_PARAMETERS | n; - frame[1] = 0x04; /* Length */ - - /* irda_param_insert(self, IRTTP_MAX_SDU_SIZE, frame+1, */ -/* TTP_SAR_HEADER, ¶m_info) */ - - frame[2] = 0x01; /* MaxSduSize */ - frame[3] = 0x02; /* Value length */ - - put_unaligned(cpu_to_be16((__u16) max_sdu_size), - (__be16 *)(frame+4)); - } else { - /* Insert TTP header */ - frame = skb_push(tx_skb, TTP_HEADER); - - frame[0] = n & 0x7f; - } - - ret = irlmp_connect_response(self->lsap, tx_skb); - - return ret; -} -EXPORT_SYMBOL(irttp_connect_response); - -/* - * Function irttp_dup (self, instance) - * - * Duplicate TSAP, can be used by servers to confirm a connection on a - * new TSAP so it can keep listening on the old one. - */ -struct tsap_cb *irttp_dup(struct tsap_cb *orig, void *instance) -{ - struct tsap_cb *new; - unsigned long flags; - - /* Protect our access to the old tsap instance */ - spin_lock_irqsave(&irttp->tsaps->hb_spinlock, flags); - - /* Find the old instance */ - if (!hashbin_find(irttp->tsaps, (long) orig, NULL)) { - pr_debug("%s(), unable to find TSAP\n", __func__); - spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); - return NULL; - } - - /* Allocate a new instance */ - new = kmemdup(orig, sizeof(struct tsap_cb), GFP_ATOMIC); - if (!new) { - pr_debug("%s(), unable to kmalloc\n", __func__); - spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); - return NULL; - } - spin_lock_init(&new->lock); - - /* We don't need the old instance any more */ - spin_unlock_irqrestore(&irttp->tsaps->hb_spinlock, flags); - - /* Try to dup the LSAP (may fail if we were too slow) */ - new->lsap = irlmp_dup(orig->lsap, new); - if (!new->lsap) { - pr_debug("%s(), dup failed!\n", __func__); - kfree(new); - return NULL; - } - - /* Not everything should be copied */ - new->notify.instance = instance; - - /* Initialize internal objects */ - irttp_init_tsap(new); - - /* This is locked */ - hashbin_insert(irttp->tsaps, (irda_queue_t *) new, (long) new, NULL); - - return new; -} -EXPORT_SYMBOL(irttp_dup); - -/* - * Function irttp_disconnect_request (self) - * - * Close this connection please! If priority is high, the queued data - * segments, if any, will be deallocated first - * - */ -int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata, - int priority) -{ - int ret; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return -1;); - - /* Already disconnected? */ - if (!self->connected) { - pr_debug("%s(), already disconnected!\n", __func__); - if (userdata) - dev_kfree_skb(userdata); - return -1; - } - - /* Disconnect already pending ? - * We need to use an atomic operation to prevent reentry. This - * function may be called from various context, like user, timer - * for following a disconnect_indication() (i.e. net_bh). - * Jean II */ - if (test_and_set_bit(0, &self->disconnect_pend)) { - pr_debug("%s(), disconnect already pending\n", - __func__); - if (userdata) - dev_kfree_skb(userdata); - - /* Try to make some progress */ - irttp_run_tx_queue(self); - return -1; - } - - /* - * Check if there is still data segments in the transmit queue - */ - if (!skb_queue_empty(&self->tx_queue)) { - if (priority == P_HIGH) { - /* - * No need to send the queued data, if we are - * disconnecting right now since the data will - * not have any usable connection to be sent on - */ - pr_debug("%s(): High priority!!()\n", __func__); - irttp_flush_queues(self); - } else if (priority == P_NORMAL) { - /* - * Must delay disconnect until after all data segments - * have been sent and the tx_queue is empty - */ - /* We'll reuse this one later for the disconnect */ - self->disconnect_skb = userdata; /* May be NULL */ - - irttp_run_tx_queue(self); - - irttp_start_todo_timer(self, HZ/10); - return -1; - } - } - /* Note : we don't need to check if self->rx_queue is full and the - * state of self->rx_sdu_busy because the disconnect response will - * be sent at the LMP level (so even if the peer has its Tx queue - * full of data). - Jean II */ - - pr_debug("%s(), Disconnecting ...\n", __func__); - self->connected = FALSE; - - if (!userdata) { - struct sk_buff *tx_skb; - tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); - if (!tx_skb) - return -ENOMEM; - - /* - * Reserve space for MUX and LAP header - */ - skb_reserve(tx_skb, LMP_MAX_HEADER); - - userdata = tx_skb; - } - ret = irlmp_disconnect_request(self->lsap, userdata); - - /* The disconnect is no longer pending */ - clear_bit(0, &self->disconnect_pend); /* FALSE */ - - return ret; -} -EXPORT_SYMBOL(irttp_disconnect_request); - -/* - * Function irttp_disconnect_indication (self, reason) - * - * Disconnect indication, TSAP disconnected by peer? - * - */ -static void irttp_disconnect_indication(void *instance, void *sap, - LM_REASON reason, struct sk_buff *skb) -{ - struct tsap_cb *self; - - self = instance; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == TTP_TSAP_MAGIC, return;); - - /* Prevent higher layer to send more data */ - self->connected = FALSE; - - /* Check if client has already tried to close the TSAP */ - if (self->close_pend) { - /* In this case, the higher layer is probably gone. Don't - * bother it and clean up the remains - Jean II */ - if (skb) - dev_kfree_skb(skb); - irttp_close_tsap(self); - return; - } - - /* If we are here, we assume that is the higher layer is still - * waiting for the disconnect notification and able to process it, - * even if he tried to disconnect. Otherwise, it would have already - * attempted to close the tsap and self->close_pend would be TRUE. - * Jean II */ - - /* No need to notify the client if has already tried to disconnect */ - if (self->notify.disconnect_indication) - self->notify.disconnect_indication(self->notify.instance, self, - reason, skb); - else - if (skb) - dev_kfree_skb(skb); -} - -/* - * Function irttp_do_data_indication (self, skb) - * - * Try to deliver reassembled skb to layer above, and requeue it if that - * for some reason should fail. We mark rx sdu as busy to apply back - * pressure is necessary. - */ -static void irttp_do_data_indication(struct tsap_cb *self, struct sk_buff *skb) -{ - int err; - - /* Check if client has already closed the TSAP and gone away */ - if (self->close_pend) { - dev_kfree_skb(skb); - return; - } - - err = self->notify.data_indication(self->notify.instance, self, skb); - - /* Usually the layer above will notify that it's input queue is - * starting to get filled by using the flow request, but this may - * be difficult, so it can instead just refuse to eat it and just - * give an error back - */ - if (err) { - pr_debug("%s() requeueing skb!\n", __func__); - - /* Make sure we take a break */ - self->rx_sdu_busy = TRUE; - - /* Need to push the header in again */ - skb_push(skb, TTP_HEADER); - skb->data[0] = 0x00; /* Make sure MORE bit is cleared */ - - /* Put skb back on queue */ - skb_queue_head(&self->rx_queue, skb); - } -} - -/* - * Function irttp_run_rx_queue (self) - * - * Check if we have any frames to be transmitted, or if we have any - * available credit to give away. - */ -static void irttp_run_rx_queue(struct tsap_cb *self) -{ - struct sk_buff *skb; - int more = 0; - - pr_debug("%s() send=%d,avail=%d,remote=%d\n", __func__, - self->send_credit, self->avail_credit, self->remote_credit); - - /* Get exclusive access to the rx queue, otherwise don't touch it */ - if (irda_lock(&self->rx_queue_lock) == FALSE) - return; - - /* - * Reassemble all frames in receive queue and deliver them - */ - while (!self->rx_sdu_busy && (skb = skb_dequeue(&self->rx_queue))) { - /* This bit will tell us if it's the last fragment or not */ - more = skb->data[0] & 0x80; - - /* Remove TTP header */ - skb_pull(skb, TTP_HEADER); - - /* Add the length of the remaining data */ - self->rx_sdu_size += skb->len; - - /* - * If SAR is disabled, or user has requested no reassembly - * of received fragments then we just deliver them - * immediately. This can be requested by clients that - * implements byte streams without any message boundaries - */ - if (self->rx_max_sdu_size == TTP_SAR_DISABLE) { - irttp_do_data_indication(self, skb); - self->rx_sdu_size = 0; - - continue; - } - - /* Check if this is a fragment, and not the last fragment */ - if (more) { - /* - * Queue the fragment if we still are within the - * limits of the maximum size of the rx_sdu - */ - if (self->rx_sdu_size <= self->rx_max_sdu_size) { - pr_debug("%s(), queueing frag\n", - __func__); - skb_queue_tail(&self->rx_fragments, skb); - } else { - /* Free the part of the SDU that is too big */ - dev_kfree_skb(skb); - } - continue; - } - /* - * This is the last fragment, so time to reassemble! - */ - if ((self->rx_sdu_size <= self->rx_max_sdu_size) || - (self->rx_max_sdu_size == TTP_SAR_UNBOUND)) { - /* - * A little optimizing. Only queue the fragment if - * there are other fragments. Since if this is the - * last and only fragment, there is no need to - * reassemble :-) - */ - if (!skb_queue_empty(&self->rx_fragments)) { - skb_queue_tail(&self->rx_fragments, - skb); - - skb = irttp_reassemble_skb(self); - } - - /* Now we can deliver the reassembled skb */ - irttp_do_data_indication(self, skb); - } else { - pr_debug("%s(), Truncated frame\n", __func__); - - /* Free the part of the SDU that is too big */ - dev_kfree_skb(skb); - - /* Deliver only the valid but truncated part of SDU */ - skb = irttp_reassemble_skb(self); - - irttp_do_data_indication(self, skb); - } - self->rx_sdu_size = 0; - } - - /* - * It's not trivial to keep track of how many credits are available - * by incrementing at each packet, because delivery may fail - * (irttp_do_data_indication() may requeue the frame) and because - * we need to take care of fragmentation. - * We want the other side to send up to initial_credit packets. - * We have some frames in our queues, and we have already allowed it - * to send remote_credit. - * No need to spinlock, write is atomic and self correcting... - * Jean II - */ - self->avail_credit = (self->initial_credit - - (self->remote_credit + - skb_queue_len(&self->rx_queue) + - skb_queue_len(&self->rx_fragments))); - - /* Do we have too much credits to send to peer ? */ - if ((self->remote_credit <= TTP_RX_MIN_CREDIT) && - (self->avail_credit > 0)) { - /* Send explicit credit frame */ - irttp_give_credit(self); - /* Note : do *NOT* check if tx_queue is non-empty, that - * will produce deadlocks. I repeat : send a credit frame - * even if we have something to send in our Tx queue. - * If we have credits, it means that our Tx queue is blocked. - * - * Let's suppose the peer can't keep up with our Tx. He will - * flow control us by not sending us any credits, and we - * will stop Tx and start accumulating credits here. - * Up to the point where the peer will stop its Tx queue, - * for lack of credits. - * Let's assume the peer application is single threaded. - * It will block on Tx and never consume any Rx buffer. - * Deadlock. Guaranteed. - Jean II - */ - } - - /* Reset lock */ - self->rx_queue_lock = 0; -} - -#ifdef CONFIG_PROC_FS -struct irttp_iter_state { - int id; -}; - -static void *irttp_seq_start(struct seq_file *seq, loff_t *pos) -{ - struct irttp_iter_state *iter = seq->private; - struct tsap_cb *self; - - /* Protect our access to the tsap list */ - spin_lock_irq(&irttp->tsaps->hb_spinlock); - iter->id = 0; - - for (self = (struct tsap_cb *) hashbin_get_first(irttp->tsaps); - self != NULL; - self = (struct tsap_cb *) hashbin_get_next(irttp->tsaps)) { - if (iter->id == *pos) - break; - ++iter->id; - } - - return self; -} - -static void *irttp_seq_next(struct seq_file *seq, void *v, loff_t *pos) -{ - struct irttp_iter_state *iter = seq->private; - - ++*pos; - ++iter->id; - return (void *) hashbin_get_next(irttp->tsaps); -} - -static void irttp_seq_stop(struct seq_file *seq, void *v) -{ - spin_unlock_irq(&irttp->tsaps->hb_spinlock); -} - -static int irttp_seq_show(struct seq_file *seq, void *v) -{ - const struct irttp_iter_state *iter = seq->private; - const struct tsap_cb *self = v; - - seq_printf(seq, "TSAP %d, ", iter->id); - seq_printf(seq, "stsap_sel: %02x, ", - self->stsap_sel); - seq_printf(seq, "dtsap_sel: %02x\n", - self->dtsap_sel); - seq_printf(seq, " connected: %s, ", - self->connected ? "TRUE" : "FALSE"); - seq_printf(seq, "avail credit: %d, ", - self->avail_credit); - seq_printf(seq, "remote credit: %d, ", - self->remote_credit); - seq_printf(seq, "send credit: %d\n", - self->send_credit); - seq_printf(seq, " tx packets: %lu, ", - self->stats.tx_packets); - seq_printf(seq, "rx packets: %lu, ", - self->stats.rx_packets); - seq_printf(seq, "tx_queue len: %u ", - skb_queue_len(&self->tx_queue)); - seq_printf(seq, "rx_queue len: %u\n", - skb_queue_len(&self->rx_queue)); - seq_printf(seq, " tx_sdu_busy: %s, ", - self->tx_sdu_busy ? "TRUE" : "FALSE"); - seq_printf(seq, "rx_sdu_busy: %s\n", - self->rx_sdu_busy ? "TRUE" : "FALSE"); - seq_printf(seq, " max_seg_size: %u, ", - self->max_seg_size); - seq_printf(seq, "tx_max_sdu_size: %u, ", - self->tx_max_sdu_size); - seq_printf(seq, "rx_max_sdu_size: %u\n", - self->rx_max_sdu_size); - - seq_printf(seq, " Used by (%s)\n\n", - self->notify.name); - return 0; -} - -static const struct seq_operations irttp_seq_ops = { - .start = irttp_seq_start, - .next = irttp_seq_next, - .stop = irttp_seq_stop, - .show = irttp_seq_show, -}; - -static int irttp_seq_open(struct inode *inode, struct file *file) -{ - return seq_open_private(file, &irttp_seq_ops, - sizeof(struct irttp_iter_state)); -} - -const struct file_operations irttp_seq_fops = { - .owner = THIS_MODULE, - .open = irttp_seq_open, - .read = seq_read, - .llseek = seq_lseek, - .release = seq_release_private, -}; - -#endif /* PROC_FS */ diff --git a/net/irda/parameters.c b/net/irda/parameters.c deleted file mode 100644 index 16ce32ffe004..000000000000 --- a/net/irda/parameters.c +++ /dev/null @@ -1,584 +0,0 @@ -/********************************************************************* - * - * Filename: parameters.c - * Version: 1.0 - * Description: A more general way to handle (pi,pl,pv) parameters - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Mon Jun 7 10:25:11 1999 - * Modified at: Sun Jan 30 14:08:39 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1999-2000 Dag Brattli, All Rights Reserved. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/types.h> -#include <linux/module.h> - -#include <asm/unaligned.h> -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/parameters.h> - -static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); -static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); -static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); -static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); - -static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); -static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func); - -static int irda_param_unpack(__u8 *buf, char *fmt, ...); - -/* Parameter value call table. Must match PV_TYPE */ -static const PV_HANDLER pv_extract_table[] = { - irda_extract_integer, /* Handler for any length integers */ - irda_extract_integer, /* Handler for 8 bits integers */ - irda_extract_integer, /* Handler for 16 bits integers */ - irda_extract_string, /* Handler for strings */ - irda_extract_integer, /* Handler for 32 bits integers */ - irda_extract_octseq, /* Handler for octet sequences */ - irda_extract_no_value /* Handler for no value parameters */ -}; - -static const PV_HANDLER pv_insert_table[] = { - irda_insert_integer, /* Handler for any length integers */ - irda_insert_integer, /* Handler for 8 bits integers */ - irda_insert_integer, /* Handler for 16 bits integers */ - NULL, /* Handler for strings */ - irda_insert_integer, /* Handler for 32 bits integers */ - NULL, /* Handler for octet sequences */ - irda_insert_no_value /* Handler for no value parameters */ -}; - -/* - * Function irda_insert_no_value (self, buf, len, pi, type, func) - */ -static int irda_insert_no_value(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func) -{ - irda_param_t p; - int ret; - - p.pi = pi; - p.pl = 0; - - /* Call handler for this parameter */ - ret = (*func)(self, &p, PV_GET); - - /* Extract values anyway, since handler may need them */ - irda_param_pack(buf, "bb", p.pi, p.pl); - - if (ret < 0) - return ret; - - return 2; /* Inserted pl+2 bytes */ -} - -/* - * Function irda_extract_no_value (self, buf, len, type, func) - * - * Extracts a parameter without a pv field (pl=0) - * - */ -static int irda_extract_no_value(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func) -{ - irda_param_t p; - int ret; - - /* Extract values anyway, since handler may need them */ - irda_param_unpack(buf, "bb", &p.pi, &p.pl); - - /* Call handler for this parameter */ - ret = (*func)(self, &p, PV_PUT); - - if (ret < 0) - return ret; - - return 2; /* Extracted pl+2 bytes */ -} - -/* - * Function irda_insert_integer (self, buf, len, pi, type, func) - */ -static int irda_insert_integer(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func) -{ - irda_param_t p; - int n = 0; - int err; - - p.pi = pi; /* In case handler needs to know */ - p.pl = type & PV_MASK; /* The integer type codes the length as well */ - p.pv.i = 0; /* Clear value */ - - /* Call handler for this parameter */ - err = (*func)(self, &p, PV_GET); - if (err < 0) - return err; - - /* - * If parameter length is still 0, then (1) this is an any length - * integer, and (2) the handler function does not care which length - * we choose to use, so we pick the one the gives the fewest bytes. - */ - if (p.pl == 0) { - if (p.pv.i < 0xff) { - pr_debug("%s(), using 1 byte\n", __func__); - p.pl = 1; - } else if (p.pv.i < 0xffff) { - pr_debug("%s(), using 2 bytes\n", __func__); - p.pl = 2; - } else { - pr_debug("%s(), using 4 bytes\n", __func__); - p.pl = 4; /* Default length */ - } - } - /* Check if buffer is long enough for insertion */ - if (len < (2+p.pl)) { - net_warn_ratelimited("%s: buffer too short for insertion!\n", - __func__); - return -1; - } - pr_debug("%s(), pi=%#x, pl=%d, pi=%d\n", __func__, - p.pi, p.pl, p.pv.i); - switch (p.pl) { - case 1: - n += irda_param_pack(buf, "bbb", p.pi, p.pl, (__u8) p.pv.i); - break; - case 2: - if (type & PV_BIG_ENDIAN) - p.pv.i = cpu_to_be16((__u16) p.pv.i); - else - p.pv.i = cpu_to_le16((__u16) p.pv.i); - n += irda_param_pack(buf, "bbs", p.pi, p.pl, (__u16) p.pv.i); - break; - case 4: - if (type & PV_BIG_ENDIAN) - cpu_to_be32s(&p.pv.i); - else - cpu_to_le32s(&p.pv.i); - n += irda_param_pack(buf, "bbi", p.pi, p.pl, p.pv.i); - - break; - default: - net_warn_ratelimited("%s: length %d not supported\n", - __func__, p.pl); - /* Skip parameter */ - return -1; - } - - return p.pl+2; /* Inserted pl+2 bytes */ -} - -/* - * Function irda_extract integer (self, buf, len, pi, type, func) - * - * Extract a possibly variable length integer from buffer, and call - * handler for processing of the parameter - */ -static int irda_extract_integer(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func) -{ - irda_param_t p; - int n = 0; - int extract_len; /* Real length we extract */ - int err; - - p.pi = pi; /* In case handler needs to know */ - p.pl = buf[1]; /* Extract length of value */ - p.pv.i = 0; /* Clear value */ - extract_len = p.pl; /* Default : extract all */ - - /* Check if buffer is long enough for parsing */ - if (len < (2+p.pl)) { - net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n", - __func__, p.pl, len); - return -1; - } - - /* - * Check that the integer length is what we expect it to be. If the - * handler want a 16 bits integer then a 32 bits is not good enough - * PV_INTEGER means that the handler is flexible. - */ - if (((type & PV_MASK) != PV_INTEGER) && ((type & PV_MASK) != p.pl)) { - net_err_ratelimited("%s: invalid parameter length! Expected %d bytes, but value had %d bytes!\n", - __func__, type & PV_MASK, p.pl); - - /* Most parameters are bit/byte fields or little endian, - * so it's ok to only extract a subset of it (the subset - * that the handler expect). This is necessary, as some - * broken implementations seems to add extra undefined bits. - * If the parameter is shorter than we expect or is big - * endian, we can't play those tricks. Jean II */ - if((p.pl < (type & PV_MASK)) || (type & PV_BIG_ENDIAN)) { - /* Skip parameter */ - return p.pl+2; - } else { - /* Extract subset of it, fallthrough */ - extract_len = type & PV_MASK; - } - } - - - switch (extract_len) { - case 1: - n += irda_param_unpack(buf+2, "b", &p.pv.i); - break; - case 2: - n += irda_param_unpack(buf+2, "s", &p.pv.i); - if (type & PV_BIG_ENDIAN) - p.pv.i = be16_to_cpu((__u16) p.pv.i); - else - p.pv.i = le16_to_cpu((__u16) p.pv.i); - break; - case 4: - n += irda_param_unpack(buf+2, "i", &p.pv.i); - if (type & PV_BIG_ENDIAN) - be32_to_cpus(&p.pv.i); - else - le32_to_cpus(&p.pv.i); - break; - default: - net_warn_ratelimited("%s: length %d not supported\n", - __func__, p.pl); - - /* Skip parameter */ - return p.pl+2; - } - - pr_debug("%s(), pi=%#x, pl=%d, pi=%d\n", __func__, - p.pi, p.pl, p.pv.i); - /* Call handler for this parameter */ - err = (*func)(self, &p, PV_PUT); - if (err < 0) - return err; - - return p.pl+2; /* Extracted pl+2 bytes */ -} - -/* - * Function irda_extract_string (self, buf, len, type, func) - */ -static int irda_extract_string(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func) -{ - char str[33]; - irda_param_t p; - int err; - - p.pi = pi; /* In case handler needs to know */ - p.pl = buf[1]; /* Extract length of value */ - if (p.pl > 32) - p.pl = 32; - - pr_debug("%s(), pi=%#x, pl=%d\n", __func__, - p.pi, p.pl); - - /* Check if buffer is long enough for parsing */ - if (len < (2+p.pl)) { - net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n", - __func__, p.pl, len); - return -1; - } - - /* Should be safe to copy string like this since we have already - * checked that the buffer is long enough */ - strncpy(str, buf+2, p.pl); - - pr_debug("%s(), str=0x%02x 0x%02x\n", - __func__, (__u8)str[0], (__u8)str[1]); - - /* Null terminate string */ - str[p.pl] = '\0'; - - p.pv.c = str; /* Handler will need to take a copy */ - - /* Call handler for this parameter */ - err = (*func)(self, &p, PV_PUT); - if (err < 0) - return err; - - return p.pl+2; /* Extracted pl+2 bytes */ -} - -/* - * Function irda_extract_octseq (self, buf, len, type, func) - */ -static int irda_extract_octseq(void *self, __u8 *buf, int len, __u8 pi, - PV_TYPE type, PI_HANDLER func) -{ - irda_param_t p; - - p.pi = pi; /* In case handler needs to know */ - p.pl = buf[1]; /* Extract length of value */ - - /* Check if buffer is long enough for parsing */ - if (len < (2+p.pl)) { - net_warn_ratelimited("%s: buffer too short for parsing! Need %d bytes, but len is only %d\n", - __func__, p.pl, len); - return -1; - } - - pr_debug("%s(), not impl\n", __func__); - - return p.pl+2; /* Extracted pl+2 bytes */ -} - -/* - * Function irda_param_pack (skb, fmt, ...) - * - * Format: - * 'i' = 32 bits integer - * 's' = string - * - */ -int irda_param_pack(__u8 *buf, char *fmt, ...) -{ - irda_pv_t arg; - va_list args; - char *p; - int n = 0; - - va_start(args, fmt); - - for (p = fmt; *p != '\0'; p++) { - switch (*p) { - case 'b': /* 8 bits unsigned byte */ - buf[n++] = (__u8)va_arg(args, int); - break; - case 's': /* 16 bits unsigned short */ - arg.i = (__u16)va_arg(args, int); - put_unaligned((__u16)arg.i, (__u16 *)(buf+n)); n+=2; - break; - case 'i': /* 32 bits unsigned integer */ - arg.i = va_arg(args, __u32); - put_unaligned(arg.i, (__u32 *)(buf+n)); n+=4; - break; -#if 0 - case 'c': /* \0 terminated string */ - arg.c = va_arg(args, char *); - strcpy(buf+n, arg.c); - n += strlen(arg.c) + 1; - break; -#endif - default: - va_end(args); - return -1; - } - } - va_end(args); - - return 0; -} -EXPORT_SYMBOL(irda_param_pack); - -/* - * Function irda_param_unpack (skb, fmt, ...) - */ -static int irda_param_unpack(__u8 *buf, char *fmt, ...) -{ - irda_pv_t arg; - va_list args; - char *p; - int n = 0; - - va_start(args, fmt); - - for (p = fmt; *p != '\0'; p++) { - switch (*p) { - case 'b': /* 8 bits byte */ - arg.ip = va_arg(args, __u32 *); - *arg.ip = buf[n++]; - break; - case 's': /* 16 bits short */ - arg.ip = va_arg(args, __u32 *); - *arg.ip = get_unaligned((__u16 *)(buf+n)); n+=2; - break; - case 'i': /* 32 bits unsigned integer */ - arg.ip = va_arg(args, __u32 *); - *arg.ip = get_unaligned((__u32 *)(buf+n)); n+=4; - break; -#if 0 - case 'c': /* \0 terminated string */ - arg.c = va_arg(args, char *); - strcpy(arg.c, buf+n); - n += strlen(arg.c) + 1; - break; -#endif - default: - va_end(args); - return -1; - } - - } - va_end(args); - - return 0; -} - -/* - * Function irda_param_insert (self, pi, buf, len, info) - * - * Insert the specified parameter (pi) into buffer. Returns number of - * bytes inserted - */ -int irda_param_insert(void *self, __u8 pi, __u8 *buf, int len, - pi_param_info_t *info) -{ - const pi_minor_info_t *pi_minor_info; - __u8 pi_minor; - __u8 pi_major; - int type; - int ret = -1; - int n = 0; - - IRDA_ASSERT(buf != NULL, return ret;); - IRDA_ASSERT(info != NULL, return ret;); - - pi_minor = pi & info->pi_mask; - pi_major = pi >> info->pi_major_offset; - - /* Check if the identifier value (pi) is valid */ - if ((pi_major > info->len-1) || - (pi_minor > info->tables[pi_major].len-1)) - { - pr_debug("%s(), no handler for parameter=0x%02x\n", - __func__, pi); - - /* Skip this parameter */ - return -1; - } - - /* Lookup the info on how to parse this parameter */ - pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor]; - - /* Find expected data type for this parameter identifier (pi)*/ - type = pi_minor_info->type; - - /* Check if handler has been implemented */ - if (!pi_minor_info->func) { - net_info_ratelimited("%s: no handler for pi=%#x\n", - __func__, pi); - /* Skip this parameter */ - return -1; - } - - /* Insert parameter value */ - ret = (*pv_insert_table[type & PV_MASK])(self, buf+n, len, pi, type, - pi_minor_info->func); - return ret; -} -EXPORT_SYMBOL(irda_param_insert); - -/* - * Function irda_param_extract (self, buf, len, info) - * - * Parse all parameters. If len is correct, then everything should be - * safe. Returns the number of bytes that was parsed - * - */ -static int irda_param_extract(void *self, __u8 *buf, int len, - pi_param_info_t *info) -{ - const pi_minor_info_t *pi_minor_info; - __u8 pi_minor; - __u8 pi_major; - int type; - int ret = -1; - int n = 0; - - IRDA_ASSERT(buf != NULL, return ret;); - IRDA_ASSERT(info != NULL, return ret;); - - pi_minor = buf[n] & info->pi_mask; - pi_major = buf[n] >> info->pi_major_offset; - - /* Check if the identifier value (pi) is valid */ - if ((pi_major > info->len-1) || - (pi_minor > info->tables[pi_major].len-1)) - { - pr_debug("%s(), no handler for parameter=0x%02x\n", - __func__, buf[0]); - - /* Skip this parameter */ - return 2 + buf[n + 1]; /* Continue */ - } - - /* Lookup the info on how to parse this parameter */ - pi_minor_info = &info->tables[pi_major].pi_minor_call_table[pi_minor]; - - /* Find expected data type for this parameter identifier (pi)*/ - type = pi_minor_info->type; - - pr_debug("%s(), pi=[%d,%d], type=%d\n", __func__, - pi_major, pi_minor, type); - - /* Check if handler has been implemented */ - if (!pi_minor_info->func) { - net_info_ratelimited("%s: no handler for pi=%#x\n", - __func__, buf[n]); - /* Skip this parameter */ - return 2 + buf[n + 1]; /* Continue */ - } - - /* Parse parameter value */ - ret = (*pv_extract_table[type & PV_MASK])(self, buf+n, len, buf[n], - type, pi_minor_info->func); - return ret; -} - -/* - * Function irda_param_extract_all (self, buf, len, info) - * - * Parse all parameters. If len is correct, then everything should be - * safe. Returns the number of bytes that was parsed - * - */ -int irda_param_extract_all(void *self, __u8 *buf, int len, - pi_param_info_t *info) -{ - int ret = -1; - int n = 0; - - IRDA_ASSERT(buf != NULL, return ret;); - IRDA_ASSERT(info != NULL, return ret;); - - /* - * Parse all parameters. Each parameter must be at least two bytes - * long or else there is no point in trying to parse it - */ - while (len > 2) { - ret = irda_param_extract(self, buf+n, len, info); - if (ret < 0) - return ret; - - n += ret; - len -= ret; - } - return n; -} -EXPORT_SYMBOL(irda_param_extract_all); diff --git a/net/irda/qos.c b/net/irda/qos.c deleted file mode 100644 index 25ba8509ad3e..000000000000 --- a/net/irda/qos.c +++ /dev/null @@ -1,771 +0,0 @@ -/********************************************************************* - * - * Filename: qos.c - * Version: 1.0 - * Description: IrLAP QoS parameter negotiation - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Tue Sep 9 00:00:26 1997 - * Modified at: Sun Jan 30 14:29:16 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2001 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see <http://www.gnu.org/licenses/>. - * - ********************************************************************/ - -#include <linux/export.h> - -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/parameters.h> -#include <net/irda/qos.h> -#include <net/irda/irlap.h> -#include <net/irda/irlap_frame.h> - -/* - * Maximum values of the baud rate we negotiate with the other end. - * Most often, you don't have to change that, because Linux-IrDA will - * use the maximum offered by the link layer, which usually works fine. - * In some very rare cases, you may want to limit it to lower speeds... - */ -int sysctl_max_baud_rate = 16000000; -/* - * Maximum value of the lap disconnect timer we negotiate with the other end. - * Most often, the value below represent the best compromise, but some user - * may want to keep the LAP alive longer or shorter in case of link failure. - * Remember that the threshold time (early warning) is fixed to 3s... - */ -int sysctl_max_noreply_time = 12; -/* - * Minimum turn time to be applied before transmitting to the peer. - * Nonzero values (usec) are used as lower limit to the per-connection - * mtt value which was announced by the other end during negotiation. - * Might be helpful if the peer device provides too short mtt. - * Default is 10us which means using the unmodified value given by the - * peer except if it's 0 (0 is likely a bug in the other stack). - */ -unsigned int sysctl_min_tx_turn_time = 10; -/* - * Maximum data size to be used in transmission in payload of LAP frame. - * There is a bit of confusion in the IrDA spec : - * The LAP spec defines the payload of a LAP frame (I field) to be - * 2048 bytes max (IrLAP 1.1, chapt 6.6.5, p40). - * On the other hand, the PHY mention frames of 2048 bytes max (IrPHY - * 1.2, chapt 5.3.2.1, p41). But, this number includes the LAP header - * (2 bytes), and CRC (32 bits at 4 Mb/s). So, for the I field (LAP - * payload), that's only 2042 bytes. Oups ! - * My nsc-ircc hardware has troubles receiving 2048 bytes frames at 4 Mb/s, - * so adjust to 2042... I don't know if this bug applies only for 2048 - * bytes frames or all negotiated frame sizes, but you can use the sysctl - * to play with this value anyway. - * Jean II */ -unsigned int sysctl_max_tx_data_size = 2042; -/* - * Maximum transmit window, i.e. number of LAP frames between turn-around. - * This allow to override what the peer told us. Some peers are buggy and - * don't always support what they tell us. - * Jean II */ -unsigned int sysctl_max_tx_window = 7; - -static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get); -static int irlap_param_link_disconnect(void *instance, irda_param_t *parm, - int get); -static int irlap_param_max_turn_time(void *instance, irda_param_t *param, - int get); -static int irlap_param_data_size(void *instance, irda_param_t *param, int get); -static int irlap_param_window_size(void *instance, irda_param_t *param, - int get); -static int irlap_param_additional_bofs(void *instance, irda_param_t *parm, - int get); -static int irlap_param_min_turn_time(void *instance, irda_param_t *param, - int get); - -#ifndef CONFIG_IRDA_DYNAMIC_WINDOW -static __u32 irlap_requested_line_capacity(struct qos_info *qos); -#endif - -static __u32 min_turn_times[] = { 10000, 5000, 1000, 500, 100, 50, 10, 0 }; /* us */ -static __u32 baud_rates[] = { 2400, 9600, 19200, 38400, 57600, 115200, 576000, - 1152000, 4000000, 16000000 }; /* bps */ -static __u32 data_sizes[] = { 64, 128, 256, 512, 1024, 2048 }; /* bytes */ -static __u32 add_bofs[] = { 48, 24, 12, 5, 3, 2, 1, 0 }; /* bytes */ -static __u32 max_turn_times[] = { 500, 250, 100, 50 }; /* ms */ -static __u32 link_disc_times[] = { 3, 8, 12, 16, 20, 25, 30, 40 }; /* secs */ - -static __u32 max_line_capacities[10][4] = { - /* 500 ms 250 ms 100 ms 50 ms (max turn time) */ - { 100, 0, 0, 0 }, /* 2400 bps */ - { 400, 0, 0, 0 }, /* 9600 bps */ - { 800, 0, 0, 0 }, /* 19200 bps */ - { 1600, 0, 0, 0 }, /* 38400 bps */ - { 2360, 0, 0, 0 }, /* 57600 bps */ - { 4800, 2400, 960, 480 }, /* 115200 bps */ - { 28800, 11520, 5760, 2880 }, /* 576000 bps */ - { 57600, 28800, 11520, 5760 }, /* 1152000 bps */ - { 200000, 100000, 40000, 20000 }, /* 4000000 bps */ - { 800000, 400000, 160000, 80000 }, /* 16000000 bps */ -}; - -static const pi_minor_info_t pi_minor_call_table_type_0[] = { - { NULL, 0 }, -/* 01 */{ irlap_param_baud_rate, PV_INTEGER | PV_LITTLE_ENDIAN }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, - { NULL, 0 }, -/* 08 */{ irlap_param_link_disconnect, PV_INT_8_BITS } -}; - -static const pi_minor_info_t pi_minor_call_table_type_1[] = { - { NULL, 0 }, - { NULL, 0 }, -/* 82 */{ irlap_param_max_turn_time, PV_INT_8_BITS }, -/* 83 */{ irlap_param_data_size, PV_INT_8_BITS }, -/* 84 */{ irlap_param_window_size, PV_INT_8_BITS }, -/* 85 */{ irlap_param_additional_bofs, PV_INT_8_BITS }, -/* 86 */{ irlap_param_min_turn_time, PV_INT_8_BITS }, -}; - -static const pi_major_info_t pi_major_call_table[] = { - { pi_minor_call_table_type_0, 9 }, - { pi_minor_call_table_type_1, 7 }, -}; - -static pi_param_info_t irlap_param_info = { pi_major_call_table, 2, 0x7f, 7 }; - -/* ---------------------- LOCAL SUBROUTINES ---------------------- */ -/* Note : we start with a bunch of local subroutines. - * As the compiler is "one pass", this is the only way to get them to - * inline properly... - * Jean II - */ -/* - * Function value_index (value, array, size) - * - * Returns the index to the value in the specified array - */ -static inline int value_index(__u32 value, __u32 *array, int size) -{ - int i; - - for (i=0; i < size; i++) - if (array[i] == value) - break; - return i; -} - -/* - * Function index_value (index, array) - * - * Returns value to index in array, easy! - * - */ -static inline __u32 index_value(int index, __u32 *array) -{ - return array[index]; -} - -/* - * Function msb_index (word) - * - * Returns index to most significant bit (MSB) in word - * - */ -static int msb_index (__u16 word) -{ - __u16 msb = 0x8000; - int index = 15; /* Current MSB */ - - /* Check for buggy peers. - * Note : there is a small probability that it could be us, but I - * would expect driver authors to catch that pretty early and be - * able to check precisely what's going on. If a end user sees this, - * it's very likely the peer. - Jean II */ - if (word == 0) { - net_warn_ratelimited("%s(), Detected buggy peer, adjust null PV to 0x1!\n", - __func__); - /* The only safe choice (we don't know the array size) */ - word = 0x1; - } - - while (msb) { - if (word & msb) - break; /* Found it! */ - msb >>=1; - index--; - } - return index; -} - -/* - * Function value_lower_bits (value, array) - * - * Returns a bit field marking all possibility lower than value. - */ -static inline int value_lower_bits(__u32 value, __u32 *array, int size, __u16 *field) -{ - int i; - __u16 mask = 0x1; - __u16 result = 0x0; - - for (i=0; i < size; i++) { - /* Add the current value to the bit field, shift mask */ - result |= mask; - mask <<= 1; - /* Finished ? */ - if (array[i] >= value) - break; - } - /* Send back a valid index */ - if(i >= size) - i = size - 1; /* Last item */ - *field = result; - return i; -} - -/* - * Function value_highest_bit (value, array) - * - * Returns a bit field marking the highest possibility lower than value. - */ -static inline int value_highest_bit(__u32 value, __u32 *array, int size, __u16 *field) -{ - int i; - __u16 mask = 0x1; - __u16 result = 0x0; - - for (i=0; i < size; i++) { - /* Finished ? */ - if (array[i] <= value) - break; - /* Shift mask */ - mask <<= 1; - } - /* Set the current value to the bit field */ - result |= mask; - /* Send back a valid index */ - if(i >= size) - i = size - 1; /* Last item */ - *field = result; - return i; -} - -/* -------------------------- MAIN CALLS -------------------------- */ - -/* - * Function irda_qos_compute_intersection (qos, new) - * - * Compute the intersection of the old QoS capabilities with new ones - * - */ -void irda_qos_compute_intersection(struct qos_info *qos, struct qos_info *new) -{ - IRDA_ASSERT(qos != NULL, return;); - IRDA_ASSERT(new != NULL, return;); - - /* Apply */ - qos->baud_rate.bits &= new->baud_rate.bits; - qos->window_size.bits &= new->window_size.bits; - qos->min_turn_time.bits &= new->min_turn_time.bits; - qos->max_turn_time.bits &= new->max_turn_time.bits; - qos->data_size.bits &= new->data_size.bits; - qos->link_disc_time.bits &= new->link_disc_time.bits; - qos->additional_bofs.bits &= new->additional_bofs.bits; - - irda_qos_bits_to_value(qos); -} - -/* - * Function irda_init_max_qos_capabilies (qos) - * - * The purpose of this function is for layers and drivers to be able to - * set the maximum QoS possible and then "and in" their own limitations - * - */ -void irda_init_max_qos_capabilies(struct qos_info *qos) -{ - int i; - /* - * These are the maximum supported values as specified on pages - * 39-43 in IrLAP - */ - - /* Use sysctl to set some configurable values... */ - /* Set configured max speed */ - i = value_lower_bits(sysctl_max_baud_rate, baud_rates, 10, - &qos->baud_rate.bits); - sysctl_max_baud_rate = index_value(i, baud_rates); - - /* Set configured max disc time */ - i = value_lower_bits(sysctl_max_noreply_time, link_disc_times, 8, - &qos->link_disc_time.bits); - sysctl_max_noreply_time = index_value(i, link_disc_times); - - /* LSB is first byte, MSB is second byte */ - qos->baud_rate.bits &= 0x03ff; - - qos->window_size.bits = 0x7f; - qos->min_turn_time.bits = 0xff; - qos->max_turn_time.bits = 0x0f; - qos->data_size.bits = 0x3f; - qos->link_disc_time.bits &= 0xff; - qos->additional_bofs.bits = 0xff; -} -EXPORT_SYMBOL(irda_init_max_qos_capabilies); - -/* - * Function irlap_adjust_qos_settings (qos) - * - * Adjust QoS settings in case some values are not possible to use because - * of other settings - */ -static void irlap_adjust_qos_settings(struct qos_info *qos) -{ - __u32 line_capacity; - int index; - - /* - * Make sure the mintt is sensible. - * Main culprit : Ericsson T39. - Jean II - */ - if (sysctl_min_tx_turn_time > qos->min_turn_time.value) { - int i; - - net_warn_ratelimited("%s(), Detected buggy peer, adjust mtt to %dus!\n", - __func__, sysctl_min_tx_turn_time); - - /* We don't really need bits, but easier this way */ - i = value_highest_bit(sysctl_min_tx_turn_time, min_turn_times, - 8, &qos->min_turn_time.bits); - sysctl_min_tx_turn_time = index_value(i, min_turn_times); - qos->min_turn_time.value = sysctl_min_tx_turn_time; - } - - /* - * Not allowed to use a max turn time less than 500 ms if the baudrate - * is less than 115200 - */ - if ((qos->baud_rate.value < 115200) && - (qos->max_turn_time.value < 500)) - { - pr_debug("%s(), adjusting max turn time from %d to 500 ms\n", - __func__, qos->max_turn_time.value); - qos->max_turn_time.value = 500; - } - - /* - * The data size must be adjusted according to the baud rate and max - * turn time - */ - index = value_index(qos->data_size.value, data_sizes, 6); - line_capacity = irlap_max_line_capacity(qos->baud_rate.value, - qos->max_turn_time.value); - -#ifdef CONFIG_IRDA_DYNAMIC_WINDOW - while ((qos->data_size.value > line_capacity) && (index > 0)) { - qos->data_size.value = data_sizes[index--]; - pr_debug("%s(), reducing data size to %d\n", - __func__, qos->data_size.value); - } -#else /* Use method described in section 6.6.11 of IrLAP */ - while (irlap_requested_line_capacity(qos) > line_capacity) { - IRDA_ASSERT(index != 0, return;); - - /* Must be able to send at least one frame */ - if (qos->window_size.value > 1) { - qos->window_size.value--; - pr_debug("%s(), reducing window size to %d\n", - __func__, qos->window_size.value); - } else if (index > 1) { - qos->data_size.value = data_sizes[index--]; - pr_debug("%s(), reducing data size to %d\n", - __func__, qos->data_size.value); - } else { - net_warn_ratelimited("%s(), nothing more we can do!\n", - __func__); - } - } -#endif /* CONFIG_IRDA_DYNAMIC_WINDOW */ - /* - * Fix tx data size according to user limits - Jean II - */ - if (qos->data_size.value > sysctl_max_tx_data_size) - /* Allow non discrete adjustement to avoid losing capacity */ - qos->data_size.value = sysctl_max_tx_data_size; - /* - * Override Tx window if user request it. - Jean II - */ - if (qos->window_size.value > sysctl_max_tx_window) - qos->window_size.value = sysctl_max_tx_window; -} - -/* - * Function irlap_negotiate (qos_device, qos_session, skb) - * - * Negotiate QoS values, not really that much negotiation :-) - * We just set the QoS capabilities for the peer station - * - */ -int irlap_qos_negotiate(struct irlap_cb *self, struct sk_buff *skb) -{ - int ret; - - ret = irda_param_extract_all(self, skb->data, skb->len, - &irlap_param_info); - - /* Convert the negotiated bits to values */ - irda_qos_bits_to_value(&self->qos_tx); - irda_qos_bits_to_value(&self->qos_rx); - - irlap_adjust_qos_settings(&self->qos_tx); - - pr_debug("Setting BAUD_RATE to %d bps.\n", - self->qos_tx.baud_rate.value); - pr_debug("Setting DATA_SIZE to %d bytes\n", - self->qos_tx.data_size.value); - pr_debug("Setting WINDOW_SIZE to %d\n", - self->qos_tx.window_size.value); - pr_debug("Setting XBOFS to %d\n", - self->qos_tx.additional_bofs.value); - pr_debug("Setting MAX_TURN_TIME to %d ms.\n", - self->qos_tx.max_turn_time.value); - pr_debug("Setting MIN_TURN_TIME to %d usecs.\n", - self->qos_tx.min_turn_time.value); - pr_debug("Setting LINK_DISC to %d secs.\n", - self->qos_tx.link_disc_time.value); - return ret; -} - -/* - * Function irlap_insert_negotiation_params (qos, fp) - * - * Insert QoS negotiaion pararameters into frame - * - */ -int irlap_insert_qos_negotiation_params(struct irlap_cb *self, - struct sk_buff *skb) -{ - int ret; - - /* Insert data rate */ - ret = irda_param_insert(self, PI_BAUD_RATE, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - /* Insert max turnaround time */ - ret = irda_param_insert(self, PI_MAX_TURN_TIME, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - /* Insert data size */ - ret = irda_param_insert(self, PI_DATA_SIZE, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - /* Insert window size */ - ret = irda_param_insert(self, PI_WINDOW_SIZE, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - /* Insert additional BOFs */ - ret = irda_param_insert(self, PI_ADD_BOFS, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - /* Insert minimum turnaround time */ - ret = irda_param_insert(self, PI_MIN_TURN_TIME, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - /* Insert link disconnect/threshold time */ - ret = irda_param_insert(self, PI_LINK_DISC, skb_tail_pointer(skb), - skb_tailroom(skb), &irlap_param_info); - if (ret < 0) - return ret; - skb_put(skb, ret); - - return 0; -} - -/* - * Function irlap_param_baud_rate (instance, param, get) - * - * Negotiate data-rate - * - */ -static int irlap_param_baud_rate(void *instance, irda_param_t *param, int get) -{ - __u16 final; - - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) { - param->pv.i = self->qos_rx.baud_rate.bits; - pr_debug("%s(), baud rate = 0x%02x\n", - __func__, param->pv.i); - } else { - /* - * Stations must agree on baud rate, so calculate - * intersection - */ - pr_debug("Requested BAUD_RATE: 0x%04x\n", (__u16)param->pv.i); - final = (__u16) param->pv.i & self->qos_rx.baud_rate.bits; - - pr_debug("Final BAUD_RATE: 0x%04x\n", final); - self->qos_tx.baud_rate.bits = final; - self->qos_rx.baud_rate.bits = final; - } - - return 0; -} - -/* - * Function irlap_param_link_disconnect (instance, param, get) - * - * Negotiate link disconnect/threshold time. - * - */ -static int irlap_param_link_disconnect(void *instance, irda_param_t *param, - int get) -{ - __u16 final; - - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->qos_rx.link_disc_time.bits; - else { - /* - * Stations must agree on link disconnect/threshold - * time. - */ - pr_debug("LINK_DISC: %02x\n", (__u8)param->pv.i); - final = (__u8) param->pv.i & self->qos_rx.link_disc_time.bits; - - pr_debug("Final LINK_DISC: %02x\n", final); - self->qos_tx.link_disc_time.bits = final; - self->qos_rx.link_disc_time.bits = final; - } - return 0; -} - -/* - * Function irlap_param_max_turn_time (instance, param, get) - * - * Negotiate the maximum turnaround time. This is a type 1 parameter and - * will be negotiated independently for each station - * - */ -static int irlap_param_max_turn_time(void *instance, irda_param_t *param, - int get) -{ - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->qos_rx.max_turn_time.bits; - else - self->qos_tx.max_turn_time.bits = (__u8) param->pv.i; - - return 0; -} - -/* - * Function irlap_param_data_size (instance, param, get) - * - * Negotiate the data size. This is a type 1 parameter and - * will be negotiated independently for each station - * - */ -static int irlap_param_data_size(void *instance, irda_param_t *param, int get) -{ - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->qos_rx.data_size.bits; - else - self->qos_tx.data_size.bits = (__u8) param->pv.i; - - return 0; -} - -/* - * Function irlap_param_window_size (instance, param, get) - * - * Negotiate the window size. This is a type 1 parameter and - * will be negotiated independently for each station - * - */ -static int irlap_param_window_size(void *instance, irda_param_t *param, - int get) -{ - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->qos_rx.window_size.bits; - else - self->qos_tx.window_size.bits = (__u8) param->pv.i; - - return 0; -} - -/* - * Function irlap_param_additional_bofs (instance, param, get) - * - * Negotiate additional BOF characters. This is a type 1 parameter and - * will be negotiated independently for each station. - */ -static int irlap_param_additional_bofs(void *instance, irda_param_t *param, int get) -{ - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->qos_rx.additional_bofs.bits; - else - self->qos_tx.additional_bofs.bits = (__u8) param->pv.i; - - return 0; -} - -/* - * Function irlap_param_min_turn_time (instance, param, get) - * - * Negotiate the minimum turn around time. This is a type 1 parameter and - * will be negotiated independently for each station - */ -static int irlap_param_min_turn_time(void *instance, irda_param_t *param, - int get) -{ - struct irlap_cb *self = (struct irlap_cb *) instance; - - IRDA_ASSERT(self != NULL, return -1;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return -1;); - - if (get) - param->pv.i = self->qos_rx.min_turn_time.bits; - else - self->qos_tx.min_turn_time.bits = (__u8) param->pv.i; - - return 0; -} - -/* - * Function irlap_max_line_capacity (speed, max_turn_time, min_turn_time) - * - * Calculate the maximum line capacity - * - */ -__u32 irlap_max_line_capacity(__u32 speed, __u32 max_turn_time) -{ - __u32 line_capacity; - int i,j; - - pr_debug("%s(), speed=%d, max_turn_time=%d\n", - __func__, speed, max_turn_time); - - i = value_index(speed, baud_rates, 10); - j = value_index(max_turn_time, max_turn_times, 4); - - IRDA_ASSERT(((i >=0) && (i <10)), return 0;); - IRDA_ASSERT(((j >=0) && (j <4)), return 0;); - - line_capacity = max_line_capacities[i][j]; - - pr_debug("%s(), line capacity=%d bytes\n", - __func__, line_capacity); - - return line_capacity; -} - -#ifndef CONFIG_IRDA_DYNAMIC_WINDOW -static __u32 irlap_requested_line_capacity(struct qos_info *qos) -{ - __u32 line_capacity; - - line_capacity = qos->window_size.value * - (qos->data_size.value + 6 + qos->additional_bofs.value) + - irlap_min_turn_time_in_bytes(qos->baud_rate.value, - qos->min_turn_time.value); - - pr_debug("%s(), requested line capacity=%d\n", - __func__, line_capacity); - - return line_capacity; -} -#endif - -void irda_qos_bits_to_value(struct qos_info *qos) -{ - int index; - - IRDA_ASSERT(qos != NULL, return;); - - index = msb_index(qos->baud_rate.bits); - qos->baud_rate.value = baud_rates[index]; - - index = msb_index(qos->data_size.bits); - qos->data_size.value = data_sizes[index]; - - index = msb_index(qos->window_size.bits); - qos->window_size.value = index+1; - - index = msb_index(qos->min_turn_time.bits); - qos->min_turn_time.value = min_turn_times[index]; - - index = msb_index(qos->max_turn_time.bits); - qos->max_turn_time.value = max_turn_times[index]; - - index = msb_index(qos->link_disc_time.bits); - qos->link_disc_time.value = link_disc_times[index]; - - index = msb_index(qos->additional_bofs.bits); - qos->additional_bofs.value = add_bofs[index]; -} -EXPORT_SYMBOL(irda_qos_bits_to_value); diff --git a/net/irda/timer.c b/net/irda/timer.c deleted file mode 100644 index f2280f73b057..000000000000 --- a/net/irda/timer.c +++ /dev/null @@ -1,231 +0,0 @@ -/********************************************************************* - * - * Filename: timer.c - * Version: - * Description: - * Status: Experimental. - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Sat Aug 16 00:59:29 1997 - * Modified at: Wed Dec 8 12:50:34 1999 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * - * Copyright (c) 1997, 1999 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/delay.h> - -#include <net/irda/timer.h> -#include <net/irda/irda.h> -#include <net/irda/irda_device.h> -#include <net/irda/irlap.h> -#include <net/irda/irlmp.h> - -extern int sysctl_slot_timeout; - -static void irlap_slot_timer_expired(void* data); -static void irlap_query_timer_expired(void* data); -static void irlap_final_timer_expired(void* data); -static void irlap_wd_timer_expired(void* data); -static void irlap_backoff_timer_expired(void* data); -static void irlap_media_busy_expired(void* data); - -void irlap_start_slot_timer(struct irlap_cb *self, int timeout) -{ - irda_start_timer(&self->slot_timer, timeout, (void *) self, - irlap_slot_timer_expired); -} - -void irlap_start_query_timer(struct irlap_cb *self, int S, int s) -{ - int timeout; - - /* Calculate when the peer discovery should end. Normally, we - * get the end-of-discovery frame, so this is just in case - * we miss it. - * Basically, we multiply the number of remaining slots by our - * slot time, plus add some extra time to properly receive the last - * discovery packet (which is longer due to extra discovery info), - * to avoid messing with for incoming connections requests and - * to accommodate devices that perform discovery slower than us. - * Jean II */ - timeout = msecs_to_jiffies(sysctl_slot_timeout) * (S - s) - + XIDEXTRA_TIMEOUT + SMALLBUSY_TIMEOUT; - - /* Set or re-set the timer. We reset the timer for each received - * discovery query, which allow us to automatically adjust to - * the speed of the peer discovery (faster or slower). Jean II */ - irda_start_timer( &self->query_timer, timeout, (void *) self, - irlap_query_timer_expired); -} - -void irlap_start_final_timer(struct irlap_cb *self, int timeout) -{ - irda_start_timer(&self->final_timer, timeout, (void *) self, - irlap_final_timer_expired); -} - -void irlap_start_wd_timer(struct irlap_cb *self, int timeout) -{ - irda_start_timer(&self->wd_timer, timeout, (void *) self, - irlap_wd_timer_expired); -} - -void irlap_start_backoff_timer(struct irlap_cb *self, int timeout) -{ - irda_start_timer(&self->backoff_timer, timeout, (void *) self, - irlap_backoff_timer_expired); -} - -void irlap_start_mbusy_timer(struct irlap_cb *self, int timeout) -{ - irda_start_timer(&self->media_busy_timer, timeout, - (void *) self, irlap_media_busy_expired); -} - -void irlap_stop_mbusy_timer(struct irlap_cb *self) -{ - /* If timer is activated, kill it! */ - del_timer(&self->media_busy_timer); - - /* If we are in NDM, there is a bunch of events in LAP that - * that be pending due to the media_busy condition, such as - * CONNECT_REQUEST and SEND_UI_FRAME. If we don't generate - * an event, they will wait forever... - * Jean II */ - if (self->state == LAP_NDM) - irlap_do_event(self, MEDIA_BUSY_TIMER_EXPIRED, NULL, NULL); -} - -void irlmp_start_watchdog_timer(struct lsap_cb *self, int timeout) -{ - irda_start_timer(&self->watchdog_timer, timeout, (void *) self, - irlmp_watchdog_timer_expired); -} - -void irlmp_start_discovery_timer(struct irlmp_cb *self, int timeout) -{ - irda_start_timer(&self->discovery_timer, timeout, (void *) self, - irlmp_discovery_timer_expired); -} - -void irlmp_start_idle_timer(struct lap_cb *self, int timeout) -{ - irda_start_timer(&self->idle_timer, timeout, (void *) self, - irlmp_idle_timer_expired); -} - -void irlmp_stop_idle_timer(struct lap_cb *self) -{ - /* If timer is activated, kill it! */ - del_timer(&self->idle_timer); -} - -/* - * Function irlap_slot_timer_expired (data) - * - * IrLAP slot timer has expired - * - */ -static void irlap_slot_timer_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_do_event(self, SLOT_TIMER_EXPIRED, NULL, NULL); -} - -/* - * Function irlap_query_timer_expired (data) - * - * IrLAP query timer has expired - * - */ -static void irlap_query_timer_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_do_event(self, QUERY_TIMER_EXPIRED, NULL, NULL); -} - -/* - * Function irda_final_timer_expired (data) - * - * - * - */ -static void irlap_final_timer_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_do_event(self, FINAL_TIMER_EXPIRED, NULL, NULL); -} - -/* - * Function irda_wd_timer_expired (data) - * - * - * - */ -static void irlap_wd_timer_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_do_event(self, WD_TIMER_EXPIRED, NULL, NULL); -} - -/* - * Function irda_backoff_timer_expired (data) - * - * - * - */ -static void irlap_backoff_timer_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - - irlap_do_event(self, BACKOFF_TIMER_EXPIRED, NULL, NULL); -} - - -/* - * Function irtty_media_busy_expired (data) - * - * - */ -static void irlap_media_busy_expired(void *data) -{ - struct irlap_cb *self = (struct irlap_cb *) data; - - IRDA_ASSERT(self != NULL, return;); - - irda_device_set_media_busy(self->netdev, FALSE); - /* Note : the LAP event will be send in irlap_stop_mbusy_timer(), - * to catch other cases where the flag is cleared (for example - * after a discovery) - Jean II */ -} diff --git a/net/irda/wrapper.c b/net/irda/wrapper.c deleted file mode 100644 index 40a0f993bf13..000000000000 --- a/net/irda/wrapper.c +++ /dev/null @@ -1,492 +0,0 @@ -/********************************************************************* - * - * Filename: wrapper.c - * Version: 1.2 - * Description: IrDA SIR async wrapper layer - * Status: Stable - * Author: Dag Brattli <dagb@cs.uit.no> - * Created at: Mon Aug 4 20:40:53 1997 - * Modified at: Fri Jan 28 13:21:09 2000 - * Modified by: Dag Brattli <dagb@cs.uit.no> - * Modified at: Fri May 28 3:11 CST 1999 - * Modified by: Horst von Brand <vonbrand@sleipnir.valparaiso.cl> - * - * Copyright (c) 1998-2000 Dag Brattli <dagb@cs.uit.no>, - * All Rights Reserved. - * Copyright (c) 2000-2002 Jean Tourrilhes <jt@hpl.hp.com> - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2 of - * the License, or (at your option) any later version. - * - * Neither Dag Brattli nor University of Tromsø admit liability nor - * provide warranty for any of this software. This material is - * provided "AS-IS" and at no charge. - * - ********************************************************************/ - -#include <linux/skbuff.h> -#include <linux/string.h> -#include <linux/module.h> -#include <asm/byteorder.h> - -#include <net/irda/irda.h> -#include <net/irda/wrapper.h> -#include <net/irda/crc.h> -#include <net/irda/irlap.h> -#include <net/irda/irlap_frame.h> -#include <net/irda/irda_device.h> - -/************************** FRAME WRAPPING **************************/ -/* - * Unwrap and unstuff SIR frames - * - * Note : at FIR and MIR, HDLC framing is used and usually handled - * by the controller, so we come here only for SIR... Jean II - */ - -/* - * Function stuff_byte (byte, buf) - * - * Byte stuff one single byte and put the result in buffer pointed to by - * buf. The buffer must at all times be able to have two bytes inserted. - * - * This is in a tight loop, better inline it, so need to be prior to callers. - * (2000 bytes on P6 200MHz, non-inlined ~370us, inline ~170us) - Jean II - */ -static inline int stuff_byte(__u8 byte, __u8 *buf) -{ - switch (byte) { - case BOF: /* FALLTHROUGH */ - case EOF: /* FALLTHROUGH */ - case CE: - /* Insert transparently coded */ - buf[0] = CE; /* Send link escape */ - buf[1] = byte^IRDA_TRANS; /* Complement bit 5 */ - return 2; - /* break; */ - default: - /* Non-special value, no transparency required */ - buf[0] = byte; - return 1; - /* break; */ - } -} - -/* - * Function async_wrap (skb, *tx_buff, buffsize) - * - * Makes a new buffer with wrapping and stuffing, should check that - * we don't get tx buffer overflow. - */ -int async_wrap_skb(struct sk_buff *skb, __u8 *tx_buff, int buffsize) -{ - struct irda_skb_cb *cb = (struct irda_skb_cb *) skb->cb; - int xbofs; - int i; - int n; - union { - __u16 value; - __u8 bytes[2]; - } fcs; - - /* Initialize variables */ - fcs.value = INIT_FCS; - n = 0; - - /* - * Send XBOF's for required min. turn time and for the negotiated - * additional XBOFS - */ - - if (cb->magic != LAP_MAGIC) { - /* - * This will happen for all frames sent from user-space. - * Nothing to worry about, but we set the default number of - * BOF's - */ - pr_debug("%s(), wrong magic in skb!\n", __func__); - xbofs = 10; - } else - xbofs = cb->xbofs + cb->xbofs_delay; - - pr_debug("%s(), xbofs=%d\n", __func__, xbofs); - - /* Check that we never use more than 115 + 48 xbofs */ - if (xbofs > 163) { - pr_debug("%s(), too many xbofs (%d)\n", __func__, - xbofs); - xbofs = 163; - } - - memset(tx_buff + n, XBOF, xbofs); - n += xbofs; - - /* Start of packet character BOF */ - tx_buff[n++] = BOF; - - /* Insert frame and calc CRC */ - for (i=0; i < skb->len; i++) { - /* - * Check for the possibility of tx buffer overflow. We use - * bufsize-5 since the maximum number of bytes that can be - * transmitted after this point is 5. - */ - if(n >= (buffsize-5)) { - net_err_ratelimited("%s(), tx buffer overflow (n=%d)\n", - __func__, n); - return n; - } - - n += stuff_byte(skb->data[i], tx_buff+n); - fcs.value = irda_fcs(fcs.value, skb->data[i]); - } - - /* Insert CRC in little endian format (LSB first) */ - fcs.value = ~fcs.value; -#ifdef __LITTLE_ENDIAN - n += stuff_byte(fcs.bytes[0], tx_buff+n); - n += stuff_byte(fcs.bytes[1], tx_buff+n); -#else /* ifdef __BIG_ENDIAN */ - n += stuff_byte(fcs.bytes[1], tx_buff+n); - n += stuff_byte(fcs.bytes[0], tx_buff+n); -#endif - tx_buff[n++] = EOF; - - return n; -} -EXPORT_SYMBOL(async_wrap_skb); - -/************************* FRAME UNWRAPPING *************************/ -/* - * Unwrap and unstuff SIR frames - * - * Complete rewrite by Jean II : - * More inline, faster, more compact, more logical. Jean II - * (16 bytes on P6 200MHz, old 5 to 7 us, new 4 to 6 us) - * (24 bytes on P6 200MHz, old 9 to 10 us, new 7 to 8 us) - * (for reference, 115200 b/s is 1 byte every 69 us) - * And reduce wrapper.o by ~900B in the process ;-) - * - * Then, we have the addition of ZeroCopy, which is optional - * (i.e. the driver must initiate it) and improve final processing. - * (2005 B frame + EOF on P6 200MHz, without 30 to 50 us, with 10 to 25 us) - * - * Note : at FIR and MIR, HDLC framing is used and usually handled - * by the controller, so we come here only for SIR... Jean II - */ - -/* - * We can also choose where we want to do the CRC calculation. We can - * do it "inline", as we receive the bytes, or "postponed", when - * receiving the End-Of-Frame. - * (16 bytes on P6 200MHz, inlined 4 to 6 us, postponed 4 to 5 us) - * (24 bytes on P6 200MHz, inlined 7 to 8 us, postponed 5 to 7 us) - * With ZeroCopy : - * (2005 B frame on P6 200MHz, inlined 10 to 25 us, postponed 140 to 180 us) - * Without ZeroCopy : - * (2005 B frame on P6 200MHz, inlined 30 to 50 us, postponed 150 to 180 us) - * (Note : numbers taken with irq disabled) - * - * From those numbers, it's not clear which is the best strategy, because - * we end up running through a lot of data one way or another (i.e. cache - * misses). I personally prefer to avoid the huge latency spike of the - * "postponed" solution, because it come just at the time when we have - * lot's of protocol processing to do and it will hurt our ability to - * reach low link turnaround times... Jean II - */ -//#define POSTPONE_RX_CRC - -/* - * Function async_bump (buf, len, stats) - * - * Got a frame, make a copy of it, and pass it up the stack! We can try - * to inline it since it's only called from state_inside_frame - */ -static inline void -async_bump(struct net_device *dev, - struct net_device_stats *stats, - iobuff_t *rx_buff) -{ - struct sk_buff *newskb; - struct sk_buff *dataskb; - int docopy; - - /* Check if we need to copy the data to a new skb or not. - * If the driver doesn't use ZeroCopy Rx, we have to do it. - * With ZeroCopy Rx, the rx_buff already point to a valid - * skb. But, if the frame is small, it is more efficient to - * copy it to save memory (copy will be fast anyway - that's - * called Rx-copy-break). Jean II */ - docopy = ((rx_buff->skb == NULL) || - (rx_buff->len < IRDA_RX_COPY_THRESHOLD)); - - /* Allocate a new skb */ - newskb = dev_alloc_skb(docopy ? rx_buff->len + 1 : rx_buff->truesize); - if (!newskb) { - stats->rx_dropped++; - /* We could deliver the current skb if doing ZeroCopy Rx, - * but this would stall the Rx path. Better drop the - * packet... Jean II */ - return; - } - - /* Align IP header to 20 bytes (i.e. increase skb->data) - * Note this is only useful with IrLAN, as PPP has a variable - * header size (2 or 1 bytes) - Jean II */ - skb_reserve(newskb, 1); - - if(docopy) { - /* Copy data without CRC (length already checked) */ - skb_copy_to_linear_data(newskb, rx_buff->data, - rx_buff->len - 2); - /* Deliver this skb */ - dataskb = newskb; - } else { - /* We are using ZeroCopy. Deliver old skb */ - dataskb = rx_buff->skb; - /* And hook the new skb to the rx_buff */ - rx_buff->skb = newskb; - rx_buff->head = newskb->data; /* NOT newskb->head */ - //printk(KERN_DEBUG "ZeroCopy : len = %d, dataskb = %p, newskb = %p\n", rx_buff->len, dataskb, newskb); - } - - /* Set proper length on skb (without CRC) */ - skb_put(dataskb, rx_buff->len - 2); - - /* Feed it to IrLAP layer */ - dataskb->dev = dev; - skb_reset_mac_header(dataskb); - dataskb->protocol = htons(ETH_P_IRDA); - - netif_rx(dataskb); - - stats->rx_packets++; - stats->rx_bytes += rx_buff->len; - - /* Clean up rx_buff (redundant with async_unwrap_bof() ???) */ - rx_buff->data = rx_buff->head; - rx_buff->len = 0; -} - -/* - * Function async_unwrap_bof(dev, byte) - * - * Handle Beginning Of Frame character received within a frame - * - */ -static inline void -async_unwrap_bof(struct net_device *dev, - struct net_device_stats *stats, - iobuff_t *rx_buff, __u8 byte) -{ - switch(rx_buff->state) { - case LINK_ESCAPE: - case INSIDE_FRAME: - /* Not supposed to happen, the previous frame is not - * finished - Jean II */ - pr_debug("%s(), Discarding incomplete frame\n", - __func__); - stats->rx_errors++; - stats->rx_missed_errors++; - irda_device_set_media_busy(dev, TRUE); - break; - - case OUTSIDE_FRAME: - case BEGIN_FRAME: - default: - /* We may receive multiple BOF at the start of frame */ - break; - } - - /* Now receiving frame */ - rx_buff->state = BEGIN_FRAME; - rx_buff->in_frame = TRUE; - - /* Time to initialize receive buffer */ - rx_buff->data = rx_buff->head; - rx_buff->len = 0; - rx_buff->fcs = INIT_FCS; -} - -/* - * Function async_unwrap_eof(dev, byte) - * - * Handle End Of Frame character received within a frame - * - */ -static inline void -async_unwrap_eof(struct net_device *dev, - struct net_device_stats *stats, - iobuff_t *rx_buff, __u8 byte) -{ -#ifdef POSTPONE_RX_CRC - int i; -#endif - - switch(rx_buff->state) { - case OUTSIDE_FRAME: - /* Probably missed the BOF */ - stats->rx_errors++; - stats->rx_missed_errors++; - irda_device_set_media_busy(dev, TRUE); - break; - - case BEGIN_FRAME: - case LINK_ESCAPE: - case INSIDE_FRAME: - default: - /* Note : in the case of BEGIN_FRAME and LINK_ESCAPE, - * the fcs will most likely not match and generate an - * error, as expected - Jean II */ - rx_buff->state = OUTSIDE_FRAME; - rx_buff->in_frame = FALSE; - -#ifdef POSTPONE_RX_CRC - /* If we haven't done the CRC as we receive bytes, we - * must do it now... Jean II */ - for(i = 0; i < rx_buff->len; i++) - rx_buff->fcs = irda_fcs(rx_buff->fcs, - rx_buff->data[i]); -#endif - - /* Test FCS and signal success if the frame is good */ - if (rx_buff->fcs == GOOD_FCS) { - /* Deliver frame */ - async_bump(dev, stats, rx_buff); - break; - } else { - /* Wrong CRC, discard frame! */ - irda_device_set_media_busy(dev, TRUE); - - pr_debug("%s(), crc error\n", __func__); - stats->rx_errors++; - stats->rx_crc_errors++; - } - break; - } -} - -/* - * Function async_unwrap_ce(dev, byte) - * - * Handle Character Escape character received within a frame - * - */ -static inline void -async_unwrap_ce(struct net_device *dev, - struct net_device_stats *stats, - iobuff_t *rx_buff, __u8 byte) -{ - switch(rx_buff->state) { - case OUTSIDE_FRAME: - /* Activate carrier sense */ - irda_device_set_media_busy(dev, TRUE); - break; - - case LINK_ESCAPE: - net_warn_ratelimited("%s: state not defined\n", __func__); - break; - - case BEGIN_FRAME: - case INSIDE_FRAME: - default: - /* Stuffed byte coming */ - rx_buff->state = LINK_ESCAPE; - break; - } -} - -/* - * Function async_unwrap_other(dev, byte) - * - * Handle other characters received within a frame - * - */ -static inline void -async_unwrap_other(struct net_device *dev, - struct net_device_stats *stats, - iobuff_t *rx_buff, __u8 byte) -{ - switch(rx_buff->state) { - /* This is on the critical path, case are ordered by - * probability (most frequent first) - Jean II */ - case INSIDE_FRAME: - /* Must be the next byte of the frame */ - if (rx_buff->len < rx_buff->truesize) { - rx_buff->data[rx_buff->len++] = byte; -#ifndef POSTPONE_RX_CRC - rx_buff->fcs = irda_fcs(rx_buff->fcs, byte); -#endif - } else { - pr_debug("%s(), Rx buffer overflow, aborting\n", - __func__); - rx_buff->state = OUTSIDE_FRAME; - } - break; - - case LINK_ESCAPE: - /* - * Stuffed char, complement bit 5 of byte - * following CE, IrLAP p.114 - */ - byte ^= IRDA_TRANS; - if (rx_buff->len < rx_buff->truesize) { - rx_buff->data[rx_buff->len++] = byte; -#ifndef POSTPONE_RX_CRC - rx_buff->fcs = irda_fcs(rx_buff->fcs, byte); -#endif - rx_buff->state = INSIDE_FRAME; - } else { - pr_debug("%s(), Rx buffer overflow, aborting\n", - __func__); - rx_buff->state = OUTSIDE_FRAME; - } - break; - - case OUTSIDE_FRAME: - /* Activate carrier sense */ - if(byte != XBOF) - irda_device_set_media_busy(dev, TRUE); - break; - - case BEGIN_FRAME: - default: - rx_buff->data[rx_buff->len++] = byte; -#ifndef POSTPONE_RX_CRC - rx_buff->fcs = irda_fcs(rx_buff->fcs, byte); -#endif - rx_buff->state = INSIDE_FRAME; - break; - } -} - -/* - * Function async_unwrap_char (dev, rx_buff, byte) - * - * Parse and de-stuff frame received from the IrDA-port - * - * This is the main entry point for SIR drivers. - */ -void async_unwrap_char(struct net_device *dev, - struct net_device_stats *stats, - iobuff_t *rx_buff, __u8 byte) -{ - switch(byte) { - case CE: - async_unwrap_ce(dev, stats, rx_buff, byte); - break; - case BOF: - async_unwrap_bof(dev, stats, rx_buff, byte); - break; - case EOF: - async_unwrap_eof(dev, stats, rx_buff, byte); - break; - default: - async_unwrap_other(dev, stats, rx_buff, byte); - break; - } -} -EXPORT_SYMBOL(async_unwrap_char); - diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index c343ac60bf50..bd5723315069 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/in.h> #include <linux/inet.h> #include <linux/list.h> @@ -155,8 +156,8 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, seq_printf(seq, " psock-%-5u %-10llu %-16llu %-10llu %-16llu %-8d %-8d %-8d %-8d ", psock->index, - psock->strp.stats.rx_msgs, - psock->strp.stats.rx_bytes, + psock->strp.stats.msgs, + psock->strp.stats.bytes, psock->stats.tx_msgs, psock->stats.tx_bytes, psock->sk->sk_receive_queue.qlen, @@ -170,22 +171,22 @@ static void kcm_format_psock(struct kcm_psock *psock, struct seq_file *seq, if (psock->tx_stopped) seq_puts(seq, "TxStop "); - if (psock->strp.rx_stopped) + if (psock->strp.stopped) seq_puts(seq, "RxStop "); if (psock->tx_kcm) seq_printf(seq, "Rsvd-%d ", psock->tx_kcm->index); - if (!psock->strp.rx_paused && !psock->ready_rx_msg) { + if (!psock->strp.paused && !psock->ready_rx_msg) { if (psock->sk->sk_receive_queue.qlen) { - if (psock->strp.rx_need_bytes) + if (psock->strp.need_bytes) seq_printf(seq, "RxWait=%u ", - psock->strp.rx_need_bytes); + psock->strp.need_bytes); else seq_printf(seq, "RxWait "); } } else { - if (psock->strp.rx_paused) + if (psock->strp.paused) seq_puts(seq, "RxPause "); if (psock->ready_rx_msg) @@ -371,20 +372,20 @@ static int kcm_stats_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%-8s %-10llu %-16llu %-10llu %-16llu %-10llu %-10llu %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u %-10u\n", "", - strp_stats.rx_msgs, - strp_stats.rx_bytes, + strp_stats.msgs, + strp_stats.bytes, psock_stats.tx_msgs, psock_stats.tx_bytes, psock_stats.reserved, psock_stats.unreserved, - strp_stats.rx_aborts, - strp_stats.rx_interrupted, - strp_stats.rx_unrecov_intr, - strp_stats.rx_mem_fail, - strp_stats.rx_need_more_hdr, - strp_stats.rx_bad_hdr_len, - strp_stats.rx_msg_too_big, - strp_stats.rx_msg_timeouts, + strp_stats.aborts, + strp_stats.interrupted, + strp_stats.unrecov_intr, + strp_stats.mem_fail, + strp_stats.need_more_hdr, + strp_stats.bad_hdr_len, + strp_stats.msg_too_big, + strp_stats.msg_timeouts, psock_stats.tx_aborts); return 0; diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index da49191f7ad0..0b750a22c4b9 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -96,12 +96,12 @@ static void kcm_update_rx_mux_stats(struct kcm_mux *mux, struct kcm_psock *psock) { STRP_STATS_ADD(mux->stats.rx_bytes, - psock->strp.stats.rx_bytes - + psock->strp.stats.bytes - psock->saved_rx_bytes); mux->stats.rx_msgs += - psock->strp.stats.rx_msgs - psock->saved_rx_msgs; - psock->saved_rx_msgs = psock->strp.stats.rx_msgs; - psock->saved_rx_bytes = psock->strp.stats.rx_bytes; + psock->strp.stats.msgs - psock->saved_rx_msgs; + psock->saved_rx_msgs = psock->strp.stats.msgs; + psock->saved_rx_bytes = psock->strp.stats.bytes; } static void kcm_update_tx_mux_stats(struct kcm_mux *mux, @@ -1118,7 +1118,7 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, struct kcm_sock *kcm = kcm_sk(sk); int err = 0; long timeo; - struct strp_rx_msg *rxm; + struct strp_msg *stm; int copied = 0; struct sk_buff *skb; @@ -1132,26 +1132,26 @@ static int kcm_recvmsg(struct socket *sock, struct msghdr *msg, /* Okay, have a message on the receive queue */ - rxm = strp_rx_msg(skb); + stm = strp_msg(skb); - if (len > rxm->full_len) - len = rxm->full_len; + if (len > stm->full_len) + len = stm->full_len; - err = skb_copy_datagram_msg(skb, rxm->offset, msg, len); + err = skb_copy_datagram_msg(skb, stm->offset, msg, len); if (err < 0) goto out; copied = len; if (likely(!(flags & MSG_PEEK))) { KCM_STATS_ADD(kcm->stats.rx_bytes, copied); - if (copied < rxm->full_len) { + if (copied < stm->full_len) { if (sock->type == SOCK_DGRAM) { /* Truncated message */ msg->msg_flags |= MSG_TRUNC; goto msg_finished; } - rxm->offset += copied; - rxm->full_len -= copied; + stm->offset += copied; + stm->full_len -= copied; } else { msg_finished: /* Finished with message */ @@ -1175,7 +1175,7 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, struct sock *sk = sock->sk; struct kcm_sock *kcm = kcm_sk(sk); long timeo; - struct strp_rx_msg *rxm; + struct strp_msg *stm; int err = 0; ssize_t copied; struct sk_buff *skb; @@ -1192,12 +1192,12 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, /* Okay, have a message on the receive queue */ - rxm = strp_rx_msg(skb); + stm = strp_msg(skb); - if (len > rxm->full_len) - len = rxm->full_len; + if (len > stm->full_len) + len = stm->full_len; - copied = skb_splice_bits(skb, sk, rxm->offset, pipe, len, flags); + copied = skb_splice_bits(skb, sk, stm->offset, pipe, len, flags); if (copied < 0) { err = copied; goto err_out; @@ -1205,8 +1205,8 @@ static ssize_t kcm_splice_read(struct socket *sock, loff_t *ppos, KCM_STATS_ADD(kcm->stats.rx_bytes, copied); - rxm->offset += copied; - rxm->full_len -= copied; + stm->offset += copied; + stm->full_len -= copied; /* We have no way to return MSG_EOR. If all the bytes have been * read we still leave the message in the receive socket buffer. @@ -1376,13 +1376,21 @@ static int kcm_attach(struct socket *sock, struct socket *csock, struct kcm_psock *psock = NULL, *tpsock; struct list_head *head; int index = 0; - struct strp_callbacks cb; + static const struct strp_callbacks cb = { + .rcv_msg = kcm_rcv_strparser, + .parse_msg = kcm_parse_func_strparser, + .read_sock_done = kcm_read_sock_done, + }; int err; csk = csock->sk; if (!csk) return -EINVAL; + /* We must prevent loops or risk deadlock ! */ + if (csk->sk_family == PF_KCM) + return -EOPNOTSUPP; + psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); if (!psock) return -ENOMEM; @@ -1391,11 +1399,6 @@ static int kcm_attach(struct socket *sock, struct socket *csock, psock->sk = csk; psock->bpf_prog = prog; - cb.rcv_msg = kcm_rcv_strparser; - cb.abort_parser = NULL; - cb.parse_msg = kcm_parse_func_strparser; - cb.read_sock_done = kcm_read_sock_done; - err = strp_init(&psock->strp, csk, &cb); if (err) { kmem_cache_free(kcm_psockp, psock); @@ -1647,7 +1650,7 @@ static int kcm_clone(struct socket *osock, struct kcm_clone *info, } newfile = sock_alloc_file(newsock, 0, osock->sk->sk_prot_creator->name); - if (unlikely(IS_ERR(newfile))) { + if (IS_ERR(newfile)) { err = PTR_ERR(newfile); goto out_sock_alloc_fail; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 98f4d8211b9a..3dffb892d52c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2399,8 +2399,6 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa out: xfrm_pol_put(xp); - if (err == 0) - xfrm_garbage_collect(net); return err; } @@ -2651,8 +2649,6 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -2752,8 +2748,6 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad int err, err2; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, true); - if (!err) - xfrm_garbage_collect(net); err2 = unicast_flush_resp(sk, hdr); if (err || err2) { if (err == -ESRCH) /* empty table - old silent behavior */ @@ -3851,7 +3845,7 @@ static void __net_exit pfkey_net_exit(struct net *net) struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); pfkey_exit_proc(net); - BUG_ON(!hlist_empty(&net_pfkey->table)); + WARN_ON(!hlist_empty(&net_pfkey->table)); } static struct pernet_operations pfkey_net_ops = { diff --git a/net/l2tp/Makefile b/net/l2tp/Makefile index 2870f41ea44d..399a7e5db2f4 100644 --- a/net/l2tp/Makefile +++ b/net/l2tp/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the L2TP. # diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b0c2d4ae781d..115918ad8eca 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -100,8 +100,6 @@ struct l2tp_skb_cb { #define L2TP_SKB_CB(skb) ((struct l2tp_skb_cb *) &skb->cb[sizeof(struct inet_skb_parm)]) -static atomic_t l2tp_tunnel_count; -static atomic_t l2tp_session_count; static struct workqueue_struct *l2tp_wq; /* per-net private data for this module */ @@ -113,7 +111,6 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; -static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) { @@ -127,39 +124,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net) return net_generic(net, l2tp_net_id); } -/* Tunnel reference counts. Incremented per session that is added to - * the tunnel. - */ -static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel) -{ - refcount_inc(&tunnel->ref_count); -} - -static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel) -{ - if (refcount_dec_and_test(&tunnel->ref_count)) - l2tp_tunnel_free(tunnel); -} -#ifdef L2TP_REFCNT_DEBUG -#define l2tp_tunnel_inc_refcount(_t) \ -do { \ - pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_t)->name, \ - refcount_read(&_t->ref_count)); \ - l2tp_tunnel_inc_refcount_1(_t); \ -} while (0) -#define l2tp_tunnel_dec_refcount(_t) \ -do { \ - pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_t)->name, \ - refcount_read(&_t->ref_count)); \ - l2tp_tunnel_dec_refcount_1(_t); \ -} while (0) -#else -#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t) -#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t) -#endif - /* Session hash global list for L2TPv3. * The session_id SHOULD be random according to RFC3931, but several * L2TP implementations use incrementing session_ids. So we do a real @@ -229,12 +193,31 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } -/* Lookup a session. A new reference is held on the returned session. - * Optionally calls session->ref() too if do_ref is true. - */ +/* Lookup a tunnel. A new reference is held on the returned tunnel. */ +struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) +{ + const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel; + + rcu_read_lock_bh(); + list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { + if (tunnel->tunnel_id == tunnel_id) { + l2tp_tunnel_inc_refcount(tunnel); + rcu_read_unlock_bh(); + + return tunnel; + } + } + rcu_read_unlock_bh(); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_get); + +/* Lookup a session. A new reference is held on the returned session. */ struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, - u32 session_id, bool do_ref) + u32 session_id) { struct hlist_head *session_list; struct l2tp_session *session; @@ -248,8 +231,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net, hlist_for_each_entry_rcu(session, session_list, global_hlist) { if (session->session_id == session_id) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); rcu_read_unlock_bh(); return session; @@ -265,8 +246,6 @@ struct l2tp_session *l2tp_session_get(const struct net *net, hlist_for_each_entry(session, session_list, hlist) { if (session->session_id == session_id) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; @@ -278,8 +257,7 @@ struct l2tp_session *l2tp_session_get(const struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get); -struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, - bool do_ref) +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth) { int hash; struct l2tp_session *session; @@ -290,8 +268,6 @@ struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, hlist_for_each_entry(session, &tunnel->session_hlist[hash], hlist) { if (++count > nth) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); read_unlock_bh(&tunnel->hlist_lock); return session; } @@ -308,8 +284,7 @@ EXPORT_SYMBOL_GPL(l2tp_session_get_nth); * This is very inefficient but is only used by management interfaces. */ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, - const char *ifname, - bool do_ref) + const char *ifname) { struct l2tp_net *pn = l2tp_pernet(net); int hash; @@ -320,8 +295,6 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, hlist_for_each_entry_rcu(session, &pn->l2tp_session_hlist[hash], global_hlist) { if (!strcmp(session->ifname, ifname)) { l2tp_session_inc_refcount(session); - if (do_ref && session->ref) - session->ref(session); rcu_read_unlock_bh(); return session; @@ -335,20 +308,28 @@ struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, } EXPORT_SYMBOL_GPL(l2tp_session_get_by_ifname); -static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, - struct l2tp_session *session) +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel) { struct l2tp_session *session_walk; struct hlist_head *g_head; struct hlist_head *head; struct l2tp_net *pn; + int err; head = l2tp_session_id_hash(tunnel, session->session_id); write_lock_bh(&tunnel->hlist_lock); + if (!tunnel->acpt_newsess) { + err = -ENODEV; + goto err_tlock; + } + hlist_for_each_entry(session_walk, head, hlist) - if (session_walk->session_id == session->session_id) - goto exist; + if (session_walk->session_id == session->session_id) { + err = -EEXIST; + goto err_tlock; + } if (tunnel->version == L2TP_HDR_VER_3) { pn = l2tp_pernet(tunnel->l2tp_net); @@ -356,12 +337,21 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, session->session_id); spin_lock_bh(&pn->l2tp_session_hlist_lock); + hlist_for_each_entry(session_walk, g_head, global_hlist) - if (session_walk->session_id == session->session_id) - goto exist_glob; + if (session_walk->session_id == session->session_id) { + err = -EEXIST; + goto err_tlock_pnlock; + } + l2tp_tunnel_inc_refcount(tunnel); + sock_hold(tunnel->sock); hlist_add_head_rcu(&session->global_hlist, g_head); + spin_unlock_bh(&pn->l2tp_session_hlist_lock); + } else { + l2tp_tunnel_inc_refcount(tunnel); + sock_hold(tunnel->sock); } hlist_add_head(&session->hlist, head); @@ -369,13 +359,14 @@ static int l2tp_session_add_to_tunnel(struct l2tp_tunnel *tunnel, return 0; -exist_glob: +err_tlock_pnlock: spin_unlock_bh(&pn->l2tp_session_hlist_lock); -exist: +err_tlock: write_unlock_bh(&tunnel->hlist_lock); - return -EEXIST; + return err; } +EXPORT_SYMBOL_GPL(l2tp_session_register); /* Lookup a tunnel by id */ @@ -480,9 +471,6 @@ static void l2tp_recv_dequeue_skb(struct l2tp_session *session, struct sk_buff * (*session->recv_skb)(session, skb, L2TP_SKB_CB(skb)->length); else kfree_skb(skb); - - if (session->deref) - (*session->deref)(session); } /* Dequeue skbs from the session's reorder_q, subject to packet order. @@ -511,8 +499,6 @@ start: session->reorder_skip = 1; __skb_unlink(skb, &session->reorder_q); kfree_skb(skb); - if (session->deref) - (*session->deref)(session); continue; } @@ -685,9 +671,6 @@ discard: * a data (not control) frame before coming here. Fields up to the * session-id have already been parsed and ptr points to the data * after the session-id. - * - * session->ref() must have been called prior to l2tp_recv_common(). - * session->deref() will be called automatically after skb is processed. */ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, unsigned char *ptr, unsigned char *optr, u16 hdrflags, @@ -854,9 +837,6 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb, discard: atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); - - if (session->deref) - (*session->deref)(session); } EXPORT_SYMBOL(l2tp_recv_common); @@ -870,8 +850,6 @@ int l2tp_session_queue_purge(struct l2tp_session *session) while ((skb = skb_dequeue(&session->reorder_q))) { atomic_long_inc(&session->stats.rx_errors); kfree_skb(skb); - if (session->deref) - (*session->deref)(session); } return 0; } @@ -963,13 +941,10 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb, } /* Find the session context */ - session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id, true); + session = l2tp_session_get(tunnel->l2tp_net, tunnel, session_id); if (!session || !session->recv_skb) { - if (session) { - if (session->deref) - session->deref(session); + if (session) l2tp_session_dec_refcount(session); - } /* Not found? Pass to userspace to deal with */ l2tp_info(tunnel, L2TP_MSG_DATA, @@ -1264,16 +1239,14 @@ static void l2tp_tunnel_destruct(struct sock *sk) /* Remove hooks into tunnel socket */ sk->sk_destruct = tunnel->old_sk_destruct; sk->sk_user_data = NULL; - tunnel->sock = NULL; /* Remove the tunnel struct from the tunnel list */ pn = l2tp_pernet(tunnel->l2tp_net); spin_lock_bh(&pn->l2tp_tunnel_list_lock); list_del_rcu(&tunnel->list); spin_unlock_bh(&pn->l2tp_tunnel_list_lock); - atomic_dec(&l2tp_tunnel_count); - l2tp_tunnel_closeall(tunnel); + tunnel->sock = NULL; l2tp_tunnel_dec_refcount(tunnel); /* Call the original destructor */ @@ -1298,6 +1271,7 @@ void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel) tunnel->name); write_lock_bh(&tunnel->hlist_lock); + tunnel->acpt_newsess = false; for (hash = 0; hash < L2TP_HASH_SIZE; hash++) { again: hlist_for_each_safe(walk, tmp, &tunnel->session_hlist[hash]) { @@ -1308,8 +1282,8 @@ again: hlist_del_init(&session->hlist); - if (session->ref != NULL) - (*session->ref)(session); + if (test_and_set_bit(0, &session->dead)) + goto again; write_unlock_bh(&tunnel->hlist_lock); @@ -1319,9 +1293,6 @@ again: if (session->session_close != NULL) (*session->session_close)(session); - if (session->deref != NULL) - (*session->deref)(session); - l2tp_session_dec_refcount(session); write_lock_bh(&tunnel->hlist_lock); @@ -1348,17 +1319,6 @@ static void l2tp_udp_encap_destroy(struct sock *sk) } } -/* Really kill the tunnel. - * Come here only when all sessions have been cleared from the tunnel. - */ -static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) -{ - BUG_ON(refcount_read(&tunnel->ref_count) != 0); - BUG_ON(tunnel->sock != NULL); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); - kfree_rcu(tunnel, rcu); -} - /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { @@ -1605,6 +1565,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 tunnel->magic = L2TP_TUNNEL_MAGIC; sprintf(&tunnel->name[0], "tunl %u", tunnel_id); rwlock_init(&tunnel->hlist_lock); + tunnel->acpt_newsess = true; /* The net we belong to */ tunnel->l2tp_net = net; @@ -1662,7 +1623,6 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Add tunnel to our list */ INIT_LIST_HEAD(&tunnel->list); - atomic_inc(&l2tp_tunnel_count); /* Bump the reference count. The tunnel context is deleted * only when this drops to zero. Must be done before list insertion @@ -1689,14 +1649,12 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_create); /* This function is used by the netlink TUNNEL_DELETE command. */ -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel) { - l2tp_tunnel_inc_refcount(tunnel); - if (false == queue_work(l2tp_wq, &tunnel->del_work)) { - l2tp_tunnel_dec_refcount(tunnel); - return 1; + if (!test_and_set_bit(0, &tunnel->dead)) { + l2tp_tunnel_inc_refcount(tunnel); + queue_work(l2tp_wq, &tunnel->del_work); } - return 0; } EXPORT_SYMBOL_GPL(l2tp_tunnel_delete); @@ -1710,8 +1668,6 @@ void l2tp_session_free(struct l2tp_session *session) if (tunnel) { BUG_ON(tunnel->magic != L2TP_TUNNEL_MAGIC); - if (session->session_id != 0) - atomic_dec(&l2tp_session_count); sock_put(tunnel->sock); session->tunnel = NULL; l2tp_tunnel_dec_refcount(tunnel); @@ -1754,15 +1710,16 @@ EXPORT_SYMBOL_GPL(__l2tp_session_unhash); */ int l2tp_session_delete(struct l2tp_session *session) { - if (session->ref) - (*session->ref)(session); + if (test_and_set_bit(0, &session->dead)) + return 0; + __l2tp_session_unhash(session); l2tp_session_queue_purge(session); if (session->session_close != NULL) (*session->session_close)(session); - if (session->deref) - (*session->deref)(session); + l2tp_session_dec_refcount(session); + return 0; } EXPORT_SYMBOL_GPL(l2tp_session_delete); @@ -1788,7 +1745,6 @@ EXPORT_SYMBOL_GPL(l2tp_session_set_header_len); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) { struct l2tp_session *session; - int err; session = kzalloc(sizeof(struct l2tp_session) + priv_size, GFP_KERNEL); if (session != NULL) { @@ -1844,25 +1800,7 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); - err = l2tp_session_add_to_tunnel(tunnel, session); - if (err) { - kfree(session); - - return ERR_PTR(err); - } - - /* Bump the reference count. The session context is deleted - * only when this drops to zero. - */ refcount_set(&session->ref_count, 1); - l2tp_tunnel_inc_refcount(tunnel); - - /* Ensure tunnel socket isn't deleted */ - sock_hold(tunnel->sock); - - /* Ignore management session in session count value */ - if (session->session_id != 0) - atomic_inc(&l2tp_session_count); return session; } @@ -1895,15 +1833,19 @@ static __net_exit void l2tp_exit_net(struct net *net) { struct l2tp_net *pn = l2tp_pernet(net); struct l2tp_tunnel *tunnel = NULL; + int hash; rcu_read_lock_bh(); list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { - (void)l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); } rcu_read_unlock_bh(); flush_workqueue(l2tp_wq); rcu_barrier(); + + for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++) + WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash])); } static struct pernet_operations l2tp_net_ops = { diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index cdb6e3327f74..9534e16965cc 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -76,6 +76,7 @@ struct l2tp_session_cfg { struct l2tp_session { int magic; /* should be * L2TP_SESSION_MAGIC */ + long dead; struct l2tp_tunnel *tunnel; /* back pointer to tunnel * context */ @@ -128,8 +129,6 @@ struct l2tp_session { int (*build_header)(struct l2tp_session *session, void *buf); void (*recv_skb)(struct l2tp_session *session, struct sk_buff *skb, int data_len); void (*session_close)(struct l2tp_session *session); - void (*ref)(struct l2tp_session *session); - void (*deref)(struct l2tp_session *session); #if IS_ENABLED(CONFIG_L2TP_DEBUGFS) void (*show)(struct seq_file *m, void *priv); #endif @@ -160,8 +159,15 @@ struct l2tp_tunnel_cfg { struct l2tp_tunnel { int magic; /* Should be L2TP_TUNNEL_MAGIC */ + + unsigned long dead; + struct rcu_head rcu; rwlock_t hlist_lock; /* protect session_hlist */ + bool acpt_newsess; /* Indicates whether this + * tunnel accepts new sessions. + * Protected by hlist_lock. + */ struct hlist_head session_hlist[L2TP_HASH_SIZE]; /* hashed list of sessions, * hashed by id */ @@ -197,7 +203,9 @@ struct l2tp_tunnel { }; struct l2tp_nl_cmd_ops { - int (*session_create)(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); + int (*session_create)(struct net *net, struct l2tp_tunnel *tunnel, + u32 session_id, u32 peer_session_id, + struct l2tp_session_cfg *cfg); int (*session_delete)(struct l2tp_session *session); }; @@ -231,14 +239,14 @@ out: return tunnel; } +struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); + struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, - u32 session_id, bool do_ref); -struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth, - bool do_ref); + u32 session_id); +struct l2tp_session *l2tp_session_get_nth(struct l2tp_tunnel *tunnel, int nth); struct l2tp_session *l2tp_session_get_by_ifname(const struct net *net, - const char *ifname, - bool do_ref); + const char *ifname); struct l2tp_tunnel *l2tp_tunnel_find(const struct net *net, u32 tunnel_id); struct l2tp_tunnel *l2tp_tunnel_find_nth(const struct net *net, int nth); @@ -246,11 +254,14 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 peer_tunnel_id, struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp); void l2tp_tunnel_closeall(struct l2tp_tunnel *tunnel); -int l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); +void l2tp_tunnel_delete(struct l2tp_tunnel *tunnel); struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunnel, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg); +int l2tp_session_register(struct l2tp_session *session, + struct l2tp_tunnel *tunnel); + void __l2tp_session_unhash(struct l2tp_session *session); int l2tp_session_delete(struct l2tp_session *session); void l2tp_session_free(struct l2tp_session *session); @@ -269,40 +280,31 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); +static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel) +{ + refcount_inc(&tunnel->ref_count); +} + +static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) +{ + if (refcount_dec_and_test(&tunnel->ref_count)) + kfree_rcu(tunnel, rcu); +} + /* Session reference counts. Incremented when code obtains a reference * to a session. */ -static inline void l2tp_session_inc_refcount_1(struct l2tp_session *session) +static inline void l2tp_session_inc_refcount(struct l2tp_session *session) { refcount_inc(&session->ref_count); } -static inline void l2tp_session_dec_refcount_1(struct l2tp_session *session) +static inline void l2tp_session_dec_refcount(struct l2tp_session *session) { if (refcount_dec_and_test(&session->ref_count)) l2tp_session_free(session); } -#ifdef L2TP_REFCNT_DEBUG -#define l2tp_session_inc_refcount(_s) \ -do { \ - pr_debug("l2tp_session_inc_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_s)->name, \ - refcount_read(&_s->ref_count)); \ - l2tp_session_inc_refcount_1(_s); \ -} while (0) -#define l2tp_session_dec_refcount(_s) \ -do { \ - pr_debug("l2tp_session_dec_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_s)->name, \ - refcount_read(&_s->ref_count)); \ - l2tp_session_dec_refcount_1(_s); \ -} while (0) -#else -#define l2tp_session_inc_refcount(s) l2tp_session_inc_refcount_1(s) -#define l2tp_session_dec_refcount(s) l2tp_session_dec_refcount_1(s) -#endif - #define l2tp_printk(ptr, type, func, fmt, ...) \ do { \ if (((ptr)->debug) & (type)) \ diff --git a/net/l2tp/l2tp_debugfs.c b/net/l2tp/l2tp_debugfs.c index 53bae54c4d6e..eb69411bcb47 100644 --- a/net/l2tp/l2tp_debugfs.c +++ b/net/l2tp/l2tp_debugfs.c @@ -53,7 +53,7 @@ static void l2tp_dfs_next_tunnel(struct l2tp_dfs_seq_data *pd) static void l2tp_dfs_next_session(struct l2tp_dfs_seq_data *pd) { - pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; if (pd->session == NULL) { @@ -241,8 +241,6 @@ static int l2tp_dfs_seq_show(struct seq_file *m, void *v) l2tp_dfs_seq_tunnel_show(m, pd->tunnel); } else { l2tp_dfs_seq_session_show(m, pd->session); - if (pd->session->deref) - pd->session->deref(pd->session); l2tp_session_dec_refcount(pd->session); } diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c index 4de2ec94b08c..5c366ecfa1cb 100644 --- a/net/l2tp/l2tp_eth.c +++ b/net/l2tp/l2tp_eth.c @@ -41,10 +41,7 @@ /* via netdev_priv() */ struct l2tp_eth { - struct net_device *dev; - struct sock *tunnel_sock; struct l2tp_session *session; - struct list_head list; atomic_long_t tx_bytes; atomic_long_t tx_packets; atomic_long_t tx_dropped; @@ -55,26 +52,12 @@ struct l2tp_eth { /* via l2tp_session_priv() */ struct l2tp_eth_sess { - struct net_device *dev; + struct net_device __rcu *dev; }; -/* per-net private data for this module */ -static unsigned int l2tp_eth_net_id; -struct l2tp_eth_net { - struct list_head l2tp_eth_dev_list; - spinlock_t l2tp_eth_lock; -}; - -static inline struct l2tp_eth_net *l2tp_eth_pernet(struct net *net) -{ - return net_generic(net, l2tp_eth_net_id); -} static int l2tp_eth_dev_init(struct net_device *dev) { - struct l2tp_eth *priv = netdev_priv(dev); - - priv->dev = dev; eth_hw_addr_random(dev); eth_broadcast_addr(dev->broadcast); netdev_lockdep_set_classes(dev); @@ -85,12 +68,13 @@ static int l2tp_eth_dev_init(struct net_device *dev) static void l2tp_eth_dev_uninit(struct net_device *dev) { struct l2tp_eth *priv = netdev_priv(dev); - struct l2tp_eth_net *pn = l2tp_eth_pernet(dev_net(dev)); + struct l2tp_eth_sess *spriv; - spin_lock(&pn->l2tp_eth_lock); - list_del_init(&priv->list); - spin_unlock(&pn->l2tp_eth_lock); - dev_put(dev); + spriv = l2tp_session_priv(priv->session); + RCU_INIT_POINTER(spriv->dev, NULL); + /* No need for synchronize_net() here. We're called by + * unregister_netdev*(), which does the synchronisation for us. + */ } static int l2tp_eth_dev_xmit(struct sk_buff *skb, struct net_device *dev) @@ -148,8 +132,8 @@ static void l2tp_eth_dev_setup(struct net_device *dev) static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, int data_len) { struct l2tp_eth_sess *spriv = l2tp_session_priv(session); - struct net_device *dev = spriv->dev; - struct l2tp_eth *priv = netdev_priv(dev); + struct net_device *dev; + struct l2tp_eth *priv; if (session->debug & L2TP_MSG_DATA) { unsigned int length; @@ -173,16 +157,25 @@ static void l2tp_eth_dev_recv(struct l2tp_session *session, struct sk_buff *skb, skb_dst_drop(skb); nf_reset(skb); + rcu_read_lock(); + dev = rcu_dereference(spriv->dev); + if (!dev) + goto error_rcu; + + priv = netdev_priv(dev); if (dev_forward_skb(dev, skb) == NET_RX_SUCCESS) { atomic_long_inc(&priv->rx_packets); atomic_long_add(data_len, &priv->rx_bytes); } else { atomic_long_inc(&priv->rx_errors); } + rcu_read_unlock(); + return; +error_rcu: + rcu_read_unlock(); error: - atomic_long_inc(&priv->rx_errors); kfree_skb(skb); } @@ -193,11 +186,15 @@ static void l2tp_eth_delete(struct l2tp_session *session) if (session) { spriv = l2tp_session_priv(session); - dev = spriv->dev; + + rtnl_lock(); + dev = rtnl_dereference(spriv->dev); if (dev) { - unregister_netdev(dev); - spriv->dev = NULL; + unregister_netdevice(dev); + rtnl_unlock(); module_put(THIS_MODULE); + } else { + rtnl_unlock(); } } } @@ -207,9 +204,20 @@ static void l2tp_eth_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; struct l2tp_eth_sess *spriv = l2tp_session_priv(session); - struct net_device *dev = spriv->dev; + struct net_device *dev; + + rcu_read_lock(); + dev = rcu_dereference(spriv->dev); + if (!dev) { + rcu_read_unlock(); + return; + } + dev_hold(dev); + rcu_read_unlock(); seq_printf(m, " interface %s\n", dev->name); + + dev_put(dev); } #endif @@ -262,23 +270,17 @@ static void l2tp_eth_adjust_mtu(struct l2tp_tunnel *tunnel, dev->needed_headroom += session->hdr_len; } -static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) +static int l2tp_eth_create(struct net *net, struct l2tp_tunnel *tunnel, + u32 session_id, u32 peer_session_id, + struct l2tp_session_cfg *cfg) { unsigned char name_assign_type; struct net_device *dev; char name[IFNAMSIZ]; - struct l2tp_tunnel *tunnel; struct l2tp_session *session; struct l2tp_eth *priv; struct l2tp_eth_sess *spriv; int rc; - struct l2tp_eth_net *pn; - - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (!tunnel) { - rc = -ENODEV; - goto out; - } if (cfg->ifname) { strlcpy(name, cfg->ifname, IFNAMSIZ); @@ -292,14 +294,14 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p peer_session_id, cfg); if (IS_ERR(session)) { rc = PTR_ERR(session); - goto out; + goto err; } dev = alloc_netdev(sizeof(*priv), name, name_assign_type, l2tp_eth_dev_setup); if (!dev) { rc = -ENOMEM; - goto out_del_session; + goto err_sess; } dev_net_set(dev, net); @@ -308,11 +310,8 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p l2tp_eth_adjust_mtu(tunnel, session, dev); priv = netdev_priv(dev); - priv->dev = dev; priv->session = session; - INIT_LIST_HEAD(&priv->list); - priv->tunnel_sock = tunnel->sock; session->recv_skb = l2tp_eth_dev_recv; session->session_close = l2tp_eth_delete; #if IS_ENABLED(CONFIG_L2TP_DEBUGFS) @@ -320,48 +319,50 @@ static int l2tp_eth_create(struct net *net, u32 tunnel_id, u32 session_id, u32 p #endif spriv = l2tp_session_priv(session); - spriv->dev = dev; - rc = register_netdev(dev); - if (rc < 0) - goto out_del_dev; + l2tp_session_inc_refcount(session); - __module_get(THIS_MODULE); - /* Must be done after register_netdev() */ - strlcpy(session->ifname, dev->name, IFNAMSIZ); + rtnl_lock(); - dev_hold(dev); - pn = l2tp_eth_pernet(dev_net(dev)); - spin_lock(&pn->l2tp_eth_lock); - list_add(&priv->list, &pn->l2tp_eth_dev_list); - spin_unlock(&pn->l2tp_eth_lock); + /* Register both device and session while holding the rtnl lock. This + * ensures that l2tp_eth_delete() will see that there's a device to + * unregister, even if it happened to run before we assign spriv->dev. + */ + rc = l2tp_session_register(session, tunnel); + if (rc < 0) { + rtnl_unlock(); + goto err_sess_dev; + } - return 0; + rc = register_netdevice(dev); + if (rc < 0) { + rtnl_unlock(); + l2tp_session_delete(session); + l2tp_session_dec_refcount(session); + free_netdev(dev); -out_del_dev: - free_netdev(dev); - spriv->dev = NULL; -out_del_session: - l2tp_session_delete(session); -out: - return rc; -} + return rc; + } -static __net_init int l2tp_eth_init_net(struct net *net) -{ - struct l2tp_eth_net *pn = net_generic(net, l2tp_eth_net_id); + strlcpy(session->ifname, dev->name, IFNAMSIZ); + rcu_assign_pointer(spriv->dev, dev); + + rtnl_unlock(); + + l2tp_session_dec_refcount(session); - INIT_LIST_HEAD(&pn->l2tp_eth_dev_list); - spin_lock_init(&pn->l2tp_eth_lock); + __module_get(THIS_MODULE); return 0; -} -static struct pernet_operations l2tp_eth_net_ops = { - .init = l2tp_eth_init_net, - .id = &l2tp_eth_net_id, - .size = sizeof(struct l2tp_eth_net), -}; +err_sess_dev: + l2tp_session_dec_refcount(session); + free_netdev(dev); +err_sess: + kfree(session); +err: + return rc; +} static const struct l2tp_nl_cmd_ops l2tp_eth_nl_cmd_ops = { @@ -376,25 +377,18 @@ static int __init l2tp_eth_init(void) err = l2tp_nl_register_ops(L2TP_PWTYPE_ETH, &l2tp_eth_nl_cmd_ops); if (err) - goto out; - - err = register_pernet_device(&l2tp_eth_net_ops); - if (err) - goto out_unreg; + goto err; pr_info("L2TP ethernet pseudowire support (L2TPv3)\n"); return 0; -out_unreg: - l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); -out: +err: return err; } static void __exit l2tp_eth_exit(void) { - unregister_pernet_device(&l2tp_eth_net_ops); l2tp_nl_unregister_ops(L2TP_PWTYPE_ETH); } diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c index 4d322c1b7233..ff61124fdf59 100644 --- a/net/l2tp/l2tp_ip.c +++ b/net/l2tp/l2tp_ip.c @@ -123,6 +123,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct iphdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -143,7 +144,7 @@ static int l2tp_ip_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, NULL, session_id, true); + session = l2tp_session_get(net, NULL, session_id); if (!session) goto discard; @@ -178,24 +179,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct iphdr *iph = (struct iphdr *) skb_network_header(skb); - - read_lock_bh(&l2tp_ip_lock); - sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, - inet_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip_lock); - goto discard; - } + iph = (struct iphdr *)skb_network_header(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip_lock); + sk = __l2tp_ip_bind_lookup(net, iph->daddr, iph->saddr, inet_iif(skb), + tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip_lock); if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; @@ -205,8 +199,6 @@ pass_up: return sk_receive_skb(sk, skb, 1); discard_sess: - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); goto discard; diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 88b397c30d86..192344688c06 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -136,6 +136,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) unsigned char *ptr, *optr; struct l2tp_session *session; struct l2tp_tunnel *tunnel = NULL; + struct ipv6hdr *iph; int length; if (!pskb_may_pull(skb, 4)) @@ -156,7 +157,7 @@ static int l2tp_ip6_recv(struct sk_buff *skb) } /* Ok, this is a data packet. Lookup the session. */ - session = l2tp_session_get(net, NULL, session_id, true); + session = l2tp_session_get(net, NULL, session_id); if (!session) goto discard; @@ -192,24 +193,17 @@ pass_up: goto discard; tunnel_id = ntohl(*(__be32 *) &skb->data[4]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) { - sk = tunnel->sock; - sock_hold(sk); - } else { - struct ipv6hdr *iph = ipv6_hdr(skb); - - read_lock_bh(&l2tp_ip6_lock); - sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, - inet6_iif(skb), tunnel_id); - if (!sk) { - read_unlock_bh(&l2tp_ip6_lock); - goto discard; - } + iph = ipv6_hdr(skb); - sock_hold(sk); + read_lock_bh(&l2tp_ip6_lock); + sk = __l2tp_ip6_bind_lookup(net, &iph->daddr, &iph->saddr, + inet6_iif(skb), tunnel_id); + if (!sk) { read_unlock_bh(&l2tp_ip6_lock); + goto discard; } + sock_hold(sk); + read_unlock_bh(&l2tp_ip6_lock); if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_put; @@ -219,8 +213,6 @@ pass_up: return sk_receive_skb(sk, skb, 1); discard_sess: - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); goto discard; diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 12cfcd0ca807..a1f24fb2be98 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -48,8 +48,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, /* Accessed under genl lock */ static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX]; -static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, - bool do_ref) +static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info) { u32 tunnel_id; u32 session_id; @@ -60,15 +59,16 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, if (info->attrs[L2TP_ATTR_IFNAME]) { ifname = nla_data(info->attrs[L2TP_ATTR_IFNAME]); - session = l2tp_session_get_by_ifname(net, ifname, do_ref); + session = l2tp_session_get_by_ifname(net, ifname); } else if ((info->attrs[L2TP_ATTR_SESSION_ID]) && (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) - session = l2tp_session_get(net, tunnel, session_id, - do_ref); + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (tunnel) { + session = l2tp_session_get(net, tunnel, session_id); + l2tp_tunnel_dec_refcount(tunnel); + } } return session; @@ -271,8 +271,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel == NULL) { + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (!tunnel) { ret = -ENODEV; goto out; } @@ -280,7 +280,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_DELETE); - (void) l2tp_tunnel_delete(tunnel); + l2tp_tunnel_delete(tunnel); + + l2tp_tunnel_dec_refcount(tunnel); out: return ret; @@ -299,8 +301,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel == NULL) { + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (!tunnel) { ret = -ENODEV; goto out; } @@ -311,6 +313,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_MODIFY); + l2tp_tunnel_dec_refcount(tunnel); + out: return ret; } @@ -400,7 +404,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla if (nla_put_u16(skb, L2TP_ATTR_UDP_SPORT, ntohs(inet->inet_sport)) || nla_put_u16(skb, L2TP_ATTR_UDP_DPORT, ntohs(inet->inet_dport))) goto nla_put_failure; - /* NOBREAK */ + /* fall through */ case L2TP_ENCAPTYPE_IP: #if IS_ENABLED(CONFIG_IPV6) if (np) { @@ -438,34 +442,37 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; - goto out; + goto err; } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel == NULL) { - ret = -ENODEV; - goto out; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err; + } + + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (!tunnel) { + ret = -ENODEV; + goto err_nlmsg; } ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET); if (ret < 0) - goto err_out; + goto err_nlmsg_tunnel; + + l2tp_tunnel_dec_refcount(tunnel); return genlmsg_unicast(net, msg, info->snd_portid); -err_out: +err_nlmsg_tunnel: + l2tp_tunnel_dec_refcount(tunnel); +err_nlmsg: nlmsg_free(msg); - -out: +err: return ret; } @@ -509,8 +516,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf ret = -EINVAL; goto out; } + tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); + tunnel = l2tp_tunnel_get(net, tunnel_id); if (!tunnel) { ret = -ENODEV; goto out; @@ -518,24 +526,24 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if (!info->attrs[L2TP_ATTR_SESSION_ID]) { ret = -EINVAL; - goto out; + goto out_tunnel; } session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) { ret = -EINVAL; - goto out; + goto out_tunnel; } peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]); if (!info->attrs[L2TP_ATTR_PW_TYPE]) { ret = -EINVAL; - goto out; + goto out_tunnel; } cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]); if (cfg.pw_type >= __L2TP_PWTYPE_MAX) { ret = -EINVAL; - goto out; + goto out_tunnel; } if (tunnel->version > 2) { @@ -557,7 +565,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); if (len > 8) { ret = -EINVAL; - goto out; + goto out_tunnel; } cfg.cookie_len = len; memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len); @@ -566,7 +574,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]); if (len > 8) { ret = -EINVAL; - goto out; + goto out_tunnel; } cfg.peer_cookie_len = len; memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len); @@ -609,7 +617,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) || (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) { ret = -EPROTONOSUPPORT; - goto out; + goto out_tunnel; } /* Check that pseudowire-specific params are present */ @@ -619,7 +627,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf case L2TP_PWTYPE_ETH_VLAN: if (!info->attrs[L2TP_ATTR_VLAN_ID]) { ret = -EINVAL; - goto out; + goto out_tunnel; } break; case L2TP_PWTYPE_ETH: @@ -633,13 +641,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf break; } - ret = -EPROTONOSUPPORT; - if (l2tp_nl_cmd_ops[cfg.pw_type]->session_create) - ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id, - session_id, peer_session_id, &cfg); + ret = l2tp_nl_cmd_ops[cfg.pw_type]->session_create(net, tunnel, + session_id, + peer_session_id, + &cfg); if (ret >= 0) { - session = l2tp_session_get(net, tunnel, session_id, false); + session = l2tp_session_get(net, tunnel, session_id); if (session) { ret = l2tp_session_notify(&l2tp_nl_family, info, session, L2TP_CMD_SESSION_CREATE); @@ -647,6 +655,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf } } +out_tunnel: + l2tp_tunnel_dec_refcount(tunnel); out: return ret; } @@ -657,7 +667,7 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf struct l2tp_session *session; u16 pw_type; - session = l2tp_nl_session_get(info, true); + session = l2tp_nl_session_get(info); if (session == NULL) { ret = -ENODEV; goto out; @@ -671,8 +681,6 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete) ret = (*l2tp_nl_cmd_ops[pw_type]->session_delete)(session); - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); out: @@ -684,7 +692,7 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf int ret = 0; struct l2tp_session *session; - session = l2tp_nl_session_get(info, false); + session = l2tp_nl_session_get(info); if (session == NULL) { ret = -ENODEV; goto out; @@ -816,7 +824,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) struct sk_buff *msg; int ret; - session = l2tp_nl_session_get(info, false); + session = l2tp_nl_session_get(info); if (session == NULL) { ret = -ENODEV; goto err; @@ -862,7 +870,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback goto out; } - session = l2tp_session_get_nth(tunnel, si, false); + session = l2tp_session_get_nth(tunnel, si); if (session == NULL) { ti++; tunnel = NULL; diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c index f0edb7209079..b412fc3351dc 100644 --- a/net/l2tp/l2tp_ppp.c +++ b/net/l2tp/l2tp_ppp.c @@ -122,10 +122,11 @@ struct pppol2tp_session { int owner; /* pid that opened the socket */ - struct sock *sock; /* Pointer to the session + struct mutex sk_lock; /* Protects .sk */ + struct sock __rcu *sk; /* Pointer to the session * PPPoX socket */ - struct sock *tunnel_sock; /* Pointer to the tunnel UDP - * socket */ + struct sock *__sk; /* Copy of .sk, for cleanup */ + struct rcu_head rcu; /* For asynchronous release */ int flags; /* accessed by PPPIOCGFLAGS. * Unused. */ }; @@ -138,6 +139,24 @@ static const struct ppp_channel_ops pppol2tp_chan_ops = { static const struct proto_ops pppol2tp_ops; +/* Retrieves the pppol2tp socket associated to a session. + * A reference is held on the returned socket, so this function must be paired + * with sock_put(). + */ +static struct sock *pppol2tp_session_get_sock(struct l2tp_session *session) +{ + struct pppol2tp_session *ps = l2tp_session_priv(session); + struct sock *sk; + + rcu_read_lock(); + sk = rcu_dereference(ps->sk); + if (sk) + sock_hold(sk); + rcu_read_unlock(); + + return sk; +} + /* Helpers to obtain tunnel/session contexts from sockets. */ static inline struct l2tp_session *pppol2tp_sock_to_session(struct sock *sk) @@ -224,7 +243,8 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int /* If the socket is bound, send it in to PPP's input queue. Otherwise * queue it on the session socket. */ - sk = ps->sock; + rcu_read_lock(); + sk = rcu_dereference(ps->sk); if (sk == NULL) goto no_sock; @@ -247,30 +267,16 @@ static void pppol2tp_recv(struct l2tp_session *session, struct sk_buff *skb, int kfree_skb(skb); } } + rcu_read_unlock(); return; no_sock: + rcu_read_unlock(); l2tp_info(session, L2TP_MSG_DATA, "%s: no socket\n", session->name); kfree_skb(skb); } -static void pppol2tp_session_sock_hold(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock) - sock_hold(ps->sock); -} - -static void pppol2tp_session_sock_put(struct l2tp_session *session) -{ - struct pppol2tp_session *ps = l2tp_session_priv(session); - - if (ps->sock) - sock_put(ps->sock); -} - /************************************************************************ * Transmit handling ***********************************************************************/ @@ -287,7 +293,6 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, int error; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int uhlen; error = -ENOTCONN; @@ -300,10 +305,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, if (session == NULL) goto error; - ps = l2tp_session_priv(session); - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto error_put_sess; + tunnel = session->tunnel; uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; @@ -314,7 +316,7 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, 2 + total_len, /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ 0, GFP_KERNEL); if (!skb) - goto error_put_sess_tun; + goto error_put_sess; /* Reserve space for headers. */ skb_reserve(skb, NET_SKB_PAD); @@ -332,20 +334,17 @@ static int pppol2tp_sendmsg(struct socket *sock, struct msghdr *m, error = memcpy_from_msg(skb_put(skb, total_len), m, total_len); if (error < 0) { kfree_skb(skb); - goto error_put_sess_tun; + goto error_put_sess; } local_bh_disable(); l2tp_xmit_skb(session, skb, session->hdr_len); local_bh_enable(); - sock_put(ps->tunnel_sock); sock_put(sk); return total_len; -error_put_sess_tun: - sock_put(ps->tunnel_sock); error_put_sess: sock_put(sk); error: @@ -369,10 +368,8 @@ error: static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) { struct sock *sk = (struct sock *) chan->private; - struct sock *sk_tun; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int uhlen, headroom; if (sock_flag(sk, SOCK_DEAD) || !(sk->sk_state & PPPOX_CONNECTED)) @@ -383,13 +380,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) if (session == NULL) goto abort; - ps = l2tp_session_priv(session); - sk_tun = ps->tunnel_sock; - if (sk_tun == NULL) - goto abort_put_sess; - tunnel = l2tp_sock_to_tunnel(sk_tun); - if (tunnel == NULL) - goto abort_put_sess; + tunnel = session->tunnel; uhlen = (tunnel->encap == L2TP_ENCAPTYPE_UDP) ? sizeof(struct udphdr) : 0; headroom = NET_SKB_PAD + @@ -398,7 +389,7 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) session->hdr_len + /* L2TP header */ 2; /* 2 bytes for PPP_ALLSTATIONS & PPP_UI */ if (skb_cow_head(skb, headroom)) - goto abort_put_sess_tun; + goto abort_put_sess; /* Setup PPP header */ __skb_push(skb, 2); @@ -409,12 +400,10 @@ static int pppol2tp_xmit(struct ppp_channel *chan, struct sk_buff *skb) l2tp_xmit_skb(session, skb, session->hdr_len); local_bh_enable(); - sock_put(sk_tun); sock_put(sk); + return 1; -abort_put_sess_tun: - sock_put(sk_tun); abort_put_sess: sock_put(sk); abort: @@ -431,16 +420,15 @@ abort: */ static void pppol2tp_session_close(struct l2tp_session *session) { - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct sock *sk = ps->sock; - struct socket *sock = sk->sk_socket; + struct sock *sk; BUG_ON(session->magic != L2TP_SESSION_MAGIC); - if (sock) { - inet_shutdown(sock, SEND_SHUTDOWN); - /* Don't let the session go away before our socket does */ - l2tp_session_inc_refcount(session); + sk = pppol2tp_session_get_sock(session); + if (sk) { + if (sk->sk_socket) + inet_shutdown(sk->sk_socket, SEND_SHUTDOWN); + sock_put(sk); } } @@ -461,6 +449,14 @@ static void pppol2tp_session_destruct(struct sock *sk) } } +static void pppol2tp_put_sk(struct rcu_head *head) +{ + struct pppol2tp_session *ps; + + ps = container_of(head, typeof(*ps), rcu); + sock_put(ps->__sk); +} + /* Called when the PPPoX socket (session) is closed. */ static int pppol2tp_release(struct socket *sock) @@ -486,11 +482,23 @@ static int pppol2tp_release(struct socket *sock) session = pppol2tp_sock_to_session(sk); - /* Purge any queued data */ if (session != NULL) { - __l2tp_session_unhash(session); - l2tp_session_queue_purge(session); - sock_put(sk); + struct pppol2tp_session *ps; + + l2tp_session_delete(session); + + ps = l2tp_session_priv(session); + mutex_lock(&ps->sk_lock); + ps->__sk = rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock)); + RCU_INIT_POINTER(ps->sk, NULL); + mutex_unlock(&ps->sk_lock); + call_rcu(&ps->rcu, pppol2tp_put_sk); + + /* Rely on the sock_put() call at the end of the function for + * dropping the reference held by pppol2tp_sock_to_session(). + * The last reference will be dropped by pppol2tp_put_sk(). + */ } release_sock(sk); @@ -557,16 +565,46 @@ out: static void pppol2tp_show(struct seq_file *m, void *arg) { struct l2tp_session *session = arg; - struct pppol2tp_session *ps = l2tp_session_priv(session); + struct sock *sk; + + sk = pppol2tp_session_get_sock(session); + if (sk) { + struct pppox_sock *po = pppox_sk(sk); - if (ps) { - struct pppox_sock *po = pppox_sk(ps->sock); - if (po) - seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sock_put(sk); } } #endif +static void pppol2tp_session_init(struct l2tp_session *session) +{ + struct pppol2tp_session *ps; + struct dst_entry *dst; + + session->recv_skb = pppol2tp_recv; + session->session_close = pppol2tp_session_close; +#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) + session->show = pppol2tp_show; +#endif + + ps = l2tp_session_priv(session); + mutex_init(&ps->sk_lock); + ps->owner = current->pid; + + /* If PMTU discovery was enabled, use the MTU that was discovered */ + dst = sk_dst_get(session->tunnel->sock); + if (dst) { + u32 pmtu = dst_mtu(dst); + + if (pmtu) { + session->mtu = pmtu - PPPOL2TP_HEADER_OVERHEAD; + session->mru = pmtu - PPPOL2TP_HEADER_OVERHEAD; + } + dst_release(dst); + } +} + /* connect() handler. Attach a PPPoX socket to a tunnel UDP socket */ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, @@ -578,12 +616,12 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, struct l2tp_session *session = NULL; struct l2tp_tunnel *tunnel; struct pppol2tp_session *ps; - struct dst_entry *dst; struct l2tp_session_cfg cfg = { 0, }; int error = 0; u32 tunnel_id, peer_tunnel_id; u32 session_id, peer_session_id; bool drop_refcnt = false; + bool drop_tunnel = false; int ver = 2; int fd; @@ -652,7 +690,9 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel_id == 0) goto end; - tunnel = l2tp_tunnel_find(sock_net(sk), tunnel_id); + tunnel = l2tp_tunnel_get(sock_net(sk), tunnel_id); + if (tunnel) + drop_tunnel = true; /* Special case: create tunnel context if session_id and * peer_session_id is 0. Otherwise look up tunnel using supplied @@ -685,7 +725,7 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, if (tunnel->peer_tunnel_id == 0) tunnel->peer_tunnel_id = peer_tunnel_id; - session = l2tp_session_get(sock_net(sk), tunnel, session_id, false); + session = l2tp_session_get(sock_net(sk), tunnel, session_id); if (session) { drop_refcnt = true; ps = l2tp_session_priv(session); @@ -693,13 +733,10 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, /* Using a pre-existing session is fine as long as it hasn't * been connected yet. */ - if (ps->sock) { - error = -EEXIST; - goto end; - } - - /* consistency checks */ - if (ps->tunnel_sock != tunnel->sock) { + mutex_lock(&ps->sk_lock); + if (rcu_dereference_protected(ps->sk, + lockdep_is_held(&ps->sk_lock))) { + mutex_unlock(&ps->sk_lock); error = -EEXIST; goto end; } @@ -715,35 +752,19 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, error = PTR_ERR(session); goto end; } - } - - /* Associate session with its PPPoL2TP socket */ - ps = l2tp_session_priv(session); - ps->owner = current->pid; - ps->sock = sk; - ps->tunnel_sock = tunnel->sock; - - session->recv_skb = pppol2tp_recv; - session->session_close = pppol2tp_session_close; -#if IS_ENABLED(CONFIG_L2TP_DEBUGFS) - session->show = pppol2tp_show; -#endif - - /* We need to know each time a skb is dropped from the reorder - * queue. - */ - session->ref = pppol2tp_session_sock_hold; - session->deref = pppol2tp_session_sock_put; - /* If PMTU discovery was enabled, use the MTU that was discovered */ - dst = sk_dst_get(tunnel->sock); - if (dst != NULL) { - u32 pmtu = dst_mtu(dst); + pppol2tp_session_init(session); + ps = l2tp_session_priv(session); + l2tp_session_inc_refcount(session); - if (pmtu != 0) - session->mtu = session->mru = pmtu - - PPPOL2TP_HEADER_OVERHEAD; - dst_release(dst); + mutex_lock(&ps->sk_lock); + error = l2tp_session_register(session, tunnel); + if (error < 0) { + mutex_unlock(&ps->sk_lock); + kfree(session); + goto end; + } + drop_refcnt = true; } /* Special case: if source & dest session_id == 0x0000, this @@ -768,12 +789,23 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr, po->chan.mtu = session->mtu; error = ppp_register_net_channel(sock_net(sk), &po->chan); - if (error) + if (error) { + mutex_unlock(&ps->sk_lock); goto end; + } out_no_ppp: /* This is how we get the session context from the socket. */ sk->sk_user_data = session; + rcu_assign_pointer(ps->sk, sk); + mutex_unlock(&ps->sk_lock); + + /* Keep the reference we've grabbed on the session: sk doesn't expect + * the session to disappear. pppol2tp_session_destruct() is responsible + * for dropping it. + */ + drop_refcnt = false; + sk->sk_state = PPPOX_CONNECTED; l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", session->name); @@ -781,6 +813,8 @@ out_no_ppp: end: if (drop_refcnt) l2tp_session_dec_refcount(session); + if (drop_tunnel) + l2tp_tunnel_dec_refcount(tunnel); release_sock(sk); return error; @@ -788,25 +822,19 @@ end: #ifdef CONFIG_L2TP_V3 -/* Called when creating sessions via the netlink interface. - */ -static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_id, u32 peer_session_id, struct l2tp_session_cfg *cfg) +/* Called when creating sessions via the netlink interface. */ +static int pppol2tp_session_create(struct net *net, struct l2tp_tunnel *tunnel, + u32 session_id, u32 peer_session_id, + struct l2tp_session_cfg *cfg) { int error; - struct l2tp_tunnel *tunnel; struct l2tp_session *session; - struct pppol2tp_session *ps; - - tunnel = l2tp_tunnel_find(net, tunnel_id); - - /* Error if we can't find the tunnel */ - error = -ENOENT; - if (tunnel == NULL) - goto out; /* Error if tunnel socket is not prepped */ - if (tunnel->sock == NULL) - goto out; + if (!tunnel->sock) { + error = -ENOENT; + goto err; + } /* Default MTU values. */ if (cfg->mtu == 0) @@ -820,18 +848,20 @@ static int pppol2tp_session_create(struct net *net, u32 tunnel_id, u32 session_i peer_session_id, cfg); if (IS_ERR(session)) { error = PTR_ERR(session); - goto out; + goto err; } - ps = l2tp_session_priv(session); - ps->tunnel_sock = tunnel->sock; + pppol2tp_session_init(session); - l2tp_info(session, L2TP_MSG_CONTROL, "%s: created\n", - session->name); + error = l2tp_session_register(session, tunnel); + if (error < 0) + goto err_sess; - error = 0; + return 0; -out: +err_sess: + kfree(session); +err: return error; } @@ -862,9 +892,7 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, goto end; pls = l2tp_session_priv(session); - tunnel = l2tp_sock_to_tunnel(pls->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; + tunnel = session->tunnel; inet = inet_sk(tunnel->sock); if ((tunnel->version == 2) && (tunnel->sock->sk_family == AF_INET)) { @@ -944,8 +972,6 @@ static int pppol2tp_getname(struct socket *sock, struct sockaddr *uaddr, *usockaddr_len = len; error = 0; - sock_put(pls->tunnel_sock); -end_put_sess: sock_put(sk); end: return error; @@ -992,8 +1018,9 @@ static int pppol2tp_session_ioctl(struct l2tp_session *session, "%s: pppol2tp_session_ioctl(cmd=%#x, arg=%#lx)\n", session->name, cmd, arg); - sk = ps->sock; - sock_hold(sk); + sk = pppol2tp_session_get_sock(session); + if (!sk) + return -EBADR; switch (cmd) { case SIOCGIFMTU: @@ -1140,13 +1167,11 @@ static int pppol2tp_tunnel_ioctl(struct l2tp_tunnel *tunnel, /* resend to session ioctl handler */ struct l2tp_session *session = l2tp_session_get(sock_net(sk), tunnel, - stats.session_id, true); + stats.session_id); if (session) { err = pppol2tp_session_ioctl(session, cmd, arg); - if (session->deref) - session->deref(session); l2tp_session_dec_refcount(session); } else { err = -EBADR; @@ -1185,7 +1210,6 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, struct sock *sk = sock->sk; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int err; if (!sk) @@ -1209,16 +1233,10 @@ static int pppol2tp_ioctl(struct socket *sock, unsigned int cmd, /* Special case: if session's session_id is zero, treat ioctl as a * tunnel ioctl */ - ps = l2tp_session_priv(session); if ((session->session_id == 0) && (session->peer_session_id == 0)) { - err = -EBADF; - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; - + tunnel = session->tunnel; err = pppol2tp_tunnel_ioctl(tunnel, cmd, arg); - sock_put(ps->tunnel_sock); goto end_put_sess; } @@ -1270,7 +1288,6 @@ static int pppol2tp_session_setsockopt(struct sock *sk, int optname, int val) { int err = 0; - struct pppol2tp_session *ps = l2tp_session_priv(session); switch (optname) { case PPPOL2TP_SO_RECVSEQ: @@ -1291,8 +1308,8 @@ static int pppol2tp_session_setsockopt(struct sock *sk, } session->send_seq = !!val; { - struct sock *ssk = ps->sock; - struct pppox_sock *po = pppox_sk(ssk); + struct pppox_sock *po = pppox_sk(sk); + po->chan.hdrlen = val ? PPPOL2TP_L2TP_HDR_SIZE_SEQ : PPPOL2TP_L2TP_HDR_SIZE_NOSEQ; } @@ -1345,7 +1362,6 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, struct sock *sk = sock->sk; struct l2tp_session *session; struct l2tp_tunnel *tunnel; - struct pppol2tp_session *ps; int val; int err; @@ -1370,20 +1386,14 @@ static int pppol2tp_setsockopt(struct socket *sock, int level, int optname, /* Special case: if session_id == 0x0000, treat as operation on tunnel */ - ps = l2tp_session_priv(session); if ((session->session_id == 0) && (session->peer_session_id == 0)) { - err = -EBADF; - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; - + tunnel = session->tunnel; err = pppol2tp_tunnel_setsockopt(sk, tunnel, optname, val); - sock_put(ps->tunnel_sock); - } else + } else { err = pppol2tp_session_setsockopt(sk, session, optname, val); + } -end_put_sess: sock_put(sk); end: return err; @@ -1471,7 +1481,6 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, struct l2tp_tunnel *tunnel; int val, len; int err; - struct pppol2tp_session *ps; if (level != SOL_PPPOL2TP) return -EINVAL; @@ -1495,16 +1504,10 @@ static int pppol2tp_getsockopt(struct socket *sock, int level, int optname, goto end; /* Special case: if session_id == 0x0000, treat as operation on tunnel */ - ps = l2tp_session_priv(session); if ((session->session_id == 0) && (session->peer_session_id == 0)) { - err = -EBADF; - tunnel = l2tp_sock_to_tunnel(ps->tunnel_sock); - if (tunnel == NULL) - goto end_put_sess; - + tunnel = session->tunnel; err = pppol2tp_tunnel_getsockopt(sk, tunnel, optname, &val); - sock_put(ps->tunnel_sock); if (err) goto end_put_sess; } else { @@ -1563,7 +1566,7 @@ static void pppol2tp_next_tunnel(struct net *net, struct pppol2tp_seq_data *pd) static void pppol2tp_next_session(struct net *net, struct pppol2tp_seq_data *pd) { - pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx, true); + pd->session = l2tp_session_get_nth(pd->tunnel, pd->session_idx); pd->session_idx++; if (pd->session == NULL) { @@ -1631,8 +1634,9 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) { struct l2tp_session *session = v; struct l2tp_tunnel *tunnel = session->tunnel; - struct pppol2tp_session *ps = l2tp_session_priv(session); - struct pppox_sock *po = pppox_sk(ps->sock); + unsigned char state; + char user_data_ok; + struct sock *sk; u32 ip = 0; u16 port = 0; @@ -1642,6 +1646,15 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) port = ntohs(inet->inet_sport); } + sk = pppol2tp_session_get_sock(session); + if (sk) { + state = sk->sk_state; + user_data_ok = (session == sk->sk_user_data) ? 'Y' : 'N'; + } else { + state = 0; + user_data_ok = 'N'; + } + seq_printf(m, " SESSION '%s' %08X/%d %04X/%04X -> " "%04X/%04X %d %c\n", session->name, ip, port, @@ -1649,9 +1662,7 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) session->session_id, tunnel->peer_tunnel_id, session->peer_session_id, - ps->sock->sk_state, - (session == ps->sock->sk_user_data) ? - 'Y' : 'N'); + state, user_data_ok); seq_printf(m, " %d/%d/%c/%c/%s %08x %u\n", session->mtu, session->mru, session->recv_seq ? 'R' : '-', @@ -1668,8 +1679,12 @@ static void pppol2tp_seq_session_show(struct seq_file *m, void *v) atomic_long_read(&session->stats.rx_bytes), atomic_long_read(&session->stats.rx_errors)); - if (po) + if (sk) { + struct pppox_sock *po = pppox_sk(sk); + seq_printf(m, " interface %s\n", ppp_dev_name(&po->chan)); + sock_put(sk); + } } static int pppol2tp_seq_show(struct seq_file *m, void *v) @@ -1694,8 +1709,6 @@ static int pppol2tp_seq_show(struct seq_file *m, void *v) pppol2tp_seq_tunnel_show(m, pd->tunnel); } else { pppol2tp_seq_session_show(m, pd->session); - if (pd->session->deref) - pd->session->deref(pd->session); l2tp_session_dec_refcount(pd->session); } diff --git a/net/lapb/lapb_iface.c b/net/lapb/lapb_iface.c index e15314e3b464..db6e0afe3a20 100644 --- a/net/lapb/lapb_iface.c +++ b/net/lapb/lapb_iface.c @@ -127,8 +127,8 @@ static struct lapb_cb *lapb_create_cb(void) skb_queue_head_init(&lapb->write_queue); skb_queue_head_init(&lapb->ack_queue); - init_timer(&lapb->t1timer); - init_timer(&lapb->t2timer); + timer_setup(&lapb->t1timer, NULL, 0); + timer_setup(&lapb->t2timer, NULL, 0); lapb->t1 = LAPB_DEFAULT_T1; lapb->t2 = LAPB_DEFAULT_T2; diff --git a/net/lapb/lapb_timer.c b/net/lapb/lapb_timer.c index 1a5535bc3b8d..8bb469cb3abe 100644 --- a/net/lapb/lapb_timer.c +++ b/net/lapb/lapb_timer.c @@ -35,15 +35,14 @@ #include <linux/interrupt.h> #include <net/lapb.h> -static void lapb_t1timer_expiry(unsigned long); -static void lapb_t2timer_expiry(unsigned long); +static void lapb_t1timer_expiry(struct timer_list *); +static void lapb_t2timer_expiry(struct timer_list *); void lapb_start_t1timer(struct lapb_cb *lapb) { del_timer(&lapb->t1timer); - lapb->t1timer.data = (unsigned long)lapb; - lapb->t1timer.function = &lapb_t1timer_expiry; + lapb->t1timer.function = (TIMER_FUNC_TYPE)lapb_t1timer_expiry; lapb->t1timer.expires = jiffies + lapb->t1; add_timer(&lapb->t1timer); @@ -53,8 +52,7 @@ void lapb_start_t2timer(struct lapb_cb *lapb) { del_timer(&lapb->t2timer); - lapb->t2timer.data = (unsigned long)lapb; - lapb->t2timer.function = &lapb_t2timer_expiry; + lapb->t2timer.function = (TIMER_FUNC_TYPE)lapb_t2timer_expiry; lapb->t2timer.expires = jiffies + lapb->t2; add_timer(&lapb->t2timer); @@ -75,9 +73,9 @@ int lapb_t1timer_running(struct lapb_cb *lapb) return timer_pending(&lapb->t1timer); } -static void lapb_t2timer_expiry(unsigned long param) +static void lapb_t2timer_expiry(struct timer_list *t) { - struct lapb_cb *lapb = (struct lapb_cb *)param; + struct lapb_cb *lapb = from_timer(lapb, t, t2timer); if (lapb->condition & LAPB_ACK_PENDING_CONDITION) { lapb->condition &= ~LAPB_ACK_PENDING_CONDITION; @@ -85,9 +83,9 @@ static void lapb_t2timer_expiry(unsigned long param) } } -static void lapb_t1timer_expiry(unsigned long param) +static void lapb_t1timer_expiry(struct timer_list *t) { - struct lapb_cb *lapb = (struct lapb_cb *)param; + struct lapb_cb *lapb = from_timer(lapb, t, t1timer); switch (lapb->state) { diff --git a/net/llc/llc_c_ac.c b/net/llc/llc_c_ac.c index ea225bd2672c..f59648018060 100644 --- a/net/llc/llc_c_ac.c +++ b/net/llc/llc_c_ac.c @@ -1318,9 +1318,8 @@ static int llc_conn_ac_inc_vs_by_1(struct sock *sk, struct sk_buff *skb) return 0; } -static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) +static void llc_conn_tmr_common_cb(struct sock *sk, u8 type) { - struct sock *sk = (struct sock *)timeout_data; struct sk_buff *skb = alloc_skb(0, GFP_ATOMIC); bh_lock_sock(sk); @@ -1334,24 +1333,32 @@ static void llc_conn_tmr_common_cb(unsigned long timeout_data, u8 type) bh_unlock_sock(sk); } -void llc_conn_pf_cycle_tmr_cb(unsigned long timeout_data) +void llc_conn_pf_cycle_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_P_TMR); + struct llc_sock *llc = from_timer(llc, t, pf_cycle_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_P_TMR); } -void llc_conn_busy_tmr_cb(unsigned long timeout_data) +void llc_conn_busy_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_BUSY_TMR); + struct llc_sock *llc = from_timer(llc, t, busy_state_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_BUSY_TMR); } -void llc_conn_ack_tmr_cb(unsigned long timeout_data) +void llc_conn_ack_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_ACK_TMR); + struct llc_sock *llc = from_timer(llc, t, ack_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_ACK_TMR); } -void llc_conn_rej_tmr_cb(unsigned long timeout_data) +void llc_conn_rej_tmr_cb(struct timer_list *t) { - llc_conn_tmr_common_cb(timeout_data, LLC_CONN_EV_TYPE_REJ_TMR); + struct llc_sock *llc = from_timer(llc, t, rej_sent_timer.timer); + + llc_conn_tmr_common_cb(&llc->sk, LLC_CONN_EV_TYPE_REJ_TMR); } int llc_conn_ac_rst_vs(struct sock *sk, struct sk_buff *skb) diff --git a/net/llc/llc_conn.c b/net/llc/llc_conn.c index 5e91b47f0d2a..9177dbb16dce 100644 --- a/net/llc/llc_conn.c +++ b/net/llc/llc_conn.c @@ -902,20 +902,16 @@ static void llc_sk_init(struct sock *sk) llc->inc_cntr = llc->dec_cntr = 2; llc->dec_step = llc->connect_step = 1; - setup_timer(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->ack_timer.timer, llc_conn_ack_tmr_cb, 0); llc->ack_timer.expire = sysctl_llc2_ack_timeout; - setup_timer(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->pf_cycle_timer.timer, llc_conn_pf_cycle_tmr_cb, 0); llc->pf_cycle_timer.expire = sysctl_llc2_p_timeout; - setup_timer(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->rej_sent_timer.timer, llc_conn_rej_tmr_cb, 0); llc->rej_sent_timer.expire = sysctl_llc2_rej_timeout; - setup_timer(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, - (unsigned long)sk); + timer_setup(&llc->busy_state_timer.timer, llc_conn_busy_tmr_cb, 0); llc->busy_state_timer.expire = sysctl_llc2_busy_timeout; llc->n2 = 2; /* max retransmit */ diff --git a/net/llc/llc_input.c b/net/llc/llc_input.c index dd3e83328ad5..82cb93f66b9b 100644 --- a/net/llc/llc_input.c +++ b/net/llc/llc_input.c @@ -193,7 +193,7 @@ int llc_rcv(struct sk_buff *skb, struct net_device *dev, */ rcv = rcu_dereference(sap->rcv_func); dest = llc_pdu_type(skb); - sap_handler = dest ? ACCESS_ONCE(llc_type_handlers[dest - 1]) : NULL; + sap_handler = dest ? READ_ONCE(llc_type_handlers[dest - 1]) : NULL; if (unlikely(!sap_handler)) { if (rcv) rcv(skb, dev, pt, orig_dev); @@ -214,7 +214,7 @@ drop: kfree_skb(skb); goto out; handle_station: - sta_handler = ACCESS_ONCE(llc_station_handler); + sta_handler = READ_ONCE(llc_station_handler); if (!sta_handler) goto drop; sta_handler(skb); diff --git a/net/llc/sysctl_net_llc.c b/net/llc/sysctl_net_llc.c index 799bafc2af39..8443a6d841b0 100644 --- a/net/llc/sysctl_net_llc.c +++ b/net/llc/sysctl_net_llc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * sysctl_net_llc.c: sysctl interface to LLC net subsystem. * diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile index 282912245938..e3589ade62e0 100644 --- a/net/mac80211/Makefile +++ b/net/mac80211/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_MAC80211) += mac80211.o # mac80211 objects @@ -6,6 +7,7 @@ mac80211-y := \ driver-ops.o \ sta_info.o \ wep.o \ + aead_api.o \ wpa.o \ scan.o offchannel.o \ ht.o agg-tx.o agg-rx.o \ @@ -15,8 +17,6 @@ mac80211-y := \ rate.o \ michael.o \ tkip.o \ - aes_ccm.o \ - aes_gcm.o \ aes_cmac.o \ aes_gmac.o \ fils_aead.o \ diff --git a/net/mac80211/aes_ccm.c b/net/mac80211/aead_api.c index a4e0d59a40dd..160f9df30402 100644 --- a/net/mac80211/aes_ccm.c +++ b/net/mac80211/aead_api.c @@ -1,6 +1,7 @@ /* * Copyright 2003-2004, Instant802 Networks, Inc. * Copyright 2005-2006, Devicescape Software, Inc. + * Copyright 2014-2015, Qualcomm Atheros, Inc. * * Rewrite: Copyright (C) 2013 Linaro Ltd <ard.biesheuvel@linaro.org> * @@ -12,30 +13,29 @@ #include <linux/kernel.h> #include <linux/types.h> #include <linux/err.h> +#include <linux/scatterlist.h> #include <crypto/aead.h> -#include <net/mac80211.h> -#include "key.h" -#include "aes_ccm.h" +#include "aead_api.h" -int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, + u8 *data, size_t data_len, u8 *mic) { + size_t mic_len = crypto_aead_authsize(tfm); struct scatterlist sg[3]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); u8 *__aad; - aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC); if (!aead_req) return -ENOMEM; __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, CCM_AAD_LEN); + memcpy(__aad, aad, aad_len); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); + sg_set_buf(&sg[0], __aad, aad_len); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -49,10 +49,10 @@ int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, return 0; } -int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len) +int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len, + u8 *data, size_t data_len, u8 *mic) { + size_t mic_len = crypto_aead_authsize(tfm); struct scatterlist sg[3]; struct aead_request *aead_req; int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); @@ -62,15 +62,15 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, if (data_len == 0) return -EINVAL; - aead_req = kzalloc(reqsize + CCM_AAD_LEN, GFP_ATOMIC); + aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC); if (!aead_req) return -ENOMEM; __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, CCM_AAD_LEN); + memcpy(__aad, aad, aad_len); sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); + sg_set_buf(&sg[0], __aad, aad_len); sg_set_buf(&sg[1], data, data_len); sg_set_buf(&sg[2], mic, mic_len); @@ -84,14 +84,14 @@ int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, return err; } -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], - size_t key_len, - size_t mic_len) +struct crypto_aead * +aead_key_setup_encrypt(const char *alg, const u8 key[], + size_t key_len, size_t mic_len) { struct crypto_aead *tfm; int err; - tfm = crypto_alloc_aead("ccm(aes)", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_aead(alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) return tfm; @@ -109,7 +109,7 @@ free_aead: return ERR_PTR(err); } -void ieee80211_aes_key_free(struct crypto_aead *tfm) +void aead_key_free(struct crypto_aead *tfm) { crypto_free_aead(tfm); } diff --git a/net/mac80211/aead_api.h b/net/mac80211/aead_api.h new file mode 100644 index 000000000000..5e39ea843bbf --- /dev/null +++ b/net/mac80211/aead_api.h @@ -0,0 +1,27 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef _AEAD_API_H +#define _AEAD_API_H + +#include <crypto/aead.h> +#include <linux/crypto.h> + +struct crypto_aead * +aead_key_setup_encrypt(const char *alg, const u8 key[], + size_t key_len, size_t mic_len); + +int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + size_t aad_len, u8 *data, + size_t data_len, u8 *mic); + +int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, + size_t aad_len, u8 *data, + size_t data_len, u8 *mic); + +void aead_key_free(struct crypto_aead *tfm); + +#endif /* _AEAD_API_H */ diff --git a/net/mac80211/aes_ccm.h b/net/mac80211/aes_ccm.h index fcd3254c5cf0..e9b7ca0bde5b 100644 --- a/net/mac80211/aes_ccm.h +++ b/net/mac80211/aes_ccm.h @@ -10,19 +10,39 @@ #ifndef AES_CCM_H #define AES_CCM_H -#include <linux/crypto.h> +#include "aead_api.h" #define CCM_AAD_LEN 32 -struct crypto_aead *ieee80211_aes_key_setup_encrypt(const u8 key[], - size_t key_len, - size_t mic_len); -int ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); -int ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic, - size_t mic_len); -void ieee80211_aes_key_free(struct crypto_aead *tfm); +static inline struct crypto_aead * +ieee80211_aes_key_setup_encrypt(const u8 key[], size_t key_len, size_t mic_len) +{ + return aead_key_setup_encrypt("ccm(aes)", key, key_len, mic_len); +} + +static inline int +ieee80211_aes_ccm_encrypt(struct crypto_aead *tfm, + u8 *b_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_encrypt(tfm, b_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline int +ieee80211_aes_ccm_decrypt(struct crypto_aead *tfm, + u8 *b_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_decrypt(tfm, b_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline void ieee80211_aes_key_free(struct crypto_aead *tfm) +{ + return aead_key_free(tfm); +} #endif /* AES_CCM_H */ diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c deleted file mode 100644 index 8a4397cc1b08..000000000000 --- a/net/mac80211/aes_gcm.c +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Copyright 2014-2015, Qualcomm Atheros, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#include <linux/kernel.h> -#include <linux/types.h> -#include <linux/err.h> -#include <crypto/aead.h> - -#include <net/mac80211.h> -#include "key.h" -#include "aes_gcm.h" - -int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) -{ - struct scatterlist sg[3]; - struct aead_request *aead_req; - int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); - u8 *__aad; - - aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); - if (!aead_req) - return -ENOMEM; - - __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, GCM_AAD_LEN); - - sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); - sg_set_buf(&sg[1], data, data_len); - sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); - - aead_request_set_tfm(aead_req, tfm); - aead_request_set_crypt(aead_req, sg, sg, data_len, j_0); - aead_request_set_ad(aead_req, sg[0].length); - - crypto_aead_encrypt(aead_req); - kzfree(aead_req); - return 0; -} - -int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic) -{ - struct scatterlist sg[3]; - struct aead_request *aead_req; - int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm); - u8 *__aad; - int err; - - if (data_len == 0) - return -EINVAL; - - aead_req = kzalloc(reqsize + GCM_AAD_LEN, GFP_ATOMIC); - if (!aead_req) - return -ENOMEM; - - __aad = (u8 *)aead_req + reqsize; - memcpy(__aad, aad, GCM_AAD_LEN); - - sg_init_table(sg, 3); - sg_set_buf(&sg[0], &__aad[2], be16_to_cpup((__be16 *)__aad)); - sg_set_buf(&sg[1], data, data_len); - sg_set_buf(&sg[2], mic, IEEE80211_GCMP_MIC_LEN); - - aead_request_set_tfm(aead_req, tfm); - aead_request_set_crypt(aead_req, sg, sg, - data_len + IEEE80211_GCMP_MIC_LEN, j_0); - aead_request_set_ad(aead_req, sg[0].length); - - err = crypto_aead_decrypt(aead_req); - kzfree(aead_req); - - return err; -} - -struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], - size_t key_len) -{ - struct crypto_aead *tfm; - int err; - - tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(tfm)) - return tfm; - - err = crypto_aead_setkey(tfm, key, key_len); - if (err) - goto free_aead; - err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN); - if (err) - goto free_aead; - - return tfm; - -free_aead: - crypto_free_aead(tfm); - return ERR_PTR(err); -} - -void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm) -{ - crypto_free_aead(tfm); -} diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h index 55aed5352494..d2b096033009 100644 --- a/net/mac80211/aes_gcm.h +++ b/net/mac80211/aes_gcm.h @@ -9,16 +9,38 @@ #ifndef AES_GCM_H #define AES_GCM_H -#include <linux/crypto.h> +#include "aead_api.h" #define GCM_AAD_LEN 32 -int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); -int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad, - u8 *data, size_t data_len, u8 *mic); -struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], - size_t key_len); -void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm); +static inline int ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, + u8 *j_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_encrypt(tfm, j_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, + u8 *j_0, u8 *aad, u8 *data, + size_t data_len, u8 *mic) +{ + return aead_decrypt(tfm, j_0, aad + 2, + be16_to_cpup((__be16 *)aad), + data, data_len, mic); +} + +static inline struct crypto_aead * +ieee80211_aes_gcm_key_setup_encrypt(const u8 key[], size_t key_len) +{ + return aead_key_setup_encrypt("gcm(aes)", key, + key_len, IEEE80211_GCMP_MIC_LEN); +} + +static inline void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm) +{ + return aead_key_free(tfm); +} #endif /* AES_GCM_H */ diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 2b36eff5d97e..88cc1ae935ea 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -245,10 +245,10 @@ static void ieee80211_send_addba_resp(struct ieee80211_sub_if_data *sdata, u8 *d ieee80211_tx_skb(sdata, skb); } -void __ieee80211_start_rx_ba_session(struct sta_info *sta, - u8 dialog_token, u16 timeout, - u16 start_seq_num, u16 ba_policy, u16 tid, - u16 buf_size, bool tx, bool auto_seq) +void ___ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq) { struct ieee80211_local *local = sta->sdata->local; struct tid_ampdu_rx *tid_agg_rx; @@ -267,7 +267,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, ht_dbg(sta->sdata, "STA %pM requests BA session on unsupported tid %d\n", sta->sta.addr, tid); - goto end_no_lock; + goto end; } if (!sta->sta.ht_cap.ht_supported) { @@ -275,14 +275,14 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, "STA %pM erroneously requests BA session on tid %d w/o QoS\n", sta->sta.addr, tid); /* send a response anyway, it's an error case if we get here */ - goto end_no_lock; + goto end; } if (test_sta_flag(sta, WLAN_STA_BLOCK_BA)) { ht_dbg(sta->sdata, "Suspend in progress - Denying ADDBA request (%pM tid %d)\n", sta->sta.addr, tid); - goto end_no_lock; + goto end; } /* sanity check for incoming parameters: @@ -296,7 +296,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, ht_dbg_ratelimited(sta->sdata, "AddBA Req with bad params from %pM on tid %u. policy %d, buffer size %d\n", sta->sta.addr, tid, ba_policy, buf_size); - goto end_no_lock; + goto end; } /* determine default buffer size */ if (buf_size == 0) @@ -311,7 +311,7 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, buf_size, sta->sta.addr); /* examine state machine */ - mutex_lock(&sta->ampdu_mlme.mtx); + lockdep_assert_held(&sta->ampdu_mlme.mtx); if (test_bit(tid, sta->ampdu_mlme.agg_session_valid)) { if (sta->ampdu_mlme.tid_rx_token[tid] == dialog_token) { @@ -415,15 +415,25 @@ end: __clear_bit(tid, sta->ampdu_mlme.unexpected_agg); sta->ampdu_mlme.tid_rx_token[tid] = dialog_token; } - mutex_unlock(&sta->ampdu_mlme.mtx); -end_no_lock: if (tx) ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, tid, dialog_token, status, 1, buf_size, timeout); } +void __ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq) +{ + mutex_lock(&sta->ampdu_mlme.mtx); + ___ieee80211_start_rx_ba_session(sta, dialog_token, timeout, + start_seq_num, ba_policy, tid, + buf_size, tx, auto_seq); + mutex_unlock(&sta->ampdu_mlme.mtx); +} + void ieee80211_process_addba_request(struct ieee80211_local *local, struct sta_info *sta, struct ieee80211_mgmt *mgmt, @@ -449,7 +459,7 @@ void ieee80211_process_addba_request(struct ieee80211_local *local, } void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, - const u8 *addr, unsigned int bit) + const u8 *addr, unsigned int tid) { struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); struct ieee80211_local *local = sdata->local; @@ -460,7 +470,7 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, if (!sta) goto unlock; - set_bit(bit, sta->ampdu_mlme.tid_rx_manage_offl); + set_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl); ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); unlock: rcu_read_unlock(); diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c index cbd48762256c..bef516ec47f9 100644 --- a/net/mac80211/agg-tx.c +++ b/net/mac80211/agg-tx.c @@ -226,7 +226,11 @@ ieee80211_agg_start_txq(struct sta_info *sta, int tid, bool enable) clear_bit(IEEE80211_TXQ_AMPDU, &txqi->flags); clear_bit(IEEE80211_TXQ_STOP, &txqi->flags); + local_bh_disable(); + rcu_read_lock(); drv_wake_tx_queue(sta->sdata->local, txqi); + rcu_read_unlock(); + local_bh_enable(); } /* @@ -436,7 +440,7 @@ static void sta_addba_resp_timer_expired(unsigned long data) test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state)) { rcu_read_unlock(); ht_dbg(sta->sdata, - "timer expired on %pM tid %d but we are not (or no longer) expecting addBA response there\n", + "timer expired on %pM tid %d not expecting addBA response\n", sta->sta.addr, tid); return; } @@ -639,7 +643,7 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid, time_before(jiffies, sta->ampdu_mlme.last_addba_req_time[tid] + HT_AGG_RETRIES_PERIOD)) { ht_dbg(sdata, - "BA request denied - waiting a grace period after %d failed requests on %pM tid %u\n", + "BA request denied - %d failed requests on %pM tid %u\n", sta->ampdu_mlme.addba_req_num[tid], sta->sta.addr, tid); ret = -EBUSY; goto err_unlock_sta; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index a354f1939e49..fb15d3b97cb2 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2727,12 +2727,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, if (!ieee80211_sdata_running(sdata)) return -ENETDOWN; - if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { - ret = drv_set_bitrate_mask(local, sdata, mask); - if (ret) - return ret; - } - /* * If active validate the setting and reject it if it doesn't leave * at least one basic rate usable, since we really have to be able @@ -2748,6 +2742,12 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy, return -EINVAL; } + if (ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) { + ret = drv_set_bitrate_mask(local, sdata, mask); + if (ret) + return ret; + } + for (i = 0; i < NUM_NL80211_BANDS; i++) { struct ieee80211_supported_band *sband = wiphy->bands[i]; int j; diff --git a/net/mac80211/debug.h b/net/mac80211/debug.h index 1956b3115dd5..d90a8f9cc3fd 100644 --- a/net/mac80211/debug.h +++ b/net/mac80211/debug.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC80211_DEBUG_H #define __MAC80211_DEBUG_H #include <net/cfg80211.h> diff --git a/net/mac80211/debugfs.h b/net/mac80211/debugfs.h index 60c35afee29d..d2c424787463 100644 --- a/net/mac80211/debugfs.h +++ b/net/mac80211/debugfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC80211_DEBUGFS_H #define __MAC80211_DEBUGFS_H diff --git a/net/mac80211/debugfs_key.h b/net/mac80211/debugfs_key.h index 32adc77e9c77..1cd7b8bff56c 100644 --- a/net/mac80211/debugfs_key.h +++ b/net/mac80211/debugfs_key.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC80211_DEBUGFS_KEY_H #define __MAC80211_DEBUGFS_KEY_H diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h index 9f5501a9a795..a7e9d8d518f9 100644 --- a/net/mac80211/debugfs_netdev.h +++ b/net/mac80211/debugfs_netdev.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* routines exported for debugfs handling */ #ifndef __IEEE80211_DEBUGFS_NETDEV_H diff --git a/net/mac80211/debugfs_sta.h b/net/mac80211/debugfs_sta.h index 8b608903259f..d2e7c27ad6d1 100644 --- a/net/mac80211/debugfs_sta.h +++ b/net/mac80211/debugfs_sta.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC80211_DEBUGFS_STA_H #define __MAC80211_DEBUGFS_STA_H diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 09f77e4a8a79..c7f93fd9ca7a 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file * Copyright(c) 2016 Intel Deutschland GmbH diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index c92df492e898..41f5e48f8021 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -290,16 +290,36 @@ void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, { int i; + mutex_lock(&sta->ampdu_mlme.mtx); for (i = 0; i < IEEE80211_NUM_TIDS; i++) { - __ieee80211_stop_tx_ba_session(sta, i, reason); - __ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, - WLAN_REASON_QSTA_LEAVE_QBSS, - reason != AGG_STOP_DESTROY_STA && - reason != AGG_STOP_PEER_REQUEST); + ___ieee80211_stop_tx_ba_session(sta, i, reason); + ___ieee80211_stop_rx_ba_session(sta, i, WLAN_BACK_RECIPIENT, + WLAN_REASON_QSTA_LEAVE_QBSS, + reason != AGG_STOP_DESTROY_STA && + reason != AGG_STOP_PEER_REQUEST); } + mutex_unlock(&sta->ampdu_mlme.mtx); /* stopping might queue the work again - so cancel only afterwards */ cancel_work_sync(&sta->ampdu_mlme.work); + + /* + * In case the tear down is part of a reconfigure due to HW restart + * request, it is possible that the low level driver requested to stop + * the BA session, so handle it to properly clean tid_tx data. + */ + mutex_lock(&sta->ampdu_mlme.mtx); + for (i = 0; i < IEEE80211_NUM_TIDS; i++) { + struct tid_ampdu_tx *tid_tx = + rcu_dereference_protected_tid_tx(sta, i); + + if (!tid_tx) + continue; + + if (test_and_clear_bit(HT_AGG_STATE_STOP_CB, &tid_tx->state)) + ieee80211_stop_tx_ba_cb(sta, i, tid_tx); + } + mutex_unlock(&sta->ampdu_mlme.mtx); } void ieee80211_ba_session_work(struct work_struct *work) @@ -333,9 +353,9 @@ void ieee80211_ba_session_work(struct work_struct *work) if (test_and_clear_bit(tid, sta->ampdu_mlme.tid_rx_manage_offl)) - __ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, - IEEE80211_MAX_AMPDU_BUF, - false, true); + ___ieee80211_start_rx_ba_session(sta, 0, 0, 0, 1, tid, + IEEE80211_MAX_AMPDU_BUF, + false, true); if (test_and_clear_bit(tid + IEEE80211_NUM_TIDS, sta->ampdu_mlme.tid_rx_manage_offl)) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 2197c62a0a6e..68f874e73561 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1760,6 +1760,10 @@ void __ieee80211_start_rx_ba_session(struct sta_info *sta, u8 dialog_token, u16 timeout, u16 start_seq_num, u16 ba_policy, u16 tid, u16 buf_size, bool tx, bool auto_seq); +void ___ieee80211_start_rx_ba_session(struct sta_info *sta, + u8 dialog_token, u16 timeout, + u16 start_seq_num, u16 ba_policy, u16 tid, + u16 buf_size, bool tx, bool auto_seq); void ieee80211_sta_tear_down_BA_sessions(struct sta_info *sta, enum ieee80211_agg_stop_reason reason); void ieee80211_process_delba(struct ieee80211_sub_if_data *sdata, @@ -2005,6 +2009,8 @@ void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct txq_info *txq, int tid); void ieee80211_txq_purge(struct ieee80211_local *local, struct txq_info *txqi); +void ieee80211_txq_remove_vlan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata); void ieee80211_send_auth(struct ieee80211_sub_if_data *sdata, u16 transaction, u16 auth_alg, u16 status, const u8 *extra, size_t extra_len, const u8 *bssid, diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9228ac73c429..13b16f90e1cf 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -731,7 +731,8 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up) sdata->vif.type == NL80211_IFTYPE_AP_VLAN || local->ops->wake_tx_queue) { /* XXX: for AP_VLAN, actually track AP queues */ - netif_tx_start_all_queues(dev); + if (dev) + netif_tx_start_all_queues(dev); } else if (dev) { unsigned long flags; int n_acs = IEEE80211_NUM_ACS; @@ -793,7 +794,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, bool going_down) { struct ieee80211_local *local = sdata->local; - struct fq *fq = &local->fq; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; @@ -993,8 +993,6 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, skb_queue_purge(&sdata->skb_queue); } - sdata->bss = NULL; - spin_lock_irqsave(&local->queue_stop_reason_lock, flags); for (i = 0; i < IEEE80211_MAX_QUEUES; i++) { skb_queue_walk_safe(&local->pending[i], skb, tmp) { @@ -1007,13 +1005,10 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); - if (sdata->vif.txq) { - struct txq_info *txqi = to_txq_info(sdata->vif.txq); + if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) + ieee80211_txq_remove_vlan(local, sdata); - spin_lock_bh(&fq->lock); - ieee80211_txq_purge(local, txqi); - spin_unlock_bh(&fq->lock); - } + sdata->bss = NULL; if (local->open_count == 0) ieee80211_clear_tx_pending(local); @@ -1758,7 +1753,9 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name, sizeof(void *)); int txq_size = 0; - if (local->ops->wake_tx_queue) + if (local->ops->wake_tx_queue && + type != NL80211_IFTYPE_AP_VLAN && + type != NL80211_IFTYPE_MONITOR) txq_size += sizeof(struct txq_info) + local->hw.txq_data_size; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index a98fc2b5e0dc..938049395f90 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -4,7 +4,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007-2008 Johannes Berg <johannes@sipsolutions.net> * Copyright 2013-2014 Intel Mobile Communications GmbH - * Copyright 2015 Intel Deutschland GmbH + * Copyright 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -19,6 +19,7 @@ #include <linux/slab.h> #include <linux/export.h> #include <net/mac80211.h> +#include <crypto/algapi.h> #include <asm/unaligned.h> #include "ieee80211_i.h" #include "driver-ops.h" @@ -609,6 +610,39 @@ void ieee80211_key_free_unused(struct ieee80211_key *key) ieee80211_key_free_common(key); } +static bool ieee80211_key_identical(struct ieee80211_sub_if_data *sdata, + struct ieee80211_key *old, + struct ieee80211_key *new) +{ + u8 tkip_old[WLAN_KEY_LEN_TKIP], tkip_new[WLAN_KEY_LEN_TKIP]; + u8 *tk_old, *tk_new; + + if (!old || new->conf.keylen != old->conf.keylen) + return false; + + tk_old = old->conf.key; + tk_new = new->conf.key; + + /* + * In station mode, don't compare the TX MIC key, as it's never used + * and offloaded rekeying may not care to send it to the host. This + * is the case in iwlwifi, for example. + */ + if (sdata->vif.type == NL80211_IFTYPE_STATION && + new->conf.cipher == WLAN_CIPHER_SUITE_TKIP && + new->conf.keylen == WLAN_KEY_LEN_TKIP && + !(new->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE)) { + memcpy(tkip_old, tk_old, WLAN_KEY_LEN_TKIP); + memcpy(tkip_new, tk_new, WLAN_KEY_LEN_TKIP); + memset(tkip_old + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); + memset(tkip_new + NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY, 0, 8); + tk_old = tkip_old; + tk_new = tkip_new; + } + + return !crypto_memneq(tk_old, tk_new, new->conf.keylen); +} + int ieee80211_key_link(struct ieee80211_key *key, struct ieee80211_sub_if_data *sdata, struct sta_info *sta) @@ -620,9 +654,6 @@ int ieee80211_key_link(struct ieee80211_key *key, pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; idx = key->conf.keyidx; - key->local = sdata->local; - key->sdata = sdata; - key->sta = sta; mutex_lock(&sdata->local->key_mtx); @@ -633,6 +664,20 @@ int ieee80211_key_link(struct ieee80211_key *key, else old_key = key_mtx_dereference(sdata->local, sdata->keys[idx]); + /* + * Silently accept key re-installation without really installing the + * new version of the key to avoid nonce reuse or replay issues. + */ + if (ieee80211_key_identical(sdata, old_key, key)) { + ieee80211_key_free_unused(key); + ret = 0; + goto out; + } + + key->local = sdata->local; + key->sdata = sdata; + key->sta = sta; + increment_tailroom_need_count(sdata); ieee80211_key_replace(sdata, sta, pairwise, old_key, key); @@ -648,6 +693,7 @@ int ieee80211_key_link(struct ieee80211_key *key, ret = 0; } + out: mutex_unlock(&sdata->local->key_mtx); return ret; diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index a550c707cd8a..7a76c4a6df30 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -675,8 +675,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh) enum nl80211_band band; u8 *pos; struct ieee80211_sub_if_data *sdata; - int hdr_len = offsetof(struct ieee80211_mgmt, u.beacon) + - sizeof(mgmt->u.beacon); + int hdr_len = offsetofend(struct ieee80211_mgmt, u.beacon); sdata = container_of(ifmsh, struct ieee80211_sub_if_data, u.mesh); rcu_read_lock(); diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h index 7e5f271e3c30..465b7853edc0 100644 --- a/net/mac80211/mesh.h +++ b/net/mac80211/mesh.h @@ -275,6 +275,7 @@ void mesh_neighbour_update(struct ieee80211_sub_if_data *sdata, u8 *hw_addr, struct ieee802_11_elems *ie); bool mesh_peer_accepts_plinks(struct ieee802_11_elems *ie); u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata); +void mesh_plink_timer(struct timer_list *t); void mesh_plink_broken(struct sta_info *sta); u32 mesh_plink_deactivate(struct sta_info *sta); u32 mesh_plink_open(struct sta_info *sta); diff --git a/net/mac80211/mesh_hwmp.c b/net/mac80211/mesh_hwmp.c index d8bbd0d2225a..146ec6c0f12f 100644 --- a/net/mac80211/mesh_hwmp.c +++ b/net/mac80211/mesh_hwmp.c @@ -111,8 +111,8 @@ static int mesh_path_sel_frame_tx(enum mpath_frame_type action, u8 flags, struct sk_buff *skb; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + - sizeof(mgmt->u.action.u.mesh_action); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.mesh_action); skb = dev_alloc_skb(local->tx_headroom + hdr_len + @@ -242,8 +242,8 @@ int mesh_path_error_tx(struct ieee80211_sub_if_data *sdata, struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh; struct ieee80211_mgmt *mgmt; u8 *pos, ie_len; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.mesh_action) + - sizeof(mgmt->u.action.u.mesh_action); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.mesh_action); if (time_before(jiffies, ifmsh->next_perr)) return -EAGAIN; diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index f69c6c38ca43..e2d00cce3c17 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -220,8 +220,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata, bool include_plid = false; u16 peering_proto = 0; u8 *pos, ie_len = 4; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.self_prot) + - sizeof(mgmt->u.action.u.self_prot); + int hdr_len = offsetofend(struct ieee80211_mgmt, u.action.u.self_prot); int err = -ENOMEM; skb = dev_alloc_skb(local->tx_headroom + @@ -604,8 +603,9 @@ out: ieee80211_mbss_info_change_notify(sdata, changed); } -static void mesh_plink_timer(unsigned long data) +void mesh_plink_timer(struct timer_list *t) { + struct mesh_sta *mesh = from_timer(mesh, t, plink_timer); struct sta_info *sta; u16 reason = 0; struct ieee80211_sub_if_data *sdata; @@ -617,7 +617,7 @@ static void mesh_plink_timer(unsigned long data) * del_timer_sync() this timer after having made sure * it cannot be readded (by deleting the plink.) */ - sta = (struct sta_info *) data; + sta = mesh->plink_sta; if (sta->sdata->local->quiescing) return; @@ -697,11 +697,8 @@ static void mesh_plink_timer(unsigned long data) static inline void mesh_plink_timer_set(struct sta_info *sta, u32 timeout) { - sta->mesh->plink_timer.expires = jiffies + msecs_to_jiffies(timeout); - sta->mesh->plink_timer.data = (unsigned long) sta; - sta->mesh->plink_timer.function = mesh_plink_timer; sta->mesh->plink_timeout = timeout; - add_timer(&sta->mesh->plink_timer); + mod_timer(&sta->mesh->plink_timer, jiffies + msecs_to_jiffies(timeout)); } static bool llid_in_use(struct ieee80211_sub_if_data *sdata, diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index b588e593b0ec..e4ededa1909d 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -145,7 +145,6 @@ static u32 ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, struct ieee80211_supported_band *sband, struct ieee80211_channel *channel, - const struct ieee80211_ht_cap *ht_cap, const struct ieee80211_ht_operation *ht_oper, const struct ieee80211_vht_operation *vht_oper, struct cfg80211_chan_def *chandef, bool tracking) @@ -163,20 +162,13 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata, chandef->center_freq1 = channel->center_freq; chandef->center_freq2 = 0; - if (!ht_cap || !ht_oper || !sta_ht_cap.ht_supported) { + if (!ht_oper || !sta_ht_cap.ht_supported) { ret = IEEE80211_STA_DISABLE_HT | IEEE80211_STA_DISABLE_VHT; goto out; } chandef->width = NL80211_CHAN_WIDTH_20; - if (!(ht_cap->cap_info & - cpu_to_le16(IEEE80211_HT_CAP_SUP_WIDTH_20_40))) { - ret = IEEE80211_STA_DISABLE_40MHZ; - vht_chandef = *chandef; - goto out; - } - ht_cfreq = ieee80211_channel_to_frequency(ht_oper->primary_chan, channel->band); /* check that channel matches the right operating channel */ @@ -344,7 +336,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata, /* calculate new channel (type) based on HT/VHT operation IEs */ flags = ieee80211_determine_chantype(sdata, sband, chan, - ht_cap, ht_oper, vht_oper, + ht_oper, vht_oper, &chandef, true); /* @@ -780,11 +772,12 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) WLAN_EID_SUPPORTED_REGULATORY_CLASSES, WLAN_EID_HT_CAPABILITY, WLAN_EID_BSS_COEX_2040, + /* luckily this is almost always there */ WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_TRAFFIC_CAPA, WLAN_EID_TIM_BCAST_REQ, WLAN_EID_INTERWORKING, - /* 60GHz doesn't happen right now */ + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ WLAN_EID_VHT_CAPABILITY, WLAN_EID_OPMODE_NOTIF, }; @@ -811,22 +804,16 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata) /* if present, add any custom IEs that go before VHT */ if (assoc_data->ie_len) { static const u8 before_vht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_EXT_SUPP_RATES, - WLAN_EID_PWR_CAPABILITY, - WLAN_EID_SUPPORTED_CHANNELS, - WLAN_EID_RSN, - WLAN_EID_QOS_CAPA, - WLAN_EID_RRM_ENABLED_CAPABILITIES, - WLAN_EID_MOBILITY_DOMAIN, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES, - WLAN_EID_HT_CAPABILITY, + /* + * no need to list the ones split off before HT + * or generated here + */ WLAN_EID_BSS_COEX_2040, WLAN_EID_EXT_CAPABILITY, WLAN_EID_QOS_TRAFFIC_CAPA, WLAN_EID_TIM_BCAST_REQ, WLAN_EID_INTERWORKING, + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ }; /* RIC already taken above, so no need to handle here anymore */ @@ -3155,7 +3142,7 @@ static void ieee80211_rx_mgmt_assoc_resp(struct ieee80211_sub_if_data *sdata, if (len < 24 + 6) return; - reassoc = ieee80211_is_reassoc_req(mgmt->frame_control); + reassoc = ieee80211_is_reassoc_resp(mgmt->frame_control); capab_info = le16_to_cpu(mgmt->u.assoc_resp.capab_info); status_code = le16_to_cpu(mgmt->u.assoc_resp.status_code); aid = le16_to_cpu(mgmt->u.assoc_resp.aid); @@ -4317,7 +4304,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, ifmgd->flags |= ieee80211_determine_chantype(sdata, sband, cbss->channel, - ht_cap, ht_oper, vht_oper, + ht_oper, vht_oper, &chandef, false); sdata->needed_rx_chains = min(ieee80211_ht_vht_rx_chains(sdata, cbss), diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c index f8e7a8bbc618..faf4f6055000 100644 --- a/net/mac80211/offchannel.c +++ b/net/mac80211/offchannel.c @@ -707,6 +707,8 @@ static int ieee80211_cancel_roc(struct ieee80211_local *local, if (!cookie) return -ENOENT; + flush_work(&local->hw_roc_start); + mutex_lock(&local->mtx); list_for_each_entry_safe(roc, tmp, &local->roc_list, list) { if (!mgmt_tx && roc->cookie != cookie) diff --git a/net/mac80211/pm.c b/net/mac80211/pm.c index a87d195c4a61..38c45e1dafd8 100644 --- a/net/mac80211/pm.c +++ b/net/mac80211/pm.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <net/mac80211.h> #include <net/rtnetlink.h> diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 47d2ed570470..ef2becaade50 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2013-2015 Intel Mobile Communications GmbH - * Copyright 2016 Intel Deutschland GmbH + * Copyright 2016-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -183,6 +183,20 @@ ieee80211_bss_info_update(struct ieee80211_local *local, return bss; } +static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata, + u32 scan_flags, const u8 *da) +{ + if (!sdata) + return false; + /* accept broadcast for OCE */ + if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP && + is_broadcast_ether_addr(da)) + return true; + if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR) + return true; + return ether_addr_equal(da, sdata->vif.addr); +} + void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) { struct ieee80211_rx_status *rx_status = IEEE80211_SKB_RXCB(skb); @@ -208,19 +222,24 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb) if (ieee80211_is_probe_resp(mgmt->frame_control)) { struct cfg80211_scan_request *scan_req; struct cfg80211_sched_scan_request *sched_scan_req; + u32 scan_req_flags = 0, sched_scan_req_flags = 0; scan_req = rcu_dereference(local->scan_req); sched_scan_req = rcu_dereference(local->sched_scan_req); - /* ignore ProbeResp to foreign address unless scanning - * with randomised address + if (scan_req) + scan_req_flags = scan_req->flags; + + if (sched_scan_req) + sched_scan_req_flags = sched_scan_req->flags; + + /* ignore ProbeResp to foreign address or non-bcast (OCE) + * unless scanning with randomised address */ - if (!(sdata1 && - (ether_addr_equal(mgmt->da, sdata1->vif.addr) || - scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR)) && - !(sdata2 && - (ether_addr_equal(mgmt->da, sdata2->vif.addr) || - sched_scan_req->flags & NL80211_SCAN_FLAG_RANDOM_ADDR))) + if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags, + mgmt->da) && + !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags, + mgmt->da)) return; elements = mgmt->u.probe_resp.variable; diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 69615016d5bf..a3060e55122c 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -329,10 +329,12 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta->mesh = kzalloc(sizeof(*sta->mesh), gfp); if (!sta->mesh) goto free; + sta->mesh->plink_sta = sta; spin_lock_init(&sta->mesh->plink_lock); if (ieee80211_vif_is_mesh(&sdata->vif) && !sdata->u.mesh.user_mpm) - init_timer(&sta->mesh->plink_timer); + timer_setup(&sta->mesh->plink_timer, mesh_plink_timer, + 0); sta->mesh->nonpeer_pm = NL80211_MESH_POWER_ACTIVE; } #endif @@ -515,6 +517,31 @@ static int sta_info_insert_drv_state(struct ieee80211_local *local, return err; } +static void +ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) +{ + struct ieee80211_local *local = sdata->local; + bool allow_p2p_go_ps = sdata->vif.p2p; + struct sta_info *sta; + + rcu_read_lock(); + list_for_each_entry_rcu(sta, &local->sta_list, list) { + if (sdata != sta->sdata || + !test_sta_flag(sta, WLAN_STA_ASSOC)) + continue; + if (!sta->sta.support_p2p_ps) { + allow_p2p_go_ps = false; + break; + } + } + rcu_read_unlock(); + + if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { + sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); + } +} + /* * should be called with sta_mtx locked * this function replaces the mutex lock @@ -561,6 +588,13 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU) goto out_remove; set_sta_flag(sta, WLAN_STA_INSERTED); + + if (sta->sta_state >= IEEE80211_STA_ASSOC) { + ieee80211_recalc_min_chandef(sta->sdata); + if (!sta->sta.support_p2p_ps) + ieee80211_recalc_p2p_go_ps_allowed(sta->sdata); + } + /* accept BA sessions now */ clear_sta_flag(sta, WLAN_STA_BLOCK_BA); @@ -1788,31 +1822,6 @@ void ieee80211_sta_set_buffered(struct ieee80211_sta *pubsta, } EXPORT_SYMBOL(ieee80211_sta_set_buffered); -static void -ieee80211_recalc_p2p_go_ps_allowed(struct ieee80211_sub_if_data *sdata) -{ - struct ieee80211_local *local = sdata->local; - bool allow_p2p_go_ps = sdata->vif.p2p; - struct sta_info *sta; - - rcu_read_lock(); - list_for_each_entry_rcu(sta, &local->sta_list, list) { - if (sdata != sta->sdata || - !test_sta_flag(sta, WLAN_STA_ASSOC)) - continue; - if (!sta->sta.support_p2p_ps) { - allow_p2p_go_ps = false; - break; - } - } - rcu_read_unlock(); - - if (allow_p2p_go_ps != sdata->vif.bss_conf.allow_p2p_go_ps) { - sdata->vif.bss_conf.allow_p2p_go_ps = allow_p2p_go_ps; - ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_P2P_PS); - } -} - int sta_info_move_state(struct sta_info *sta, enum ieee80211_sta_state new_state) { @@ -2008,7 +2017,7 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u16 rate, static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo) { - u16 rate = ACCESS_ONCE(sta_get_last_rx_stats(sta)->last_rate); + u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate); if (rate == STA_STATS_RATE_INVALID) return -EINVAL; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 3acbdfa9f649..5c54acd10562 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -344,6 +344,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8) * @plink_state: peer link state * @plink_timeout: timeout of peer link * @plink_timer: peer link watch timer + * @plink_sta: peer link watch timer's sta_info * @t_offset: timing offset relative to this host * @t_offset_setpoint: reference timing offset of this sta to be used when * calculating clockdrift @@ -356,6 +357,7 @@ DECLARE_EWMA(mesh_fail_avg, 20, 8) */ struct mesh_sta { struct timer_list plink_timer; + struct sta_info *plink_sta; s64 t_offset; s64 t_offset_setpoint; @@ -398,7 +400,7 @@ struct ieee80211_sta_rx_stats { u64 msdu[IEEE80211_NUM_TIDS + 1]; }; -/** +/* * The bandwidth threshold below which the per-station CoDel parameters will be * scaled to be more lenient (to prevent starvation of slow stations). This * value will be scaled by the number of active stations when it is being diff --git a/net/mac80211/trace.c b/net/mac80211/trace.c index edfe0c170a1c..837857261b66 100644 --- a/net/mac80211/trace.c +++ b/net/mac80211/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* bug in tracepoint.h, it should include this */ #include <linux/module.h> diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h index 3d9ac17af407..591ad02e1fa4 100644 --- a/net/mac80211/trace.h +++ b/net/mac80211/trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Portions of this file * Copyright(c) 2016 Intel Deutschland GmbH diff --git a/net/mac80211/trace_msg.h b/net/mac80211/trace_msg.h index 768f7c22a190..366b9e6f043e 100644 --- a/net/mac80211/trace_msg.h +++ b/net/mac80211/trace_msg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifdef CONFIG_MAC80211_MESSAGE_TRACING #if !defined(__MAC80211_MSG_DRIVER_TRACE) || defined(TRACE_HEADER_MULTI_READ) diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 8858f4f185e9..7b8154474b9e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -1276,11 +1276,6 @@ static void ieee80211_set_skb_enqueue_time(struct sk_buff *skb) IEEE80211_SKB_CB(skb)->control.enqueue_time = codel_get_time(); } -static void ieee80211_set_skb_vif(struct sk_buff *skb, struct txq_info *txqi) -{ - IEEE80211_SKB_CB(skb)->control.vif = txqi->txq.vif; -} - static u32 codel_skb_len_func(const struct sk_buff *skb) { return skb->len; @@ -1401,6 +1396,40 @@ static void ieee80211_txq_enqueue(struct ieee80211_local *local, fq_flow_get_default_func); } +static bool fq_vlan_filter_func(struct fq *fq, struct fq_tin *tin, + struct fq_flow *flow, struct sk_buff *skb, + void *data) +{ + struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb); + + return info->control.vif == data; +} + +void ieee80211_txq_remove_vlan(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata) +{ + struct fq *fq = &local->fq; + struct txq_info *txqi; + struct fq_tin *tin; + struct ieee80211_sub_if_data *ap; + + if (WARN_ON(sdata->vif.type != NL80211_IFTYPE_AP_VLAN)) + return; + + ap = container_of(sdata->bss, struct ieee80211_sub_if_data, u.ap); + + if (!ap->vif.txq) + return; + + txqi = to_txq_info(ap->vif.txq); + tin = &txqi->tin; + + spin_lock_bh(&fq->lock); + fq_tin_filter(fq, tin, fq_vlan_filter_func, &sdata->vif, + fq_skb_free_func); + spin_unlock_bh(&fq->lock); +} + void ieee80211_txq_init(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, struct txq_info *txqi, int tid) @@ -3414,6 +3443,7 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw, struct ieee80211_tx_info *info; struct ieee80211_tx_data tx; ieee80211_tx_result r; + struct ieee80211_vif *vif; spin_lock_bh(&fq->lock); @@ -3430,8 +3460,6 @@ begin: if (!skb) goto out; - ieee80211_set_skb_vif(skb, txqi); - hdr = (struct ieee80211_hdr *)skb->data; info = IEEE80211_SKB_CB(skb); @@ -3488,6 +3516,34 @@ begin: } } + switch (tx.sdata->vif.type) { + case NL80211_IFTYPE_MONITOR: + if (tx.sdata->u.mntr.flags & MONITOR_FLAG_ACTIVE) { + vif = &tx.sdata->vif; + break; + } + tx.sdata = rcu_dereference(local->monitor_sdata); + if (tx.sdata) { + vif = &tx.sdata->vif; + info->hw_queue = + vif->hw_queue[skb_get_queue_mapping(skb)]; + } else if (ieee80211_hw_check(&local->hw, QUEUE_CONTROL)) { + ieee80211_free_txskb(&local->hw, skb); + goto begin; + } else { + vif = NULL; + } + break; + case NL80211_IFTYPE_AP_VLAN: + tx.sdata = container_of(tx.sdata->bss, + struct ieee80211_sub_if_data, u.ap); + /* fall through */ + default: + vif = &tx.sdata->vif; + break; + } + + IEEE80211_SKB_CB(skb)->control.vif = vif; out: spin_unlock_bh(&fq->lock); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 259698de569f..d57e5f6bd8b6 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -1392,10 +1392,10 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* insert custom IEs that go before HT */ if (ie && ie_len) { static const u8 before_ht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_REQUEST, - WLAN_EID_EXT_SUPP_RATES, + /* + * no need to list the ones split off already + * (or generated here) + */ WLAN_EID_DS_PARAMS, WLAN_EID_SUPPORTED_REGULATORY_CLASSES, }; @@ -1424,20 +1424,17 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local, /* insert custom IEs that go before VHT */ if (ie && ie_len) { static const u8 before_vht[] = { - WLAN_EID_SSID, - WLAN_EID_SUPP_RATES, - WLAN_EID_REQUEST, - WLAN_EID_EXT_SUPP_RATES, - WLAN_EID_DS_PARAMS, - WLAN_EID_SUPPORTED_REGULATORY_CLASSES, - WLAN_EID_HT_CAPABILITY, + /* + * no need to list the ones split off already + * (or generated here) + */ WLAN_EID_BSS_COEX_2040, WLAN_EID_EXT_CAPABILITY, WLAN_EID_SSID_LIST, WLAN_EID_CHANNEL_USAGE, WLAN_EID_INTERWORKING, - /* mesh ID can't happen here */ - /* 60 GHz can't happen here right now */ + WLAN_EID_MESH_ID, + /* 60 GHz (Multi-band, DMG, MMS) can't happen */ }; noffset = ieee80211_ie_split(ie, ie_len, before_vht, ARRAY_SIZE(before_vht), @@ -2980,8 +2977,8 @@ int ieee80211_send_action_csa(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt; struct ieee80211_local *local = sdata->local; int freq; - int hdr_len = offsetof(struct ieee80211_mgmt, u.action.u.chan_switch) + - sizeof(mgmt->u.action.u.chan_switch); + int hdr_len = offsetofend(struct ieee80211_mgmt, + u.action.u.chan_switch); u8 *pos; if (sdata->vif.type != NL80211_IFTYPE_ADHOC && diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index 19ec2189d3ac..b9276ac849fa 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -386,6 +386,16 @@ enum ieee80211_sta_rx_bandwidth ieee80211_sta_cur_vht_bw(struct sta_info *sta) bw = ieee80211_sta_cap_rx_bw(sta); bw = min(bw, sta->cur_max_bandwidth); + + /* Don't consider AP's bandwidth for TDLS peers, section 11.23.1 of + * IEEE80211-2016 specification makes higher bandwidth operation + * possible on the TDLS link if the peers have wider bandwidth + * capability. + */ + if (test_sta_flag(sta, WLAN_STA_TDLS_PEER) && + test_sta_flag(sta, WLAN_STA_TDLS_WIDER_BW)) + return bw; + bw = min(bw, ieee80211_chan_width_to_rx_bw(bss_width)); return bw; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 0d722ea98a1b..b58722d9de37 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -464,7 +464,7 @@ static int ccmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb, pos += IEEE80211_CCMP_HDR_LEN; ccmp_special_blocks(skb, pn, b_0, aad); return ieee80211_aes_ccm_encrypt(key->u.ccmp.tfm, b_0, aad, pos, len, - skb_put(skb, mic_len), mic_len); + skb_put(skb, mic_len)); } @@ -543,7 +543,7 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx, key->u.ccmp.tfm, b_0, aad, skb->data + hdrlen + IEEE80211_CCMP_HDR_LEN, data_len, - skb->data + skb->len - mic_len, mic_len)) + skb->data + skb->len - mic_len)) return RX_DROP_UNUSABLE; } diff --git a/net/mac802154/cfg.h b/net/mac802154/cfg.h index e2718f981e82..3bb089685500 100644 --- a/net/mac802154/cfg.h +++ b/net/mac802154/cfg.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* mac802154 configuration hooks for cfg802154 */ diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h index fd9daf2ecec9..d23f0db98015 100644 --- a/net/mac802154/driver-ops.h +++ b/net/mac802154/driver-ops.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __MAC802154_DRIVER_OPS #define __MAC802154_DRIVER_OPS diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 1e1c9b20bab7..2fb703d70803 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -623,13 +623,18 @@ llsec_do_encrypt_unauth(struct sk_buff *skb, const struct mac802154_llsec *sec, u8 iv[16]; struct scatterlist src; SKCIPHER_REQUEST_ON_STACK(req, key->tfm0); - int err; + int err, datalen; + unsigned char *data; llsec_geniv(iv, sec->params.hwaddr, &hdr->sec); - sg_init_one(&src, skb->data, skb->len); + /* Compute data payload offset and data length */ + data = skb_mac_header(skb) + skb->mac_len; + datalen = skb_tail_pointer(skb) - data; + sg_init_one(&src, data, datalen); + skcipher_request_set_tfm(req, key->tfm0); skcipher_request_set_callback(req, 0, NULL, NULL); - skcipher_request_set_crypt(req, &src, &src, skb->len, iv); + skcipher_request_set_crypt(req, &src, &src, datalen, iv); err = crypto_skcipher_encrypt(req); skcipher_request_zero(req); return err; @@ -713,7 +718,8 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb) if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA) return -EINVAL; - if (!hdr.fc.security_enabled || hdr.sec.level == 0) { + if (!hdr.fc.security_enabled || + (hdr.sec.level == IEEE802154_SCF_SECLEVEL_NONE)) { skb_push(skb, hlen); return 0; } diff --git a/net/mac802154/trace.c b/net/mac802154/trace.c index 863e5e6b983d..c36e3d541a42 100644 --- a/net/mac802154/trace.c +++ b/net/mac802154/trace.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/module.h> #ifndef __CHECKER__ diff --git a/net/mac802154/trace.h b/net/mac802154/trace.h index 6f30e0c93a16..2c8a43d3607f 100644 --- a/net/mac802154/trace.h +++ b/net/mac802154/trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* Based on net/mac80211/trace.h */ #undef TRACE_SYSTEM diff --git a/net/mpls/Kconfig b/net/mpls/Kconfig index 5c467ef97311..801ea9098387 100644 --- a/net/mpls/Kconfig +++ b/net/mpls/Kconfig @@ -24,6 +24,7 @@ config NET_MPLS_GSO config MPLS_ROUTING tristate "MPLS: routing support" + depends on NET_IP_TUNNEL || NET_IP_TUNNEL=n ---help--- Add support for forwarding of mpls packets. diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index ea4f481839dd..8ca9915befc8 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -16,6 +16,7 @@ #include <net/arp.h> #include <net/ip_fib.h> #include <net/netevent.h> +#include <net/ip_tunnels.h> #include <net/netns/generic.h> #if IS_ENABLED(CONFIG_IPV6) #include <net/ipv6.h> @@ -39,6 +40,36 @@ static int one = 1; static int label_limit = (1 << 20) - 1; static int ttl_max = 255; +#if IS_ENABLED(CONFIG_NET_IP_TUNNEL) +static size_t ipgre_mpls_encap_hlen(struct ip_tunnel_encap *e) +{ + return sizeof(struct mpls_shim_hdr); +} + +static const struct ip_tunnel_encap_ops mpls_iptun_ops = { + .encap_hlen = ipgre_mpls_encap_hlen, +}; + +static int ipgre_tunnel_encap_add_mpls_ops(void) +{ + return ip_tunnel_encap_add_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); +} + +static void ipgre_tunnel_encap_del_mpls_ops(void) +{ + ip_tunnel_encap_del_ops(&mpls_iptun_ops, TUNNEL_ENCAP_MPLS); +} +#else +static int ipgre_tunnel_encap_add_mpls_ops(void) +{ + return 0; +} + +static void ipgre_tunnel_encap_del_mpls_ops(void) +{ +} +#endif + static void rtmsg_lfib(int event, u32 label, struct mpls_route *rt, struct nlmsghdr *nlh, struct net *net, u32 portid, unsigned int nlm_flags); @@ -2479,12 +2510,16 @@ static int __init mpls_init(void) rtnl_af_register(&mpls_af_ops); - rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, NULL); - rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, NULL); + rtnl_register(PF_MPLS, RTM_NEWROUTE, mpls_rtm_newroute, NULL, 0); + rtnl_register(PF_MPLS, RTM_DELROUTE, mpls_rtm_delroute, NULL, 0); rtnl_register(PF_MPLS, RTM_GETROUTE, mpls_getroute, mpls_dump_routes, - NULL); + 0); rtnl_register(PF_MPLS, RTM_GETNETCONF, mpls_netconf_get_devconf, - mpls_netconf_dump_devconf, NULL); + mpls_netconf_dump_devconf, 0); + err = ipgre_tunnel_encap_add_mpls_ops(); + if (err) + pr_err("Can't add mpls over gre tunnel ops\n"); + err = 0; out: return err; @@ -2502,6 +2537,7 @@ static void __exit mpls_exit(void) dev_remove_pack(&mpls_packet_type); unregister_netdevice_notifier(&mpls_dev_notifier); unregister_pernet_subsys(&mpls_net_ops); + ipgre_tunnel_encap_del_mpls_ops(); } module_exit(mpls_exit); diff --git a/net/mpls/internal.h b/net/mpls/internal.h index cf65aec2e551..768a302879b4 100644 --- a/net/mpls/internal.h +++ b/net/mpls/internal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef MPLS_INTERNAL_H #define MPLS_INTERNAL_H #include <net/mpls.h> diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h index 1308a56f2591..d30f7bd741d0 100644 --- a/net/ncsi/internal.h +++ b/net/ncsi/internal.h @@ -180,6 +180,7 @@ struct ncsi_channel { #define NCSI_CHANNEL_INACTIVE 1 #define NCSI_CHANNEL_ACTIVE 2 #define NCSI_CHANNEL_INVISIBLE 3 + bool reconfigure_needed; spinlock_t lock; /* Protect filters etc */ struct ncsi_package *package; struct ncsi_channel_version version; @@ -235,6 +236,9 @@ enum { ncsi_dev_state_probe_dp, ncsi_dev_state_config_sp = 0x0301, ncsi_dev_state_config_cis, + ncsi_dev_state_config_clear_vids, + ncsi_dev_state_config_svf, + ncsi_dev_state_config_ev, ncsi_dev_state_config_sma, ncsi_dev_state_config_ebf, #if IS_ENABLED(CONFIG_IPV6) @@ -253,6 +257,12 @@ enum { ncsi_dev_state_suspend_done }; +struct vlan_vid { + struct list_head list; + __be16 proto; + u16 vid; +}; + struct ncsi_dev_priv { struct ncsi_dev ndev; /* Associated NCSI device */ unsigned int flags; /* NCSI device flags */ @@ -276,6 +286,8 @@ struct ncsi_dev_priv { struct work_struct work; /* For channel management */ struct packet_type ptype; /* NCSI packet Rx handler */ struct list_head node; /* Form NCSI device list */ +#define NCSI_MAX_VLAN_VIDS 15 + struct list_head vlan_vids; /* List of active VLAN IDs */ }; struct ncsi_cmd_arg { diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c index 6898e7229285..67e708e98ccf 100644 --- a/net/ncsi/ncsi-aen.c +++ b/net/ncsi/ncsi-aen.c @@ -73,6 +73,9 @@ static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, ncm->data[2] = data; ncm->data[4] = ntohl(lsc->oem_status); + netdev_info(ndp->ndev.dev, "NCSI: LSC AEN - channel %u state %s\n", + nc->id, data & 0x1 ? "up" : "down"); + chained = !list_empty(&nc->link); state = nc->state; spin_unlock_irqrestore(&nc->lock, flags); @@ -145,6 +148,8 @@ static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, ncm = &nc->modes[NCSI_MODE_LINK]; hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; ncm->data[3] = ntohl(hncdsc->status); + netdev_info(ndp->ndev.dev, "NCSI: HNCDSC AEN - channel %u state %s\n", + nc->id, ncm->data[3] & 0x3 ? "up" : "down"); if (!list_empty(&nc->link) || nc->state != NCSI_CHANNEL_ACTIVE) { spin_unlock_irqrestore(&nc->lock, flags); @@ -187,7 +192,7 @@ static struct ncsi_aen_handler { } ncsi_aen_handlers[] = { { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc }, { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr }, - { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc } + { NCSI_PKT_AEN_HNCDSC, 8, ncsi_aen_handler_hncdsc } }; int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) @@ -212,10 +217,18 @@ int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) } ret = ncsi_validate_aen_pkt(h, nah->payload); - if (ret) + if (ret) { + netdev_warn(ndp->ndev.dev, + "NCSI: 'bad' packet ignored for AEN type 0x%x\n", + h->type); goto out; + } ret = nah->handler(ndp, h); + if (ret) + netdev_err(ndp->ndev.dev, + "NCSI: Handler for AEN type 0x%x returned %d\n", + h->type, ret); out: consume_skb(skb); return ret; diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 5e03ed190e18..7567ca63aae2 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -139,9 +139,9 @@ static int ncsi_cmd_handler_svf(struct sk_buff *skb, struct ncsi_cmd_svf_pkt *cmd; cmd = skb_put_zero(skb, sizeof(*cmd)); - cmd->vlan = htons(nca->words[0]); - cmd->index = nca->bytes[2]; - cmd->enable = nca->bytes[3]; + cmd->vlan = htons(nca->words[1]); + cmd->index = nca->bytes[6]; + cmd->enable = nca->bytes[7]; ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; @@ -153,7 +153,7 @@ static int ncsi_cmd_handler_ev(struct sk_buff *skb, struct ncsi_cmd_ev_pkt *cmd; cmd = skb_put_zero(skb, sizeof(*cmd)); - cmd->mode = nca->bytes[0]; + cmd->mode = nca->bytes[3]; ncsi_cmd_build_header(&cmd->cmd.common, nca); return 0; @@ -228,7 +228,7 @@ static struct ncsi_cmd_handler { { NCSI_PKT_CMD_AE, 8, ncsi_cmd_handler_ae }, { NCSI_PKT_CMD_SL, 8, ncsi_cmd_handler_sl }, { NCSI_PKT_CMD_GLS, 0, ncsi_cmd_handler_default }, - { NCSI_PKT_CMD_SVF, 4, ncsi_cmd_handler_svf }, + { NCSI_PKT_CMD_SVF, 8, ncsi_cmd_handler_svf }, { NCSI_PKT_CMD_EV, 4, ncsi_cmd_handler_ev }, { NCSI_PKT_CMD_DV, 0, ncsi_cmd_handler_default }, { NCSI_PKT_CMD_SMA, 8, ncsi_cmd_handler_sma }, diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c index a3bd5fa8ad09..a2b904a718c6 100644 --- a/net/ncsi/ncsi-manage.c +++ b/net/ncsi/ncsi-manage.c @@ -38,6 +38,25 @@ static inline int ncsi_filter_size(int table) return sizes[table]; } +static u32 *ncsi_get_filter(struct ncsi_channel *nc, int table, int index) +{ + struct ncsi_channel_filter *ncf; + int size; + + ncf = nc->filters[table]; + if (!ncf) + return NULL; + + size = ncsi_filter_size(table); + if (size < 0) + return NULL; + + return ncf->data + size * index; +} + +/* Find the first active filter in a filter table that matches the given + * data parameter. If data is NULL, this returns the first active filter. + */ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data) { struct ncsi_channel_filter *ncf; @@ -58,7 +77,7 @@ int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data) index = -1; while ((index = find_next_bit(bitmap, ncf->total, index + 1)) < ncf->total) { - if (!memcmp(ncf->data + size * index, data, size)) { + if (!data || !memcmp(ncf->data + size * index, data, size)) { spin_unlock_irqrestore(&nc->lock, flags); return index; } @@ -170,6 +189,7 @@ static void ncsi_channel_monitor(unsigned long data) struct ncsi_channel *nc = (struct ncsi_channel *)data; struct ncsi_package *np = nc->package; struct ncsi_dev_priv *ndp = np->ndp; + struct ncsi_channel_mode *ncm; struct ncsi_cmd_arg nca; bool enabled, chained; unsigned int monitor_state; @@ -183,11 +203,15 @@ static void ncsi_channel_monitor(unsigned long data) monitor_state = nc->monitor.state; spin_unlock_irqrestore(&nc->lock, flags); - if (!enabled || chained) + if (!enabled || chained) { + ncsi_stop_channel_monitor(nc); return; + } if (state != NCSI_CHANNEL_INACTIVE && - state != NCSI_CHANNEL_ACTIVE) + state != NCSI_CHANNEL_ACTIVE) { + ncsi_stop_channel_monitor(nc); return; + } switch (monitor_state) { case NCSI_CHANNEL_MONITOR_START: @@ -198,28 +222,30 @@ static void ncsi_channel_monitor(unsigned long data) nca.type = NCSI_PKT_CMD_GLS; nca.req_flags = 0; ret = ncsi_xmit_cmd(&nca); - if (ret) { + if (ret) netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", ret); - return; - } - break; case NCSI_CHANNEL_MONITOR_WAIT ... NCSI_CHANNEL_MONITOR_WAIT_MAX: break; default: - if (!(ndp->flags & NCSI_DEV_HWA) && - state == NCSI_CHANNEL_ACTIVE) { + netdev_err(ndp->ndev.dev, "NCSI Channel %d timed out!\n", + nc->id); + if (!(ndp->flags & NCSI_DEV_HWA)) { ncsi_report_link(ndp, true); ndp->flags |= NCSI_DEV_RESHUFFLE; } + ncsi_stop_channel_monitor(nc); + + ncm = &nc->modes[NCSI_MODE_LINK]; spin_lock_irqsave(&nc->lock, flags); nc->state = NCSI_CHANNEL_INVISIBLE; + ncm->data[2] &= ~0x1; spin_unlock_irqrestore(&nc->lock, flags); spin_lock_irqsave(&ndp->lock, flags); - nc->state = NCSI_CHANNEL_INACTIVE; + nc->state = NCSI_CHANNEL_ACTIVE; list_add_tail_rcu(&nc->link, &ndp->channel_queue); spin_unlock_irqrestore(&ndp->lock, flags); ncsi_process_next_channel(ndp); @@ -639,6 +665,99 @@ error: nd->state = ncsi_dev_state_functional; } +/* Check the VLAN filter bitmap for a set filter, and construct a + * "Set VLAN Filter - Disable" packet if found. + */ +static int clear_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, + struct ncsi_cmd_arg *nca) +{ + int index; + u32 *data; + u16 vid; + + index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, NULL); + if (index < 0) { + /* Filter table empty */ + return -1; + } + + data = ncsi_get_filter(nc, NCSI_FILTER_VLAN, index); + if (!data) { + netdev_err(ndp->ndev.dev, + "NCSI: failed to retrieve filter %d\n", index); + /* Set the VLAN id to 0 - this will still disable the entry in + * the filter table, but we won't know what it was. + */ + vid = 0; + } else { + vid = *(u16 *)data; + } + + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: removed vlan tag %u at index %d\n", + vid, index + 1); + ncsi_remove_filter(nc, NCSI_FILTER_VLAN, index); + + nca->type = NCSI_PKT_CMD_SVF; + nca->words[1] = vid; + /* HW filter index starts at 1 */ + nca->bytes[6] = index + 1; + nca->bytes[7] = 0x00; + return 0; +} + +/* Find an outstanding VLAN tag and constuct a "Set VLAN Filter - Enable" + * packet. + */ +static int set_one_vid(struct ncsi_dev_priv *ndp, struct ncsi_channel *nc, + struct ncsi_cmd_arg *nca) +{ + struct vlan_vid *vlan = NULL; + int index = 0; + + list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { + index = ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan->vid); + if (index < 0) { + /* New tag to add */ + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: new vlan id to set: %u\n", + vlan->vid); + break; + } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "vid %u already at filter pos %d\n", + vlan->vid, index); + } + + if (!vlan || index >= 0) { + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "no vlan ids left to set\n"); + return -1; + } + + index = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan->vid); + if (index < 0) { + netdev_err(ndp->ndev.dev, + "Failed to add new VLAN tag, error %d\n", index); + if (index == -ENOSPC) + netdev_err(ndp->ndev.dev, + "Channel %u already has all VLAN filters set\n", + nc->id); + return -1; + } + + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: set vid %u in packet, index %u\n", + vlan->vid, index + 1); + nca->type = NCSI_PKT_CMD_SVF; + nca->words[1] = vlan->vid; + /* HW filter index starts at 1 */ + nca->bytes[6] = index + 1; + nca->bytes[7] = 0x01; + + return 0; +} + static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) { struct ncsi_dev *nd = &ndp->ndev; @@ -667,8 +786,11 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = NCSI_RESERVED_CHANNEL; ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD_SP\n"); goto error; + } nd->state = ncsi_dev_state_config_cis; break; @@ -680,11 +802,17 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = nc->id; ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD_CIS\n"); goto error; + } - nd->state = ncsi_dev_state_config_sma; + nd->state = ncsi_dev_state_config_clear_vids; break; + case ncsi_dev_state_config_clear_vids: + case ncsi_dev_state_config_svf: + case ncsi_dev_state_config_ev: case ncsi_dev_state_config_sma: case ncsi_dev_state_config_ebf: #if IS_ENABLED(CONFIG_IPV6) @@ -699,11 +827,40 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) nca.package = np->id; nca.channel = nc->id; + /* Clear any active filters on the channel before setting */ + if (nd->state == ncsi_dev_state_config_clear_vids) { + ret = clear_one_vid(ndp, nc, &nca); + if (ret) { + nd->state = ncsi_dev_state_config_svf; + schedule_work(&ndp->work); + break; + } + /* Repeat */ + nd->state = ncsi_dev_state_config_clear_vids; + /* Add known VLAN tags to the filter */ + } else if (nd->state == ncsi_dev_state_config_svf) { + ret = set_one_vid(ndp, nc, &nca); + if (ret) { + nd->state = ncsi_dev_state_config_ev; + schedule_work(&ndp->work); + break; + } + /* Repeat */ + nd->state = ncsi_dev_state_config_svf; + /* Enable/Disable the VLAN filter */ + } else if (nd->state == ncsi_dev_state_config_ev) { + if (list_empty(&ndp->vlan_vids)) { + nca.type = NCSI_PKT_CMD_DV; + } else { + nca.type = NCSI_PKT_CMD_EV; + nca.bytes[3] = NCSI_CAP_VLAN_NO; + } + nd->state = ncsi_dev_state_config_sma; + } else if (nd->state == ncsi_dev_state_config_sma) { /* Use first entry in unicast filter table. Note that * the MAC filter table starts from entry 1 instead of * 0. */ - if (nd->state == ncsi_dev_state_config_sma) { nca.type = NCSI_PKT_CMD_SMA; for (index = 0; index < 6; index++) nca.bytes[index] = dev->dev_addr[index]; @@ -746,17 +903,45 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) } ret = ncsi_xmit_cmd(&nca); - if (ret) + if (ret) { + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit CMD %x\n", + nca.type); goto error; + } break; case ncsi_dev_state_config_done: + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: channel %u config done\n", nc->id); spin_lock_irqsave(&nc->lock, flags); + if (nc->reconfigure_needed) { + /* This channel's configuration has been updated + * part-way during the config state - start the + * channel configuration over + */ + nc->reconfigure_needed = false; + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + spin_lock_irqsave(&ndp->lock, flags); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + + netdev_printk(KERN_DEBUG, dev, + "Dirty NCSI channel state reset\n"); + ncsi_process_next_channel(ndp); + break; + } + if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { hot_nc = nc; nc->state = NCSI_CHANNEL_ACTIVE; } else { hot_nc = NULL; nc->state = NCSI_CHANNEL_INACTIVE; + netdev_warn(ndp->ndev.dev, + "NCSI: channel %u link down after config\n", + nc->id); } spin_unlock_irqrestore(&nc->lock, flags); @@ -769,8 +954,8 @@ static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) ncsi_process_next_channel(ndp); break; default: - netdev_warn(dev, "Wrong NCSI state 0x%x in config\n", - nd->state); + netdev_alert(dev, "Wrong NCSI state 0x%x in config\n", + nd->state); } return; @@ -822,10 +1007,17 @@ static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) } if (!found) { + netdev_warn(ndp->ndev.dev, + "NCSI: No channel found with link\n"); ncsi_report_link(ndp, true); return -ENODEV; } + ncm = &found->modes[NCSI_MODE_LINK]; + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: Channel %u added to queue (link %s)\n", + found->id, ncm->data[2] & 0x1 ? "up" : "down"); + out: spin_lock_irqsave(&ndp->lock, flags); list_add_tail_rcu(&found->link, &ndp->channel_queue); @@ -839,12 +1031,15 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) struct ncsi_package *np; struct ncsi_channel *nc; unsigned int cap; + bool has_channel = false; /* The hardware arbitration is disabled if any one channel * doesn't support explicitly. */ NCSI_FOR_EACH_PACKAGE(ndp, np) { NCSI_FOR_EACH_CHANNEL(np, nc) { + has_channel = true; + cap = nc->caps[NCSI_CAP_GENERIC].cap; if (!(cap & NCSI_CAP_GENERIC_HWA) || (cap & NCSI_CAP_GENERIC_HWA_MASK) != @@ -855,8 +1050,13 @@ static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) } } - ndp->flags |= NCSI_DEV_HWA; - return true; + if (has_channel) { + ndp->flags |= NCSI_DEV_HWA; + return true; + } + + ndp->flags &= ~NCSI_DEV_HWA; + return false; } static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) @@ -879,6 +1079,8 @@ static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) /* We can have no channels in extremely case */ if (list_empty(&ndp->channel_queue)) { + netdev_err(ndp->ndev.dev, + "NCSI: No available channels for HWA\n"); ncsi_report_link(ndp, false); return -ENOENT; } @@ -1047,6 +1249,9 @@ static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) return; error: + netdev_err(ndp->ndev.dev, + "NCSI: Failed to transmit cmd 0x%x during probe\n", + nca.type); ncsi_report_link(ndp, true); } @@ -1100,10 +1305,14 @@ int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) switch (old_state) { case NCSI_CHANNEL_INACTIVE: ndp->ndev.state = ncsi_dev_state_config; + netdev_info(ndp->ndev.dev, "NCSI: configuring channel %u\n", + nc->id); ncsi_configure_channel(ndp); break; case NCSI_CHANNEL_ACTIVE: ndp->ndev.state = ncsi_dev_state_suspend; + netdev_info(ndp->ndev.dev, "NCSI: suspending channel %u\n", + nc->id); ncsi_suspend_channel(ndp); break; default: @@ -1123,6 +1332,8 @@ out: return ncsi_choose_active_channel(ndp); } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, + "NCSI: No more channels to process\n"); ncsi_report_link(ndp, false); return -ENODEV; } @@ -1191,6 +1402,147 @@ static struct notifier_block ncsi_inet6addr_notifier = { }; #endif /* CONFIG_IPV6 */ +static int ncsi_kick_channels(struct ncsi_dev_priv *ndp) +{ + struct ncsi_dev *nd = &ndp->ndev; + struct ncsi_channel *nc; + struct ncsi_package *np; + unsigned long flags; + unsigned int n = 0; + + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + + /* Channels may be busy, mark dirty instead of + * kicking if; + * a) not ACTIVE (configured) + * b) in the channel_queue (to be configured) + * c) it's ndev is in the config state + */ + if (nc->state != NCSI_CHANNEL_ACTIVE) { + if ((ndp->ndev.state & 0xff00) == + ncsi_dev_state_config || + !list_empty(&nc->link)) { + netdev_printk(KERN_DEBUG, nd->dev, + "NCSI: channel %p marked dirty\n", + nc); + nc->reconfigure_needed = true; + } + spin_unlock_irqrestore(&nc->lock, flags); + continue; + } + + spin_unlock_irqrestore(&nc->lock, flags); + + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + spin_lock_irqsave(&ndp->lock, flags); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + + netdev_printk(KERN_DEBUG, nd->dev, + "NCSI: kicked channel %p\n", nc); + n++; + } + } + + return n; +} + +int ncsi_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct ncsi_dev_priv *ndp; + unsigned int n_vids = 0; + struct vlan_vid *vlan; + struct ncsi_dev *nd; + bool found = false; + + if (vid == 0) + return 0; + + nd = ncsi_find_dev(dev); + if (!nd) { + netdev_warn(dev, "NCSI: No net_device?\n"); + return 0; + } + + ndp = TO_NCSI_DEV_PRIV(nd); + + /* Add the VLAN id to our internal list */ + list_for_each_entry_rcu(vlan, &ndp->vlan_vids, list) { + n_vids++; + if (vlan->vid == vid) { + netdev_printk(KERN_DEBUG, dev, + "NCSI: vid %u already registered\n", vid); + return 0; + } + } + if (n_vids >= NCSI_MAX_VLAN_VIDS) { + netdev_warn(dev, + "tried to add vlan id %u but NCSI max already registered (%u)\n", + vid, NCSI_MAX_VLAN_VIDS); + return -ENOSPC; + } + + vlan = kzalloc(sizeof(*vlan), GFP_KERNEL); + if (!vlan) + return -ENOMEM; + + vlan->proto = proto; + vlan->vid = vid; + list_add_rcu(&vlan->list, &ndp->vlan_vids); + + netdev_printk(KERN_DEBUG, dev, "NCSI: Added new vid %u\n", vid); + + found = ncsi_kick_channels(ndp) != 0; + + return found ? ncsi_process_next_channel(ndp) : 0; +} +EXPORT_SYMBOL_GPL(ncsi_vlan_rx_add_vid); + +int ncsi_vlan_rx_kill_vid(struct net_device *dev, __be16 proto, u16 vid) +{ + struct vlan_vid *vlan, *tmp; + struct ncsi_dev_priv *ndp; + struct ncsi_dev *nd; + bool found = false; + + if (vid == 0) + return 0; + + nd = ncsi_find_dev(dev); + if (!nd) { + netdev_warn(dev, "NCSI: no net_device?\n"); + return 0; + } + + ndp = TO_NCSI_DEV_PRIV(nd); + + /* Remove the VLAN id from our internal list */ + list_for_each_entry_safe(vlan, tmp, &ndp->vlan_vids, list) + if (vlan->vid == vid) { + netdev_printk(KERN_DEBUG, dev, + "NCSI: vid %u found, removing\n", vid); + list_del_rcu(&vlan->list); + found = true; + kfree(vlan); + } + + if (!found) { + netdev_err(dev, "NCSI: vid %u wasn't registered!\n", vid); + return -EINVAL; + } + + found = ncsi_kick_channels(ndp) != 0; + + return found ? ncsi_process_next_channel(ndp) : 0; +} +EXPORT_SYMBOL_GPL(ncsi_vlan_rx_kill_vid); + struct ncsi_dev *ncsi_register_dev(struct net_device *dev, void (*handler)(struct ncsi_dev *ndev)) { @@ -1215,6 +1567,7 @@ struct ncsi_dev *ncsi_register_dev(struct net_device *dev, nd->handler = handler; ndp->pending_req_num = 0; INIT_LIST_HEAD(&ndp->channel_queue); + INIT_LIST_HEAD(&ndp->vlan_vids); INIT_WORK(&ndp->work, ncsi_dev_work); /* Initialize private NCSI device */ @@ -1263,10 +1616,12 @@ int ncsi_start_dev(struct ncsi_dev *nd) return 0; } - if (ndp->flags & NCSI_DEV_HWA) + if (ndp->flags & NCSI_DEV_HWA) { + netdev_info(ndp->ndev.dev, "NCSI: Enabling HWA mode\n"); ret = ncsi_enable_hwa(ndp); - else + } else { ret = ncsi_choose_active_channel(ndp); + } return ret; } @@ -1297,6 +1652,7 @@ void ncsi_stop_dev(struct ncsi_dev *nd) } } + netdev_printk(KERN_DEBUG, ndp->ndev.dev, "NCSI: Stopping device\n"); ncsi_report_link(ndp, true); } EXPORT_SYMBOL_GPL(ncsi_stop_dev); diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h index 3ea49ed0a935..91b4b66438df 100644 --- a/net/ncsi/ncsi-pkt.h +++ b/net/ncsi/ncsi-pkt.h @@ -104,7 +104,7 @@ struct ncsi_cmd_svf_pkt { unsigned char index; /* VLAN table index */ unsigned char enable; /* Enable or disable */ __be32 checksum; /* Checksum */ - unsigned char pad[14]; + unsigned char pad[18]; }; /* Enable VLAN */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 087db775b3dc..efd933ff5570 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -146,7 +146,7 @@ static int ncsi_rsp_handler_ec(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_ENABLE]; if (ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -173,7 +173,7 @@ static int ncsi_rsp_handler_dc(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_ENABLE]; if (!ncm->enable) - return -EBUSY; + return 0; ncm->enable = 0; return 0; @@ -217,7 +217,7 @@ static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; if (ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -239,7 +239,7 @@ static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr) ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; if (!ncm->enable) - return -EBUSY; + return 0; ncm->enable = 1; return 0; @@ -263,7 +263,7 @@ static int ncsi_rsp_handler_ae(struct ncsi_request *nr) /* Check if the AEN has been enabled */ ncm = &nc->modes[NCSI_MODE_AEN]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to AEN configuration */ cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd); @@ -354,7 +354,8 @@ static int ncsi_rsp_handler_svf(struct ncsi_request *nr) /* Add or remove the VLAN filter */ if (!(cmd->enable & 0x1)) { - ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index); + /* HW indexes from 1 */ + ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index - 1); } else { vlan = ntohs(cmd->vlan); ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan); @@ -381,7 +382,7 @@ static int ncsi_rsp_handler_ev(struct ncsi_request *nr) /* Check if VLAN mode has been enabled */ ncm = &nc->modes[NCSI_MODE_VLAN]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to VLAN mode */ cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); @@ -408,7 +409,7 @@ static int ncsi_rsp_handler_dv(struct ncsi_request *nr) /* Check if VLAN mode has been enabled */ ncm = &nc->modes[NCSI_MODE_VLAN]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to VLAN mode */ ncm->enable = 0; @@ -454,13 +455,10 @@ static int ncsi_rsp_handler_sma(struct ncsi_request *nr) bitmap = &ncf->bitmap; if (cmd->at_e & 0x1) { - if (test_and_set_bit(cmd->index, bitmap)) - return -EBUSY; + set_bit(cmd->index, bitmap); memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); } else { - if (!test_and_clear_bit(cmd->index, bitmap)) - return -EBUSY; - + clear_bit(cmd->index, bitmap); memset(ncf->data + 6 * cmd->index, 0, 6); } @@ -484,7 +482,7 @@ static int ncsi_rsp_handler_ebf(struct ncsi_request *nr) /* Check if broadcast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_BC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to broadcast filter mode */ cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd); @@ -510,7 +508,7 @@ static int ncsi_rsp_handler_dbf(struct ncsi_request *nr) /* Check if broadcast filter isn't enabled */ ncm = &nc->modes[NCSI_MODE_BC]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to broadcast filter mode */ ncm->enable = 0; @@ -537,7 +535,7 @@ static int ncsi_rsp_handler_egmf(struct ncsi_request *nr) /* Check if multicast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_MC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to multicast filter mode */ cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd); @@ -563,7 +561,7 @@ static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr) /* Check if multicast filter has been enabled */ ncm = &nc->modes[NCSI_MODE_MC]; if (!ncm->enable) - return -EBUSY; + return 0; /* Update to multicast filter mode */ ncm->enable = 0; @@ -590,7 +588,7 @@ static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) /* Check if flow control has been enabled */ ncm = &nc->modes[NCSI_MODE_FC]; if (ncm->enable) - return -EBUSY; + return 0; /* Update to flow control mode */ cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd); @@ -693,7 +691,14 @@ static int ncsi_rsp_handler_gc(struct ncsi_request *nr) ncf->index = i; ncf->total = cnt; - ncf->bitmap = 0x0ul; + if (i == NCSI_FILTER_VLAN) { + /* Set VLAN filters active so they are cleared in + * first configuration state + */ + ncf->bitmap = U64_MAX; + } else { + ncf->bitmap = 0x0ul; + } nc->filters[i] = ncf; } @@ -951,7 +956,7 @@ static struct ncsi_rsp_handler { { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf }, { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf }, { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc }, - { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi }, + { NCSI_PKT_RSP_GVI, 40, ncsi_rsp_handler_gvi }, { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, @@ -1024,11 +1029,19 @@ int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, if (payload < 0) payload = ntohs(hdr->length); ret = ncsi_validate_rsp_pkt(nr, payload); - if (ret) + if (ret) { + netdev_warn(ndp->ndev.dev, + "NCSI: 'bad' packet ignored for type 0x%x\n", + hdr->type); goto out; + } /* Process the packet */ ret = nrh->handler(nr); + if (ret) + netdev_err(ndp->ndev.dev, + "NCSI: Handler for packet type 0x%x returned %d\n", + hdr->type, ret); out: ncsi_free_request(nr); return ret; diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 9b28864cc36a..e4a13cc8a2e7 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -636,6 +636,15 @@ config NFT_FWD_NETDEV help This option enables packet forwarding for the "netdev" family. +config NFT_FIB_NETDEV + depends on NFT_FIB_IPV4 + depends on NFT_FIB_IPV6 + tristate "Netfilter nf_tables netdev fib lookups support" + help + This option allows using the FIB expression from the netdev table. + The lookup will be delegated to the IPv4 or IPv6 FIB depending + on the protocol of the packet. + endif # NF_TABLES_NETDEV endif # NF_TABLES diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 913380919301..f78ed2470831 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 netfilter-objs := core.o nf_log.o nf_queue.o nf_sockopt.o nf_conntrack-y := nf_conntrack_core.o nf_conntrack_standalone.o nf_conntrack_expect.o nf_conntrack_helper.o nf_conntrack_proto.o nf_conntrack_l3proto_generic.o nf_conntrack_proto_generic.o nf_conntrack_proto_tcp.o nf_conntrack_proto_udp.o nf_conntrack_extend.o nf_conntrack_acct.o nf_conntrack_seqadj.o @@ -100,6 +101,7 @@ obj-$(CONFIG_NFT_REDIR) += nft_redir.o obj-$(CONFIG_NFT_HASH) += nft_hash.o obj-$(CONFIG_NFT_FIB) += nft_fib.o obj-$(CONFIG_NFT_FIB_INET) += nft_fib_inet.o +obj-$(CONFIG_NFT_FIB_NETDEV) += nft_fib_netdev.o # nf_tables netdev obj-$(CONFIG_NFT_DUP_NETDEV) += nft_dup_netdev.o diff --git a/net/netfilter/core.c b/net/netfilter/core.c index 974cf2a3795a..52cd2901a097 100644 --- a/net/netfilter/core.c +++ b/net/netfilter/core.c @@ -21,7 +21,7 @@ #include <linux/inetdevice.h> #include <linux/proc_fs.h> #include <linux/mutex.h> -#include <linux/slab.h> +#include <linux/mm.h> #include <linux/rcupdate.h> #include <net/net_namespace.h> #include <net/sock.h> @@ -62,10 +62,182 @@ EXPORT_SYMBOL(nf_hooks_needed); #endif static DEFINE_MUTEX(nf_hook_mutex); + +/* max hooks per family/hooknum */ +#define MAX_HOOK_COUNT 1024 + #define nf_entry_dereference(e) \ rcu_dereference_protected(e, lockdep_is_held(&nf_hook_mutex)) -static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) +static struct nf_hook_entries *allocate_hook_entries_size(u16 num) +{ + struct nf_hook_entries *e; + size_t alloc = sizeof(*e) + + sizeof(struct nf_hook_entry) * num + + sizeof(struct nf_hook_ops *) * num; + + if (num == 0) + return NULL; + + e = kvzalloc(alloc, GFP_KERNEL); + if (e) + e->num_hook_entries = num; + return e; +} + +static unsigned int accept_all(void *priv, + struct sk_buff *skb, + const struct nf_hook_state *state) +{ + return NF_ACCEPT; /* ACCEPT makes nf_hook_slow call next hook */ +} + +static const struct nf_hook_ops dummy_ops = { + .hook = accept_all, + .priority = INT_MIN, +}; + +static struct nf_hook_entries * +nf_hook_entries_grow(const struct nf_hook_entries *old, + const struct nf_hook_ops *reg) +{ + unsigned int i, alloc_entries, nhooks, old_entries; + struct nf_hook_ops **orig_ops = NULL; + struct nf_hook_ops **new_ops; + struct nf_hook_entries *new; + bool inserted = false; + + alloc_entries = 1; + old_entries = old ? old->num_hook_entries : 0; + + if (old) { + orig_ops = nf_hook_entries_get_hook_ops(old); + + for (i = 0; i < old_entries; i++) { + if (orig_ops[i] != &dummy_ops) + alloc_entries++; + } + } + + if (alloc_entries > MAX_HOOK_COUNT) + return ERR_PTR(-E2BIG); + + new = allocate_hook_entries_size(alloc_entries); + if (!new) + return ERR_PTR(-ENOMEM); + + new_ops = nf_hook_entries_get_hook_ops(new); + + i = 0; + nhooks = 0; + while (i < old_entries) { + if (orig_ops[i] == &dummy_ops) { + ++i; + continue; + } + if (inserted || reg->priority > orig_ops[i]->priority) { + new_ops[nhooks] = (void *)orig_ops[i]; + new->hooks[nhooks] = old->hooks[i]; + i++; + } else { + new_ops[nhooks] = (void *)reg; + new->hooks[nhooks].hook = reg->hook; + new->hooks[nhooks].priv = reg->priv; + inserted = true; + } + nhooks++; + } + + if (!inserted) { + new_ops[nhooks] = (void *)reg; + new->hooks[nhooks].hook = reg->hook; + new->hooks[nhooks].priv = reg->priv; + } + + return new; +} + +static void hooks_validate(const struct nf_hook_entries *hooks) +{ +#ifdef CONFIG_DEBUG_KERNEL + struct nf_hook_ops **orig_ops; + int prio = INT_MIN; + size_t i = 0; + + orig_ops = nf_hook_entries_get_hook_ops(hooks); + + for (i = 0; i < hooks->num_hook_entries; i++) { + if (orig_ops[i] == &dummy_ops) + continue; + + WARN_ON(orig_ops[i]->priority < prio); + + if (orig_ops[i]->priority > prio) + prio = orig_ops[i]->priority; + } +#endif +} + +/* + * __nf_hook_entries_try_shrink - try to shrink hook array + * + * @pp -- location of hook blob + * + * Hook unregistration must always succeed, so to-be-removed hooks + * are replaced by a dummy one that will just move to next hook. + * + * This counts the current dummy hooks, attempts to allocate new blob, + * copies the live hooks, then replaces and discards old one. + * + * return values: + * + * Returns address to free, or NULL. + */ +static void *__nf_hook_entries_try_shrink(struct nf_hook_entries __rcu **pp) +{ + struct nf_hook_entries *old, *new = NULL; + unsigned int i, j, skip = 0, hook_entries; + struct nf_hook_ops **orig_ops; + struct nf_hook_ops **new_ops; + + old = nf_entry_dereference(*pp); + if (WARN_ON_ONCE(!old)) + return NULL; + + orig_ops = nf_hook_entries_get_hook_ops(old); + for (i = 0; i < old->num_hook_entries; i++) { + if (orig_ops[i] == &dummy_ops) + skip++; + } + + /* if skip == hook_entries all hooks have been removed */ + hook_entries = old->num_hook_entries; + if (skip == hook_entries) + goto out_assign; + + if (skip == 0) + return NULL; + + hook_entries -= skip; + new = allocate_hook_entries_size(hook_entries); + if (!new) + return NULL; + + new_ops = nf_hook_entries_get_hook_ops(new); + for (i = 0, j = 0; i < old->num_hook_entries; i++) { + if (orig_ops[i] == &dummy_ops) + continue; + new->hooks[j] = old->hooks[i]; + new_ops[j] = (void *)orig_ops[i]; + j++; + } + hooks_validate(new); +out_assign: + rcu_assign_pointer(*pp, new); + return old; +} + +static struct nf_hook_entries __rcu **nf_hook_entry_head(struct net *net, const struct nf_hook_ops *reg) { if (reg->pf != NFPROTO_NETDEV) return net->nf.hooks[reg->pf]+reg->hooknum; @@ -76,13 +248,14 @@ static struct nf_hook_entry __rcu **nf_hook_entry_head(struct net *net, const st return ®->dev->nf_hooks_ingress; } #endif + WARN_ON_ONCE(1); return NULL; } int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry __rcu **pp; - struct nf_hook_entry *entry, *p; + struct nf_hook_entries *p, *new_hooks; + struct nf_hook_entries __rcu **pp; if (reg->pf == NFPROTO_NETDEV) { #ifndef CONFIG_NETFILTER_INGRESS @@ -98,23 +271,19 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) if (!pp) return -EINVAL; - entry = kmalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - return -ENOMEM; - - nf_hook_entry_init(entry, reg); - mutex_lock(&nf_hook_mutex); - /* Find the spot in the list */ - for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { - if (reg->priority < nf_hook_entry_priority(p)) - break; - } - rcu_assign_pointer(entry->next, p); - rcu_assign_pointer(*pp, entry); + p = nf_entry_dereference(*pp); + new_hooks = nf_hook_entries_grow(p, reg); + + if (!IS_ERR(new_hooks)) + rcu_assign_pointer(*pp, new_hooks); mutex_unlock(&nf_hook_mutex); + if (IS_ERR(new_hooks)) + return PTR_ERR(new_hooks); + + hooks_validate(new_hooks); #ifdef CONFIG_NETFILTER_INGRESS if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) net_inc_ingress_queue(); @@ -122,48 +291,74 @@ int nf_register_net_hook(struct net *net, const struct nf_hook_ops *reg) #ifdef HAVE_JUMP_LABEL static_key_slow_inc(&nf_hooks_needed[reg->pf][reg->hooknum]); #endif + synchronize_net(); + BUG_ON(p == new_hooks); + kvfree(p); return 0; } EXPORT_SYMBOL(nf_register_net_hook); -static struct nf_hook_entry * -__nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) +/* + * __nf_unregister_net_hook - remove a hook from blob + * + * @oldp: current address of hook blob + * @unreg: hook to unregister + * + * This cannot fail, hook unregistration must always succeed. + * Therefore replace the to-be-removed hook with a dummy hook. + */ +static void __nf_unregister_net_hook(struct nf_hook_entries *old, + const struct nf_hook_ops *unreg) { - struct nf_hook_entry __rcu **pp; - struct nf_hook_entry *p; - - pp = nf_hook_entry_head(net, reg); - if (WARN_ON_ONCE(!pp)) - return NULL; + struct nf_hook_ops **orig_ops; + bool found = false; + unsigned int i; - mutex_lock(&nf_hook_mutex); - for (; (p = nf_entry_dereference(*pp)) != NULL; pp = &p->next) { - if (nf_hook_entry_ops(p) == reg) { - rcu_assign_pointer(*pp, p->next); - break; - } - } - mutex_unlock(&nf_hook_mutex); - if (!p) { - WARN(1, "nf_unregister_net_hook: hook not found!\n"); - return NULL; + orig_ops = nf_hook_entries_get_hook_ops(old); + for (i = 0; i < old->num_hook_entries; i++) { + if (orig_ops[i] != unreg) + continue; + WRITE_ONCE(old->hooks[i].hook, accept_all); + WRITE_ONCE(orig_ops[i], &dummy_ops); + found = true; + break; } + + if (found) { #ifdef CONFIG_NETFILTER_INGRESS - if (reg->pf == NFPROTO_NETDEV && reg->hooknum == NF_NETDEV_INGRESS) - net_dec_ingress_queue(); + if (unreg->pf == NFPROTO_NETDEV && unreg->hooknum == NF_NETDEV_INGRESS) + net_dec_ingress_queue(); #endif #ifdef HAVE_JUMP_LABEL - static_key_slow_dec(&nf_hooks_needed[reg->pf][reg->hooknum]); + static_key_slow_dec(&nf_hooks_needed[unreg->pf][unreg->hooknum]); #endif - - return p; + } else { + WARN_ONCE(1, "hook not found, pf %d num %d", unreg->pf, unreg->hooknum); + } } void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) { - struct nf_hook_entry *p = __nf_unregister_net_hook(net, reg); + struct nf_hook_entries __rcu **pp; + struct nf_hook_entries *p; unsigned int nfq; + pp = nf_hook_entry_head(net, reg); + if (!pp) + return; + + mutex_lock(&nf_hook_mutex); + + p = nf_entry_dereference(*pp); + if (WARN_ON_ONCE(!p)) { + mutex_unlock(&nf_hook_mutex); + return; + } + + __nf_unregister_net_hook(p, reg); + + p = __nf_hook_entries_try_shrink(pp); + mutex_unlock(&nf_hook_mutex); if (!p) return; @@ -173,7 +368,7 @@ void nf_unregister_net_hook(struct net *net, const struct nf_hook_ops *reg) nfq = nf_queue_nf_hook_drop(net); if (nfq) synchronize_net(); - kfree(p); + kvfree(p); } EXPORT_SYMBOL(nf_unregister_net_hook); @@ -200,26 +395,59 @@ EXPORT_SYMBOL(nf_register_net_hooks); void nf_unregister_net_hooks(struct net *net, const struct nf_hook_ops *reg, unsigned int hookcount) { - struct nf_hook_entry *to_free[16]; - unsigned int i, n, nfq; + struct nf_hook_entries *to_free[16], *p; + struct nf_hook_entries __rcu **pp; + unsigned int i, j, n; + + mutex_lock(&nf_hook_mutex); + for (i = 0; i < hookcount; i++) { + pp = nf_hook_entry_head(net, ®[i]); + if (!pp) + continue; + + p = nf_entry_dereference(*pp); + if (WARN_ON_ONCE(!p)) + continue; + __nf_unregister_net_hook(p, ®[i]); + } + mutex_unlock(&nf_hook_mutex); do { n = min_t(unsigned int, hookcount, ARRAY_SIZE(to_free)); - for (i = 0; i < n; i++) - to_free[i] = __nf_unregister_net_hook(net, ®[i]); + mutex_lock(&nf_hook_mutex); - synchronize_net(); + for (i = 0, j = 0; i < hookcount && j < n; i++) { + pp = nf_hook_entry_head(net, ®[i]); + if (!pp) + continue; + + p = nf_entry_dereference(*pp); + if (!p) + continue; + + to_free[j] = __nf_hook_entries_try_shrink(pp); + if (to_free[j]) + ++j; + } + + mutex_unlock(&nf_hook_mutex); + + if (j) { + unsigned int nfq; - /* need 2nd synchronize_net() if nfqueue is used, skb - * can get reinjected right before nf_queue_hook_drop() - */ - nfq = nf_queue_nf_hook_drop(net); - if (nfq) synchronize_net(); - for (i = 0; i < n; i++) - kfree(to_free[i]); + /* need 2nd synchronize_net() if nfqueue is used, skb + * can get reinjected right before nf_queue_hook_drop() + */ + nfq = nf_queue_nf_hook_drop(net); + if (nfq) + synchronize_net(); + + for (i = 0; i < j; i++) + kvfree(to_free[i]); + } reg += n; hookcount -= n; @@ -230,16 +458,15 @@ EXPORT_SYMBOL(nf_unregister_net_hooks); /* Returns 1 if okfn() needs to be executed by the caller, * -EPERM for NF_DROP, 0 otherwise. Caller must hold rcu_read_lock. */ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry *entry) + const struct nf_hook_entries *e, unsigned int s) { unsigned int verdict; int ret; - do { - verdict = nf_hook_entry_hookfn(entry, skb, state); + for (; s < e->num_hook_entries; s++) { + verdict = nf_hook_entry_hookfn(&e->hooks[s], skb, state); switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: - entry = rcu_dereference(entry->next); break; case NF_DROP: kfree_skb(skb); @@ -248,8 +475,8 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, ret = -EPERM; return ret; case NF_QUEUE: - ret = nf_queue(skb, state, &entry, verdict); - if (ret == 1 && entry) + ret = nf_queue(skb, state, e, s, verdict); + if (ret == 1) continue; return ret; default: @@ -258,7 +485,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state, */ return 0; } - } while (entry); + } return 1; } diff --git a/net/netfilter/ipset/Makefile b/net/netfilter/ipset/Makefile index 28ec148df02d..a445a6bf4f11 100644 --- a/net/netfilter/ipset/Makefile +++ b/net/netfilter/ipset/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the ipset modules # diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h index 8ad2b52a0b32..5ca18f07683b 100644 --- a/net/netfilter/ipset/ip_set_bitmap_gen.h +++ b/net/netfilter/ipset/ip_set_bitmap_gen.h @@ -37,11 +37,11 @@ #define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id))) static void -mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct mtype *map = set->data; - setup_timer(&map->gc, gc, (unsigned long)set); + timer_setup(&map->gc, gc, 0); mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } @@ -272,10 +272,10 @@ out: } static void -mtype_gc(unsigned long ul_set) +mtype_gc(struct timer_list *t) { - struct ip_set *set = (struct ip_set *)ul_set; - struct mtype *map = set->data; + struct mtype *map = from_timer(map, t, gc); + struct ip_set *set = map->set; void *x; u32 id; diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c index 4783efff0bde..d8975a0b4282 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ip.c +++ b/net/netfilter/ipset/ip_set_bitmap_ip.c @@ -48,6 +48,7 @@ struct bitmap_ip { size_t memsize; /* members size */ u8 netmask; /* subnet netmask */ struct timer_list gc; /* garbage collection */ + struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[0] /* data extensions */ __aligned(__alignof__(u64)); }; @@ -232,6 +233,7 @@ init_map_ip(struct ip_set *set, struct bitmap_ip *map, map->netmask = netmask; set->timeout = IPSET_NO_TIMEOUT; + map->set = set; set->data = map; set->family = NFPROTO_IPV4; diff --git a/net/netfilter/ipset/ip_set_bitmap_ipmac.c b/net/netfilter/ipset/ip_set_bitmap_ipmac.c index 9a065f672d3a..4c279fbd2d5d 100644 --- a/net/netfilter/ipset/ip_set_bitmap_ipmac.c +++ b/net/netfilter/ipset/ip_set_bitmap_ipmac.c @@ -52,6 +52,7 @@ struct bitmap_ipmac { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collector */ + struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[0] /* MAC + data extensions */ __aligned(__alignof__(u64)); }; @@ -307,6 +308,7 @@ init_map_ipmac(struct ip_set *set, struct bitmap_ipmac *map, map->elements = elements; set->timeout = IPSET_NO_TIMEOUT; + map->set = set; set->data = map; set->family = NFPROTO_IPV4; diff --git a/net/netfilter/ipset/ip_set_bitmap_port.c b/net/netfilter/ipset/ip_set_bitmap_port.c index 7f0c733358a4..7f9bbd7c98b5 100644 --- a/net/netfilter/ipset/ip_set_bitmap_port.c +++ b/net/netfilter/ipset/ip_set_bitmap_port.c @@ -40,6 +40,7 @@ struct bitmap_port { u32 elements; /* number of max elements in the set */ size_t memsize; /* members size */ struct timer_list gc; /* garbage collection */ + struct ip_set *set; /* attached to this ip_set */ unsigned char extensions[0] /* data extensions */ __aligned(__alignof__(u64)); }; @@ -214,6 +215,7 @@ init_map_port(struct ip_set *set, struct bitmap_port *map, map->last_port = last_port; set->timeout = IPSET_NO_TIMEOUT; + map->set = set; set->data = map; set->family = NFPROTO_UNSPEC; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index e495b5e484b1..cf84f7b37cd9 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -1191,14 +1191,17 @@ static int ip_set_swap(struct net *net, struct sock *ctnl, struct sk_buff *skb, from->family == to->family)) return -IPSET_ERR_TYPE_MISMATCH; - if (from->ref_netlink || to->ref_netlink) + write_lock_bh(&ip_set_ref_lock); + + if (from->ref_netlink || to->ref_netlink) { + write_unlock_bh(&ip_set_ref_lock); return -EBUSY; + } strncpy(from_name, from->name, IPSET_MAXNAMELEN); strncpy(from->name, to->name, IPSET_MAXNAMELEN); strncpy(to->name, from_name, IPSET_MAXNAMELEN); - write_lock_bh(&ip_set_ref_lock); swap(from->ref, to->ref); ip_set(inst, from_id) = to; ip_set(inst, to_id) = from; @@ -2072,25 +2075,28 @@ static struct pernet_operations ip_set_net_ops = { static int __init ip_set_init(void) { - int ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); + int ret = register_pernet_subsys(&ip_set_net_ops); + + if (ret) { + pr_err("ip_set: cannot register pernet_subsys.\n"); + return ret; + } + ret = nfnetlink_subsys_register(&ip_set_netlink_subsys); if (ret != 0) { pr_err("ip_set: cannot register with nfnetlink.\n"); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } + ret = nf_register_sockopt(&so_set); if (ret != 0) { pr_err("SO_SET registry failed: %d\n", ret); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + unregister_pernet_subsys(&ip_set_net_ops); return ret; } - ret = register_pernet_subsys(&ip_set_net_ops); - if (ret) { - pr_err("ip_set: cannot register pernet_subsys.\n"); - nf_unregister_sockopt(&so_set); - nfnetlink_subsys_unregister(&ip_set_netlink_subsys); - return ret; - } + pr_info("ip_set: protocol %u\n", IPSET_PROTOCOL); return 0; } @@ -2098,9 +2104,10 @@ ip_set_init(void) static void __exit ip_set_fini(void) { - unregister_pernet_subsys(&ip_set_net_ops); nf_unregister_sockopt(&so_set); nfnetlink_subsys_unregister(&ip_set_netlink_subsys); + + unregister_pernet_subsys(&ip_set_net_ops); pr_debug("these are the famous last words\n"); } diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h index f236c0bc7b3f..efffc8eabafe 100644 --- a/net/netfilter/ipset/ip_set_hash_gen.h +++ b/net/netfilter/ipset/ip_set_hash_gen.h @@ -280,6 +280,7 @@ htable_bits(u32 hashsize) struct htype { struct htable __rcu *table; /* the hash table */ struct timer_list gc; /* garbage collection when timeout enabled */ + struct ip_set *set; /* attached to this ip_set */ u32 maxelem; /* max elements in the hash */ u32 initval; /* random jhash init value */ #ifdef IP_SET_HASH_WITH_MARKMASK @@ -429,11 +430,11 @@ mtype_destroy(struct ip_set *set) } static void -mtype_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +mtype_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct htype *h = set->data; - setup_timer(&h->gc, gc, (unsigned long)set); + timer_setup(&h->gc, gc, 0); mod_timer(&h->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); pr_debug("gc initialized, run in every %u\n", IPSET_GC_PERIOD(set->timeout)); @@ -526,10 +527,10 @@ mtype_expire(struct ip_set *set, struct htype *h) } static void -mtype_gc(unsigned long ul_set) +mtype_gc(struct timer_list *t) { - struct ip_set *set = (struct ip_set *)ul_set; - struct htype *h = set->data; + struct htype *h = from_timer(h, t, gc); + struct ip_set *set = h->set; pr_debug("called\n"); spin_lock_bh(&set->lock); @@ -1041,12 +1042,24 @@ out: static int mtype_head(struct ip_set *set, struct sk_buff *skb) { - const struct htype *h = set->data; + struct htype *h = set->data; const struct htable *t; struct nlattr *nested; size_t memsize; u8 htable_bits; + /* If any members have expired, set->elements will be wrong + * mytype_expire function will update it with the right count. + * we do not hold set->lock here, so grab it first. + * set->elements can still be incorrect in the case of a huge set, + * because elements might time out during the listing. + */ + if (SET_WITH_TIMEOUT(set)) { + spin_lock_bh(&set->lock); + mtype_expire(set, h); + spin_unlock_bh(&set->lock); + } + rcu_read_lock_bh(); t = rcu_dereference_bh_nfnl(h->table); memsize = mtype_ahash_memsize(h, t) + set->ext_size; @@ -1302,6 +1315,7 @@ IPSET_TOKEN(HTYPE, _create)(struct net *net, struct ip_set *set, t->htable_bits = hbits; RCU_INIT_POINTER(h->table, t); + h->set = set; set->data = h; #ifndef IP_SET_PROTO_UNDEF if (set->family == NFPROTO_IPV4) { diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c index 20bfbd315f61..613eb212cb48 100644 --- a/net/netfilter/ipset/ip_set_hash_ip.c +++ b/net/netfilter/ipset/ip_set_hash_ip.c @@ -123,13 +123,12 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], return ret; ip &= ip_set_hostmask(h->netmask); + e.ip = htonl(ip); + if (e.ip == 0) + return -IPSET_ERR_HASH_ELEM; - if (adt == IPSET_TEST) { - e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + if (adt == IPSET_TEST) return adtfn(set, &e, &ext, &ext, flags); - } ip_to = ip; if (tb[IPSET_ATTR_IP_TO]) { @@ -148,17 +147,20 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[], hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1); - if (retried) + if (retried) { ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip += hosts) { e.ip = htonl(ip); - if (e.ip == 0) - return -IPSET_ERR_HASH_ELEM; + } + for (; ip <= ip_to;) { ret = adtfn(set, &e, &ext, &ext, flags); - if (ret && !ip_set_eexist(ret, flags)) return ret; + ip += hosts; + e.ip = htonl(ip); + if (e.ip == 0) + return 0; + ret = 0; } return ret; diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c index b64cf14e8352..f3ba8348cf9d 100644 --- a/net/netfilter/ipset/ip_set_hash_ipmark.c +++ b/net/netfilter/ipset/ip_set_hash_ipmark.c @@ -149,7 +149,7 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c index f438740e6c6a..ddb8039ec1d2 100644 --- a/net/netfilter/ipset/ip_set_hash_ipport.c +++ b/net/netfilter/ipset/ip_set_hash_ipport.c @@ -178,7 +178,7 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c index 6215fb898c50..a7f4d7a85420 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportip.c +++ b/net/netfilter/ipset/ip_set_hash_ipportip.c @@ -185,7 +185,7 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; for (; p <= port_to; p++) { diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c index 5ab1b99a53c2..0f164e986bf1 100644 --- a/net/netfilter/ipset/ip_set_hash_ipportnet.c +++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c @@ -271,7 +271,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - for (; !before(ip_to, ip); ip++) { + for (; ip <= ip_to; ip++) { e.ip = htonl(ip); p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port) : port; @@ -281,7 +281,7 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip == ntohl(h->next.ip) && p == ntohs(h->next.port) ? ntohl(h->next.ip2) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip2 = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &cidr); @@ -434,7 +434,7 @@ hash_ipportnet6_uadt(struct ip_set *set, struct nlattr *tb[], if (unlikely(tb[IPSET_ATTR_IP_TO])) return -IPSET_ERR_HASH_RANGE_UNSUPPORTED; if (unlikely(tb[IPSET_ATTR_CIDR])) { - u8 cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); + cidr = nla_get_u8(tb[IPSET_ATTR_CIDR]); if (cidr != HOST_MASK) return -IPSET_ERR_INVALID_CIDR; diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c index 5d9e895452e7..1c67a1761e45 100644 --- a/net/netfilter/ipset/ip_set_hash_net.c +++ b/net/netfilter/ipset/ip_set_hash_net.c @@ -193,7 +193,7 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[], } if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c index 44cf11939c91..d417074f1c1a 100644 --- a/net/netfilter/ipset/ip_set_hash_netiface.c +++ b/net/netfilter/ipset/ip_set_hash_netiface.c @@ -255,7 +255,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c index db614e13b193..7f9ae2e9645b 100644 --- a/net/netfilter/ipset/ip_set_hash_netnet.c +++ b/net/netfilter/ipset/ip_set_hash_netnet.c @@ -250,13 +250,13 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); ip2 = (retried && ip == ntohl(h->next.ip[0])) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); last2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); ret = adtfn(set, &e, &ext, &ext, flags); diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c index 54b64b6cd0cd..e6ef382febe4 100644 --- a/net/netfilter/ipset/ip_set_hash_netport.c +++ b/net/netfilter/ipset/ip_set_hash_netport.c @@ -241,7 +241,7 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip = htonl(ip); last = ip_set_range_to_cidr(ip, ip_to, &cidr); e.cidr = cidr - 1; diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c index aff846960ac4..8602f2595a1a 100644 --- a/net/netfilter/ipset/ip_set_hash_netportnet.c +++ b/net/netfilter/ipset/ip_set_hash_netportnet.c @@ -291,7 +291,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], if (retried) ip = ntohl(h->next.ip[0]); - while (!after(ip, ip_to)) { + while (ip <= ip_to) { e.ip[0] = htonl(ip); ip_last = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]); p = retried && ip == ntohl(h->next.ip[0]) ? ntohs(h->next.port) @@ -301,7 +301,7 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[], ip2 = (retried && ip == ntohl(h->next.ip[0]) && p == ntohs(h->next.port)) ? ntohl(h->next.ip[1]) : ip2_from; - while (!after(ip2, ip2_to)) { + while (ip2 <= ip2_to) { e.ip[1] = htonl(ip2); ip2_last = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]); diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c index 178d4eba013b..e864681b8dc5 100644 --- a/net/netfilter/ipset/ip_set_list_set.c +++ b/net/netfilter/ipset/ip_set_list_set.c @@ -44,6 +44,7 @@ struct set_adt_elem { struct list_set { u32 size; /* size of set list array */ struct timer_list gc; /* garbage collection */ + struct ip_set *set; /* attached to this ip_set */ struct net *net; /* namespace */ struct list_head members; /* the set members */ }; @@ -453,7 +454,6 @@ static size_t list_set_memsize(const struct list_set *map, size_t dsize) { struct set_elem *e; - size_t memsize; u32 n = 0; rcu_read_lock(); @@ -461,9 +461,7 @@ list_set_memsize(const struct list_set *map, size_t dsize) n++; rcu_read_unlock(); - memsize = sizeof(*map) + n * dsize; - - return memsize; + return (sizeof(*map) + n * dsize); } static int @@ -571,10 +569,10 @@ static const struct ip_set_type_variant set_variant = { }; static void -list_set_gc(unsigned long ul_set) +list_set_gc(struct timer_list *t) { - struct ip_set *set = (struct ip_set *)ul_set; - struct list_set *map = set->data; + struct list_set *map = from_timer(map, t, gc); + struct ip_set *set = map->set; spin_lock_bh(&set->lock); set_cleanup_entries(set); @@ -585,11 +583,11 @@ list_set_gc(unsigned long ul_set) } static void -list_set_gc_init(struct ip_set *set, void (*gc)(unsigned long ul_set)) +list_set_gc_init(struct ip_set *set, void (*gc)(struct timer_list *t)) { struct list_set *map = set->data; - setup_timer(&map->gc, gc, (unsigned long)set); + timer_setup(&map->gc, gc, 0); mod_timer(&map->gc, jiffies + IPSET_GC_PERIOD(set->timeout) * HZ); } @@ -606,6 +604,7 @@ init_list_set(struct net *net, struct ip_set *set, u32 size) map->size = size; map->net = net; + map->set = set; INIT_LIST_HEAD(&map->members); set->data = map; diff --git a/net/netfilter/ipset/pfxlen.c b/net/netfilter/ipset/pfxlen.c index 1c8a42c1056c..d5be9c25fad6 100644 --- a/net/netfilter/ipset/pfxlen.c +++ b/net/netfilter/ipset/pfxlen.c @@ -3,6 +3,141 @@ /* Prefixlen maps for fast conversions, by Jan Engelhardt. */ +#ifdef E +#undef E +#endif + +#define PREFIXES_MAP \ + E(0x00000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0x80000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), \ + E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + #define E(a, b, c, d) \ {.ip6 = { \ htonl(a), htonl(b), \ @@ -13,135 +148,7 @@ * just use prefixlen_netmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_netmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_netmask_map); @@ -155,135 +162,7 @@ EXPORT_SYMBOL_GPL(ip_set_netmask_map); * just use prefixlen_hostmask_map[prefixlength].ip. */ const union nf_inet_addr ip_set_hostmask_map[] = { - E(0x00000000, 0x00000000, 0x00000000, 0x00000000), - E(0x80000000, 0x00000000, 0x00000000, 0x00000000), - E(0xC0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xE0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF0000000, 0x00000000, 0x00000000, 0x00000000), - E(0xF8000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFC000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFE000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF000000, 0x00000000, 0x00000000, 0x00000000), - E(0xFF800000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFC00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFE00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF00000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFF80000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFC0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFE0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF0000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFF8000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFC000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFE000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF000, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFF800, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFC00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFE00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF00, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFF80, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFC0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFE0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF0, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFF8, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFC, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFE, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x00000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0x80000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xC0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xE0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF0000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xF8000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFC000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFE000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF000000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFF800000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFC00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFE00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF00000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFF80000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFC0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFE0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF0000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFF8000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFC000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFE000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF000, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFF800, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFC00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFE00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF00, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFF80, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFC0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFE0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF0, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFF8, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFC, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFE, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x00000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0x80000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x80000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xC0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xE0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF0000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xF8000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFC000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFE000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF000000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFF800000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFC00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFE00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF00000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFF80000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFC0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFE0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF0000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFF8000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFC000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFE000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF000), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFF800), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFC00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFE00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF00), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFF80), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFC0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFE0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF0), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFF8), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFC), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFE), - E(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF), + PREFIXES_MAP }; EXPORT_SYMBOL_GPL(ip_set_hostmask_map); diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 67f3f4389602..c552993fa4b9 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the IPVS modules on top of IPv4. # diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 3d2ac71a83ec..3e053cb30070 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -104,7 +104,7 @@ static inline void ct_write_unlock_bh(unsigned int key) spin_unlock_bh(&__ip_vs_conntbl_lock_array[key&CT_LOCKARRAY_MASK].l); } -static void ip_vs_conn_expire(unsigned long data); +static void ip_vs_conn_expire(struct timer_list *t); /* * Returns hash value for IPVS connection entry @@ -185,7 +185,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) hlist_add_head_rcu(&cp->c_list, &ip_vs_conn_tab[hash]); ret = 1; } else { - pr_err("%s(): request for already hashed, called from %pF\n", + pr_err("%s(): request for already hashed, called from %pS\n", __func__, __builtin_return_address(0)); ret = 0; } @@ -457,7 +457,7 @@ EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); static void __ip_vs_conn_put_notimer(struct ip_vs_conn *cp) { __ip_vs_conn_put(cp); - ip_vs_conn_expire((unsigned long)cp); + ip_vs_conn_expire(&cp->timer); } /* @@ -817,9 +817,9 @@ static void ip_vs_conn_rcu_free(struct rcu_head *head) kmem_cache_free(ip_vs_conn_cachep, cp); } -static void ip_vs_conn_expire(unsigned long data) +static void ip_vs_conn_expire(struct timer_list *t) { - struct ip_vs_conn *cp = (struct ip_vs_conn *)data; + struct ip_vs_conn *cp = from_timer(cp, t, timer); struct netns_ipvs *ipvs = cp->ipvs; /* @@ -909,7 +909,7 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, int dest_af, } INIT_HLIST_NODE(&cp->c_list); - setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); + timer_setup(&cp->timer, ip_vs_conn_expire, 0); cp->ipvs = ipvs; cp->af = p->af; cp->daf = dest_af; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e31956b58aba..5cb7cac9177d 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -125,14 +125,12 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.inpkts++; s->cnt.inbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); u64_stats_update_begin(&s->syncp); @@ -159,14 +157,12 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb) s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_lock(); svc = rcu_dereference(dest->svc); s = this_cpu_ptr(svc->stats.cpustats); u64_stats_update_begin(&s->syncp); s->cnt.outpkts++; s->cnt.outbytes += skb->len; u64_stats_update_end(&s->syncp); - rcu_read_unlock(); s = this_cpu_ptr(ipvs->tot_stats.cpustats); u64_stats_update_begin(&s->syncp); @@ -1222,7 +1218,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, if (!pptr) return NULL; - rcu_read_lock(); dest = ip_vs_find_real_service(ipvs, af, iph->protocol, &iph->saddr, pptr[0]); if (dest) { @@ -1237,7 +1232,6 @@ static struct ip_vs_conn *__ip_vs_rs_conn_out(unsigned int hooknum, pptr[0], pptr[1]); } } - rcu_read_unlock(); return cp; } @@ -1689,11 +1683,9 @@ ip_vs_in_icmp(struct netns_ipvs *ipvs, struct sk_buff *skb, int *related, if (dest) { struct ip_vs_dest_dst *dest_dst; - rcu_read_lock(); dest_dst = rcu_dereference(dest->dest_dst); if (dest_dst) mtu = dst_mtu(dest_dst->dst_cache); - rcu_read_unlock(); } if (mtu > 68 + sizeof(struct iphdr)) mtu -= sizeof(struct iphdr); @@ -2109,7 +2101,7 @@ ip_vs_forward_icmp_v6(void *priv, struct sk_buff *skb, #endif -static struct nf_hook_ops ip_vs_ops[] __read_mostly = { +static const struct nf_hook_ops ip_vs_ops[] = { /* After packet filtering, change source only for VS/NAT */ { .hook = ip_vs_reply4, diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 1fa3c2307b6e..fff213eacf2a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -300,7 +300,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) unsigned int hash; if (svc->flags & IP_VS_SVC_F_HASHED) { - pr_err("%s(): request for already hashed, called from %pF\n", + pr_err("%s(): request for already hashed, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } @@ -334,7 +334,7 @@ static int ip_vs_svc_hash(struct ip_vs_service *svc) static int ip_vs_svc_unhash(struct ip_vs_service *svc) { if (!(svc->flags & IP_VS_SVC_F_HASHED)) { - pr_err("%s(): request for unhash flagged, called from %pF\n", + pr_err("%s(): request for unhash flagged, called from %pS\n", __func__, __builtin_return_address(0)); return 0; } @@ -550,18 +550,15 @@ bool ip_vs_has_real_service(struct netns_ipvs *ipvs, int af, __u16 protocol, /* Check for "full" addressed entries */ hash = ip_vs_rs_hashkey(af, daddr, dport); - rcu_read_lock(); hlist_for_each_entry_rcu(dest, &ipvs->rs_table[hash], d_list) { if (dest->port == dport && dest->af == af && ip_vs_addr_equal(af, &dest->addr, daddr) && (dest->protocol == protocol || dest->vfwmark)) { /* HIT */ - rcu_read_unlock(); return true; } } - rcu_read_unlock(); return false; } @@ -1149,9 +1146,9 @@ ip_vs_del_dest(struct ip_vs_service *svc, struct ip_vs_dest_user_kern *udest) return 0; } -static void ip_vs_dest_trash_expire(unsigned long data) +static void ip_vs_dest_trash_expire(struct timer_list *t) { - struct netns_ipvs *ipvs = (struct netns_ipvs *)data; + struct netns_ipvs *ipvs = from_timer(ipvs, t, dest_trash_timer); struct ip_vs_dest *dest, *next; unsigned long now = jiffies; @@ -2037,12 +2034,16 @@ static int ip_vs_info_seq_show(struct seq_file *seq, void *v) seq_puts(seq, " -> RemoteAddress:Port Forward Weight ActiveConn InActConn\n"); } else { + struct net *net = seq_file_net(seq); + struct netns_ipvs *ipvs = net_ipvs(net); const struct ip_vs_service *svc = v; const struct ip_vs_iter *iter = seq->private; const struct ip_vs_dest *dest; struct ip_vs_scheduler *sched = rcu_dereference(svc->scheduler); char *sched_name = sched ? sched->name : "none"; + if (svc->ipvs != ipvs) + return 0; if (iter->table == ip_vs_svc_table) { #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) @@ -4022,8 +4023,7 @@ int __net_init ip_vs_control_net_init(struct netns_ipvs *ipvs) INIT_LIST_HEAD(&ipvs->dest_trash); spin_lock_init(&ipvs->dest_trash_lock); - setup_timer(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, - (unsigned long) ipvs); + timer_setup(&ipvs->dest_trash_timer, ip_vs_dest_trash_expire, 0); atomic_set(&ipvs->ftpsvc_counter, 0); atomic_set(&ipvs->nullsvc_counter, 0); atomic_set(&ipvs->conn_out_counter, 0); diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c index 457c6c193e13..489055091a9b 100644 --- a/net/netfilter/ipvs/ip_vs_est.c +++ b/net/netfilter/ipvs/ip_vs_est.c @@ -97,12 +97,12 @@ static void ip_vs_read_cpu_stats(struct ip_vs_kstats *sum, } -static void estimation_timer(unsigned long arg) +static void estimation_timer(struct timer_list *t) { struct ip_vs_estimator *e; struct ip_vs_stats *s; u64 rate; - struct netns_ipvs *ipvs = (struct netns_ipvs *)arg; + struct netns_ipvs *ipvs = from_timer(ipvs, t, est_timer); spin_lock(&ipvs->est_lock); list_for_each_entry(e, &ipvs->est_list, list) { @@ -192,7 +192,7 @@ int __net_init ip_vs_estimator_net_init(struct netns_ipvs *ipvs) { INIT_LIST_HEAD(&ipvs->est_list); spin_lock_init(&ipvs->est_lock); - setup_timer(&ipvs->est_timer, estimation_timer, (unsigned long)ipvs); + timer_setup(&ipvs->est_timer, estimation_timer, 0); mod_timer(&ipvs->est_timer, jiffies + 2 * HZ); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index fb780be76d15..3e17d32b629d 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -269,13 +269,11 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, * hopefully it will succeed on the retransmitted * packet. */ - rcu_read_lock(); mangled = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, iph->ihl * 4, start - data, end - start, buf, buf_len); - rcu_read_unlock(); if (mangled) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); diff --git a/net/netfilter/ipvs/ip_vs_lblc.c b/net/netfilter/ipvs/ip_vs_lblc.c index b6aa4a970c6e..d625179de485 100644 --- a/net/netfilter/ipvs/ip_vs_lblc.c +++ b/net/netfilter/ipvs/ip_vs_lblc.c @@ -106,6 +106,7 @@ struct ip_vs_lblc_table { struct rcu_head rcu_head; struct hlist_head bucket[IP_VS_LBLC_TAB_SIZE]; /* hash bucket */ struct timer_list periodic_timer; /* collect stale entries */ + struct ip_vs_service *svc; /* pointer back to service */ atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ int rover; /* rover for expire check */ @@ -294,10 +295,10 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc) * of the table. * The full expiration check is for this purpose now. */ -static void ip_vs_lblc_check_expire(unsigned long data) +static void ip_vs_lblc_check_expire(struct timer_list *t) { - struct ip_vs_service *svc = (struct ip_vs_service *) data; - struct ip_vs_lblc_table *tbl = svc->sched_data; + struct ip_vs_lblc_table *tbl = from_timer(tbl, t, periodic_timer); + struct ip_vs_service *svc = tbl->svc; unsigned long now = jiffies; int goal; int i, j; @@ -369,12 +370,12 @@ static int ip_vs_lblc_init_svc(struct ip_vs_service *svc) tbl->rover = 0; tbl->counter = 1; tbl->dead = 0; + tbl->svc = svc; /* * Hook periodic timer for garbage collection */ - setup_timer(&tbl->periodic_timer, ip_vs_lblc_check_expire, - (unsigned long)svc); + timer_setup(&tbl->periodic_timer, ip_vs_lblc_check_expire, 0); mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; diff --git a/net/netfilter/ipvs/ip_vs_lblcr.c b/net/netfilter/ipvs/ip_vs_lblcr.c index c13ff575f9f7..84c57b62a588 100644 --- a/net/netfilter/ipvs/ip_vs_lblcr.c +++ b/net/netfilter/ipvs/ip_vs_lblcr.c @@ -278,6 +278,7 @@ struct ip_vs_lblcr_table { atomic_t entries; /* number of entries */ int max_size; /* maximum size of entries */ struct timer_list periodic_timer; /* collect stale entries */ + struct ip_vs_service *svc; /* pointer back to service */ int rover; /* rover for expire check */ int counter; /* counter for no expire */ bool dead; @@ -458,10 +459,10 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc) * of the table. * The full expiration check is for this purpose now. */ -static void ip_vs_lblcr_check_expire(unsigned long data) +static void ip_vs_lblcr_check_expire(struct timer_list *t) { - struct ip_vs_service *svc = (struct ip_vs_service *) data; - struct ip_vs_lblcr_table *tbl = svc->sched_data; + struct ip_vs_lblcr_table *tbl = from_timer(tbl, t, periodic_timer); + struct ip_vs_service *svc = tbl->svc; unsigned long now = jiffies; int goal; int i, j; @@ -532,12 +533,12 @@ static int ip_vs_lblcr_init_svc(struct ip_vs_service *svc) tbl->rover = 0; tbl->counter = 1; tbl->dead = 0; + tbl->svc = svc; /* * Hook periodic timer for garbage collection */ - setup_timer(&tbl->periodic_timer, ip_vs_lblcr_check_expire, - (unsigned long)svc); + timer_setup(&tbl->periodic_timer, ip_vs_lblcr_check_expire, 0); mod_timer(&tbl->periodic_timer, jiffies + CHECK_EXPIRE_INTERVAL); return 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 3ffad4adaddf..eff7569824e5 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/ip.h> #include <linux/sctp.h> @@ -24,9 +25,13 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, if (sh) { sch = skb_header_pointer(skb, iph->len + sizeof(_sctph), sizeof(_schunkh), &_schunkh); - if (sch && (sch->type == SCTP_CID_INIT || - sysctl_sloppy_sctp(ipvs))) + if (sch) { + if (sch->type == SCTP_CID_ABORT || + !(sysctl_sloppy_sctp(ipvs) || + sch->type == SCTP_CID_INIT)) + return 1; ports = &sh->source; + } } } else { ports = skb_header_pointer( @@ -38,7 +43,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, return 0; } - rcu_read_lock(); if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); @@ -53,7 +57,6 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, * It seems that we are very loaded. * We have to drop this packet :( */ - rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -67,11 +70,9 @@ sctp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; - rcu_read_unlock(); return 0; } } - rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -526,12 +527,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = sctp_app_hashkey(cp->vport); - rcu_read_lock(); list_for_each_entry_rcu(inc, &ipvs->sctp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s: Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -544,11 +543,10 @@ static int sctp_app_conn_bind(struct ip_vs_conn *cp) cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); - goto out; + break; } } - rcu_read_unlock(); -out: + return result; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 12dc8d5bc37d..121a321b91be 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -63,7 +63,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, } /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ - rcu_read_lock(); if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, @@ -80,7 +79,6 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, * It seems that we are very loaded. * We have to drop this packet :( */ - rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -95,11 +93,9 @@ tcp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; - rcu_read_unlock(); return 0; } } - rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -661,12 +657,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = tcp_app_hashkey(cp->vport); - rcu_read_lock(); list_for_each_entry_rcu(inc, &ipvs->tcp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -680,12 +674,10 @@ tcp_app_conn_bind(struct ip_vs_conn *cp) cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); - goto out; + break; } } - rcu_read_unlock(); - out: return result; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index e494e9a88c7f..30e11cd6aa8a 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -53,7 +53,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, return 0; } - rcu_read_lock(); if (likely(!ip_vs_iph_inverse(iph))) svc = ip_vs_service_find(ipvs, af, skb->mark, iph->protocol, &iph->daddr, ports[1]); @@ -69,7 +68,6 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, * It seems that we are very loaded. * We have to drop this packet :( */ - rcu_read_unlock(); *verdict = NF_DROP; return 0; } @@ -84,11 +82,9 @@ udp_conn_schedule(struct netns_ipvs *ipvs, int af, struct sk_buff *skb, *verdict = ip_vs_leave(svc, skb, pd, iph); else *verdict = NF_DROP; - rcu_read_unlock(); return 0; } } - rcu_read_unlock(); /* NF_ACCEPT */ return 1; } @@ -410,12 +406,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) /* Lookup application incarnations and bind the right one */ hash = udp_app_hashkey(cp->vport); - rcu_read_lock(); list_for_each_entry_rcu(inc, &ipvs->udp_apps[hash], p_list) { if (inc->port == cp->vport) { if (unlikely(!ip_vs_app_inc_get(inc))) break; - rcu_read_unlock(); IP_VS_DBG_BUF(9, "%s(): Binding conn %s:%u->" "%s:%u to app %s on port %u\n", @@ -429,12 +423,10 @@ static int udp_app_conn_bind(struct ip_vs_conn *cp) cp->app = inc; if (inc->init_conn) result = inc->init_conn(inc, cp); - goto out; + break; } } - rcu_read_unlock(); - out: return result; } diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 0e5b64a75da0..9ee71cb276d7 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * IPVS An implementation of the IP virtual server support for the * LINUX operating system. IPVS is now implemented as a module @@ -457,7 +458,7 @@ static inline bool in_persistence(struct ip_vs_conn *cp) static int ip_vs_sync_conn_needed(struct netns_ipvs *ipvs, struct ip_vs_conn *cp, int pkts) { - unsigned long orig = ACCESS_ONCE(cp->sync_endtime); + unsigned long orig = READ_ONCE(cp->sync_endtime); unsigned long now = jiffies; unsigned long n = (now + cp->timeout) & ~3UL; unsigned int sync_refresh_period; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 2eab1e0400f4..4527921b1c3a 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -678,7 +678,6 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); if (__ip_vs_get_out_rt(cp->ipvs, cp->af, skb, NULL, iph->daddr, IP_VS_RT_MODE_NON_LOCAL, NULL, ipvsh) < 0) goto tx_error; @@ -689,14 +688,12 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -710,7 +707,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); if (__ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, NULL, &iph->daddr, NULL, ipvsh, 0, IP_VS_RT_MODE_NON_LOCAL) < 0) @@ -720,14 +716,12 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -746,7 +740,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { __be16 _pt, *p; @@ -815,14 +808,12 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); - rcu_read_unlock(); LeaveFunction(10); return rc; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -837,7 +828,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT && !ipvsh->fragoffs)) { __be16 _pt, *p; @@ -906,7 +896,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); - rcu_read_unlock(); LeaveFunction(10); return rc; @@ -914,7 +903,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: LeaveFunction(10); kfree_skb(skb); - rcu_read_unlock(); return NF_STOLEN; } #endif @@ -933,6 +921,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, { struct sk_buff *new_skb = NULL; struct iphdr *old_iph = NULL; + __u8 old_dsfield; #ifdef CONFIG_IP_VS_IPV6 struct ipv6hdr *old_ipv6h = NULL; #endif @@ -957,7 +946,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, *payload_len = ntohs(old_ipv6h->payload_len) + sizeof(*old_ipv6h); - *dsfield = ipv6_get_dsfield(old_ipv6h); + old_dsfield = ipv6_get_dsfield(old_ipv6h); *ttl = old_ipv6h->hop_limit; if (df) *df = 0; @@ -972,12 +961,15 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af, /* fix old IP header checksum */ ip_send_check(old_iph); - *dsfield = ipv4_get_dsfield(old_iph); + old_dsfield = ipv4_get_dsfield(old_iph); *ttl = old_iph->ttl; if (payload_len) *payload_len = ntohs(old_iph->tot_len); } + /* Implement full-functionality option for ECN encapsulation */ + *dsfield = INET_ECN_encapsulate(old_dsfield, old_dsfield); + return skb; error: kfree_skb(skb); @@ -1035,7 +1027,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt(ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | @@ -1043,10 +1034,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL, &saddr, ipvsh); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); - } rt = skb_rtable(skb); tdev = rt->dst.dev; @@ -1095,7 +1084,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, ip_local_out(net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); @@ -1104,7 +1092,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: if (!IS_ERR(skb)) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1127,7 +1114,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, &saddr, ipvsh, 1, @@ -1136,10 +1122,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_TUNNEL); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - } rt = (struct rt6_info *) skb_dst(skb); tdev = rt->dst.dev; @@ -1185,7 +1169,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ip6_local_out(cp->ipvs->net, skb->sk, skb); else if (ret == NF_DROP) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); @@ -1194,7 +1177,6 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error: if (!IS_ERR(skb)) kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1213,17 +1195,14 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_KNOWN_NH, NULL, ipvsh); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 1); - } ip_send_check(ip_hdr(skb)); @@ -1231,14 +1210,12 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV4, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1252,7 +1229,6 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rcu_read_lock(); local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, @@ -1261,23 +1237,19 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_RT_MODE_KNOWN_NH); if (local < 0) goto tx_error; - if (local) { - rcu_read_unlock(); + if (local) return ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 1); - } /* Another hack: avoid icmp_send in ip_fragment */ skb->ignore_df = 1; ip_vs_send_or_cont(NFPROTO_IPV6, skb, cp, 0); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; tx_error: kfree_skb(skb); - rcu_read_unlock(); LeaveFunction(10); return NF_STOLEN; } @@ -1322,7 +1294,6 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - rcu_read_lock(); local = __ip_vs_get_out_rt(cp->ipvs, cp->af, skb, cp->dest, cp->daddr.ip, rt_mode, NULL, iph); if (local < 0) @@ -1368,12 +1339,10 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV4, skb, cp, local); - rcu_read_unlock(); goto out; tx_error: kfree_skb(skb); - rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); @@ -1414,7 +1383,6 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, rt_mode = (hooknum != NF_INET_FORWARD) ? IP_VS_RT_MODE_LOCAL | IP_VS_RT_MODE_NON_LOCAL | IP_VS_RT_MODE_RDR : IP_VS_RT_MODE_NON_LOCAL; - rcu_read_lock(); local = __ip_vs_get_out_rt_v6(cp->ipvs, cp->af, skb, cp->dest, &cp->daddr.in6, NULL, ipvsh, 0, rt_mode); if (local < 0) @@ -1460,12 +1428,10 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->ignore_df = 1; rc = ip_vs_nat_send_or_cont(NFPROTO_IPV6, skb, cp, local); - rcu_read_unlock(); goto out; tx_error: kfree_skb(skb); - rcu_read_unlock(); rc = NF_STOLEN; out: LeaveFunction(10); diff --git a/net/netfilter/nf_conntrack_broadcast.c b/net/netfilter/nf_conntrack_broadcast.c index 4e99cca61612..ecc3ab784633 100644 --- a/net/netfilter/nf_conntrack_broadcast.c +++ b/net/netfilter/nf_conntrack_broadcast.c @@ -40,7 +40,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) goto out; - rcu_read_lock(); in_dev = __in_dev_get_rcu(rt->dst.dev); if (in_dev != NULL) { for_primary_ifa(in_dev) { @@ -50,7 +49,6 @@ int nf_conntrack_broadcast_help(struct sk_buff *skb, } } endfor_ifa(in_dev); } - rcu_read_unlock(); if (mask == 0) goto out; diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9979f46c81dc..85f643c1e227 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -56,6 +56,8 @@ #include <net/netfilter/nf_nat_helper.h> #include <net/netns/hash.h> +#include "nf_internals.h" + #define NF_CONNTRACK_VERSION "0.5.0" int (*nfnetlink_parse_nat_setup_hook)(struct nf_conn *ct, @@ -96,19 +98,26 @@ static struct conntrack_gc_work conntrack_gc_work; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { + /* 1) Acquire the lock */ spin_lock(lock); - while (unlikely(nf_conntrack_locks_all)) { - spin_unlock(lock); - /* - * Order the 'nf_conntrack_locks_all' load vs. the - * spin_unlock_wait() loads below, to ensure - * that 'nf_conntrack_locks_all_lock' is indeed held: - */ - smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ - spin_unlock_wait(&nf_conntrack_locks_all_lock); - spin_lock(lock); - } + /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics + * It pairs with the smp_store_release() in nf_conntrack_all_unlock() + */ + if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) + return; + + /* fast path failed, unlock */ + spin_unlock(lock); + + /* Slow path 1) get global lock */ + spin_lock(&nf_conntrack_locks_all_lock); + + /* Slow path 2) get the lock we want */ + spin_lock(lock); + + /* Slow path 3) release the global lock */ + spin_unlock(&nf_conntrack_locks_all_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -149,28 +158,27 @@ static void nf_conntrack_all_lock(void) int i; spin_lock(&nf_conntrack_locks_all_lock); - nf_conntrack_locks_all = true; - /* - * Order the above store of 'nf_conntrack_locks_all' against - * the spin_unlock_wait() loads below, such that if - * nf_conntrack_lock() observes 'nf_conntrack_locks_all' - * we must observe nf_conntrack_locks[] held: - */ - smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ + nf_conntrack_locks_all = true; for (i = 0; i < CONNTRACK_LOCKS; i++) { - spin_unlock_wait(&nf_conntrack_locks[i]); + spin_lock(&nf_conntrack_locks[i]); + + /* This spin_unlock provides the "release" to ensure that + * nf_conntrack_locks_all==true is visible to everyone that + * acquired spin_lock(&nf_conntrack_locks[]). + */ + spin_unlock(&nf_conntrack_locks[i]); } } static void nf_conntrack_all_unlock(void) { - /* - * All prior stores must be complete before we clear + /* All prior stores must be complete before we clear * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() * might observe the false value but not the entire - * critical section: + * critical section. + * It pairs with the smp_load_acquire() in nf_conntrack_lock() */ smp_store_release(&nf_conntrack_locks_all, false); spin_unlock(&nf_conntrack_locks_all_lock); @@ -248,8 +256,8 @@ bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff, u_int16_t l3num, struct net *net, struct nf_conntrack_tuple *tuple) { - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; unsigned int protoff; u_int8_t protonum; int ret; @@ -398,22 +406,19 @@ static void destroy_conntrack(struct nf_conntrack *nfct) { struct nf_conn *ct = (struct nf_conn *)nfct; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; pr_debug("destroy_conntrack(%p)\n", ct); - NF_CT_ASSERT(atomic_read(&nfct->use) == 0); + WARN_ON(atomic_read(&nfct->use) != 0); if (unlikely(nf_ct_is_template(ct))) { nf_ct_tmpl_free(ct); return; } - rcu_read_lock(); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->destroy) l4proto->destroy(ct); - rcu_read_unlock(); - local_bh_disable(); /* Expectations will have been removed in clean_from_lists, * except TFTP can create an expectation on the first packet, @@ -695,7 +700,7 @@ static int nf_ct_resolve_clash(struct net *net, struct sk_buff *skb, { /* This is the conntrack entry already in hashes that won race. */ struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h); - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); if (l4proto->allow_clash && @@ -757,12 +762,11 @@ __nf_conntrack_confirm(struct sk_buff *skb) * connections for unconfirmed conns. But packet copies and * REJECT will give spurious warnings here. */ - /* NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 1); */ /* No external references means no one else could have * confirmed us. */ - NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); + WARN_ON(nf_ct_is_confirmed(ct)); pr_debug("Confirming conntrack %p\n", ct); /* We have to check the DYING flag after unlink to prevent * a race against nf_ct_get_next_corpse() possibly called from @@ -1079,12 +1083,12 @@ static void gc_worker(struct work_struct *work) next_run = gc_work->next_gc_run; gc_work->last_bucket = i; gc_work->early_drop = false; - queue_delayed_work(system_long_wq, &gc_work->dwork, next_run); + queue_delayed_work(system_power_efficient_wq, &gc_work->dwork, next_run); } static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work) { - INIT_DELAYED_WORK(&gc_work->dwork, gc_worker); + INIT_DEFERRABLE_WORK(&gc_work->dwork, gc_worker); gc_work->next_gc_run = HZ; gc_work->exiting = false; } @@ -1161,7 +1165,7 @@ void nf_conntrack_free(struct nf_conn *ct) /* A freed object has refcnt == 0, that's * the golden rule for SLAB_TYPESAFE_BY_RCU */ - NF_CT_ASSERT(atomic_read(&ct->ct_general.use) == 0); + WARN_ON(atomic_read(&ct->ct_general.use) != 0); nf_ct_ext_destroy(ct); nf_ct_ext_free(ct); @@ -1177,8 +1181,8 @@ EXPORT_SYMBOL_GPL(nf_conntrack_free); static noinline struct nf_conntrack_tuple_hash * init_conntrack(struct net *net, struct nf_conn *tmpl, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto, + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_l4proto *l4proto, struct sk_buff *skb, unsigned int dataoff, u32 hash) { @@ -1289,8 +1293,8 @@ resolve_normal_ct(struct net *net, struct nf_conn *tmpl, unsigned int dataoff, u_int16_t l3num, u_int8_t protonum, - struct nf_conntrack_l3proto *l3proto, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l3proto *l3proto, + const struct nf_conntrack_l4proto *l4proto) { const struct nf_conntrack_zone *zone; struct nf_conntrack_tuple tuple; @@ -1345,10 +1349,10 @@ unsigned int nf_conntrack_in(struct net *net, u_int8_t pf, unsigned int hooknum, struct sk_buff *skb) { + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; struct nf_conn *ct, *tmpl; enum ip_conntrack_info ctinfo; - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; unsigned int *timeouts; unsigned int dataoff; u_int8_t protonum; @@ -1415,7 +1419,7 @@ repeat: /* Decide what timeout policy we want to apply to this flow. */ timeouts = nf_ct_timeout_lookup(net, ct, l4proto); - ret = l4proto->packet(ct, skb, dataoff, ctinfo, pf, hooknum, timeouts); + ret = l4proto->packet(ct, skb, dataoff, ctinfo, timeouts); if (ret <= 0) { /* Invalid: inverse of the return code tells * the netfilter core what to do */ @@ -1469,7 +1473,7 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, struct nf_conn_help *help = nfct_help(ct); /* Should be unconfirmed, so not in hash table yet */ - NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); + WARN_ON(nf_ct_is_confirmed(ct)); pr_debug("Altering reply tuple of %p to ", ct); nf_ct_dump_tuple(newreply); @@ -1491,7 +1495,7 @@ void __nf_ct_refresh_acct(struct nf_conn *ct, unsigned long extra_jiffies, int do_acct) { - NF_CT_ASSERT(skb); + WARN_ON(!skb); /* Only update if this is not a fixed timeout */ if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) @@ -1559,9 +1563,14 @@ int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[], } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_to_tuple); -int nf_ct_port_nlattr_tuple_size(void) +unsigned int nf_ct_port_nlattr_tuple_size(void) { - return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + + return size; } EXPORT_SYMBOL_GPL(nf_ct_port_nlattr_tuple_size); #endif @@ -1689,6 +1698,18 @@ __nf_ct_unconfirmed_destroy(struct net *net) } } +void nf_ct_unconfirmed_destroy(struct net *net) +{ + might_sleep(); + + if (atomic_read(&net->ct.count) > 0) { + __nf_ct_unconfirmed_destroy(net); + nf_queue_nf_hook_drop(net); + synchronize_net(); + } +} +EXPORT_SYMBOL_GPL(nf_ct_unconfirmed_destroy); + void nf_ct_iterate_cleanup_net(struct net *net, int (*iter)(struct nf_conn *i, void *data), void *data, u32 portid, int report) @@ -1700,14 +1721,10 @@ void nf_ct_iterate_cleanup_net(struct net *net, if (atomic_read(&net->ct.count) == 0) return; - __nf_ct_unconfirmed_destroy(net); - d.iter = iter; d.data = data; d.net = net; - synchronize_net(); - nf_ct_iterate_cleanup(iter_net_only, &d, portid, report); } EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup_net); @@ -1733,6 +1750,7 @@ nf_ct_iterate_destroy(int (*iter)(struct nf_conn *i, void *data), void *data) if (atomic_read(&net->ct.count) == 0) continue; __nf_ct_unconfirmed_destroy(net); + nf_queue_nf_hook_drop(net); } rtnl_unlock(); @@ -1927,7 +1945,7 @@ int nf_conntrack_hash_resize(unsigned int hashsize) return 0; } -int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp) +int nf_conntrack_set_hashsize(const char *val, const struct kernel_param *kp) { unsigned int hashsize; int rc; @@ -2071,7 +2089,7 @@ int nf_conntrack_init_start(void) goto err_proto; conntrack_gc_work_init(&conntrack_gc_work); - queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, HZ); + queue_delayed_work(system_power_efficient_wq, &conntrack_gc_work.dwork, HZ); return 0; diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index 899c2c36da13..64778f9a8548 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -51,8 +51,8 @@ void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); - NF_CT_ASSERT(master_help); - NF_CT_ASSERT(!timer_pending(&exp->timeout)); + WARN_ON(!master_help); + WARN_ON(timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); net->ct.expect_count--; @@ -368,12 +368,6 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) /* two references : one for hash insert, one for the timer */ refcount_add(2, &exp->use); - hlist_add_head_rcu(&exp->lnode, &master_help->expectations); - master_help->expecting[exp->class]++; - - hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); - net->ct.expect_count++; - setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); helper = rcu_dereference_protected(master_help->helper, @@ -384,6 +378,12 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) } add_timer(&exp->timeout); + hlist_add_head_rcu(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; + + hlist_add_head_rcu(&exp->hnode, &nf_ct_expect_hash[h]); + net->ct.expect_count++; + NF_CT_STAT_INC(net, expect_create); } @@ -474,6 +474,60 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); +void nf_ct_expect_iterate_destroy(bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data) +{ + struct nf_conntrack_expect *exp; + const struct hlist_node *next; + unsigned int i; + + spin_lock_bh(&nf_conntrack_expect_lock); + + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, next, + &nf_ct_expect_hash[i], + hnode) { + if (iter(exp, data) && del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } + } + } + + spin_unlock_bh(&nf_conntrack_expect_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_destroy); + +void nf_ct_expect_iterate_net(struct net *net, + bool (*iter)(struct nf_conntrack_expect *e, void *data), + void *data, + u32 portid, int report) +{ + struct nf_conntrack_expect *exp; + const struct hlist_node *next; + unsigned int i; + + spin_lock_bh(&nf_conntrack_expect_lock); + + for (i = 0; i < nf_ct_expect_hsize; i++) { + hlist_for_each_entry_safe(exp, next, + &nf_ct_expect_hash[i], + hnode) { + + if (!net_eq(nf_ct_exp_net(exp), net)) + continue; + + if (iter(exp, data) && del_timer(&exp->timeout)) { + nf_ct_unlink_expect_report(exp, portid, report); + nf_ct_expect_put(exp); + } + } + } + + spin_unlock_bh(&nf_conntrack_expect_lock); +} +EXPORT_SYMBOL_GPL(nf_ct_expect_iterate_net); + #ifdef CONFIG_NF_CONNTRACK_PROCFS struct ct_expect_iter_state { struct seq_net_private p; diff --git a/net/netfilter/nf_conntrack_extend.c b/net/netfilter/nf_conntrack_extend.c index 6c605e88ebae..9fe0ddc333fb 100644 --- a/net/netfilter/nf_conntrack_extend.c +++ b/net/netfilter/nf_conntrack_extend.c @@ -47,7 +47,7 @@ void *nf_ct_ext_add(struct nf_conn *ct, enum nf_ct_ext_id id, gfp_t gfp) struct nf_ct_ext_type *t; /* Conntrack must not be confirmed to avoid races on reallocation. */ - NF_CT_ASSERT(!nf_ct_is_confirmed(ct)); + WARN_ON(nf_ct_is_confirmed(ct)); old = ct->ext; diff --git a/net/netfilter/nf_conntrack_h323_asn1.c b/net/netfilter/nf_conntrack_h323_asn1.c index 89b2e46925c4..cf1bf2605c10 100644 --- a/net/netfilter/nf_conntrack_h323_asn1.c +++ b/net/netfilter/nf_conntrack_h323_asn1.c @@ -91,41 +91,41 @@ typedef struct field_t { } field_t; /* Bit Stream */ -typedef struct { +struct bitstr { unsigned char *buf; unsigned char *beg; unsigned char *end; unsigned char *cur; unsigned int bit; -} bitstr_t; +}; /* Tool Functions */ #define INC_BIT(bs) if((++(bs)->bit)>7){(bs)->cur++;(bs)->bit=0;} #define INC_BITS(bs,b) if(((bs)->bit+=(b))>7){(bs)->cur+=(bs)->bit>>3;(bs)->bit&=7;} #define BYTE_ALIGN(bs) if((bs)->bit){(bs)->cur++;(bs)->bit=0;} #define CHECK_BOUND(bs,n) if((bs)->cur+(n)>(bs)->end)return(H323_ERROR_BOUND) -static unsigned int get_len(bitstr_t *bs); -static unsigned int get_bit(bitstr_t *bs); -static unsigned int get_bits(bitstr_t *bs, unsigned int b); -static unsigned int get_bitmap(bitstr_t *bs, unsigned int b); -static unsigned int get_uint(bitstr_t *bs, int b); +static unsigned int get_len(struct bitstr *bs); +static unsigned int get_bit(struct bitstr *bs); +static unsigned int get_bits(struct bitstr *bs, unsigned int b); +static unsigned int get_bitmap(struct bitstr *bs, unsigned int b); +static unsigned int get_uint(struct bitstr *bs, int b); /* Decoder Functions */ -static int decode_nul(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bool(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_oid(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_int(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_enum(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bitstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_numstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_octstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_seq(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_seqof(bitstr_t *bs, const struct field_t *f, char *base, int level); -static int decode_choice(bitstr_t *bs, const struct field_t *f, char *base, int level); +static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level); +static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level); /* Decoder Functions Vector */ -typedef int (*decoder_t)(bitstr_t *, const struct field_t *, char *, int); +typedef int (*decoder_t)(struct bitstr *, const struct field_t *, char *, int); static const decoder_t Decoders[] = { decode_nul, decode_bool, @@ -150,7 +150,7 @@ static const decoder_t Decoders[] = { * Functions ****************************************************************************/ /* Assume bs is aligned && v < 16384 */ -static unsigned int get_len(bitstr_t *bs) +static unsigned int get_len(struct bitstr *bs) { unsigned int v; @@ -166,7 +166,7 @@ static unsigned int get_len(bitstr_t *bs) } /****************************************************************************/ -static unsigned int get_bit(bitstr_t *bs) +static unsigned int get_bit(struct bitstr *bs) { unsigned int b = (*bs->cur) & (0x80 >> bs->bit); @@ -177,7 +177,7 @@ static unsigned int get_bit(bitstr_t *bs) /****************************************************************************/ /* Assume b <= 8 */ -static unsigned int get_bits(bitstr_t *bs, unsigned int b) +static unsigned int get_bits(struct bitstr *bs, unsigned int b) { unsigned int v, l; @@ -203,7 +203,7 @@ static unsigned int get_bits(bitstr_t *bs, unsigned int b) /****************************************************************************/ /* Assume b <= 32 */ -static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) +static unsigned int get_bitmap(struct bitstr *bs, unsigned int b) { unsigned int v, l, shift, bytes; @@ -242,7 +242,7 @@ static unsigned int get_bitmap(bitstr_t *bs, unsigned int b) /**************************************************************************** * Assume bs is aligned and sizeof(unsigned int) == 4 ****************************************************************************/ -static unsigned int get_uint(bitstr_t *bs, int b) +static unsigned int get_uint(struct bitstr *bs, int b) { unsigned int v = 0; @@ -264,7 +264,7 @@ static unsigned int get_uint(bitstr_t *bs, int b) } /****************************************************************************/ -static int decode_nul(bitstr_t *bs, const struct field_t *f, +static int decode_nul(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -273,7 +273,7 @@ static int decode_nul(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bool(bitstr_t *bs, const struct field_t *f, +static int decode_bool(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -285,7 +285,7 @@ static int decode_bool(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_oid(bitstr_t *bs, const struct field_t *f, +static int decode_oid(struct bitstr *bs, const struct field_t *f, char *base, int level) { int len; @@ -302,7 +302,7 @@ static int decode_oid(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_int(bitstr_t *bs, const struct field_t *f, +static int decode_int(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -346,7 +346,7 @@ static int decode_int(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_enum(bitstr_t *bs, const struct field_t *f, +static int decode_enum(struct bitstr *bs, const struct field_t *f, char *base, int level) { PRINT("%*.s%s\n", level * TAB_SIZE, " ", f->name); @@ -362,7 +362,7 @@ static int decode_enum(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bitstr(bitstr_t *bs, const struct field_t *f, +static int decode_bitstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -396,7 +396,7 @@ static int decode_bitstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_numstr(bitstr_t *bs, const struct field_t *f, +static int decode_numstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -414,7 +414,7 @@ static int decode_numstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_octstr(bitstr_t *bs, const struct field_t *f, +static int decode_octstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -463,7 +463,7 @@ static int decode_octstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, +static int decode_bmpstr(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int len; @@ -489,7 +489,7 @@ static int decode_bmpstr(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_seq(bitstr_t *bs, const struct field_t *f, +static int decode_seq(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int ext, bmp, i, opt, len = 0, bmp2, bmp2_len; @@ -606,7 +606,7 @@ static int decode_seq(bitstr_t *bs, const struct field_t *f, } /****************************************************************************/ -static int decode_seqof(bitstr_t *bs, const struct field_t *f, +static int decode_seqof(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int count, effective_count = 0, i, len = 0; @@ -696,7 +696,7 @@ static int decode_seqof(bitstr_t *bs, const struct field_t *f, /****************************************************************************/ -static int decode_choice(bitstr_t *bs, const struct field_t *f, +static int decode_choice(struct bitstr *bs, const struct field_t *f, char *base, int level) { unsigned int type, ext, len = 0; @@ -772,7 +772,7 @@ int DecodeRasMessage(unsigned char *buf, size_t sz, RasMessage *ras) FNAME("RasMessage") CHOICE, 5, 24, 32, DECODE | EXT, 0, _RasMessage }; - bitstr_t bs; + struct bitstr bs; bs.buf = bs.beg = bs.cur = buf; bs.end = buf + sz; @@ -789,7 +789,7 @@ static int DecodeH323_UserInformation(unsigned char *buf, unsigned char *beg, FNAME("H323-UserInformation") SEQ, 1, 2, 2, DECODE | EXT, 0, _H323_UserInformation }; - bitstr_t bs; + struct bitstr bs; bs.buf = buf; bs.beg = bs.cur = beg; @@ -808,7 +808,7 @@ int DecodeMultimediaSystemControlMessage(unsigned char *buf, size_t sz, FNAME("MultimediaSystemControlMessage") CHOICE, 2, 4, 4, DECODE | EXT, 0, _MultimediaSystemControlMessage }; - bitstr_t bs; + struct bitstr bs; bs.buf = bs.beg = bs.cur = buf; bs.end = buf + sz; @@ -877,6 +877,7 @@ int DecodeQ931(unsigned char *buf, size_t sz, Q931 *q931) if (sz < 1) break; len = *p++; + sz--; if (sz < len) break; p += len; diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 9129bb3b5153..551a1eddf0fa 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -437,12 +437,22 @@ out: } EXPORT_SYMBOL_GPL(nf_conntrack_helper_register); -void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +static bool expect_iter_me(struct nf_conntrack_expect *exp, void *data) { - struct nf_conntrack_expect *exp; - const struct hlist_node *next; - unsigned int i; + struct nf_conn_help *help = nfct_help(exp->master); + const struct nf_conntrack_helper *me = data; + const struct nf_conntrack_helper *this; + + if (exp->helper == me) + return true; + this = rcu_dereference_protected(help->helper, + lockdep_is_held(&nf_conntrack_expect_lock)); + return this == me; +} + +void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) +{ mutex_lock(&nf_ct_helper_mutex); hlist_del_rcu(&me->hnode); nf_ct_helper_count--; @@ -453,21 +463,7 @@ void nf_conntrack_helper_unregister(struct nf_conntrack_helper *me) */ synchronize_rcu(); - /* Get rid of expectations */ - spin_lock_bh(&nf_conntrack_expect_lock); - for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, next, - &nf_ct_expect_hash[i], hnode) { - struct nf_conn_help *help = nfct_help(exp->master); - if ((rcu_dereference_protected( - help->helper, - lockdep_is_held(&nf_conntrack_expect_lock) - ) == me || exp->helper == me)) - nf_ct_remove_expect(exp); - } - } - spin_unlock_bh(&nf_conntrack_expect_lock); - + nf_ct_expect_iterate_destroy(expect_iter_me, NULL); nf_ct_iterate_destroy(unhelp, me); } EXPORT_SYMBOL_GPL(nf_conntrack_helper_unregister); diff --git a/net/netfilter/nf_conntrack_l3proto_generic.c b/net/netfilter/nf_conntrack_l3proto_generic.c index cf9ace70bece..397e6911214f 100644 --- a/net/netfilter/nf_conntrack_l3proto_generic.c +++ b/net/netfilter/nf_conntrack_l3proto_generic.c @@ -49,11 +49,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -static void generic_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ -} - static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { @@ -64,10 +59,8 @@ static int generic_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, struct nf_conntrack_l3proto nf_conntrack_l3proto_generic __read_mostly = { .l3proto = PF_UNSPEC, - .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, - .print_tuple = generic_print_tuple, .get_l4proto = generic_get_l4proto, }; EXPORT_SYMBOL_GPL(nf_conntrack_l3proto_generic); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 7999e70c3bfb..59c08997bfdf 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -61,8 +61,8 @@ MODULE_LICENSE("GPL"); static char __initdata version[] = "0.93"; static int ctnetlink_dump_tuples_proto(struct sk_buff *skb, - const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_tuple *tuple, + const struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nlattr *nest_parms; @@ -86,7 +86,7 @@ nla_put_failure: static int ctnetlink_dump_tuples_ip(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, - struct nf_conntrack_l3proto *l3proto) + const struct nf_conntrack_l3proto *l3proto) { int ret = 0; struct nlattr *nest_parms; @@ -109,9 +109,9 @@ nla_put_failure: static int ctnetlink_dump_tuples(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; int ret; - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; rcu_read_lock(); l3proto = __nf_ct_l3proto_find(tuple->src.l3num); @@ -163,7 +163,7 @@ nla_put_failure: static int ctnetlink_dump_protoinfo(struct sk_buff *skb, struct nf_conn *ct) { - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; struct nlattr *nest_proto; int ret; @@ -533,22 +533,27 @@ nla_put_failure: return -1; } -static inline size_t ctnetlink_proto_size(const struct nf_conn *ct) +#if defined(CONFIG_NETFILTER_NETLINK_GLUE_CT) || defined(CONFIG_NF_CONNTRACK_EVENTS) +static size_t ctnetlink_proto_size(const struct nf_conn *ct) { - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; - size_t len = 0; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; + size_t len, len4 = 0; - rcu_read_lock(); l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - len += l3proto->nla_size; + len = l3proto->nla_size; + len *= 3u; /* ORIG, REPLY, MASTER */ l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); len += l4proto->nla_size; - rcu_read_unlock(); + if (l4proto->nlattr_tuple_size) { + len4 = l4proto->nlattr_tuple_size(); + len4 *= 3u; /* ORIG, REPLY, MASTER */ + } - return len; + return len + len4; } +#endif static inline size_t ctnetlink_acct_size(const struct nf_conn *ct) { @@ -664,7 +669,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - rcu_read_lock(); zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); @@ -736,8 +740,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) && ctnetlink_dump_mark(skb, ct) < 0) goto nla_put_failure; #endif - rcu_read_unlock(); - nlmsg_end(skb, nlh); err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); @@ -747,7 +749,6 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) return 0; nla_put_failure: - rcu_read_unlock(); nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); @@ -941,8 +942,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = { static int ctnetlink_parse_tuple_proto(struct nlattr *attr, struct nf_conntrack_tuple *tuple) { + const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTO_MAX+1]; - struct nf_conntrack_l4proto *l4proto; int ret = 0; ret = nla_parse_nested(tb, CTA_PROTO_MAX, attr, proto_nla_policy, @@ -1585,8 +1586,8 @@ static int ctnetlink_change_protoinfo(struct nf_conn *ct, const struct nlattr * const cda[]) { const struct nlattr *attr = cda[CTA_PROTOINFO]; + const struct nf_conntrack_l4proto *l4proto; struct nlattr *tb[CTA_PROTOINFO_MAX+1]; - struct nf_conntrack_l4proto *l4proto; int err = 0; err = nla_parse_nested(tb, CTA_PROTOINFO_MAX, attr, protoinfo_policy, @@ -2213,7 +2214,6 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) const struct nf_conntrack_zone *zone; struct nlattr *nest_parms; - rcu_read_lock(); zone = nf_ct_zone(ct); nest_parms = nla_nest_start(skb, CTA_TUPLE_ORIG | NLA_F_NESTED); @@ -2272,11 +2272,9 @@ static int __ctnetlink_glue_build(struct sk_buff *skb, struct nf_conn *ct) #endif if (ctnetlink_dump_labels(skb, ct) < 0) goto nla_put_failure; - rcu_read_unlock(); return 0; nla_put_failure: - rcu_read_unlock(); return -ENOSPC; } @@ -2483,11 +2481,11 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_tuple_mask *mask) { - int ret; - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple m; struct nlattr *nest_parms; + int ret; memset(&m, 0xFF, sizeof(m)); memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3)); @@ -2661,17 +2659,14 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) nfmsg->version = NFNETLINK_V0; nfmsg->res_id = 0; - rcu_read_lock(); if (ctnetlink_exp_dump_expect(skb, exp) < 0) goto nla_put_failure; - rcu_read_unlock(); nlmsg_end(skb, nlh); nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: - rcu_read_unlock(); nlmsg_cancel(skb, nlh); nlmsg_failure: kfree_skb(skb); @@ -2910,6 +2905,21 @@ out: return err == -EAGAIN ? -ENOBUFS : err; } +static bool expect_iter_name(struct nf_conntrack_expect *exp, void *data) +{ + const struct nf_conn_help *m_help; + const char *name = data; + + m_help = nfct_help(exp->master); + + return strcmp(m_help->helper->name, name) == 0; +} + +static bool expect_iter_all(struct nf_conntrack_expect *exp, void *data) +{ + return true; +} + static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const cda[], @@ -2918,10 +2928,8 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, struct nf_conntrack_expect *exp; struct nf_conntrack_tuple tuple; struct nfgenmsg *nfmsg = nlmsg_data(nlh); - struct hlist_node *next; u_int8_t u3 = nfmsg->nfgen_family; struct nf_conntrack_zone zone; - unsigned int i; int err; if (cda[CTA_EXPECT_TUPLE]) { @@ -2961,49 +2969,15 @@ static int ctnetlink_del_expect(struct net *net, struct sock *ctnl, nf_ct_expect_put(exp); } else if (cda[CTA_EXPECT_HELP_NAME]) { char *name = nla_data(cda[CTA_EXPECT_HELP_NAME]); - struct nf_conn_help *m_help; - - /* delete all expectations for this helper */ - spin_lock_bh(&nf_conntrack_expect_lock); - for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, next, - &nf_ct_expect_hash[i], - hnode) { - - if (!net_eq(nf_ct_exp_net(exp), net)) - continue; - m_help = nfct_help(exp->master); - if (!strcmp(m_help->helper->name, name) && - del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - nf_ct_expect_put(exp); - } - } - } - spin_unlock_bh(&nf_conntrack_expect_lock); + nf_ct_expect_iterate_net(net, expect_iter_name, name, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } else { /* This basically means we have to flush everything*/ - spin_lock_bh(&nf_conntrack_expect_lock); - for (i = 0; i < nf_ct_expect_hsize; i++) { - hlist_for_each_entry_safe(exp, next, - &nf_ct_expect_hash[i], - hnode) { - - if (!net_eq(nf_ct_exp_net(exp), net)) - continue; - - if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).portid, - nlmsg_report(nlh)); - nf_ct_expect_put(exp); - } - } - } - spin_unlock_bh(&nf_conntrack_expect_lock); + nf_ct_expect_iterate_net(net, expect_iter_all, NULL, + NETLINK_CB(skb).portid, + nlmsg_report(nlh)); } return 0; diff --git a/net/netfilter/nf_conntrack_pptp.c b/net/netfilter/nf_conntrack_pptp.c index 6959e93063d4..11562f2a08bb 100644 --- a/net/netfilter/nf_conntrack_pptp.c +++ b/net/netfilter/nf_conntrack_pptp.c @@ -113,7 +113,6 @@ static void pptp_expectfn(struct nf_conn *ct, /* Can you see how rusty this code is, compared with the pre-2.6.11 * one? That's what happened to my shiny newnat of 2002 ;( -HW */ - rcu_read_lock(); nf_nat_pptp_expectfn = rcu_dereference(nf_nat_pptp_hook_expectfn); if (nf_nat_pptp_expectfn && ct->master->status & IPS_NAT_MASK) nf_nat_pptp_expectfn(ct, exp); @@ -136,7 +135,6 @@ static void pptp_expectfn(struct nf_conn *ct, pr_debug("not found\n"); } } - rcu_read_unlock(); } static int destroy_sibling_or_exp(struct net *net, struct nf_conn *ct, diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c index 1dcad229c3cc..c8e9c9503a08 100644 --- a/net/netfilter/nf_conntrack_proto.c +++ b/net/netfilter/nf_conntrack_proto.c @@ -27,6 +27,7 @@ #include <net/netfilter/nf_conntrack_l3proto.h> #include <net/netfilter/nf_conntrack_l4proto.h> #include <net/netfilter/nf_conntrack_core.h> +#include <net/netfilter/nf_log.h> static struct nf_conntrack_l4proto __rcu **nf_ct_protos[NFPROTO_NUMPROTO] __read_mostly; struct nf_conntrack_l3proto __rcu *nf_ct_l3protos[NFPROTO_NUMPROTO] __read_mostly; @@ -63,9 +64,55 @@ nf_ct_unregister_sysctl(struct ctl_table_header **header, *header = NULL; *table = NULL; } + +__printf(5, 6) +void nf_l4proto_log_invalid(const struct sk_buff *skb, + struct net *net, + u16 pf, u8 protonum, + const char *fmt, ...) +{ + struct va_format vaf; + va_list args; + + if (net->ct.sysctl_log_invalid != protonum || + net->ct.sysctl_log_invalid != IPPROTO_RAW) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, + "nf_ct_proto_%d: %pV ", protonum, &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_l4proto_log_invalid); + +__printf(3, 4) +void nf_ct_l4proto_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const char *fmt, ...) +{ + struct va_format vaf; + struct net *net; + va_list args; + + net = nf_ct_net(ct); + if (likely(net->ct.sysctl_log_invalid == 0)) + return; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + + nf_l4proto_log_invalid(skb, net, nf_ct_l3num(ct), + nf_ct_protonum(ct), "%pV", &vaf); + va_end(args); +} +EXPORT_SYMBOL_GPL(nf_ct_l4proto_log_invalid); #endif -struct nf_conntrack_l4proto * +const struct nf_conntrack_l4proto * __nf_ct_l4proto_find(u_int16_t l3proto, u_int8_t l4proto) { if (unlikely(l3proto >= NFPROTO_NUMPROTO || nf_ct_protos[l3proto] == NULL)) @@ -77,7 +124,7 @@ EXPORT_SYMBOL_GPL(__nf_ct_l4proto_find); /* this is guaranteed to always return a valid protocol helper, since * it falls back to generic_protocol */ -struct nf_conntrack_l3proto * +const struct nf_conntrack_l3proto * nf_ct_l3proto_find_get(u_int16_t l3proto) { struct nf_conntrack_l3proto *p; @@ -95,8 +142,8 @@ EXPORT_SYMBOL_GPL(nf_ct_l3proto_find_get); int nf_ct_l3proto_try_module_get(unsigned short l3proto) { + const struct nf_conntrack_l3proto *p; int ret; - struct nf_conntrack_l3proto *p; retry: p = nf_ct_l3proto_find_get(l3proto); if (p == &nf_conntrack_l3proto_generic) { @@ -125,7 +172,7 @@ void nf_ct_l3proto_module_put(unsigned short l3proto) } EXPORT_SYMBOL_GPL(nf_ct_l3proto_module_put); -int nf_ct_netns_get(struct net *net, u8 nfproto) +static int nf_ct_netns_do_get(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; int ret; @@ -150,9 +197,33 @@ int nf_ct_netns_get(struct net *net, u8 nfproto) return ret; } + +int nf_ct_netns_get(struct net *net, u8 nfproto) +{ + int err; + + if (nfproto == NFPROTO_INET) { + err = nf_ct_netns_do_get(net, NFPROTO_IPV4); + if (err < 0) + goto err1; + err = nf_ct_netns_do_get(net, NFPROTO_IPV6); + if (err < 0) + goto err2; + } else { + err = nf_ct_netns_do_get(net, nfproto); + if (err < 0) + goto err1; + } + return 0; + +err2: + nf_ct_netns_put(net, NFPROTO_IPV4); +err1: + return err; +} EXPORT_SYMBOL_GPL(nf_ct_netns_get); -void nf_ct_netns_put(struct net *net, u8 nfproto) +static void nf_ct_netns_do_put(struct net *net, u8 nfproto) { const struct nf_conntrack_l3proto *l3proto; @@ -171,12 +242,21 @@ void nf_ct_netns_put(struct net *net, u8 nfproto) nf_ct_l3proto_module_put(nfproto); } + +void nf_ct_netns_put(struct net *net, uint8_t nfproto) +{ + if (nfproto == NFPROTO_INET) { + nf_ct_netns_do_put(net, NFPROTO_IPV4); + nf_ct_netns_do_put(net, NFPROTO_IPV6); + } else + nf_ct_netns_do_put(net, nfproto); +} EXPORT_SYMBOL_GPL(nf_ct_netns_put); -struct nf_conntrack_l4proto * +const struct nf_conntrack_l4proto * nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) { - struct nf_conntrack_l4proto *p; + const struct nf_conntrack_l4proto *p; rcu_read_lock(); p = __nf_ct_l4proto_find(l3num, l4num); @@ -188,7 +268,7 @@ nf_ct_l4proto_find_get(u_int16_t l3num, u_int8_t l4num) } EXPORT_SYMBOL_GPL(nf_ct_l4proto_find_get); -void nf_ct_l4proto_put(struct nf_conntrack_l4proto *p) +void nf_ct_l4proto_put(const struct nf_conntrack_l4proto *p) { module_put(p->me); } @@ -196,28 +276,28 @@ EXPORT_SYMBOL_GPL(nf_ct_l4proto_put); static int kill_l3proto(struct nf_conn *i, void *data) { - return nf_ct_l3num(i) == ((struct nf_conntrack_l3proto *)data)->l3proto; + return nf_ct_l3num(i) == ((const struct nf_conntrack_l3proto *)data)->l3proto; } static int kill_l4proto(struct nf_conn *i, void *data) { - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; l4proto = data; return nf_ct_protonum(i) == l4proto->l4proto && nf_ct_l3num(i) == l4proto->l3proto; } -int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) +int nf_ct_l3proto_register(const struct nf_conntrack_l3proto *proto) { int ret = 0; struct nf_conntrack_l3proto *old; if (proto->l3proto >= NFPROTO_NUMPROTO) return -EBUSY; - - if (proto->tuple_to_nlattr && !proto->nlattr_tuple_size) +#if IS_ENABLED(CONFIG_NF_CT_NETLINK) + if (proto->tuple_to_nlattr && proto->nla_size == 0) return -EINVAL; - +#endif mutex_lock(&nf_ct_proto_mutex); old = rcu_dereference_protected(nf_ct_l3protos[proto->l3proto], lockdep_is_held(&nf_ct_proto_mutex)); @@ -226,9 +306,6 @@ int nf_ct_l3proto_register(struct nf_conntrack_l3proto *proto) goto out_unlock; } - if (proto->nlattr_tuple_size) - proto->nla_size = 3 * proto->nlattr_tuple_size(); - rcu_assign_pointer(nf_ct_l3protos[proto->l3proto], proto); out_unlock: @@ -238,21 +315,7 @@ out_unlock: } EXPORT_SYMBOL_GPL(nf_ct_l3proto_register); -#ifdef CONFIG_SYSCTL -extern unsigned int nf_conntrack_default_on; - -int nf_ct_l3proto_pernet_register(struct net *net, - struct nf_conntrack_l3proto *proto) -{ - if (nf_conntrack_default_on == 0) - return 0; - - return proto->net_ns_get ? proto->net_ns_get(net) : 0; -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_register); -#endif - -void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) +void nf_ct_l3proto_unregister(const struct nf_conntrack_l3proto *proto) { BUG_ON(proto->l3proto >= NFPROTO_NUMPROTO); @@ -266,27 +329,12 @@ void nf_ct_l3proto_unregister(struct nf_conntrack_l3proto *proto) synchronize_rcu(); /* Remove all contrack entries for this protocol */ - nf_ct_iterate_destroy(kill_l3proto, proto); + nf_ct_iterate_destroy(kill_l3proto, (void*)proto); } EXPORT_SYMBOL_GPL(nf_ct_l3proto_unregister); -void nf_ct_l3proto_pernet_unregister(struct net *net, - struct nf_conntrack_l3proto *proto) -{ - /* - * nf_conntrack_default_on *might* have registered hooks. - * ->net_ns_put must cope with more puts() than get(), i.e. - * if nf_conntrack_default_on was 0 at time of - * nf_ct_l3proto_pernet_register invocation this net_ns_put() - * should be a noop. - */ - if (proto->net_ns_put) - proto->net_ns_put(net); -} -EXPORT_SYMBOL_GPL(nf_ct_l3proto_pernet_unregister); - static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { if (l4proto->get_net_proto) { /* statically built-in protocols use static per-net */ @@ -301,7 +349,7 @@ static struct nf_proto_net *nf_ct_l4proto_net(struct net *net, static int nf_ct_l4proto_register_sysctl(struct net *net, struct nf_proto_net *pn, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { int err = 0; @@ -324,8 +372,8 @@ int nf_ct_l4proto_register_sysctl(struct net *net, static void nf_ct_l4proto_unregister_sysctl(struct net *net, - struct nf_proto_net *pn, - struct nf_conntrack_l4proto *l4proto) + struct nf_proto_net *pn, + const struct nf_conntrack_l4proto *l4proto) { #ifdef CONFIG_SYSCTL if (pn->ctl_table_header != NULL) @@ -383,8 +431,6 @@ int nf_ct_l4proto_register_one(struct nf_conntrack_l4proto *l4proto) l4proto->nla_size = 0; if (l4proto->nlattr_size) l4proto->nla_size += l4proto->nlattr_size(); - if (l4proto->nlattr_tuple_size) - l4proto->nla_size += 3 * l4proto->nlattr_tuple_size(); rcu_assign_pointer(nf_ct_protos[l4proto->l3proto][l4proto->l4proto], l4proto); @@ -395,7 +441,7 @@ out_unlock: EXPORT_SYMBOL_GPL(nf_ct_l4proto_register_one); int nf_ct_l4proto_pernet_register_one(struct net *net, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { int ret = 0; struct nf_proto_net *pn = NULL; @@ -420,7 +466,7 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_l4proto_pernet_register_one); -static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +static void __nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) { BUG_ON(l4proto->l3proto >= ARRAY_SIZE(nf_ct_protos)); @@ -433,7 +479,7 @@ static void __nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) &nf_conntrack_l4proto_generic); } -void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) +void nf_ct_l4proto_unregister_one(const struct nf_conntrack_l4proto *l4proto) { mutex_lock(&nf_ct_proto_mutex); __nf_ct_l4proto_unregister_one(l4proto); @@ -444,7 +490,7 @@ void nf_ct_l4proto_unregister_one(struct nf_conntrack_l4proto *l4proto) EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister_one); void nf_ct_l4proto_pernet_unregister_one(struct net *net, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { struct nf_proto_net *pn = nf_ct_l4proto_net(net, l4proto); @@ -469,8 +515,8 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], } if (i != num_proto) { ver = l4proto[i]->l3proto == PF_INET6 ? 6 : 4; - pr_err("nf_conntrack_ipv%d: can't register %s%d proto.\n", - ver, l4proto[i]->name, ver); + pr_err("nf_conntrack_ipv%d: can't register l4 %d proto.\n", + ver, l4proto[i]->l4proto); nf_ct_l4proto_unregister(l4proto, i); } return ret; @@ -478,7 +524,7 @@ int nf_ct_l4proto_register(struct nf_conntrack_l4proto *l4proto[], EXPORT_SYMBOL_GPL(nf_ct_l4proto_register); int nf_ct_l4proto_pernet_register(struct net *net, - struct nf_conntrack_l4proto *l4proto[], + struct nf_conntrack_l4proto *const l4proto[], unsigned int num_proto) { int ret = -EINVAL; @@ -490,8 +536,8 @@ int nf_ct_l4proto_pernet_register(struct net *net, break; } if (i != num_proto) { - pr_err("nf_conntrack_%s%d: pernet registration failed\n", - l4proto[i]->name, + pr_err("nf_conntrack_proto_%d %d: pernet registration failed\n", + l4proto[i]->l4proto, l4proto[i]->l3proto == PF_INET6 ? 6 : 4); nf_ct_l4proto_pernet_unregister(net, l4proto, i); } @@ -514,8 +560,8 @@ void nf_ct_l4proto_unregister(struct nf_conntrack_l4proto *l4proto[], EXPORT_SYMBOL_GPL(nf_ct_l4proto_unregister); void nf_ct_l4proto_pernet_unregister(struct net *net, - struct nf_conntrack_l4proto *l4proto[], - unsigned int num_proto) + struct nf_conntrack_l4proto *const l4proto[], + unsigned int num_proto) { while (num_proto-- != 0) nf_ct_l4proto_pernet_unregister_one(net, l4proto[num_proto]); diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c index 4707d997558a..2a446f4a554c 100644 --- a/net/netfilter/nf_conntrack_proto_dccp.c +++ b/net/netfilter/nf_conntrack_proto_dccp.c @@ -428,13 +428,13 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, default: dn = dccp_pernet(net); if (dn->dccp_loose == 0) { - msg = "nf_ct_dccp: not picking up existing connection "; + msg = "not picking up existing connection "; goto out_invalid; } case CT_DCCP_REQUEST: break; case CT_DCCP_INVALID: - msg = "nf_ct_dccp: invalid state transition "; + msg = "invalid state transition "; goto out_invalid; } @@ -447,9 +447,7 @@ static bool dccp_new(struct nf_conn *ct, const struct sk_buff *skb, return true; out_invalid: - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, nf_ct_l3num(ct), 0, skb, NULL, NULL, - NULL, "%s", msg); + nf_ct_l4proto_log_invalid(skb, ct, "%s", msg); return false; } @@ -469,10 +467,8 @@ static unsigned int *dccp_get_timeouts(struct net *net) static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, unsigned int hooknum, unsigned int *timeouts) { - struct net *net = nf_ct_net(ct); enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); struct dccp_hdr _dh, *dh; u_int8_t type, old_state, new_state; @@ -534,15 +530,11 @@ static int dccp_packet(struct nf_conn *ct, const struct sk_buff *skb, ct->proto.dccp.last_pkt = type; spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_dccp: invalid packet ignored "); + nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid packet"); return NF_ACCEPT; case CT_DCCP_INVALID: spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_dccp: invalid state transition "); + nf_ct_l4proto_log_invalid(skb, ct, "%s", "invalid state transition"); return -NF_ACCEPT; } @@ -604,8 +596,7 @@ static int dccp_error(struct net *net, struct nf_conn *tmpl, return NF_ACCEPT; out_invalid: - if (LOG_INVALID(net, IPPROTO_DCCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", msg); + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_DCCP, "%s", msg); return -NF_ACCEPT; } @@ -623,18 +614,12 @@ static bool dccp_can_early_drop(const struct nf_conn *ct) return false; } -static void dccp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.dccp.port), - ntohs(tuple->dst.u.dccp.port)); -} - +#ifdef CONFIG_NF_CONNTRACK_PROCFS static void dccp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", dccp_state_names[ct->proto.dccp.state]); } +#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) static int dccp_to_nlattr(struct sk_buff *skb, struct nlattr *nla, @@ -880,7 +865,6 @@ static struct nf_proto_net *dccp_get_net_proto(struct net *net) struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_DCCP, - .name = "dccp", .pkt_to_tuple = dccp_pkt_to_tuple, .invert_tuple = dccp_invert_tuple, .new = dccp_new, @@ -888,8 +872,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp4 __read_mostly = { .get_timeouts = dccp_get_timeouts, .error = dccp_error, .can_early_drop = dccp_can_early_drop, - .print_tuple = dccp_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, +#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, .nlattr_size = dccp_nlattr_size, @@ -916,7 +901,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_dccp4); struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .l3proto = AF_INET6, .l4proto = IPPROTO_DCCP, - .name = "dccp", .pkt_to_tuple = dccp_pkt_to_tuple, .invert_tuple = dccp_invert_tuple, .new = dccp_new, @@ -924,8 +908,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_dccp6 __read_mostly = { .get_timeouts = dccp_get_timeouts, .error = dccp_error, .can_early_drop = dccp_can_early_drop, - .print_tuple = dccp_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = dccp_print_conntrack, +#endif #if IS_ENABLED(CONFIG_NF_CT_NETLINK) .to_nlattr = dccp_to_nlattr, .nlattr_size = dccp_nlattr_size, diff --git a/net/netfilter/nf_conntrack_proto_generic.c b/net/netfilter/nf_conntrack_proto_generic.c index d5868bad33a7..1f86ddf6649a 100644 --- a/net/netfilter/nf_conntrack_proto_generic.c +++ b/net/netfilter/nf_conntrack_proto_generic.c @@ -17,22 +17,10 @@ static unsigned int nf_ct_generic_timeout __read_mostly = 600*HZ; static bool nf_generic_should_process(u8 proto) { switch (proto) { -#ifdef CONFIG_NF_CT_PROTO_SCTP_MODULE - case IPPROTO_SCTP: - return false; -#endif -#ifdef CONFIG_NF_CT_PROTO_DCCP_MODULE - case IPPROTO_DCCP: - return false; -#endif #ifdef CONFIG_NF_CT_PROTO_GRE_MODULE case IPPROTO_GRE: return false; #endif -#ifdef CONFIG_NF_CT_PROTO_UDPLITE_MODULE - case IPPROTO_UDPLITE: - return false; -#endif default: return true; } @@ -62,12 +50,6 @@ static bool generic_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void generic_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ -} - static unsigned int *generic_get_timeouts(struct net *net) { return &(generic_pernet(net)->timeout); @@ -78,8 +60,6 @@ static int generic_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeout) { nf_ct_refresh_acct(ct, ctinfo, skb, *timeout); @@ -187,10 +167,8 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_generic __read_mostly = { .l3proto = PF_UNSPEC, .l4proto = 255, - .name = "unknown", .pkt_to_tuple = generic_pkt_to_tuple, .invert_tuple = generic_invert_tuple, - .print_tuple = generic_print_tuple, .packet = generic_packet, .get_timeouts = generic_get_timeouts, .new = generic_new, diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c index 87bb40a3feb5..a2503005d80b 100644 --- a/net/netfilter/nf_conntrack_proto_gre.c +++ b/net/netfilter/nf_conntrack_proto_gre.c @@ -224,15 +224,7 @@ static bool gre_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff, return true; } -/* print gre part of tuple */ -static void gre_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "srckey=0x%x dstkey=0x%x ", - ntohs(tuple->src.u.gre.key), - ntohs(tuple->dst.u.gre.key)); -} - +#ifdef CONFIG_NF_CONNTRACK_PROCFS /* print private data for conntrack */ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) { @@ -240,6 +232,7 @@ static void gre_print_conntrack(struct seq_file *s, struct nf_conn *ct) (ct->proto.gre.timeout / HZ), (ct->proto.gre.stream_timeout / HZ)); } +#endif static unsigned int *gre_get_timeouts(struct net *net) { @@ -251,8 +244,6 @@ static int gre_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts) { /* If we've seen traffic both ways, this is a GRE connection. @@ -364,11 +355,11 @@ static int gre_init_net(struct net *net, u_int16_t proto) static struct nf_conntrack_l4proto nf_conntrack_l4proto_gre4 __read_mostly = { .l3proto = AF_INET, .l4proto = IPPROTO_GRE, - .name = "gre", .pkt_to_tuple = gre_pkt_to_tuple, .invert_tuple = gre_invert_tuple, - .print_tuple = gre_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = gre_print_conntrack, +#endif .get_timeouts = gre_get_timeouts, .packet = gre_packet, .new = gre_new, diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index 6eef29d2eec4..80faf04ddf15 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -174,20 +174,13 @@ static bool sctp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void sctp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.sctp.port), - ntohs(tuple->dst.u.sctp.port)); -} - +#ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", sctp_conntrack_names[ct->proto.sctp.state]); } +#endif #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ @@ -313,8 +306,6 @@ static int sctp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts) { enum sctp_conntrack new_state, old_state; @@ -530,8 +521,7 @@ static int sctp_error(struct net *net, struct nf_conn *tpl, struct sk_buff *skb, } return NF_ACCEPT; out_invalid: - if (LOG_INVALID(net, IPPROTO_SCTP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, "%s", logmsg); + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_SCTP, "%s", logmsg); return -NF_ACCEPT; } @@ -791,11 +781,11 @@ static struct nf_proto_net *sctp_get_net_proto(struct net *net) struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_SCTP, - .name = "sctp", .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, +#endif .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, @@ -828,11 +818,11 @@ EXPORT_SYMBOL_GPL(nf_conntrack_l4proto_sctp4); struct nf_conntrack_l4proto nf_conntrack_l4proto_sctp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_SCTP, - .name = "sctp", .pkt_to_tuple = sctp_pkt_to_tuple, .invert_tuple = sctp_invert_tuple, - .print_tuple = sctp_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = sctp_print_conntrack, +#endif .packet = sctp_packet, .get_timeouts = sctp_get_timeouts, .new = sctp_new, diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index 9758a7dfd83e..b12fc07111d0 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -301,20 +301,13 @@ static bool tcp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void tcp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.tcp.port), - ntohs(tuple->dst.u.tcp.port)); -} - +#ifdef CONFIG_NF_CONNTRACK_PROCFS /* Print out the private part of the conntrack. */ static void tcp_print_conntrack(struct seq_file *s, struct nf_conn *ct) { seq_printf(s, "%s ", tcp_conntrack_names[ct->proto.tcp.state]); } +#endif static unsigned int get_conntrack_index(const struct tcphdr *tcph) { @@ -500,8 +493,7 @@ static bool tcp_in_window(const struct nf_conn *ct, unsigned int index, const struct sk_buff *skb, unsigned int dataoff, - const struct tcphdr *tcph, - u_int8_t pf) + const struct tcphdr *tcph) { struct net *net = nf_ct_net(ct); struct nf_tcp_net *tn = tcp_pernet(net); @@ -709,9 +701,9 @@ static bool tcp_in_window(const struct nf_conn *ct, if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal) res = true; - if (!res && LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: %s ", + if (!res) { + nf_ct_l4proto_log_invalid(skb, ct, + "%s", before(seq, sender->td_maxend + 1) ? in_recv_win ? before(sack, receiver->td_end + 1) ? @@ -720,6 +712,7 @@ static bool tcp_in_window(const struct nf_conn *ct, : "ACK is over the upper bound (ACKed data not seen yet)" : "SEQ is under the lower bound (already ACKed data retransmitted)" : "SEQ is over the upper bound (over the window of the receiver)"); + } } pr_debug("tcp_in_window: res=%u sender end=%u maxend=%u maxwin=%u " @@ -745,6 +738,12 @@ static const u8 tcp_valid_flags[(TCPHDR_FIN|TCPHDR_SYN|TCPHDR_RST|TCPHDR_ACK| [TCPHDR_ACK|TCPHDR_URG] = 1, }; +static void tcp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_TCP, "%s", msg); +} + /* Protect conntrack agaist broken packets. Code taken from ipt_unclean.c. */ static int tcp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, @@ -760,17 +759,13 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* Smaller that minimal TCP header? */ th = skb_header_pointer(skb, dataoff, sizeof(_tcph), &_tcph); if (th == NULL) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: short packet "); + tcp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* Not whole TCP header or malformed packet */ if (th->doff*4 < sizeof(struct tcphdr) || tcplen < th->doff*4) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: truncated/malformed packet "); + tcp_error_log(skb, net, pf, "truncated packet"); return -NF_ACCEPT; } @@ -781,18 +776,14 @@ static int tcp_error(struct net *net, struct nf_conn *tmpl, /* FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_TCP, pf)) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: bad TCP checksum "); + tcp_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } /* Check TCP flags. */ tcpflags = (tcp_flag_byte(th) & ~(TCPHDR_ECE|TCPHDR_CWR|TCPHDR_PSH)); if (!tcp_valid_flags[tcpflags]) { - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid TCP flag combination "); + tcp_error_log(skb, net, pf, "invalid tcp flag combination"); return -NF_ACCEPT; } @@ -809,8 +800,6 @@ static int tcp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts) { struct net *net = nf_ct_net(ct); @@ -947,10 +936,8 @@ static int tcp_packet(struct nf_conn *ct, IP_CT_EXP_CHALLENGE_ACK; } spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid packet ignored in " - "state %s ", tcp_conntrack_names[old_state]); + nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in " + "state %s ", tcp_conntrack_names[old_state]); return NF_ACCEPT; case TCP_CONNTRACK_MAX: /* Special case for SYN proxy: when the SYN to the server or @@ -972,9 +959,7 @@ static int tcp_packet(struct nf_conn *ct, pr_debug("nf_ct_tcp: Invalid dir=%i index=%u ostate=%u\n", dir, get_conntrack_index(th), old_state); spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: invalid state "); + nf_ct_l4proto_log_invalid(skb, ct, "invalid state"); return -NF_ACCEPT; case TCP_CONNTRACK_TIME_WAIT: /* RFC5961 compliance cause stack to send "challenge-ACK" @@ -989,9 +974,7 @@ static int tcp_packet(struct nf_conn *ct, /* Detected RFC5961 challenge ACK */ ct->proto.tcp.last_flags &= ~IP_CT_EXP_CHALLENGE_ACK; spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_tcp: challenge-ACK ignored "); + nf_ct_l4proto_log_invalid(skb, ct, "challenge-ack ignored"); return NF_ACCEPT; /* Don't change state */ } break; @@ -1001,9 +984,7 @@ static int tcp_packet(struct nf_conn *ct, && before(ntohl(th->seq), ct->proto.tcp.seen[!dir].td_maxack)) { /* Invalid RST */ spin_unlock_bh(&ct->lock); - if (LOG_INVALID(net, IPPROTO_TCP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, - NULL, "nf_ct_tcp: invalid RST "); + nf_ct_l4proto_log_invalid(skb, ct, "invalid rst"); return -NF_ACCEPT; } if (index == TCP_RST_SET @@ -1030,7 +1011,7 @@ static int tcp_packet(struct nf_conn *ct, } if (!tcp_in_window(ct, &ct->proto.tcp, dir, index, - skb, dataoff, th, pf)) { + skb, dataoff, th)) { spin_unlock_bh(&ct->lock); return -NF_ACCEPT; } @@ -1296,9 +1277,14 @@ static int tcp_nlattr_size(void) + nla_policy_len(tcp_nla_policy, CTA_PROTOINFO_TCP_MAX + 1); } -static int tcp_nlattr_tuple_size(void) +static unsigned int tcp_nlattr_tuple_size(void) { - return nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + static unsigned int size __read_mostly; + + if (!size) + size = nla_policy_len(nf_ct_port_nla_policy, CTA_PROTO_MAX + 1); + + return size; } #endif @@ -1556,11 +1542,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_TCP, - .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, - .print_tuple = tcp_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, +#endif .packet = tcp_packet, .get_timeouts = tcp_get_timeouts, .new = tcp_new, @@ -1594,11 +1580,11 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_tcp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_TCP, - .name = "tcp", .pkt_to_tuple = tcp_pkt_to_tuple, .invert_tuple = tcp_invert_tuple, - .print_tuple = tcp_print_tuple, +#ifdef CONFIG_NF_CONNTRACK_PROCFS .print_conntrack = tcp_print_conntrack, +#endif .packet = tcp_packet, .get_timeouts = tcp_get_timeouts, .new = tcp_new, diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c index f6ebce6178ca..3a5f727103af 100644 --- a/net/netfilter/nf_conntrack_proto_udp.c +++ b/net/netfilter/nf_conntrack_proto_udp.c @@ -63,15 +63,6 @@ static bool udp_invert_tuple(struct nf_conntrack_tuple *tuple, return true; } -/* Print out the per-protocol part of the tuple. */ -static void udp_print_tuple(struct seq_file *s, - const struct nf_conntrack_tuple *tuple) -{ - seq_printf(s, "sport=%hu dport=%hu ", - ntohs(tuple->src.u.udp.port), - ntohs(tuple->dst.u.udp.port)); -} - static unsigned int *udp_get_timeouts(struct net *net) { return udp_pernet(net)->timeouts; @@ -82,8 +73,6 @@ static int udp_packet(struct nf_conn *ct, const struct sk_buff *skb, unsigned int dataoff, enum ip_conntrack_info ctinfo, - u_int8_t pf, - unsigned int hooknum, unsigned int *timeouts) { /* If we've seen traffic both ways, this is some kind of UDP @@ -109,6 +98,12 @@ static bool udp_new(struct nf_conn *ct, const struct sk_buff *skb, } #ifdef CONFIG_NF_CT_PROTO_UDPLITE +static void udplite_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDPLITE, "%s", msg); +} + static int udplite_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, @@ -122,9 +117,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (!hdr) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: short packet "); + udplite_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } @@ -132,17 +125,13 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, if (cscov == 0) { cscov = udplen; } else if (cscov < sizeof(*hdr) || cscov > udplen) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: invalid checksum coverage "); + udplite_error_log(skb, net, pf, "invalid checksum coverage"); return -NF_ACCEPT; } /* UDPLITE mandates checksums */ if (!hdr->check) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: checksum missing "); + udplite_error_log(skb, net, pf, "checksum missing"); return -NF_ACCEPT; } @@ -150,9 +139,7 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum_partial(skb, hooknum, dataoff, cscov, IPPROTO_UDP, pf)) { - if (LOG_INVALID(net, IPPROTO_UDPLITE)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udplite: bad UDPLite checksum "); + udplite_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } @@ -160,6 +147,12 @@ static int udplite_error(struct net *net, struct nf_conn *tmpl, } #endif +static void udp_error_log(const struct sk_buff *skb, struct net *net, + u8 pf, const char *msg) +{ + nf_l4proto_log_invalid(skb, net, pf, IPPROTO_UDP, "%s", msg); +} + static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, unsigned int dataoff, u_int8_t pf, @@ -172,17 +165,13 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, /* Header is too small? */ hdr = skb_header_pointer(skb, dataoff, sizeof(_hdr), &_hdr); if (hdr == NULL) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: short packet "); + udp_error_log(skb, net, pf, "short packet"); return -NF_ACCEPT; } /* Truncated/malformed packets */ if (ntohs(hdr->len) > udplen || ntohs(hdr->len) < sizeof(*hdr)) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: truncated/malformed packet "); + udp_error_log(skb, net, pf, "truncated/malformed packet"); return -NF_ACCEPT; } @@ -196,9 +185,7 @@ static int udp_error(struct net *net, struct nf_conn *tmpl, struct sk_buff *skb, * FIXME: Source route IP option packets --RR */ if (net->ct.sysctl_checksum && hooknum == NF_INET_PRE_ROUTING && nf_checksum(skb, hooknum, dataoff, IPPROTO_UDP, pf)) { - if (LOG_INVALID(net, IPPROTO_UDP)) - nf_log_packet(net, pf, 0, skb, NULL, NULL, NULL, - "nf_ct_udp: bad UDP checksum "); + udp_error_log(skb, net, pf, "bad checksum"); return -NF_ACCEPT; } @@ -313,11 +300,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDP, - .name = "udp", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, @@ -347,11 +332,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite4 __read_mostly = { .l3proto = PF_INET, .l4proto = IPPROTO_UDPLITE, - .name = "udplite", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, @@ -381,11 +364,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udp6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDP, - .name = "udp", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, @@ -415,11 +396,9 @@ struct nf_conntrack_l4proto nf_conntrack_l4proto_udplite6 __read_mostly = { .l3proto = PF_INET6, .l4proto = IPPROTO_UDPLITE, - .name = "udplite", .allow_clash = true, .pkt_to_tuple = udp_pkt_to_tuple, .invert_tuple = udp_invert_tuple, - .print_tuple = udp_print_tuple, .packet = udp_packet, .get_timeouts = udp_get_timeouts, .new = udp_new, diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index d38af4274335..4dbb5bad4363 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -884,7 +884,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, tuple.dst.u3 = *daddr; tuple.dst.u.udp.port = port; - rcu_read_lock(); do { exp = __nf_ct_expect_find(net, nf_ct_zone(ct), &tuple); @@ -918,10 +917,8 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, goto err1; } - if (skip_expect) { - rcu_read_unlock(); + if (skip_expect) return NF_ACCEPT; - } rtp_exp = nf_ct_expect_alloc(ct); if (rtp_exp == NULL) @@ -952,7 +949,6 @@ static int set_expected_rtp_rtcp(struct sk_buff *skb, unsigned int protoff, err2: nf_ct_expect_put(rtp_exp); err1: - rcu_read_unlock(); return ret; } diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index ccb5cb9043e0..5a101caa3e12 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -41,8 +41,62 @@ print_tuple(struct seq_file *s, const struct nf_conntrack_tuple *tuple, const struct nf_conntrack_l3proto *l3proto, const struct nf_conntrack_l4proto *l4proto) { - l3proto->print_tuple(s, tuple); - l4proto->print_tuple(s, tuple); + switch (l3proto->l3proto) { + case NFPROTO_IPV4: + seq_printf(s, "src=%pI4 dst=%pI4 ", + &tuple->src.u3.ip, &tuple->dst.u3.ip); + break; + case NFPROTO_IPV6: + seq_printf(s, "src=%pI6 dst=%pI6 ", + tuple->src.u3.ip6, tuple->dst.u3.ip6); + break; + default: + break; + } + + switch (l4proto->l4proto) { + case IPPROTO_ICMP: + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); + break; + case IPPROTO_TCP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.tcp.port), + ntohs(tuple->dst.u.tcp.port)); + break; + case IPPROTO_UDPLITE: /* fallthrough */ + case IPPROTO_UDP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.udp.port), + ntohs(tuple->dst.u.udp.port)); + + break; + case IPPROTO_DCCP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.dccp.port), + ntohs(tuple->dst.u.dccp.port)); + break; + case IPPROTO_SCTP: + seq_printf(s, "sport=%hu dport=%hu ", + ntohs(tuple->src.u.sctp.port), + ntohs(tuple->dst.u.sctp.port)); + break; + case IPPROTO_ICMPV6: + seq_printf(s, "type=%u code=%u id=%u ", + tuple->dst.u.icmp.type, + tuple->dst.u.icmp.code, + ntohs(tuple->src.u.icmp.id)); + break; + case IPPROTO_GRE: + seq_printf(s, "srckey=0x%x dstkey=0x%x ", + ntohs(tuple->src.u.gre.key), + ntohs(tuple->dst.u.gre.key)); + break; + default: + break; + } } EXPORT_SYMBOL_GPL(print_tuple); @@ -198,6 +252,31 @@ ct_show_delta_time(struct seq_file *s, const struct nf_conn *ct) } #endif +static const char* l3proto_name(u16 proto) +{ + switch (proto) { + case AF_INET: return "ipv4"; + case AF_INET6: return "ipv6"; + } + + return "unknown"; +} + +static const char* l4proto_name(u16 proto) +{ + switch (proto) { + case IPPROTO_ICMP: return "icmp"; + case IPPROTO_TCP: return "tcp"; + case IPPROTO_UDP: return "udp"; + case IPPROTO_DCCP: return "dccp"; + case IPPROTO_GRE: return "gre"; + case IPPROTO_SCTP: return "sctp"; + case IPPROTO_UDPLITE: return "udplite"; + } + + return "unknown"; +} + /* return 0 on success, 1 in case of error */ static int ct_seq_show(struct seq_file *s, void *v) { @@ -208,7 +287,7 @@ static int ct_seq_show(struct seq_file *s, void *v) struct net *net = seq_file_net(s); int ret = 0; - NF_CT_ASSERT(ct); + WARN_ON(!ct); if (unlikely(!atomic_inc_not_zero(&ct->ct_general.use))) return 0; @@ -225,14 +304,14 @@ static int ct_seq_show(struct seq_file *s, void *v) goto release; l3proto = __nf_ct_l3proto_find(nf_ct_l3num(ct)); - NF_CT_ASSERT(l3proto); + WARN_ON(!l3proto); l4proto = __nf_ct_l4proto_find(nf_ct_l3num(ct), nf_ct_protonum(ct)); - NF_CT_ASSERT(l4proto); + WARN_ON(!l4proto); ret = -ENOSPC; seq_printf(s, "%-8s %u %-8s %u %ld ", - l3proto->name, nf_ct_l3num(ct), - l4proto->name, nf_ct_protonum(ct), + l3proto_name(l3proto->l3proto), nf_ct_l3num(ct), + l4proto_name(l4proto->l4proto), nf_ct_protonum(ct), nf_ct_expires(ct) / HZ); if (l4proto->print_conntrack) @@ -452,9 +531,6 @@ static int log_invalid_proto_max __read_mostly = 255; /* size the user *wants to set */ static unsigned int nf_conntrack_htable_size_user __read_mostly; -extern unsigned int nf_conntrack_default_on; -unsigned int nf_conntrack_default_on __read_mostly = 1; - static int nf_conntrack_hash_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) @@ -520,13 +596,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec, }, - { - .procname = "nf_conntrack_default_on", - .data = &nf_conntrack_default_on, - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec, - }, { } }; diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h index bfa742da83af..44284cd2528d 100644 --- a/net/netfilter/nf_internals.h +++ b/net/netfilter/nf_internals.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _NF_INTERNALS_H #define _NF_INTERNALS_H @@ -5,17 +6,11 @@ #include <linux/skbuff.h> #include <linux/netdevice.h> -#ifdef CONFIG_NETFILTER_DEBUG -#define NFDEBUG(format, args...) printk(KERN_DEBUG format , ## args) -#else -#define NFDEBUG(format, args...) -#endif - /* nf_queue.c */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp, unsigned int verdict); + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict); unsigned int nf_queue_nf_hook_drop(struct net *net); -int __init netfilter_queue_init(void); /* nf_log.c */ int __init netfilter_log_init(void); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index eb541786ccb7..6c38421e31f9 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -30,19 +30,17 @@ #include <net/netfilter/nf_conntrack_zones.h> #include <linux/netfilter/nf_nat.h> +static spinlock_t nf_nat_locks[CONNTRACK_LOCKS]; + static DEFINE_MUTEX(nf_nat_proto_mutex); static const struct nf_nat_l3proto __rcu *nf_nat_l3protos[NFPROTO_NUMPROTO] __read_mostly; static const struct nf_nat_l4proto __rcu **nf_nat_l4protos[NFPROTO_NUMPROTO] __read_mostly; -struct nf_nat_conn_key { - const struct net *net; - const struct nf_conntrack_tuple *tuple; - const struct nf_conntrack_zone *zone; -}; - -static struct rhltable nf_nat_bysource_table; +static struct hlist_head *nf_nat_bysource __read_mostly; +static unsigned int nf_nat_htable_size __read_mostly; +static unsigned int nf_nat_hash_rnd __read_mostly; inline const struct nf_nat_l3proto * __nf_nat_l3proto_find(u8 family) @@ -118,17 +116,19 @@ int nf_xfrm_me_harder(struct net *net, struct sk_buff *skb, unsigned int family) EXPORT_SYMBOL(nf_xfrm_me_harder); #endif /* CONFIG_XFRM */ -static u32 nf_nat_bysource_hash(const void *data, u32 len, u32 seed) +/* We keep an extra hash for each conntrack, for fast searching. */ +static unsigned int +hash_by_src(const struct net *n, const struct nf_conntrack_tuple *tuple) { - const struct nf_conntrack_tuple *t; - const struct nf_conn *ct = data; + unsigned int hash; + + get_random_once(&nf_nat_hash_rnd, sizeof(nf_nat_hash_rnd)); - t = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; /* Original src, to ensure we map it consistently if poss. */ + hash = jhash2((u32 *)&tuple->src, sizeof(tuple->src) / sizeof(u32), + tuple->dst.protonum ^ nf_nat_hash_rnd ^ net_hash_mix(n)); - seed ^= net_hash_mix(nf_ct_net(ct)); - return jhash2((const u32 *)&t->src, sizeof(t->src) / sizeof(u32), - t->dst.protonum ^ seed); + return reciprocal_scale(hash, nf_nat_htable_size); } /* Is this tuple already taken? (not by us) */ @@ -184,28 +184,6 @@ same_src(const struct nf_conn *ct, t->src.u.all == tuple->src.u.all); } -static int nf_nat_bysource_cmp(struct rhashtable_compare_arg *arg, - const void *obj) -{ - const struct nf_nat_conn_key *key = arg->key; - const struct nf_conn *ct = obj; - - if (!same_src(ct, key->tuple) || - !net_eq(nf_ct_net(ct), key->net) || - !nf_ct_zone_equal(ct, key->zone, IP_CT_DIR_ORIGINAL)) - return 1; - - return 0; -} - -static struct rhashtable_params nf_nat_bysource_params = { - .head_offset = offsetof(struct nf_conn, nat_bysource), - .obj_hashfn = nf_nat_bysource_hash, - .obj_cmpfn = nf_nat_bysource_cmp, - .nelem_hint = 256, - .min_size = 1024, -}; - /* Only called for SRC manip */ static int find_appropriate_src(struct net *net, @@ -216,26 +194,22 @@ find_appropriate_src(struct net *net, struct nf_conntrack_tuple *result, const struct nf_nat_range *range) { + unsigned int h = hash_by_src(net, tuple); const struct nf_conn *ct; - struct nf_nat_conn_key key = { - .net = net, - .tuple = tuple, - .zone = zone - }; - struct rhlist_head *hl, *h; - hl = rhltable_lookup(&nf_nat_bysource_table, &key, - nf_nat_bysource_params); - - rhl_for_each_entry_rcu(ct, h, hl, nat_bysource) { - nf_ct_invert_tuplepr(result, - &ct->tuplehash[IP_CT_DIR_REPLY].tuple); - result->dst = tuple->dst; - - if (in_range(l3proto, l4proto, result, range)) - return 1; + hlist_for_each_entry_rcu(ct, &nf_nat_bysource[h], nat_bysource) { + if (same_src(ct, tuple) && + net_eq(net, nf_ct_net(ct)) && + nf_ct_zone_equal(ct, zone, IP_CT_DIR_ORIGINAL)) { + /* Copy source part from reply tuple. */ + nf_ct_invert_tuplepr(result, + &ct->tuplehash[IP_CT_DIR_REPLY].tuple); + result->dst = tuple->dst; + + if (in_range(l3proto, l4proto, result, range)) + return 1; + } } - return 0; } @@ -408,15 +382,18 @@ nf_nat_setup_info(struct nf_conn *ct, const struct nf_nat_range *range, enum nf_nat_manip_type maniptype) { + struct net *net = nf_ct_net(ct); struct nf_conntrack_tuple curr_tuple, new_tuple; /* Can't setup nat info for confirmed ct. */ if (nf_ct_is_confirmed(ct)) return NF_ACCEPT; - NF_CT_ASSERT(maniptype == NF_NAT_MANIP_SRC || - maniptype == NF_NAT_MANIP_DST); - BUG_ON(nf_nat_initialized(ct, maniptype)); + WARN_ON(maniptype != NF_NAT_MANIP_SRC && + maniptype != NF_NAT_MANIP_DST); + + if (WARN_ON(nf_nat_initialized(ct, maniptype))) + return NF_DROP; /* What we've got will look like inverse of reply. Normally * this is what is in the conntrack, except for prior @@ -441,25 +418,22 @@ nf_nat_setup_info(struct nf_conn *ct, else ct->status |= IPS_DST_NAT; - if (nfct_help(ct)) + if (nfct_help(ct) && !nfct_seqadj(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } if (maniptype == NF_NAT_MANIP_SRC) { - struct nf_nat_conn_key key = { - .net = nf_ct_net(ct), - .tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple, - .zone = nf_ct_zone(ct), - }; - int err; - - err = rhltable_insert_key(&nf_nat_bysource_table, - &key, - &ct->nat_bysource, - nf_nat_bysource_params); - if (err) - return NF_DROP; + unsigned int srchash; + spinlock_t *lock; + + srchash = hash_by_src(net, + &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + lock = &nf_nat_locks[srchash % CONNTRACK_LOCKS]; + spin_lock_bh(lock); + hlist_add_head_rcu(&ct->nat_bysource, + &nf_nat_bysource[srchash]); + spin_unlock_bh(lock); } /* It's done. */ @@ -553,23 +527,29 @@ static int nf_nat_proto_remove(struct nf_conn *i, void *data) return i->status & IPS_NAT_MASK ? 1 : 0; } +static void __nf_nat_cleanup_conntrack(struct nf_conn *ct) +{ + unsigned int h; + + h = hash_by_src(nf_ct_net(ct), &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple); + spin_lock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); + hlist_del_rcu(&ct->nat_bysource); + spin_unlock_bh(&nf_nat_locks[h % CONNTRACK_LOCKS]); +} + static int nf_nat_proto_clean(struct nf_conn *ct, void *data) { if (nf_nat_proto_remove(ct, data)) return 1; - if ((ct->status & IPS_SRC_NAT_DONE) == 0) - return 0; - - /* This netns is being destroyed, and conntrack has nat null binding. + /* This module is being removed and conntrack has nat null binding. * Remove it from bysource hash, as the table will be freed soon. * * Else, when the conntrack is destoyed, nf_nat_cleanup_conntrack() * will delete entry from already-freed table. */ - clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status); - rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + if (test_and_clear_bit(IPS_SRC_NAT_DONE_BIT, &ct->status)) + __nf_nat_cleanup_conntrack(ct); /* don't delete conntrack. Although that would make things a lot * simpler, we'd end up flushing all conntracks on nat rmmod. @@ -698,8 +678,7 @@ EXPORT_SYMBOL_GPL(nf_nat_l3proto_unregister); static void nf_nat_cleanup_conntrack(struct nf_conn *ct) { if (ct->status & IPS_SRC_NAT_DONE) - rhltable_remove(&nf_nat_bysource_table, &ct->nat_bysource, - nf_nat_bysource_params); + __nf_nat_cleanup_conntrack(ct); } static struct nf_ct_ext_type nat_extend __read_mostly = { @@ -821,19 +800,27 @@ static struct nf_ct_helper_expectfn follow_master_nat = { static int __init nf_nat_init(void) { - int ret; + int ret, i; - ret = rhltable_init(&nf_nat_bysource_table, &nf_nat_bysource_params); - if (ret) - return ret; + /* Leave them the same for the moment. */ + nf_nat_htable_size = nf_conntrack_htable_size; + if (nf_nat_htable_size < CONNTRACK_LOCKS) + nf_nat_htable_size = CONNTRACK_LOCKS; + + nf_nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size, 0); + if (!nf_nat_bysource) + return -ENOMEM; ret = nf_ct_extend_register(&nat_extend); if (ret < 0) { - rhltable_destroy(&nf_nat_bysource_table); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); printk(KERN_ERR "nf_nat_core: Unable to register extension\n"); return ret; } + for (i = 0; i < CONNTRACK_LOCKS; i++) + spin_lock_init(&nf_nat_locks[i]); + nf_ct_helper_expectfn_register(&follow_master_nat); BUG_ON(nfnetlink_parse_nat_setup_hook != NULL); @@ -863,8 +850,8 @@ static void __exit nf_nat_cleanup(void) for (i = 0; i < NFPROTO_NUMPROTO; i++) kfree(nf_nat_l4protos[i]); - - rhltable_destroy(&nf_nat_bysource_table); + synchronize_net(); + nf_ct_free_hashtable(nf_nat_bysource, nf_nat_htable_size); } MODULE_LICENSE("GPL"); diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index e84a578dbe35..d76afafdc699 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -134,7 +134,7 @@ static int __init nf_nat_ftp_init(void) } /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ -static int warn_set(const char *val, struct kernel_param *kp) +static int warn_set(const char *val, const struct kernel_param *kp) { printk(KERN_INFO KBUILD_MODNAME ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index 0648cb096bd8..dcb5f6375d9d 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -106,7 +106,7 @@ static int __init nf_nat_irc_init(void) } /* Prior to 2.6.11, we had a ports param. No longer, but don't break users. */ -static int warn_set(const char *val, struct kernel_param *kp) +static int warn_set(const char *val, const struct kernel_param *kp) { printk(KERN_INFO KBUILD_MODNAME ": kernel >= 2.6.10 only uses 'ports' for conntrack modules\n"); diff --git a/net/netfilter/nf_nat_redirect.c b/net/netfilter/nf_nat_redirect.c index 86067560a318..25b06b959118 100644 --- a/net/netfilter/nf_nat_redirect.c +++ b/net/netfilter/nf_nat_redirect.c @@ -38,11 +38,11 @@ nf_nat_redirect_ipv4(struct sk_buff *skb, __be32 newdst; struct nf_nat_range newrange; - NF_CT_ASSERT(hooknum == NF_INET_PRE_ROUTING || - hooknum == NF_INET_LOCAL_OUT); + WARN_ON(hooknum != NF_INET_PRE_ROUTING && + hooknum != NF_INET_LOCAL_OUT); ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + WARN_ON(!(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); /* Local packets: make them go to loopback */ if (hooknum == NF_INET_LOCAL_OUT) { diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 043850c9d154..f7e21953b1de 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -109,9 +109,11 @@ unsigned int nf_queue_nf_hook_drop(struct net *net) return count; } +EXPORT_SYMBOL_GPL(nf_queue_nf_hook_drop); static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, - struct nf_hook_entry *hook_entry, unsigned int queuenum) + const struct nf_hook_entries *entries, + unsigned int index, unsigned int queuenum) { int status = -ENOENT; struct nf_queue_entry *entry = NULL; @@ -139,7 +141,7 @@ static int __nf_queue(struct sk_buff *skb, const struct nf_hook_state *state, *entry = (struct nf_queue_entry) { .skb = skb, .state = *state, - .hook = hook_entry, + .hook_index = index, .size = sizeof(*entry) + afinfo->route_key_size, }; @@ -162,18 +164,16 @@ err: /* Packets leaving via this function must come back through nf_reinject(). */ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp, unsigned int verdict) + const struct nf_hook_entries *entries, unsigned int index, + unsigned int verdict) { - struct nf_hook_entry *entry = *entryp; int ret; - ret = __nf_queue(skb, state, entry, verdict >> NF_VERDICT_QBITS); + ret = __nf_queue(skb, state, entries, index, verdict >> NF_VERDICT_QBITS); if (ret < 0) { if (ret == -ESRCH && - (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) { - *entryp = rcu_dereference(entry->next); + (verdict & NF_VERDICT_FLAG_QUEUE_BYPASS)) return 1; - } kfree_skb(skb); } @@ -182,33 +182,56 @@ int nf_queue(struct sk_buff *skb, struct nf_hook_state *state, static unsigned int nf_iterate(struct sk_buff *skb, struct nf_hook_state *state, - struct nf_hook_entry **entryp) + const struct nf_hook_entries *hooks, + unsigned int *index) { - unsigned int verdict; + const struct nf_hook_entry *hook; + unsigned int verdict, i = *index; - do { + while (i < hooks->num_hook_entries) { + hook = &hooks->hooks[i]; repeat: - verdict = nf_hook_entry_hookfn((*entryp), skb, state); + verdict = nf_hook_entry_hookfn(hook, skb, state); if (verdict != NF_ACCEPT) { if (verdict != NF_REPEAT) return verdict; goto repeat; } - *entryp = rcu_dereference((*entryp)->next); - } while (*entryp); + i++; + } + *index = i; return NF_ACCEPT; } +/* Caller must hold rcu read-side lock */ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) { - struct nf_hook_entry *hook_entry = entry->hook; + const struct nf_hook_entry *hook_entry; + const struct nf_hook_entries *hooks; struct sk_buff *skb = entry->skb; const struct nf_afinfo *afinfo; + const struct net *net; + unsigned int i; int err; + u8 pf; + + net = entry->state.net; + pf = entry->state.pf; + + hooks = rcu_dereference(net->nf.hooks[pf][entry->state.hook]); nf_queue_entry_release_refs(entry); + i = entry->hook_index; + if (WARN_ON_ONCE(i >= hooks->num_hook_entries)) { + kfree_skb(skb); + kfree(entry); + return; + } + + hook_entry = &hooks->hooks[i]; + /* Continue traversal iff userspace said ok... */ if (verdict == NF_REPEAT) verdict = nf_hook_entry_hookfn(hook_entry, skb, &entry->state); @@ -220,27 +243,22 @@ void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict) } if (verdict == NF_ACCEPT) { - hook_entry = rcu_dereference(hook_entry->next); - if (hook_entry) next_hook: - verdict = nf_iterate(skb, &entry->state, &hook_entry); + ++i; + verdict = nf_iterate(skb, &entry->state, hooks, &i); } switch (verdict & NF_VERDICT_MASK) { case NF_ACCEPT: case NF_STOP: -okfn: local_bh_disable(); entry->state.okfn(entry->state.net, entry->state.sk, skb); local_bh_enable(); break; case NF_QUEUE: - err = nf_queue(skb, &entry->state, &hook_entry, verdict); - if (err == 1) { - if (hook_entry) - goto next_hook; - goto okfn; - } + err = nf_queue(skb, &entry->state, hooks, i, verdict); + if (err == 1) + goto next_hook; break; case NF_STOLEN: break; diff --git a/net/netfilter/nf_sockopt.c b/net/netfilter/nf_sockopt.c index c68c1e58b362..46cb3786e0ec 100644 --- a/net/netfilter/nf_sockopt.c +++ b/net/netfilter/nf_sockopt.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> @@ -33,7 +34,7 @@ int nf_register_sockopt(struct nf_sockopt_ops *reg) reg->set_optmin, reg->set_optmax) || overlap(ops->get_optmin, ops->get_optmax, reg->get_optmin, reg->get_optmax))) { - NFDEBUG("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", + pr_debug("nf_sock overlap: %u-%u/%u-%u v %u-%u/%u-%u\n", ops->set_optmin, ops->set_optmax, ops->get_optmin, ops->get_optmax, reg->set_optmin, reg->set_optmax, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7843efa33c59..d8327b43e4dc 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -726,7 +726,10 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, if (table == NULL) goto err2; - nla_strlcpy(table->name, name, NFT_TABLE_MAXNAMELEN); + table->name = nla_strdup(name, GFP_KERNEL); + if (table->name == NULL) + goto err3; + INIT_LIST_HEAD(&table->chains); INIT_LIST_HEAD(&table->sets); INIT_LIST_HEAD(&table->objects); @@ -735,10 +738,12 @@ static int nf_tables_newtable(struct net *net, struct sock *nlsk, nft_ctx_init(&ctx, net, skb, nlh, afi, table, NULL, nla); err = nft_trans_table_add(&ctx, NFT_MSG_NEWTABLE); if (err < 0) - goto err3; + goto err4; list_add_tail_rcu(&table->list, &afi->tables); return 0; +err4: + kfree(table->name); err3: kfree(table); err2: @@ -855,6 +860,10 @@ static int nf_tables_deltable(struct net *net, struct sock *nlsk, if (IS_ERR(table)) return PTR_ERR(table); + if (nlh->nlmsg_flags & NLM_F_NONREC && + table->use > 0) + return -EBUSY; + ctx.afi = afi; ctx.table = table; @@ -865,6 +874,7 @@ static void nf_tables_table_destroy(struct nft_ctx *ctx) { BUG_ON(ctx->table->use > 0); + kfree(ctx->table->name); kfree(ctx->table); module_put(ctx->afi->owner); } @@ -1038,7 +1048,7 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name)) goto nla_put_failure; - if (nft_dump_stats(skb, nft_base_chain(chain)->stats)) + if (basechain->stats && nft_dump_stats(skb, basechain->stats)) goto nla_put_failure; } @@ -1240,10 +1250,14 @@ static void nf_tables_chain_destroy(struct nft_chain *chain) module_put(basechain->type->owner); free_percpu(basechain->stats); + if (basechain->stats) + static_branch_dec(&nft_counters_enabled); if (basechain->ops[0].dev != NULL) dev_put(basechain->ops[0].dev); + kfree(chain->name); kfree(basechain); } else { + kfree(chain->name); kfree(chain); } } @@ -1325,155 +1339,18 @@ static void nft_chain_release_hook(struct nft_chain_hook *hook) dev_put(hook->dev); } -static int nf_tables_newchain(struct net *net, struct sock *nlsk, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct netlink_ext_ack *extack) +static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, + u8 policy, bool create) { - const struct nfgenmsg *nfmsg = nlmsg_data(nlh); - const struct nlattr * uninitialized_var(name); - struct nft_af_info *afi; - struct nft_table *table; + const struct nlattr * const *nla = ctx->nla; + struct nft_table *table = ctx->table; + struct nft_af_info *afi = ctx->afi; + struct nft_base_chain *basechain; + struct nft_stats __percpu *stats; + struct net *net = ctx->net; struct nft_chain *chain; - struct nft_base_chain *basechain = NULL; - u8 genmask = nft_genmask_next(net); - int family = nfmsg->nfgen_family; - u8 policy = NF_ACCEPT; - u64 handle = 0; unsigned int i; - struct nft_stats __percpu *stats; int err; - bool create; - struct nft_ctx ctx; - - create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; - - afi = nf_tables_afinfo_lookup(net, family, true); - if (IS_ERR(afi)) - return PTR_ERR(afi); - - table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); - if (IS_ERR(table)) - return PTR_ERR(table); - - chain = NULL; - name = nla[NFTA_CHAIN_NAME]; - - if (nla[NFTA_CHAIN_HANDLE]) { - handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); - chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); - if (IS_ERR(chain)) - return PTR_ERR(chain); - } else { - chain = nf_tables_chain_lookup(table, name, genmask); - if (IS_ERR(chain)) { - if (PTR_ERR(chain) != -ENOENT) - return PTR_ERR(chain); - chain = NULL; - } - } - - if (nla[NFTA_CHAIN_POLICY]) { - if (chain != NULL && - !nft_is_base_chain(chain)) - return -EOPNOTSUPP; - - if (chain == NULL && - nla[NFTA_CHAIN_HOOK] == NULL) - return -EOPNOTSUPP; - - policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY])); - switch (policy) { - case NF_DROP: - case NF_ACCEPT: - break; - default: - return -EINVAL; - } - } - - if (chain != NULL) { - struct nft_stats *stats = NULL; - struct nft_trans *trans; - - if (nlh->nlmsg_flags & NLM_F_EXCL) - return -EEXIST; - if (nlh->nlmsg_flags & NLM_F_REPLACE) - return -EOPNOTSUPP; - - if (nla[NFTA_CHAIN_HOOK]) { - struct nft_base_chain *basechain; - struct nft_chain_hook hook; - struct nf_hook_ops *ops; - - if (!nft_is_base_chain(chain)) - return -EBUSY; - - err = nft_chain_parse_hook(net, nla, afi, &hook, - create); - if (err < 0) - return err; - - basechain = nft_base_chain(chain); - if (basechain->type != hook.type) { - nft_chain_release_hook(&hook); - return -EBUSY; - } - - for (i = 0; i < afi->nops; i++) { - ops = &basechain->ops[i]; - if (ops->hooknum != hook.num || - ops->priority != hook.priority || - ops->dev != hook.dev) { - nft_chain_release_hook(&hook); - return -EBUSY; - } - } - nft_chain_release_hook(&hook); - } - - if (nla[NFTA_CHAIN_HANDLE] && name) { - struct nft_chain *chain2; - - chain2 = nf_tables_chain_lookup(table, - nla[NFTA_CHAIN_NAME], - genmask); - if (IS_ERR(chain2)) - return PTR_ERR(chain2); - } - - if (nla[NFTA_CHAIN_COUNTERS]) { - if (!nft_is_base_chain(chain)) - return -EOPNOTSUPP; - - stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); - if (IS_ERR(stats)) - return PTR_ERR(stats); - } - - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); - trans = nft_trans_alloc(&ctx, NFT_MSG_NEWCHAIN, - sizeof(struct nft_trans_chain)); - if (trans == NULL) { - free_percpu(stats); - return -ENOMEM; - } - - nft_trans_chain_stats(trans) = stats; - nft_trans_chain_update(trans) = true; - - if (nla[NFTA_CHAIN_POLICY]) - nft_trans_chain_policy(trans) = policy; - else - nft_trans_chain_policy(trans) = -1; - - if (nla[NFTA_CHAIN_HANDLE] && name) { - nla_strlcpy(nft_trans_chain_name(trans), name, - NFT_CHAIN_MAXNAMELEN); - } - list_add_tail(&trans->list, &net->nft.commit_list); - return 0; - } if (table->use == UINT_MAX) return -EOVERFLOW; @@ -1504,14 +1381,7 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, return PTR_ERR(stats); } basechain->stats = stats; - } else { - stats = netdev_alloc_pcpu_stats(struct nft_stats); - if (stats == NULL) { - nft_chain_release_hook(&hook); - kfree(basechain); - return -ENOMEM; - } - rcu_assign_pointer(basechain->stats, stats); + static_branch_inc(&nft_counters_enabled); } hookfn = hook.type->hooks[hook.num]; @@ -1539,31 +1409,204 @@ static int nf_tables_newchain(struct net *net, struct sock *nlsk, if (chain == NULL) return -ENOMEM; } - INIT_LIST_HEAD(&chain->rules); chain->handle = nf_tables_alloc_handle(table); chain->table = table; - nla_strlcpy(chain->name, name, NFT_CHAIN_MAXNAMELEN); + chain->name = nla_strdup(nla[NFTA_CHAIN_NAME], GFP_KERNEL); + if (!chain->name) { + err = -ENOMEM; + goto err1; + } err = nf_tables_register_hooks(net, table, chain, afi->nops); if (err < 0) goto err1; - nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); - err = nft_trans_chain_add(&ctx, NFT_MSG_NEWCHAIN); + ctx->chain = chain; + err = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN); if (err < 0) goto err2; table->use++; list_add_tail_rcu(&chain->list, &table->chains); + return 0; err2: nf_tables_unregister_hooks(net, table, chain, afi->nops); err1: nf_tables_chain_destroy(chain); + return err; } +static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, + bool create) +{ + const struct nlattr * const *nla = ctx->nla; + struct nft_table *table = ctx->table; + struct nft_chain *chain = ctx->chain; + struct nft_af_info *afi = ctx->afi; + struct nft_base_chain *basechain; + struct nft_stats *stats = NULL; + struct nft_chain_hook hook; + const struct nlattr *name; + struct nf_hook_ops *ops; + struct nft_trans *trans; + int err, i; + + if (nla[NFTA_CHAIN_HOOK]) { + if (!nft_is_base_chain(chain)) + return -EBUSY; + + err = nft_chain_parse_hook(ctx->net, nla, ctx->afi, &hook, + create); + if (err < 0) + return err; + + basechain = nft_base_chain(chain); + if (basechain->type != hook.type) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + + for (i = 0; i < afi->nops; i++) { + ops = &basechain->ops[i]; + if (ops->hooknum != hook.num || + ops->priority != hook.priority || + ops->dev != hook.dev) { + nft_chain_release_hook(&hook); + return -EBUSY; + } + } + nft_chain_release_hook(&hook); + } + + if (nla[NFTA_CHAIN_HANDLE] && + nla[NFTA_CHAIN_NAME]) { + struct nft_chain *chain2; + + chain2 = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], + genmask); + if (!IS_ERR(chain2)) + return -EEXIST; + } + + if (nla[NFTA_CHAIN_COUNTERS]) { + if (!nft_is_base_chain(chain)) + return -EOPNOTSUPP; + + stats = nft_stats_alloc(nla[NFTA_CHAIN_COUNTERS]); + if (IS_ERR(stats)) + return PTR_ERR(stats); + } + + trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, + sizeof(struct nft_trans_chain)); + if (trans == NULL) { + free_percpu(stats); + return -ENOMEM; + } + + nft_trans_chain_stats(trans) = stats; + nft_trans_chain_update(trans) = true; + + if (nla[NFTA_CHAIN_POLICY]) + nft_trans_chain_policy(trans) = policy; + else + nft_trans_chain_policy(trans) = -1; + + name = nla[NFTA_CHAIN_NAME]; + if (nla[NFTA_CHAIN_HANDLE] && name) { + nft_trans_chain_name(trans) = + nla_strdup(name, GFP_KERNEL); + if (!nft_trans_chain_name(trans)) { + kfree(trans); + free_percpu(stats); + return -ENOMEM; + } + } + list_add_tail(&trans->list, &ctx->net->nft.commit_list); + + return 0; +} + +static int nf_tables_newchain(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + const struct nfgenmsg *nfmsg = nlmsg_data(nlh); + const struct nlattr * uninitialized_var(name); + u8 genmask = nft_genmask_next(net); + int family = nfmsg->nfgen_family; + struct nft_af_info *afi; + struct nft_table *table; + struct nft_chain *chain; + u8 policy = NF_ACCEPT; + struct nft_ctx ctx; + u64 handle = 0; + bool create; + + create = nlh->nlmsg_flags & NLM_F_CREATE ? true : false; + + afi = nf_tables_afinfo_lookup(net, family, true); + if (IS_ERR(afi)) + return PTR_ERR(afi); + + table = nf_tables_table_lookup(afi, nla[NFTA_CHAIN_TABLE], genmask); + if (IS_ERR(table)) + return PTR_ERR(table); + + chain = NULL; + name = nla[NFTA_CHAIN_NAME]; + + if (nla[NFTA_CHAIN_HANDLE]) { + handle = be64_to_cpu(nla_get_be64(nla[NFTA_CHAIN_HANDLE])); + chain = nf_tables_chain_lookup_byhandle(table, handle, genmask); + if (IS_ERR(chain)) + return PTR_ERR(chain); + } else { + chain = nf_tables_chain_lookup(table, name, genmask); + if (IS_ERR(chain)) { + if (PTR_ERR(chain) != -ENOENT) + return PTR_ERR(chain); + chain = NULL; + } + } + + if (nla[NFTA_CHAIN_POLICY]) { + if (chain != NULL && + !nft_is_base_chain(chain)) + return -EOPNOTSUPP; + + if (chain == NULL && + nla[NFTA_CHAIN_HOOK] == NULL) + return -EOPNOTSUPP; + + policy = ntohl(nla_get_be32(nla[NFTA_CHAIN_POLICY])); + switch (policy) { + case NF_DROP: + case NF_ACCEPT: + break; + default: + return -EINVAL; + } + } + + nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + + if (chain != NULL) { + if (nlh->nlmsg_flags & NLM_F_EXCL) + return -EEXIST; + if (nlh->nlmsg_flags & NLM_F_REPLACE) + return -EOPNOTSUPP; + + return nf_tables_updchain(&ctx, genmask, policy, create); + } + + return nf_tables_addchain(&ctx, family, genmask, policy, create); +} + static int nf_tables_delchain(struct net *net, struct sock *nlsk, struct sk_buff *skb, const struct nlmsghdr *nlh, const struct nlattr * const nla[], @@ -1574,8 +1617,11 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, struct nft_af_info *afi; struct nft_table *table; struct nft_chain *chain; + struct nft_rule *rule; int family = nfmsg->nfgen_family; struct nft_ctx ctx; + u32 use; + int err; afi = nf_tables_afinfo_lookup(net, family, false); if (IS_ERR(afi)) @@ -1588,11 +1634,30 @@ static int nf_tables_delchain(struct net *net, struct sock *nlsk, chain = nf_tables_chain_lookup(table, nla[NFTA_CHAIN_NAME], genmask); if (IS_ERR(chain)) return PTR_ERR(chain); - if (chain->use > 0) + + if (nlh->nlmsg_flags & NLM_F_NONREC && + chain->use > 0) return -EBUSY; nft_ctx_init(&ctx, net, skb, nlh, afi, table, chain, nla); + use = chain->use; + list_for_each_entry(rule, &chain->rules, list) { + if (!nft_is_active_next(net, rule)) + continue; + use--; + + err = nft_delrule(&ctx, rule); + if (err < 0) + return err; + } + + /* There are rules and elements that are still holding references to us, + * we cannot do a recursive removal in this case. + */ + if (use > 0) + return -EBUSY; + return nft_delchain(&ctx); } @@ -1977,8 +2042,8 @@ err: } struct nft_rule_dump_ctx { - char table[NFT_TABLE_MAXNAMELEN]; - char chain[NFT_CHAIN_MAXNAMELEN]; + char *table; + char *chain; }; static int nf_tables_dump_rules(struct sk_buff *skb, @@ -2002,7 +2067,7 @@ static int nf_tables_dump_rules(struct sk_buff *skb, continue; list_for_each_entry_rcu(table, &afi->tables, list) { - if (ctx && ctx->table[0] && + if (ctx && ctx->table && strcmp(ctx->table, table->name) != 0) continue; @@ -2042,7 +2107,13 @@ done: static int nf_tables_dump_rules_done(struct netlink_callback *cb) { - kfree(cb->data); + struct nft_rule_dump_ctx *ctx = cb->data; + + if (ctx) { + kfree(ctx->table); + kfree(ctx->chain); + kfree(ctx); + } return 0; } @@ -2074,12 +2145,23 @@ static int nf_tables_getrule(struct net *net, struct sock *nlsk, if (!ctx) return -ENOMEM; - if (nla[NFTA_RULE_TABLE]) - nla_strlcpy(ctx->table, nla[NFTA_RULE_TABLE], - sizeof(ctx->table)); - if (nla[NFTA_RULE_CHAIN]) - nla_strlcpy(ctx->chain, nla[NFTA_RULE_CHAIN], - sizeof(ctx->chain)); + if (nla[NFTA_RULE_TABLE]) { + ctx->table = nla_strdup(nla[NFTA_RULE_TABLE], + GFP_KERNEL); + if (!ctx->table) { + kfree(ctx); + return -ENOMEM; + } + } + if (nla[NFTA_RULE_CHAIN]) { + ctx->chain = nla_strdup(nla[NFTA_RULE_CHAIN], + GFP_KERNEL); + if (!ctx->chain) { + kfree(ctx->table); + kfree(ctx); + return -ENOMEM; + } + } c.data = ctx; } @@ -2467,14 +2549,9 @@ nft_select_set_ops(const struct nft_ctx *ctx, case NFT_SET_POL_PERFORMANCE: if (est.lookup < best.lookup) break; - if (est.lookup == best.lookup) { - if (!desc->size) { - if (est.space < best.space) - break; - } else if (est.size < best.size) { - break; - } - } + if (est.lookup == best.lookup && + est.space < best.space) + break; continue; case NFT_SET_POL_MEMORY: if (!desc->size) { @@ -2621,7 +2698,7 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set, unsigned long *inuse; unsigned int n = 0, min = 0; - p = strnchr(name, NFT_SET_MAXNAMELEN, '%'); + p = strchr(name, '%'); if (p != NULL) { if (p[1] != 'd' || strchr(p + 2, '%')) return -EINVAL; @@ -2652,12 +2729,17 @@ cont: free_page((unsigned long)inuse); } - snprintf(set->name, sizeof(set->name), name, min + n); + set->name = kasprintf(GFP_KERNEL, name, min + n); + if (!set->name) + return -ENOMEM; + list_for_each_entry(i, &ctx->table->sets, list) { if (!nft_is_active_next(ctx->net, i)) continue; - if (!strcmp(set->name, i->name)) + if (!strcmp(set->name, i->name)) { + kfree(set->name); return -ENFILE; + } } return 0; } @@ -2929,7 +3011,7 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, struct nft_table *table; struct nft_set *set; struct nft_ctx ctx; - char name[NFT_SET_MAXNAMELEN]; + char *name; unsigned int size; bool create; u64 timeout; @@ -3075,8 +3157,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk, goto err1; } - nla_strlcpy(name, nla[NFTA_SET_NAME], sizeof(set->name)); + name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL); + if (!name) { + err = -ENOMEM; + goto err2; + } + err = nf_tables_set_alloc_name(&ctx, set, name); + kfree(name); if (err < 0) goto err2; @@ -3126,6 +3214,7 @@ static void nft_set_destroy(struct nft_set *set) { set->ops->destroy(set); module_put(set->ops->type->owner); + kfree(set->name); kvfree(set); } @@ -3159,7 +3248,9 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME], genmask); if (IS_ERR(set)) return PTR_ERR(set); - if (!list_empty(&set->bindings)) + + if (!list_empty(&set->bindings) || + (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) return -EBUSY; return nft_delset(&ctx, set); @@ -3497,45 +3588,6 @@ static int nf_tables_dump_set_done(struct netlink_callback *cb) return 0; } -static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, - struct sk_buff *skb, const struct nlmsghdr *nlh, - const struct nlattr * const nla[], - struct netlink_ext_ack *extack) -{ - u8 genmask = nft_genmask_cur(net); - const struct nft_set *set; - struct nft_ctx ctx; - int err; - - err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); - if (err < 0) - return err; - - set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], - genmask); - if (IS_ERR(set)) - return PTR_ERR(set); - - if (nlh->nlmsg_flags & NLM_F_DUMP) { - struct netlink_dump_control c = { - .dump = nf_tables_dump_set, - .done = nf_tables_dump_set_done, - }; - struct nft_set_dump_ctx *dump_ctx; - - dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); - if (!dump_ctx) - return -ENOMEM; - - dump_ctx->set = set; - dump_ctx->ctx = ctx; - - c.data = dump_ctx; - return netlink_dump_start(nlsk, skb, nlh, &c); - } - return -EOPNOTSUPP; -} - static int nf_tables_fill_setelem_info(struct sk_buff *skb, const struct nft_ctx *ctx, u32 seq, u32 portid, int event, u16 flags, @@ -3581,6 +3633,135 @@ nla_put_failure: return -1; } +static int nft_setelem_parse_flags(const struct nft_set *set, + const struct nlattr *attr, u32 *flags) +{ + if (attr == NULL) + return 0; + + *flags = ntohl(nla_get_be32(attr)); + if (*flags & ~NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + if (!(set->flags & NFT_SET_INTERVAL) && + *flags & NFT_SET_ELEM_INTERVAL_END) + return -EINVAL; + + return 0; +} + +static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set, + const struct nlattr *attr) +{ + struct nlattr *nla[NFTA_SET_ELEM_MAX + 1]; + const struct nft_set_ext *ext; + struct nft_data_desc desc; + struct nft_set_elem elem; + struct sk_buff *skb; + uint32_t flags = 0; + void *priv; + int err; + + err = nla_parse_nested(nla, NFTA_SET_ELEM_MAX, attr, + nft_set_elem_policy, NULL); + if (err < 0) + return err; + + if (!nla[NFTA_SET_ELEM_KEY]) + return -EINVAL; + + err = nft_setelem_parse_flags(set, nla[NFTA_SET_ELEM_FLAGS], &flags); + if (err < 0) + return err; + + err = nft_data_init(ctx, &elem.key.val, sizeof(elem.key), &desc, + nla[NFTA_SET_ELEM_KEY]); + if (err < 0) + return err; + + err = -EINVAL; + if (desc.type != NFT_DATA_VALUE || desc.len != set->klen) + return err; + + priv = set->ops->get(ctx->net, set, &elem, flags); + if (IS_ERR(priv)) + return PTR_ERR(priv); + + elem.priv = priv; + ext = nft_set_elem_ext(set, &elem); + + err = -ENOMEM; + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (skb == NULL) + goto err1; + + err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid, + NFT_MSG_NEWSETELEM, 0, set, &elem); + if (err < 0) + goto err2; + + err = nfnetlink_unicast(skb, ctx->net, ctx->portid, MSG_DONTWAIT); + /* This avoids a loop in nfnetlink. */ + if (err < 0) + goto err1; + + return 0; +err2: + kfree_skb(skb); +err1: + /* this avoids a loop in nfnetlink. */ + return err == -EAGAIN ? -ENOBUFS : err; +} + +static int nf_tables_getsetelem(struct net *net, struct sock *nlsk, + struct sk_buff *skb, const struct nlmsghdr *nlh, + const struct nlattr * const nla[], + struct netlink_ext_ack *extack) +{ + u8 genmask = nft_genmask_cur(net); + struct nft_set *set; + struct nlattr *attr; + struct nft_ctx ctx; + int rem, err = 0; + + err = nft_ctx_init_from_elemattr(&ctx, net, skb, nlh, nla, genmask); + if (err < 0) + return err; + + set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_ELEM_LIST_SET], + genmask); + if (IS_ERR(set)) + return PTR_ERR(set); + + if (nlh->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = nf_tables_dump_set, + .done = nf_tables_dump_set_done, + }; + struct nft_set_dump_ctx *dump_ctx; + + dump_ctx = kmalloc(sizeof(*dump_ctx), GFP_KERNEL); + if (!dump_ctx) + return -ENOMEM; + + dump_ctx->set = set; + dump_ctx->ctx = ctx; + + c.data = dump_ctx; + return netlink_dump_start(nlsk, skb, nlh, &c); + } + + if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS]) + return -EINVAL; + + nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) { + err = nft_get_set_elem(&ctx, set, attr); + if (err < 0) + break; + } + + return err; +} + static void nf_tables_setelem_notify(const struct nft_ctx *ctx, const struct nft_set *set, const struct nft_set_elem *elem, @@ -3681,22 +3862,6 @@ static void nf_tables_set_elem_destroy(const struct nft_set *set, void *elem) kfree(elem); } -static int nft_setelem_parse_flags(const struct nft_set *set, - const struct nlattr *attr, u32 *flags) -{ - if (attr == NULL) - return 0; - - *flags = ntohl(nla_get_be32(attr)); - if (*flags & ~NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - if (!(set->flags & NFT_SET_INTERVAL) && - *flags & NFT_SET_ELEM_INTERVAL_END) - return -EINVAL; - - return 0; -} - static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set, const struct nlattr *attr, u32 nlmsg_flags) { @@ -4209,7 +4374,7 @@ struct nft_object *nf_tables_obj_lookup(const struct nft_table *table, list_for_each_entry(obj, &table->objects, list) { if (!nla_strcmp(nla, obj->name) && - objtype == obj->type->type && + objtype == obj->ops->type->type && nft_active_genmask(obj, genmask)) return obj; } @@ -4231,6 +4396,7 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, const struct nlattr *attr) { struct nlattr *tb[type->maxattr + 1]; + const struct nft_object_ops *ops; struct nft_object *obj; int err; @@ -4243,16 +4409,27 @@ static struct nft_object *nft_obj_init(const struct nft_ctx *ctx, memset(tb, 0, sizeof(tb[0]) * (type->maxattr + 1)); } + if (type->select_ops) { + ops = type->select_ops(ctx, (const struct nlattr * const *)tb); + if (IS_ERR(ops)) { + err = PTR_ERR(ops); + goto err1; + } + } else { + ops = type->ops; + } + err = -ENOMEM; - obj = kzalloc(sizeof(struct nft_object) + type->size, GFP_KERNEL); + obj = kzalloc(sizeof(*obj) + ops->size, GFP_KERNEL); if (obj == NULL) goto err1; - err = type->init(ctx, (const struct nlattr * const *)tb, obj); + err = ops->init(ctx, (const struct nlattr * const *)tb, obj); if (err < 0) goto err2; - obj->type = type; + obj->ops = ops; + return obj; err2: kfree(obj); @@ -4268,7 +4445,7 @@ static int nft_object_dump(struct sk_buff *skb, unsigned int attr, nest = nla_nest_start(skb, attr); if (!nest) goto nla_put_failure; - if (obj->type->dump(skb, obj, reset) < 0) + if (obj->ops->dump(skb, obj, reset) < 0) goto nla_put_failure; nla_nest_end(skb, nest); return 0; @@ -4363,18 +4540,24 @@ static int nf_tables_newobj(struct net *net, struct sock *nlsk, goto err1; } obj->table = table; - nla_strlcpy(obj->name, nla[NFTA_OBJ_NAME], NFT_OBJ_MAXNAMELEN); + obj->name = nla_strdup(nla[NFTA_OBJ_NAME], GFP_KERNEL); + if (!obj->name) { + err = -ENOMEM; + goto err2; + } err = nft_trans_obj_add(&ctx, NFT_MSG_NEWOBJ, obj); if (err < 0) - goto err2; + goto err3; list_add_tail_rcu(&obj->list, &table->objects); table->use++; return 0; +err3: + kfree(obj->name); err2: - if (obj->type->destroy) - obj->type->destroy(obj); + if (obj->ops->destroy) + obj->ops->destroy(obj); kfree(obj); err1: module_put(type->owner); @@ -4401,7 +4584,7 @@ static int nf_tables_fill_obj_info(struct sk_buff *skb, struct net *net, if (nla_put_string(skb, NFTA_OBJ_TABLE, table->name) || nla_put_string(skb, NFTA_OBJ_NAME, obj->name) || - nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->type->type)) || + nla_put_be32(skb, NFTA_OBJ_TYPE, htonl(obj->ops->type->type)) || nla_put_be32(skb, NFTA_OBJ_USE, htonl(obj->use)) || nft_object_dump(skb, NFTA_OBJ_DATA, obj, reset)) goto nla_put_failure; @@ -4415,7 +4598,7 @@ nla_put_failure: } struct nft_obj_filter { - char table[NFT_OBJ_MAXNAMELEN]; + char *table; u32 type; }; @@ -4455,7 +4638,7 @@ static int nf_tables_dump_obj(struct sk_buff *skb, struct netlink_callback *cb) goto cont; if (filter && filter->type != NFT_OBJECT_UNSPEC && - obj->type->type != filter->type) + obj->ops->type->type != filter->type) goto cont; if (nf_tables_fill_obj_info(skb, net, NETLINK_CB(cb->skb).portid, @@ -4480,7 +4663,10 @@ done: static int nf_tables_dump_obj_done(struct netlink_callback *cb) { - kfree(cb->data); + struct nft_obj_filter *filter = cb->data; + + kfree(filter->table); + kfree(filter); return 0; } @@ -4494,9 +4680,13 @@ nft_obj_filter_alloc(const struct nlattr * const nla[]) if (!filter) return ERR_PTR(-ENOMEM); - if (nla[NFTA_OBJ_TABLE]) - nla_strlcpy(filter->table, nla[NFTA_OBJ_TABLE], - NFT_TABLE_MAXNAMELEN); + if (nla[NFTA_OBJ_TABLE]) { + filter->table = nla_strdup(nla[NFTA_OBJ_TABLE], GFP_KERNEL); + if (!filter->table) { + kfree(filter); + return ERR_PTR(-ENOMEM); + } + } if (nla[NFTA_OBJ_TYPE]) filter->type = ntohl(nla_get_be32(nla[NFTA_OBJ_TYPE])); @@ -4576,10 +4766,11 @@ err: static void nft_obj_destroy(struct nft_object *obj) { - if (obj->type->destroy) - obj->type->destroy(obj); + if (obj->ops->destroy) + obj->ops->destroy(obj); - module_put(obj->type->owner); + module_put(obj->ops->type->owner); + kfree(obj->name); kfree(obj); } @@ -4662,6 +4853,7 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; + char buf[TASK_COMM_LEN]; int event = nfnl_msg_type(NFNL_SUBSYS_NFTABLES, NFT_MSG_NEWGEN); nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct nfgenmsg), 0); @@ -4673,7 +4865,9 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net, nfmsg->version = NFNETLINK_V0; nfmsg->res_id = htons(net->nft.base_seq & 0xffff); - if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq))) + if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)) || + nla_put_be32(skb, NFTA_GEN_PROC_PID, htonl(task_pid_nr(current))) || + nla_put_string(skb, NFTA_GEN_PROC_NAME, get_task_comm(buf, current))) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -4842,7 +5036,7 @@ static void nft_chain_commit_update(struct nft_trans *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)[0]) + if (nft_trans_chain_name(trans)) strcpy(trans->ctx.chain->name, nft_trans_chain_name(trans)); if (!nft_is_base_chain(trans->ctx.chain)) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index 65dbeadcb118..dfd0bf3810d2 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -29,7 +29,7 @@ static const char *const comments[__NFT_TRACETYPE_MAX] = { [NFT_TRACETYPE_RULE] = "rule", }; -static struct nf_loginfo trace_loginfo = { +static const struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { @@ -114,6 +114,22 @@ static bool nft_payload_fast_eval(const struct nft_expr *expr, return true; } +DEFINE_STATIC_KEY_FALSE(nft_counters_enabled); + +static noinline void nft_update_chain_stats(const struct nft_chain *chain, + const struct nft_pktinfo *pkt) +{ + struct nft_stats *stats; + + local_bh_disable(); + stats = this_cpu_ptr(rcu_dereference(nft_base_chain(chain)->stats)); + u64_stats_update_begin(&stats->syncp); + stats->pkts++; + stats->bytes += pkt->skb->len; + u64_stats_update_end(&stats->syncp); + local_bh_enable(); +} + struct nft_jumpstack { const struct nft_chain *chain; const struct nft_rule *rule; @@ -130,7 +146,6 @@ nft_do_chain(struct nft_pktinfo *pkt, void *priv) struct nft_regs regs; unsigned int stackptr = 0; struct nft_jumpstack jumpstack[NFT_JUMP_STACK_SIZE]; - struct nft_stats *stats; int rulenum; unsigned int gencursor = nft_genmask_cur(net); struct nft_traceinfo info; @@ -220,13 +235,8 @@ next_rule: nft_trace_packet(&info, basechain, NULL, -1, NFT_TRACETYPE_POLICY); - rcu_read_lock_bh(); - stats = this_cpu_ptr(rcu_dereference(nft_base_chain(basechain)->stats)); - u64_stats_update_begin(&stats->syncp); - stats->pkts++; - stats->bytes += pkt->skb->len; - u64_stats_update_end(&stats->syncp); - rcu_read_unlock_bh(); + if (static_branch_unlikely(&nft_counters_enabled)) + nft_update_chain_stats(basechain, pkt); return nft_base_chain(basechain)->policy; } diff --git a/net/netfilter/nf_tables_trace.c b/net/netfilter/nf_tables_trace.c index e1b15e7a5793..e1dc527a493b 100644 --- a/net/netfilter/nf_tables_trace.c +++ b/net/netfilter/nf_tables_trace.c @@ -162,6 +162,27 @@ static int nf_trace_fill_rule_info(struct sk_buff *nlskb, NFTA_TRACE_PAD); } +static bool nft_trace_have_verdict_chain(struct nft_traceinfo *info) +{ + switch (info->type) { + case NFT_TRACETYPE_RETURN: + case NFT_TRACETYPE_RULE: + break; + default: + return false; + } + + switch (info->verdict->code) { + case NFT_JUMP: + case NFT_GOTO: + break; + default: + return false; + } + + return true; +} + void nft_trace_notify(struct nft_traceinfo *info) { const struct nft_pktinfo *pkt = info->pkt; @@ -175,13 +196,12 @@ void nft_trace_notify(struct nft_traceinfo *info) return; size = nlmsg_total_size(sizeof(struct nfgenmsg)) + - nla_total_size(NFT_TABLE_MAXNAMELEN) + - nla_total_size(NFT_CHAIN_MAXNAMELEN) + + nla_total_size(strlen(info->chain->table->name)) + + nla_total_size(strlen(info->chain->name)) + nla_total_size_64bit(sizeof(__be64)) + /* rule handle */ nla_total_size(sizeof(__be32)) + /* trace type */ nla_total_size(0) + /* VERDICT, nested */ nla_total_size(sizeof(u32)) + /* verdict code */ - nla_total_size(NFT_CHAIN_MAXNAMELEN) + /* jump target */ nla_total_size(sizeof(u32)) + /* id */ nla_total_size(NFT_TRACETYPE_LL_HSIZE) + nla_total_size(NFT_TRACETYPE_NETWORK_HSIZE) + @@ -194,6 +214,9 @@ void nft_trace_notify(struct nft_traceinfo *info) nla_total_size(sizeof(u32)) + /* nfproto */ nla_total_size(sizeof(u32)); /* policy */ + if (nft_trace_have_verdict_chain(info)) + size += nla_total_size(strlen(info->verdict->chain->name)); /* jump target */ + skb = nlmsg_new(size, GFP_ATOMIC); if (!skb) return; @@ -217,14 +240,11 @@ void nft_trace_notify(struct nft_traceinfo *info) if (trace_fill_id(skb, pkt->skb)) goto nla_put_failure; - if (info->chain) { - if (nla_put_string(skb, NFTA_TRACE_CHAIN, - info->chain->name)) - goto nla_put_failure; - if (nla_put_string(skb, NFTA_TRACE_TABLE, - info->chain->table->name)) - goto nla_put_failure; - } + if (nla_put_string(skb, NFTA_TRACE_CHAIN, info->chain->name)) + goto nla_put_failure; + + if (nla_put_string(skb, NFTA_TRACE_TABLE, info->chain->table->name)) + goto nla_put_failure; if (nf_trace_fill_rule_info(skb, info)) goto nla_put_failure; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index 400e9ae97153..32b1c0b44e79 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -47,7 +47,8 @@ static const struct nla_policy cttimeout_nla_policy[CTA_TIMEOUT_MAX+1] = { }; static int -ctnl_timeout_parse_policy(void *timeouts, struct nf_conntrack_l4proto *l4proto, +ctnl_timeout_parse_policy(void *timeouts, + const struct nf_conntrack_l4proto *l4proto, struct net *net, const struct nlattr *attr) { int ret = 0; @@ -74,7 +75,7 @@ static int cttimeout_new_timeout(struct net *net, struct sock *ctnl, { __u16 l3num; __u8 l4num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout, *matching = NULL; char *name; int ret; @@ -158,7 +159,7 @@ ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; unsigned int flags = portid ? NLM_F_MULTI : 0; - struct nf_conntrack_l4proto *l4proto = timeout->l4proto; + const struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK_TIMEOUT, event); nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); @@ -363,10 +364,10 @@ static int cttimeout_default_set(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { + const struct nf_conntrack_l4proto *l4proto; + unsigned int *timeouts; __u16 l3num; __u8 l4num; - struct nf_conntrack_l4proto *l4proto; - unsigned int *timeouts; int ret; if (!cda[CTA_TIMEOUT_L3PROTO] || @@ -401,7 +402,7 @@ err: static int cttimeout_default_fill_info(struct net *net, struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, - struct nf_conntrack_l4proto *l4proto) + const struct nf_conntrack_l4proto *l4proto) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; @@ -453,11 +454,11 @@ static int cttimeout_default_get(struct net *net, struct sock *ctnl, const struct nlattr * const cda[], struct netlink_ext_ack *extack) { - __u16 l3num; - __u8 l4num; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l4proto *l4proto; struct sk_buff *skb2; int ret, err; + __u16 l3num; + __u8 l4num; if (!cda[CTA_TIMEOUT_L3PROTO] || !cda[CTA_TIMEOUT_L4PROTO]) return -EINVAL; @@ -505,7 +506,6 @@ ctnl_timeout_find_get(struct net *net, const char *name) { struct ctnl_timeout *timeout, *matching = NULL; - rcu_read_lock(); list_for_each_entry_rcu(timeout, &net->nfct_timeout_list, head) { if (strncmp(timeout->name, name, CTNL_TIMEOUT_NAME_MAX) != 0) continue; @@ -521,7 +521,6 @@ ctnl_timeout_find_get(struct net *net, const char *name) break; } err: - rcu_read_unlock(); return matching; } @@ -572,6 +571,7 @@ static void __net_exit cttimeout_net_exit(struct net *net) { struct ctnl_timeout *cur, *tmp; + nf_ct_unconfirmed_destroy(net); ctnl_untimeout(net, NULL); list_for_each_entry_safe(cur, tmp, &net->nfct_timeout_list, head) { diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index c684ba95dbb4..cad6498f10b0 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -606,7 +606,7 @@ nla_put_failure: return -1; } -static struct nf_loginfo default_loginfo = { +static const struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_ULOG, .u = { .ulog = { diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index 16fa04086880..a16356cacec3 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -41,6 +41,10 @@ #include "../bridge/br_private.h" #endif +#if IS_ENABLED(CONFIG_NF_CONNTRACK) +#include <net/netfilter/nf_conntrack.h> +#endif + #define NFQNL_QMAX_DEFAULT 1024 /* We're using struct nlattr which has 16bit nla_len. Note that nla_len @@ -397,7 +401,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, outdev = entry->state.out; - switch ((enum nfqnl_config_mode)ACCESS_ONCE(queue->copy_mode)) { + switch ((enum nfqnl_config_mode)READ_ONCE(queue->copy_mode)) { case NFQNL_COPY_META: case NFQNL_COPY_NONE: break; @@ -408,7 +412,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, skb_checksum_help(entskb)) return NULL; - data_len = ACCESS_ONCE(queue->copy_range); + data_len = READ_ONCE(queue->copy_range); if (data_len > entskb->len) data_len = entskb->len; @@ -612,6 +616,18 @@ nlmsg_failure: return NULL; } +static bool nf_ct_drop_unconfirmed(const struct nf_queue_entry *entry) +{ +#if IS_ENABLED(CONFIG_NF_CONNTRACK) + static const unsigned long flags = IPS_CONFIRMED | IPS_DYING; + const struct nf_conn *ct = (void *)skb_nfct(entry->skb); + + if (ct && ((ct->status & flags) == IPS_DYING)) + return true; +#endif + return false; +} + static int __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, struct nf_queue_entry *entry) @@ -628,6 +644,9 @@ __nfqnl_enqueue_packet(struct net *net, struct nfqnl_instance *queue, } spin_lock_bh(&queue->lock); + if (nf_ct_drop_unconfirmed(entry)) + goto err_out_free_nskb; + if (queue->queue_total >= queue->queue_maxlen) { if (queue->flags & NFQA_CFG_F_FAIL_OPEN) { failopen = 1; @@ -928,7 +947,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net) unsigned int instances = 0; int i; - rcu_read_lock(); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct nfqnl_instance *inst; struct hlist_head *head = &q->instance_table[i]; @@ -938,7 +956,6 @@ static unsigned int nfqnl_nf_hook_drop(struct net *net) instances++; } } - rcu_read_unlock(); return instances; } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f5a7cb68694e..b89f4f65b2a0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -305,7 +305,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (!(hook_mask & target->hooks)) + if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; ret = nft_compat_chain_validate_dependency(target->table, @@ -484,7 +484,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (!(hook_mask & match->hooks)) + if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; ret = nft_compat_chain_validate_dependency(match->table, diff --git a/net/netfilter/nft_counter.c b/net/netfilter/nft_counter.c index 67a710ebde09..eefe3b409925 100644 --- a/net/netfilter/nft_counter.c +++ b/net/netfilter/nft_counter.c @@ -175,15 +175,21 @@ static const struct nla_policy nft_counter_policy[NFTA_COUNTER_MAX + 1] = { [NFTA_COUNTER_BYTES] = { .type = NLA_U64 }, }; -static struct nft_object_type nft_counter_obj __read_mostly = { - .type = NFT_OBJECT_COUNTER, +static struct nft_object_type nft_counter_obj_type; +static const struct nft_object_ops nft_counter_obj_ops = { + .type = &nft_counter_obj_type, .size = sizeof(struct nft_counter_percpu_priv), - .maxattr = NFTA_COUNTER_MAX, - .policy = nft_counter_policy, .eval = nft_counter_obj_eval, .init = nft_counter_obj_init, .destroy = nft_counter_obj_destroy, .dump = nft_counter_obj_dump, +}; + +static struct nft_object_type nft_counter_obj_type __read_mostly = { + .type = NFT_OBJECT_COUNTER, + .ops = &nft_counter_obj_ops, + .maxattr = NFTA_COUNTER_MAX, + .policy = nft_counter_policy, .owner = THIS_MODULE, }; @@ -271,7 +277,7 @@ static int __init nft_counter_module_init(void) for_each_possible_cpu(cpu) seqcount_init(per_cpu_ptr(&nft_counter_seq, cpu)); - err = nft_register_obj(&nft_counter_obj); + err = nft_register_obj(&nft_counter_obj_type); if (err < 0) return err; @@ -281,14 +287,14 @@ static int __init nft_counter_module_init(void) return 0; err1: - nft_unregister_obj(&nft_counter_obj); + nft_unregister_obj(&nft_counter_obj_type); return err; } static void __exit nft_counter_module_exit(void) { nft_unregister_expr(&nft_counter_type); - nft_unregister_obj(&nft_counter_obj); + nft_unregister_obj(&nft_counter_obj_type); } module_init(nft_counter_module_init); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 1678e9e75e8e..2647b895f4b0 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -312,39 +312,6 @@ static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = { [NFTA_CT_SREG] = { .type = NLA_U32 }, }; -static int nft_ct_netns_get(struct net *net, uint8_t family) -{ - int err; - - if (family == NFPROTO_INET) { - err = nf_ct_netns_get(net, NFPROTO_IPV4); - if (err < 0) - goto err1; - err = nf_ct_netns_get(net, NFPROTO_IPV6); - if (err < 0) - goto err2; - } else { - err = nf_ct_netns_get(net, family); - if (err < 0) - goto err1; - } - return 0; - -err2: - nf_ct_netns_put(net, NFPROTO_IPV4); -err1: - return err; -} - -static void nft_ct_netns_put(struct net *net, uint8_t family) -{ - if (family == NFPROTO_INET) { - nf_ct_netns_put(net, NFPROTO_IPV4); - nf_ct_netns_put(net, NFPROTO_IPV6); - } else - nf_ct_netns_put(net, family); -} - #ifdef CONFIG_NF_CONNTRACK_ZONES static void nft_ct_tmpl_put_pcpu(void) { @@ -489,7 +456,7 @@ static int nft_ct_get_init(const struct nft_ctx *ctx, if (err < 0) return err; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) return err; @@ -583,7 +550,7 @@ static int nft_ct_set_init(const struct nft_ctx *ctx, if (err < 0) goto err1; - err = nft_ct_netns_get(ctx->net, ctx->afi->family); + err = nf_ct_netns_get(ctx->net, ctx->afi->family); if (err < 0) goto err1; @@ -606,7 +573,7 @@ static void nft_ct_set_destroy(const struct nft_ctx *ctx, struct nft_ct *priv = nft_expr_priv(expr); __nft_ct_set_destroy(ctx, priv); - nft_ct_netns_put(ctx->net, ctx->afi->family); + nf_ct_netns_put(ctx->net, ctx->afi->family); } static int nft_ct_get_dump(struct sk_buff *skb, const struct nft_expr *expr) @@ -904,15 +871,21 @@ static const struct nla_policy nft_ct_helper_policy[NFTA_CT_HELPER_MAX + 1] = { [NFTA_CT_HELPER_L4PROTO] = { .type = NLA_U8 }, }; -static struct nft_object_type nft_ct_helper_obj __read_mostly = { - .type = NFT_OBJECT_CT_HELPER, +static struct nft_object_type nft_ct_helper_obj_type; +static const struct nft_object_ops nft_ct_helper_obj_ops = { + .type = &nft_ct_helper_obj_type, .size = sizeof(struct nft_ct_helper_obj), - .maxattr = NFTA_CT_HELPER_MAX, - .policy = nft_ct_helper_policy, .eval = nft_ct_helper_obj_eval, .init = nft_ct_helper_obj_init, .destroy = nft_ct_helper_obj_destroy, .dump = nft_ct_helper_obj_dump, +}; + +static struct nft_object_type nft_ct_helper_obj_type __read_mostly = { + .type = NFT_OBJECT_CT_HELPER, + .ops = &nft_ct_helper_obj_ops, + .maxattr = NFTA_CT_HELPER_MAX, + .policy = nft_ct_helper_policy, .owner = THIS_MODULE, }; @@ -930,7 +903,7 @@ static int __init nft_ct_module_init(void) if (err < 0) goto err1; - err = nft_register_obj(&nft_ct_helper_obj); + err = nft_register_obj(&nft_ct_helper_obj_type); if (err < 0) goto err2; @@ -945,7 +918,7 @@ err1: static void __exit nft_ct_module_exit(void) { - nft_unregister_obj(&nft_ct_helper_obj); + nft_unregister_obj(&nft_ct_helper_obj_type); nft_unregister_expr(&nft_notrack_type); nft_unregister_expr(&nft_ct_type); } diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c index 1ec49fe5845f..a0a93d987a3b 100644 --- a/net/netfilter/nft_exthdr.c +++ b/net/netfilter/nft_exthdr.c @@ -8,6 +8,7 @@ * Development of this code funded by Astaro AG (http://www.astaro.com/) */ +#include <asm/unaligned.h> #include <linux/kernel.h> #include <linux/init.h> #include <linux/module.h> @@ -23,6 +24,7 @@ struct nft_exthdr { u8 len; u8 op; enum nft_registers dreg:8; + enum nft_registers sreg:8; u8 flags; }; @@ -61,6 +63,26 @@ err: regs->verdict.code = NFT_BREAK; } +static void * +nft_tcp_header_pointer(const struct nft_pktinfo *pkt, + unsigned int len, void *buffer, unsigned int *tcphdr_len) +{ + struct tcphdr *tcph; + + if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) + return NULL; + + tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buffer); + if (!tcph) + return NULL; + + *tcphdr_len = __tcp_hdrlen(tcph); + if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len) + return NULL; + + return skb_header_pointer(pkt->skb, pkt->xt.thoff, *tcphdr_len, buffer); +} + static void nft_exthdr_tcp_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -72,18 +94,7 @@ static void nft_exthdr_tcp_eval(const struct nft_expr *expr, struct tcphdr *tcph; u8 *opt; - if (!pkt->tprot_set || pkt->tprot != IPPROTO_TCP) - goto err; - - tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, sizeof(*tcph), buff); - if (!tcph) - goto err; - - tcphdr_len = __tcp_hdrlen(tcph); - if (tcphdr_len < sizeof(*tcph)) - goto err; - - tcph = skb_header_pointer(pkt->skb, pkt->xt.thoff, tcphdr_len, buff); + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); if (!tcph) goto err; @@ -115,6 +126,88 @@ err: regs->verdict.code = NFT_BREAK; } +static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE]; + struct nft_exthdr *priv = nft_expr_priv(expr); + unsigned int i, optl, tcphdr_len, offset; + struct tcphdr *tcph; + u8 *opt; + u32 src; + + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len); + if (!tcph) + return; + + opt = (u8 *)tcph; + for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) { + union { + u8 octet; + __be16 v16; + __be32 v32; + } old, new; + + optl = optlen(opt, i); + + if (priv->type != opt[i]) + continue; + + if (i + optl > tcphdr_len || priv->len + priv->offset > optl) + return; + + if (!skb_make_writable(pkt->skb, pkt->xt.thoff + i + priv->len)) + return; + + tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, + &tcphdr_len); + if (!tcph) + return; + + src = regs->data[priv->sreg]; + offset = i + priv->offset; + + switch (priv->len) { + case 2: + old.v16 = get_unaligned((u16 *)(opt + offset)); + new.v16 = src; + + switch (priv->type) { + case TCPOPT_MSS: + /* increase can cause connection to stall */ + if (ntohs(old.v16) <= ntohs(new.v16)) + return; + break; + } + + if (old.v16 == new.v16) + return; + + put_unaligned(new.v16, (u16*)(opt + offset)); + inet_proto_csum_replace2(&tcph->check, pkt->skb, + old.v16, new.v16, false); + break; + case 4: + new.v32 = src; + old.v32 = get_unaligned((u32 *)(opt + offset)); + + if (old.v32 == new.v32) + return; + + put_unaligned(new.v32, (u32*)(opt + offset)); + inet_proto_csum_replace4(&tcph->check, pkt->skb, + old.v32, new.v32, false); + break; + default: + WARN_ON_ONCE(1); + break; + } + + return; + } +} + static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = { [NFTA_EXTHDR_DREG] = { .type = NLA_U32 }, [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 }, @@ -171,12 +264,57 @@ static int nft_exthdr_init(const struct nft_ctx *ctx, NFT_DATA_VALUE, priv->len); } -static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { - const struct nft_exthdr *priv = nft_expr_priv(expr); + struct nft_exthdr *priv = nft_expr_priv(expr); + u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6; + int err; - if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) - goto nla_put_failure; + if (!tb[NFTA_EXTHDR_SREG] || + !tb[NFTA_EXTHDR_TYPE] || + !tb[NFTA_EXTHDR_OFFSET] || + !tb[NFTA_EXTHDR_LEN]) + return -EINVAL; + + if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS]) + return -EINVAL; + + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset); + if (err < 0) + return err; + + err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len); + if (err < 0) + return err; + + if (offset < 2) + return -EOPNOTSUPP; + + switch (len) { + case 2: break; + case 4: break; + default: + return -EOPNOTSUPP; + } + + err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op); + if (err < 0) + return err; + + priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]); + priv->offset = offset; + priv->len = len; + priv->sreg = nft_parse_register(tb[NFTA_EXTHDR_SREG]); + priv->flags = flags; + priv->op = op; + + return nft_validate_register_load(priv->sreg, priv->len); +} + +static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv) +{ if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type)) goto nla_put_failure; if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset))) @@ -193,6 +331,26 @@ nla_put_failure: return -1; } +static int nft_exthdr_dump(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg)) + return -1; + + return nft_exthdr_dump_common(skb, priv); +} + +static int nft_exthdr_dump_set(struct sk_buff *skb, const struct nft_expr *expr) +{ + const struct nft_exthdr *priv = nft_expr_priv(expr); + + if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg)) + return -1; + + return nft_exthdr_dump_common(skb, priv); +} + static struct nft_expr_type nft_exthdr_type; static const struct nft_expr_ops nft_exthdr_ipv6_ops = { .type = &nft_exthdr_type, @@ -210,6 +368,14 @@ static const struct nft_expr_ops nft_exthdr_tcp_ops = { .dump = nft_exthdr_dump, }; +static const struct nft_expr_ops nft_exthdr_tcp_set_ops = { + .type = &nft_exthdr_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)), + .eval = nft_exthdr_tcp_set_eval, + .init = nft_exthdr_tcp_set_init, + .dump = nft_exthdr_dump_set, +}; + static const struct nft_expr_ops * nft_exthdr_select_ops(const struct nft_ctx *ctx, const struct nlattr * const tb[]) @@ -219,12 +385,21 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx, if (!tb[NFTA_EXTHDR_OP]) return &nft_exthdr_ipv6_ops; - op = ntohl(nla_get_u32(tb[NFTA_EXTHDR_OP])); + if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG]) + return ERR_PTR(-EOPNOTSUPP); + + op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP])); switch (op) { case NFT_EXTHDR_OP_TCPOPT: - return &nft_exthdr_tcp_ops; + if (tb[NFTA_EXTHDR_SREG]) + return &nft_exthdr_tcp_set_ops; + if (tb[NFTA_EXTHDR_DREG]) + return &nft_exthdr_tcp_ops; + break; case NFT_EXTHDR_OP_IPV6: - return &nft_exthdr_ipv6_ops; + if (tb[NFTA_EXTHDR_DREG]) + return &nft_exthdr_ipv6_ops; + break; } return ERR_PTR(-EOPNOTSUPP); diff --git a/net/netfilter/nft_fib_netdev.c b/net/netfilter/nft_fib_netdev.c new file mode 100644 index 000000000000..3997ee36cfbd --- /dev/null +++ b/net/netfilter/nft_fib_netdev.c @@ -0,0 +1,87 @@ +/* + * Copyright (c) 2017 Pablo M. Bermudo Garay <pablombg@gmail.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This code is based on net/netfilter/nft_fib_inet.c, written by + * Florian Westphal <fw@strlen.de>. + */ + +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/netlink.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nf_tables.h> +#include <net/netfilter/nf_tables_core.h> +#include <net/netfilter/nf_tables.h> + +#include <net/netfilter/nft_fib.h> + +static void nft_fib_netdev_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + const struct nft_fib *priv = nft_expr_priv(expr); + + switch (ntohs(pkt->skb->protocol)) { + case ETH_P_IP: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib4_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib4_eval_type(expr, regs, pkt); + } + break; + case ETH_P_IPV6: + switch (priv->result) { + case NFT_FIB_RESULT_OIF: + case NFT_FIB_RESULT_OIFNAME: + return nft_fib6_eval(expr, regs, pkt); + case NFT_FIB_RESULT_ADDRTYPE: + return nft_fib6_eval_type(expr, regs, pkt); + } + break; + } + + regs->verdict.code = NFT_BREAK; +} + +static struct nft_expr_type nft_fib_netdev_type; +static const struct nft_expr_ops nft_fib_netdev_ops = { + .type = &nft_fib_netdev_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_fib)), + .eval = nft_fib_netdev_eval, + .init = nft_fib_init, + .dump = nft_fib_dump, + .validate = nft_fib_validate, +}; + +static struct nft_expr_type nft_fib_netdev_type __read_mostly = { + .family = NFPROTO_NETDEV, + .name = "fib", + .ops = &nft_fib_netdev_ops, + .policy = nft_fib_policy, + .maxattr = NFTA_FIB_MAX, + .owner = THIS_MODULE, +}; + +static int __init nft_fib_netdev_module_init(void) +{ + return nft_register_expr(&nft_fib_netdev_type); +} + +static void __exit nft_fib_netdev_module_exit(void) +{ + nft_unregister_expr(&nft_fib_netdev_type); +} + +module_init(nft_fib_netdev_module_init); +module_exit(nft_fib_netdev_module_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Pablo M. Bermudo Garay <pablombg@gmail.com>"); +MODULE_ALIAS_NFT_AF_EXPR(5, "fib"); diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 18dd57a52651..a9fc298ef4c3 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -65,19 +65,23 @@ static int nft_limit_init(struct nft_limit *limit, limit->nsecs = unit * NSEC_PER_SEC; if (limit->rate == 0 || limit->nsecs < unit) return -EOVERFLOW; - limit->tokens = limit->tokens_max = limit->nsecs; - - if (tb[NFTA_LIMIT_BURST]) { - u64 rate; + if (tb[NFTA_LIMIT_BURST]) limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + else + limit->burst = 0; - rate = limit->rate + limit->burst; - if (rate < limit->rate) - return -EOVERFLOW; + if (limit->rate + limit->burst < limit->rate) + return -EOVERFLOW; + + /* The token bucket size limits the number of tokens can be + * accumulated. tokens_max specifies the bucket size. + * tokens_max = unit * (rate + burst) / rate. + */ + limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + limit->rate); + limit->tokens_max = limit->tokens; - limit->rate = rate; - } if (tb[NFTA_LIMIT_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); @@ -95,9 +99,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, { u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); - u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate), + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate), NFTA_LIMIT_PAD) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), NFTA_LIMIT_PAD) || @@ -165,9 +168,9 @@ static const struct nft_expr_ops nft_limit_pkts_ops = { .dump = nft_limit_pkts_dump, }; -static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr, - struct nft_regs *regs, - const struct nft_pktinfo *pkt) +static void nft_limit_bytes_eval(const struct nft_expr *expr, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) { struct nft_limit *priv = nft_expr_priv(expr); u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); @@ -176,29 +179,29 @@ static void nft_limit_pkt_bytes_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } -static int nft_limit_pkt_bytes_init(const struct nft_ctx *ctx, - const struct nft_expr *expr, - const struct nlattr * const tb[]) +static int nft_limit_bytes_init(const struct nft_ctx *ctx, + const struct nft_expr *expr, + const struct nlattr * const tb[]) { struct nft_limit *priv = nft_expr_priv(expr); return nft_limit_init(priv, tb); } -static int nft_limit_pkt_bytes_dump(struct sk_buff *skb, - const struct nft_expr *expr) +static int nft_limit_bytes_dump(struct sk_buff *skb, + const struct nft_expr *expr) { const struct nft_limit *priv = nft_expr_priv(expr); return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); } -static const struct nft_expr_ops nft_limit_pkt_bytes_ops = { +static const struct nft_expr_ops nft_limit_bytes_ops = { .type = &nft_limit_type, .size = NFT_EXPR_SIZE(sizeof(struct nft_limit)), - .eval = nft_limit_pkt_bytes_eval, - .init = nft_limit_pkt_bytes_init, - .dump = nft_limit_pkt_bytes_dump, + .eval = nft_limit_bytes_eval, + .init = nft_limit_bytes_init, + .dump = nft_limit_bytes_dump, }; static const struct nft_expr_ops * @@ -212,7 +215,7 @@ nft_limit_select_ops(const struct nft_ctx *ctx, case NFT_LIMIT_PKTS: return &nft_limit_pkts_ops; case NFT_LIMIT_PKT_BYTES: - return &nft_limit_pkt_bytes_ops; + return &nft_limit_bytes_ops; } return ERR_PTR(-EOPNOTSUPP); } @@ -226,14 +229,133 @@ static struct nft_expr_type nft_limit_type __read_mostly = { .owner = THIS_MODULE, }; +static void nft_limit_obj_pkts_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit_pkts *priv = nft_obj_data(obj); + + if (nft_limit_eval(&priv->limit, priv->cost)) + regs->verdict.code = NFT_BREAK; +} + +static int nft_limit_obj_pkts_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_limit_pkts *priv = nft_obj_data(obj); + int err; + + err = nft_limit_init(&priv->limit, tb); + if (err < 0) + return err; + + priv->cost = div64_u64(priv->limit.nsecs, priv->limit.rate); + return 0; +} + +static int nft_limit_obj_pkts_dump(struct sk_buff *skb, + struct nft_object *obj, + bool reset) +{ + const struct nft_limit_pkts *priv = nft_obj_data(obj); + + return nft_limit_dump(skb, &priv->limit, NFT_LIMIT_PKTS); +} + +static struct nft_object_type nft_limit_obj_type; +static const struct nft_object_ops nft_limit_obj_pkts_ops = { + .type = &nft_limit_obj_type, + .size = NFT_EXPR_SIZE(sizeof(struct nft_limit_pkts)), + .init = nft_limit_obj_pkts_init, + .eval = nft_limit_obj_pkts_eval, + .dump = nft_limit_obj_pkts_dump, +}; + +static void nft_limit_obj_bytes_eval(struct nft_object *obj, + struct nft_regs *regs, + const struct nft_pktinfo *pkt) +{ + struct nft_limit *priv = nft_obj_data(obj); + u64 cost = div64_u64(priv->nsecs * pkt->skb->len, priv->rate); + + if (nft_limit_eval(priv, cost)) + regs->verdict.code = NFT_BREAK; +} + +static int nft_limit_obj_bytes_init(const struct nft_ctx *ctx, + const struct nlattr * const tb[], + struct nft_object *obj) +{ + struct nft_limit *priv = nft_obj_data(obj); + + return nft_limit_init(priv, tb); +} + +static int nft_limit_obj_bytes_dump(struct sk_buff *skb, + struct nft_object *obj, + bool reset) +{ + const struct nft_limit *priv = nft_obj_data(obj); + + return nft_limit_dump(skb, priv, NFT_LIMIT_PKT_BYTES); +} + +static struct nft_object_type nft_limit_obj_type; +static const struct nft_object_ops nft_limit_obj_bytes_ops = { + .type = &nft_limit_obj_type, + .size = sizeof(struct nft_limit), + .init = nft_limit_obj_bytes_init, + .eval = nft_limit_obj_bytes_eval, + .dump = nft_limit_obj_bytes_dump, +}; + +static const struct nft_object_ops * +nft_limit_obj_select_ops(const struct nft_ctx *ctx, + const struct nlattr * const tb[]) +{ + if (!tb[NFTA_LIMIT_TYPE]) + return &nft_limit_obj_pkts_ops; + + switch (ntohl(nla_get_be32(tb[NFTA_LIMIT_TYPE]))) { + case NFT_LIMIT_PKTS: + return &nft_limit_obj_pkts_ops; + case NFT_LIMIT_PKT_BYTES: + return &nft_limit_obj_bytes_ops; + } + return ERR_PTR(-EOPNOTSUPP); +} + +static struct nft_object_type nft_limit_obj_type __read_mostly = { + .select_ops = nft_limit_obj_select_ops, + .type = NFT_OBJECT_LIMIT, + .maxattr = NFTA_LIMIT_MAX, + .policy = nft_limit_policy, + .owner = THIS_MODULE, +}; + static int __init nft_limit_module_init(void) { - return nft_register_expr(&nft_limit_type); + int err; + + err = nft_register_obj(&nft_limit_obj_type); + if (err < 0) + return err; + + err = nft_register_expr(&nft_limit_type); + if (err < 0) + goto err1; + + return 0; +err1: + nft_unregister_obj(&nft_limit_obj_type); + return err; } static void __exit nft_limit_module_exit(void) { nft_unregister_expr(&nft_limit_type); + nft_unregister_obj(&nft_limit_obj_type); } module_init(nft_limit_module_init); @@ -242,3 +364,4 @@ module_exit(nft_limit_module_exit); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>"); MODULE_ALIAS_NFT_EXPR("limit"); +MODULE_ALIAS_NFT_OBJ(NFT_OBJECT_LIMIT); diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 1dd428fbaaa3..7bcdc48f3d73 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -22,7 +22,7 @@ static void nft_objref_eval(const struct nft_expr *expr, { struct nft_object *obj = nft_objref_priv(expr); - obj->type->eval(obj, regs, pkt); + obj->ops->eval(obj, regs, pkt); } static int nft_objref_init(const struct nft_ctx *ctx, @@ -54,7 +54,8 @@ static int nft_objref_dump(struct sk_buff *skb, const struct nft_expr *expr) const struct nft_object *obj = nft_objref_priv(expr); if (nla_put_string(skb, NFTA_OBJREF_IMM_NAME, obj->name) || - nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, htonl(obj->type->type))) + nla_put_be32(skb, NFTA_OBJREF_IMM_TYPE, + htonl(obj->ops->type->type))) goto nla_put_failure; return 0; @@ -104,7 +105,7 @@ static void nft_objref_map_eval(const struct nft_expr *expr, return; } obj = *nft_set_ext_obj(ext); - obj->type->eval(obj, regs, pkt); + obj->ops->eval(obj, regs, pkt); } static int nft_objref_map_init(const struct nft_ctx *ctx, diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index 7d699bbd45b0..e110b0ebbf58 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -184,7 +184,7 @@ static bool nft_payload_udp_checksum(struct sk_buff *skb, unsigned int thoff) if (!uh) return false; - return uh->check; + return (__force bool)uh->check; } static int nft_payload_l4csum_offset(const struct nft_pktinfo *pkt, diff --git a/net/netfilter/nft_quota.c b/net/netfilter/nft_quota.c index 25e33159be57..0ed124a93fcf 100644 --- a/net/netfilter/nft_quota.c +++ b/net/netfilter/nft_quota.c @@ -151,14 +151,20 @@ static int nft_quota_obj_dump(struct sk_buff *skb, struct nft_object *obj, return nft_quota_do_dump(skb, priv, reset); } -static struct nft_object_type nft_quota_obj __read_mostly = { - .type = NFT_OBJECT_QUOTA, +static struct nft_object_type nft_quota_obj_type; +static const struct nft_object_ops nft_quota_obj_ops = { + .type = &nft_quota_obj_type, .size = sizeof(struct nft_quota), - .maxattr = NFTA_QUOTA_MAX, - .policy = nft_quota_policy, .init = nft_quota_obj_init, .eval = nft_quota_obj_eval, .dump = nft_quota_obj_dump, +}; + +static struct nft_object_type nft_quota_obj_type __read_mostly = { + .type = NFT_OBJECT_QUOTA, + .ops = &nft_quota_obj_ops, + .maxattr = NFTA_QUOTA_MAX, + .policy = nft_quota_policy, .owner = THIS_MODULE, }; @@ -209,7 +215,7 @@ static int __init nft_quota_module_init(void) { int err; - err = nft_register_obj(&nft_quota_obj); + err = nft_register_obj(&nft_quota_obj_type); if (err < 0) return err; @@ -219,14 +225,14 @@ static int __init nft_quota_module_init(void) return 0; err1: - nft_unregister_obj(&nft_quota_obj); + nft_unregister_obj(&nft_quota_obj_type); return err; } static void __exit nft_quota_module_exit(void) { nft_unregister_expr(&nft_quota_type); - nft_unregister_obj(&nft_quota_obj); + nft_unregister_obj(&nft_quota_obj_type); } module_init(nft_quota_module_init); diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c index c7383d8f88d0..a6b7d05aeacf 100644 --- a/net/netfilter/nft_rt.c +++ b/net/netfilter/nft_rt.c @@ -23,6 +23,43 @@ struct nft_rt { enum nft_registers dreg:8; }; +static u16 get_tcpmss(const struct nft_pktinfo *pkt, const struct dst_entry *skbdst) +{ + u32 minlen = sizeof(struct ipv6hdr), mtu = dst_mtu(skbdst); + const struct sk_buff *skb = pkt->skb; + const struct nf_afinfo *ai; + struct flowi fl; + + memset(&fl, 0, sizeof(fl)); + + switch (nft_pf(pkt)) { + case NFPROTO_IPV4: + fl.u.ip4.daddr = ip_hdr(skb)->saddr; + minlen = sizeof(struct iphdr) + sizeof(struct tcphdr); + break; + case NFPROTO_IPV6: + fl.u.ip6.daddr = ipv6_hdr(skb)->saddr; + minlen = sizeof(struct ipv6hdr) + sizeof(struct tcphdr); + break; + } + + ai = nf_get_afinfo(nft_pf(pkt)); + if (ai) { + struct dst_entry *dst = NULL; + + ai->route(nft_net(pkt), &dst, &fl, false); + if (dst) { + mtu = min(mtu, dst_mtu(dst)); + dst_release(dst); + } + } + + if (mtu <= minlen || mtu > 0xffff) + return TCP_MSS_DEFAULT; + + return mtu - minlen; +} + static void nft_rt_get_eval(const struct nft_expr *expr, struct nft_regs *regs, const struct nft_pktinfo *pkt) @@ -46,8 +83,8 @@ static void nft_rt_get_eval(const struct nft_expr *expr, if (nft_pf(pkt) != NFPROTO_IPV4) goto err; - *dest = rt_nexthop((const struct rtable *)dst, - ip_hdr(skb)->daddr); + *dest = (__force u32)rt_nexthop((const struct rtable *)dst, + ip_hdr(skb)->daddr); break; case NFT_RT_NEXTHOP6: if (nft_pf(pkt) != NFPROTO_IPV6) @@ -57,6 +94,9 @@ static void nft_rt_get_eval(const struct nft_expr *expr, &ipv6_hdr(skb)->daddr), sizeof(struct in6_addr)); break; + case NFT_RT_TCPMSS: + nft_reg_store16(dest, get_tcpmss(pkt, dst)); + break; default: WARN_ON(1); goto err; @@ -67,7 +107,7 @@ err: regs->verdict.code = NFT_BREAK; } -const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { +static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = { [NFTA_RT_DREG] = { .type = NLA_U32 }, [NFTA_RT_KEY] = { .type = NLA_U32 }, }; @@ -94,6 +134,9 @@ static int nft_rt_get_init(const struct nft_ctx *ctx, case NFT_RT_NEXTHOP6: len = sizeof(struct in6_addr); break; + case NFT_RT_TCPMSS: + len = sizeof(u16); + break; default: return -EOPNOTSUPP; } @@ -118,6 +161,29 @@ nla_put_failure: return -1; } +static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *expr, + const struct nft_data **data) +{ + const struct nft_rt *priv = nft_expr_priv(expr); + unsigned int hooks; + + switch (priv->key) { + case NFT_RT_NEXTHOP4: + case NFT_RT_NEXTHOP6: + case NFT_RT_CLASSID: + return 0; + case NFT_RT_TCPMSS: + hooks = (1 << NF_INET_FORWARD) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_POST_ROUTING); + break; + default: + return -EINVAL; + } + + return nft_chain_validate_hooks(ctx->chain, hooks); +} + static struct nft_expr_type nft_rt_type; static const struct nft_expr_ops nft_rt_get_ops = { .type = &nft_rt_type, @@ -125,6 +191,7 @@ static const struct nft_expr_ops nft_rt_get_ops = { .eval = nft_rt_get_eval, .init = nft_rt_get_init, .dump = nft_rt_get_dump, + .validate = nft_rt_validate, }; static struct nft_expr_type nft_rt_type __read_mostly = { diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c index 734989c40579..45fb2752fb63 100644 --- a/net/netfilter/nft_set_bitmap.c +++ b/net/netfilter/nft_set_bitmap.c @@ -106,6 +106,23 @@ nft_bitmap_elem_find(const struct nft_set *set, struct nft_bitmap_elem *this, return NULL; } +static void *nft_bitmap_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + const struct nft_bitmap *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + struct nft_bitmap_elem *be; + + list_for_each_entry_rcu(be, &priv->list, head) { + if (memcmp(nft_set_ext_key(&be->ext), elem->key.val.data, set->klen) || + !nft_set_elem_active(&be->ext, genmask)) + continue; + + return be; + } + return ERR_PTR(-ENOENT); +} + static int nft_bitmap_insert(const struct net *net, const struct nft_set *set, const struct nft_set_elem *elem, struct nft_set_ext **ext) @@ -294,6 +311,7 @@ static struct nft_set_ops nft_bitmap_ops __read_mostly = { .activate = nft_bitmap_activate, .lookup = nft_bitmap_lookup, .walk = nft_bitmap_walk, + .get = nft_bitmap_get, }; static struct nft_set_type nft_bitmap_type __read_mostly = { diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c index 0fa01d772c5e..f8166c1d5430 100644 --- a/net/netfilter/nft_set_hash.c +++ b/net/netfilter/nft_set_hash.c @@ -95,6 +95,24 @@ static bool nft_rhash_lookup(const struct net *net, const struct nft_set *set, return !!he; } +static void *nft_rhash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_rhash *priv = nft_set_priv(set); + struct nft_rhash_elem *he; + struct nft_rhash_cmp_arg arg = { + .genmask = nft_genmask_cur(net), + .set = set, + .key = elem->key.val.data, + }; + + he = rhashtable_lookup_fast(&priv->ht, &arg, nft_rhash_params); + if (he != NULL) + return he; + + return ERR_PTR(-ENOENT); +} + static bool nft_rhash_update(struct nft_set *set, const u32 *key, void *(*new)(struct nft_set *, const struct nft_expr *, @@ -409,6 +427,24 @@ static bool nft_hash_lookup(const struct net *net, const struct nft_set *set, return false; } +static void *nft_hash_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_hash *priv = nft_set_priv(set); + u8 genmask = nft_genmask_cur(net); + struct nft_hash_elem *he; + u32 hash; + + hash = jhash(elem->key.val.data, set->klen, priv->seed); + hash = reciprocal_scale(hash, priv->buckets); + hlist_for_each_entry_rcu(he, &priv->table[hash], node) { + if (!memcmp(nft_set_ext_key(&he->ext), elem->key.val.data, set->klen) && + nft_set_elem_active(&he->ext, genmask)) + return he; + } + return ERR_PTR(-ENOENT); +} + /* nft_hash_select_ops() makes sure key size can be either 2 or 4 bytes . */ static inline u32 nft_hash_key(const u32 *key, u32 klen) { @@ -494,7 +530,7 @@ static void *nft_hash_deactivate(const struct net *net, hash = reciprocal_scale(hash, priv->buckets); hlist_for_each_entry(he, &priv->table[hash], node) { if (!memcmp(nft_set_ext_key(&this->ext), &elem->key.val, - set->klen) || + set->klen) && nft_set_elem_active(&he->ext, genmask)) { nft_set_elem_change_active(net, set, &he->ext); return he; @@ -600,6 +636,7 @@ static struct nft_set_ops nft_rhash_ops __read_mostly = { .lookup = nft_rhash_lookup, .update = nft_rhash_update, .walk = nft_rhash_walk, + .get = nft_rhash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT, }; @@ -617,6 +654,7 @@ static struct nft_set_ops nft_hash_ops __read_mostly = { .remove = nft_hash_remove, .lookup = nft_hash_lookup, .walk = nft_hash_walk, + .get = nft_hash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT, }; @@ -634,6 +672,7 @@ static struct nft_set_ops nft_hash_fast_ops __read_mostly = { .remove = nft_hash_remove, .lookup = nft_hash_lookup_fast, .walk = nft_hash_walk, + .get = nft_hash_get, .features = NFT_SET_MAP | NFT_SET_OBJECT, }; @@ -643,7 +682,6 @@ nft_hash_select_ops(const struct nft_ctx *ctx, const struct nft_set_desc *desc, { if (desc->size) { switch (desc->klen) { - case 2: case 4: return &nft_hash_fast_ops; default: diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index bce5382f1d49..e6f08bc5f359 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -19,8 +19,9 @@ #include <net/netfilter/nf_tables.h> struct nft_rbtree { - rwlock_t lock; struct rb_root root; + rwlock_t lock; + seqcount_t count; }; struct nft_rbtree_elem { @@ -40,8 +41,9 @@ static bool nft_rbtree_equal(const struct nft_set *set, const void *this, return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; } -static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, - const u32 *key, const struct nft_set_ext **ext) +static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext, + unsigned int seq) { struct nft_rbtree *priv = nft_set_priv(set); const struct nft_rbtree_elem *rbe, *interval = NULL; @@ -50,15 +52,17 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, const void *this; int d; - read_lock_bh(&priv->lock); - parent = priv->root.rb_node; + parent = rcu_dereference_raw(priv->root.rb_node); while (parent != NULL) { + if (read_seqcount_retry(&priv->count, seq)) + return false; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); this = nft_set_ext_key(&rbe->ext); d = memcmp(this, key, set->klen); if (d < 0) { - parent = parent->rb_left; + parent = rcu_dereference_raw(parent->rb_left); if (interval && nft_rbtree_equal(set, this, interval) && nft_rbtree_interval_end(this) && @@ -66,15 +70,14 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, continue; interval = rbe; } else if (d > 0) - parent = parent->rb_right; + parent = rcu_dereference_raw(parent->rb_right); else { if (!nft_set_elem_active(&rbe->ext, genmask)) { - parent = parent->rb_left; + parent = rcu_dereference_raw(parent->rb_left); continue; } if (nft_rbtree_interval_end(rbe)) goto out; - read_unlock_bh(&priv->lock); *ext = &rbe->ext; return true; @@ -84,15 +87,104 @@ static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && !nft_rbtree_interval_end(interval)) { - read_unlock_bh(&priv->lock); *ext = &interval->ext; return true; } out: + return false; +} + +static bool nft_rbtree_lookup(const struct net *net, const struct nft_set *set, + const u32 *key, const struct nft_set_ext **ext) +{ + struct nft_rbtree *priv = nft_set_priv(set); + unsigned int seq = read_seqcount_begin(&priv->count); + bool ret; + + ret = __nft_rbtree_lookup(net, set, key, ext, seq); + if (ret || !read_seqcount_retry(&priv->count, seq)) + return ret; + + read_lock_bh(&priv->lock); + seq = read_seqcount_begin(&priv->count); + ret = __nft_rbtree_lookup(net, set, key, ext, seq); read_unlock_bh(&priv->lock); + + return ret; +} + +static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, + const u32 *key, struct nft_rbtree_elem **elem, + unsigned int seq, unsigned int flags, u8 genmask) +{ + struct nft_rbtree_elem *rbe, *interval = NULL; + struct nft_rbtree *priv = nft_set_priv(set); + const struct rb_node *parent; + const void *this; + int d; + + parent = rcu_dereference_raw(priv->root.rb_node); + while (parent != NULL) { + if (read_seqcount_retry(&priv->count, seq)) + return false; + + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + + this = nft_set_ext_key(&rbe->ext); + d = memcmp(this, key, set->klen); + if (d < 0) { + parent = rcu_dereference_raw(parent->rb_left); + interval = rbe; + } else if (d > 0) { + parent = rcu_dereference_raw(parent->rb_right); + } else { + if (!nft_set_elem_active(&rbe->ext, genmask)) + parent = rcu_dereference_raw(parent->rb_left); + + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || + (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == + (flags & NFT_SET_ELEM_INTERVAL_END)) { + *elem = rbe; + return true; + } + return false; + } + } + + if (set->flags & NFT_SET_INTERVAL && interval != NULL && + nft_set_elem_active(&interval->ext, genmask) && + !nft_rbtree_interval_end(interval)) { + *elem = interval; + return true; + } + return false; } +static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, + const struct nft_set_elem *elem, unsigned int flags) +{ + struct nft_rbtree *priv = nft_set_priv(set); + unsigned int seq = read_seqcount_begin(&priv->count); + struct nft_rbtree_elem *rbe = ERR_PTR(-ENOENT); + const u32 *key = (const u32 *)&elem->key.val; + u8 genmask = nft_genmask_cur(net); + bool ret; + + ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); + if (ret || !read_seqcount_retry(&priv->count, seq)) + return rbe; + + read_lock_bh(&priv->lock); + seq = read_seqcount_begin(&priv->count); + ret = __nft_rbtree_get(net, set, key, &rbe, seq, flags, genmask); + if (!ret) + rbe = ERR_PTR(-ENOENT); + read_unlock_bh(&priv->lock); + + return rbe; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) @@ -130,7 +222,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, } } } - rb_link_node(&new->node, parent, p); + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; } @@ -144,7 +236,9 @@ static int nft_rbtree_insert(const struct net *net, const struct nft_set *set, int err; write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); err = __nft_rbtree_insert(net, set, rbe, ext); + write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); return err; @@ -158,7 +252,9 @@ static void nft_rbtree_remove(const struct net *net, struct nft_rbtree_elem *rbe = elem->priv; write_lock_bh(&priv->lock); + write_seqcount_begin(&priv->count); rb_erase(&rbe->node, &priv->root); + write_seqcount_end(&priv->count); write_unlock_bh(&priv->lock); } @@ -264,6 +360,7 @@ static int nft_rbtree_init(const struct nft_set *set, struct nft_rbtree *priv = nft_set_priv(set); rwlock_init(&priv->lock); + seqcount_init(&priv->count); priv->root = RB_ROOT; return 0; } @@ -311,6 +408,7 @@ static struct nft_set_ops nft_rbtree_ops __read_mostly = { .activate = nft_rbtree_activate, .lookup = nft_rbtree_lookup, .walk = nft_rbtree_walk, + .get = nft_rbtree_get, .features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT, }; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index e1648238a9c9..a77dd514297c 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -892,7 +892,7 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(&compat_tmp, user, sizeof(compat_tmp)) != 0) return ERR_PTR(-EFAULT); - strlcpy(info->name, compat_tmp.name, sizeof(info->name)); + memcpy(info->name, compat_tmp.name, sizeof(info->name) - 1); info->num_counters = compat_tmp.num_counters; user += sizeof(compat_tmp); } else @@ -905,9 +905,9 @@ void *xt_copy_counters_from_user(const void __user *user, unsigned int len, if (copy_from_user(info, user, sizeof(*info)) != 0) return ERR_PTR(-EFAULT); - info->name[sizeof(info->name) - 1] = '\0'; user += sizeof(*info); } + info->name[sizeof(info->name) - 1] = '\0'; size = sizeof(struct xt_counters); size *= info->num_counters; @@ -1153,6 +1153,7 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; + unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1182,26 +1183,34 @@ xt_replace_table(struct xt_table *table, smp_wmb(); table->private = newinfo; + /* make sure all cpus see new ->private value */ + smp_wmb(); + /* * Even though table entries have now been swapped, other CPU's - * may still be using the old entries. This is okay, because - * resynchronization happens because of the locking done - * during the get_counters() routine. + * may still be using the old entries... */ local_bh_enable(); + /* ... so wait for even xt_recseq on all cpus */ + for_each_possible_cpu(cpu) { + seqcount_t *s = &per_cpu(xt_recseq, cpu); + u32 seq = raw_read_seqcount(s); + + if (seq & 1) { + do { + cond_resched(); + cpu_relax(); + } while (seq == raw_read_seqcount(s)); + } + } + #ifdef CONFIG_AUDIT if (audit_enabled) { - struct audit_buffer *ab; - - ab = audit_log_start(current->audit_context, GFP_KERNEL, - AUDIT_NETFILTER_CFG); - if (ab) { - audit_log_format(ab, "table=%s family=%u entries=%u", - table->name, table->af, - private->number); - audit_log_end(ab); - } + audit_log(current->audit_context, GFP_KERNEL, + AUDIT_NETFILTER_CFG, + "table=%s family=%u entries=%u", + table->name, table->af, private->number); } #endif diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 623ef37de886..5a152e2acfd5 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -121,9 +121,9 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, { #ifdef CONFIG_NF_CONNTRACK_TIMEOUT typeof(nf_ct_timeout_find_get_hook) timeout_find_get; + const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; - struct nf_conntrack_l4proto *l4proto; int ret = 0; u8 proto; diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c index e45a01255e70..58aa9dd3c5b7 100644 --- a/net/netfilter/xt_NETMAP.c +++ b/net/netfilter/xt_NETMAP.c @@ -77,10 +77,10 @@ netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; struct nf_nat_range newrange; - NF_CT_ASSERT(xt_hooknum(par) == NF_INET_PRE_ROUTING || - xt_hooknum(par) == NF_INET_POST_ROUTING || - xt_hooknum(par) == NF_INET_LOCAL_OUT || - xt_hooknum(par) == NF_INET_LOCAL_IN); + WARN_ON(xt_hooknum(par) != NF_INET_PRE_ROUTING && + xt_hooknum(par) != NF_INET_POST_ROUTING && + xt_hooknum(par) != NF_INET_LOCAL_OUT && + xt_hooknum(par) != NF_INET_LOCAL_IN); ct = nf_ct_get(skb, &ctinfo); netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index c64aca611ac5..9dae4d665965 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -62,11 +62,9 @@ static u_int32_t tcpmss_reverse_mtu(struct net *net, memset(fl6, 0, sizeof(*fl6)); fl6->daddr = ipv6_hdr(skb)->saddr; } - rcu_read_lock(); ai = nf_get_afinfo(family); if (ai != NULL) ai->route(net, (struct dst_entry **)&rt, &fl, false); - rcu_read_unlock(); if (rt != NULL) { mtu = dst_mtu(&rt->dst); diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index d767e35fff6b..17d7705e3bd4 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -70,13 +70,11 @@ tproxy_laddr4(struct sk_buff *skb, __be32 user_laddr, __be32 daddr) return user_laddr; laddr = 0; - rcu_read_lock(); indev = __in_dev_get_rcu(skb->dev); for_primary_ifa(indev) { laddr = ifa->ifa_local; break; } endfor_ifa(indev); - rcu_read_unlock(); return laddr ? laddr : daddr; } @@ -125,7 +123,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, void *hp, __tcp_hdrlen(tcph), saddr, sport, daddr, dport, - in->ifindex); + in->ifindex, 0); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -195,7 +193,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, thoff + __tcp_hdrlen(tcph), saddr, sport, daddr, ntohs(dport), - in->ifindex); + in->ifindex, 0); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -208,7 +206,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, void *hp, case NFT_LOOKUP_ESTABLISHED: sk = __inet6_lookup_established(net, &tcp_hashinfo, saddr, sport, daddr, ntohs(dport), - in->ifindex); + in->ifindex, 0); break; default: BUG(); @@ -391,7 +389,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, return user_laddr; laddr = NULL; - rcu_read_lock(); indev = __in6_dev_get(skb->dev); if (indev) { read_lock_bh(&indev->lock); @@ -404,7 +401,6 @@ tproxy_laddr6(struct sk_buff *skb, const struct in6_addr *user_laddr, } read_unlock_bh(&indev->lock); } - rcu_read_unlock(); return laddr ? laddr : daddr; } diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index e329dabde35f..3b2be2ae6987 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -47,8 +47,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, if (dev) flow.flowi6_oif = dev->ifindex; - rcu_read_lock(); - afinfo = nf_get_afinfo(NFPROTO_IPV6); if (afinfo != NULL) { const struct nf_ipv6_ops *v6ops; @@ -63,7 +61,6 @@ static u32 match_lookup_rt6(struct net *net, const struct net_device *dev, } else { route_err = 1; } - rcu_read_unlock(); if (route_err) return XT_ADDRTYPE_UNREACHABLE; diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 38986a95216c..041da0d9c06f 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -8,6 +8,7 @@ */ #include <linux/module.h> +#include <linux/syscalls.h> #include <linux/skbuff.h> #include <linux/filter.h> #include <linux/bpf.h> @@ -49,6 +50,22 @@ static int __bpf_mt_check_fd(int fd, struct bpf_prog **ret) return 0; } +static int __bpf_mt_check_path(const char *path, struct bpf_prog **ret) +{ + mm_segment_t oldfs = get_fs(); + int retval, fd; + + set_fs(KERNEL_DS); + fd = bpf_obj_get_user(path, 0); + set_fs(oldfs); + if (fd < 0) + return fd; + + retval = __bpf_mt_check_fd(fd, ret); + sys_close(fd); + return retval; +} + static int bpf_mt_check(const struct xt_mtchk_param *par) { struct xt_bpf_info *info = par->matchinfo; @@ -66,9 +83,10 @@ static int bpf_mt_check_v1(const struct xt_mtchk_param *par) return __bpf_mt_check_bytecode(info->bpf_program, info->bpf_program_num_elem, &info->filter); - else if (info->mode == XT_BPF_MODE_FD_PINNED || - info->mode == XT_BPF_MODE_FD_ELF) + else if (info->mode == XT_BPF_MODE_FD_ELF) return __bpf_mt_check_fd(info->fd, &info->filter); + else if (info->mode == XT_BPF_MODE_PATH_PINNED) + return __bpf_mt_check_path(info->path, &info->filter); else return -EINVAL; } diff --git a/net/netfilter/xt_connlimit.c b/net/netfilter/xt_connlimit.c index b8fd4ab762ed..a6214f235333 100644 --- a/net/netfilter/xt_connlimit.c +++ b/net/netfilter/xt_connlimit.c @@ -46,7 +46,6 @@ struct xt_connlimit_conn { struct hlist_node node; struct nf_conntrack_tuple tuple; - union nf_inet_addr addr; }; struct xt_connlimit_rb { @@ -58,8 +57,7 @@ struct xt_connlimit_rb { static spinlock_t xt_connlimit_locks[CONNLIMIT_LOCK_SLOTS] __cacheline_aligned_in_smp; struct xt_connlimit_data { - struct rb_root climit_root4[CONNLIMIT_SLOTS]; - struct rb_root climit_root6[CONNLIMIT_SLOTS]; + struct rb_root climit_root[CONNLIMIT_SLOTS]; }; static u_int32_t connlimit_rnd __read_mostly; @@ -73,16 +71,9 @@ static inline unsigned int connlimit_iphash(__be32 addr) } static inline unsigned int -connlimit_iphash6(const union nf_inet_addr *addr, - const union nf_inet_addr *mask) +connlimit_iphash6(const union nf_inet_addr *addr) { - union nf_inet_addr res; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) - res.ip6[i] = addr->ip6[i] & mask->ip6[i]; - - return jhash2((u32 *)res.ip6, ARRAY_SIZE(res.ip6), + return jhash2((u32 *)addr->ip6, ARRAY_SIZE(addr->ip6), connlimit_rnd) % CONNLIMIT_SLOTS; } @@ -96,24 +87,13 @@ static inline bool already_closed(const struct nf_conn *conn) } static int -same_source_net(const union nf_inet_addr *addr, - const union nf_inet_addr *mask, - const union nf_inet_addr *u3, u_int8_t family) +same_source(const union nf_inet_addr *addr, + const union nf_inet_addr *u3, u_int8_t family) { - if (family == NFPROTO_IPV4) { - return ntohl(addr->ip & mask->ip) - - ntohl(u3->ip & mask->ip); - } else { - union nf_inet_addr lh, rh; - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(addr->ip6); ++i) { - lh.ip6[i] = addr->ip6[i] & mask->ip6[i]; - rh.ip6[i] = u3->ip6[i] & mask->ip6[i]; - } + if (family == NFPROTO_IPV4) + return ntohl(addr->ip) - ntohl(u3->ip); - return memcmp(&lh.ip6, &rh.ip6, sizeof(lh.ip6)); - } + return memcmp(addr->ip6, u3->ip6, sizeof(addr->ip6)); } static bool add_hlist(struct hlist_head *head, @@ -126,7 +106,6 @@ static bool add_hlist(struct hlist_head *head, if (conn == NULL) return false; conn->tuple = *tuple; - conn->addr = *addr; hlist_add_head(&conn->node, head); return true; } @@ -144,7 +123,6 @@ static unsigned int check_hlist(struct net *net, unsigned int length = 0; *addit = true; - rcu_read_lock(); /* check the saved connections */ hlist_for_each_entry_safe(conn, n, head, node) { @@ -179,8 +157,6 @@ static unsigned int check_hlist(struct net *net, length++; } - rcu_read_unlock(); - return length; } @@ -200,7 +176,7 @@ static void tree_nodes_free(struct rb_root *root, static unsigned int count_tree(struct net *net, struct rb_root *root, const struct nf_conntrack_tuple *tuple, - const union nf_inet_addr *addr, const union nf_inet_addr *mask, + const union nf_inet_addr *addr, u8 family, const struct nf_conntrack_zone *zone) { struct xt_connlimit_rb *gc_nodes[CONNLIMIT_GC_MAX_NODES]; @@ -221,7 +197,7 @@ count_tree(struct net *net, struct rb_root *root, rbconn = rb_entry(*rbnode, struct xt_connlimit_rb, node); parent = *rbnode; - diff = same_source_net(addr, mask, &rbconn->addr, family); + diff = same_source(addr, &rbconn->addr, family); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -274,7 +250,6 @@ count_tree(struct net *net, struct rb_root *root, } conn->tuple = *tuple; - conn->addr = *addr; rbconn->addr = *addr; INIT_HLIST_HEAD(&rbconn->hhead); @@ -289,7 +264,6 @@ static int count_them(struct net *net, struct xt_connlimit_data *data, const struct nf_conntrack_tuple *tuple, const union nf_inet_addr *addr, - const union nf_inet_addr *mask, u_int8_t family, const struct nf_conntrack_zone *zone) { @@ -297,17 +271,15 @@ static int count_them(struct net *net, int count; u32 hash; - if (family == NFPROTO_IPV6) { - hash = connlimit_iphash6(addr, mask); - root = &data->climit_root6[hash]; - } else { - hash = connlimit_iphash(addr->ip & mask->ip); - root = &data->climit_root4[hash]; - } + if (family == NFPROTO_IPV6) + hash = connlimit_iphash6(addr); + else + hash = connlimit_iphash(addr->ip); + root = &data->climit_root[hash]; spin_lock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); - count = count_tree(net, root, tuple, addr, mask, family, zone); + count = count_tree(net, root, tuple, addr, family, zone); spin_unlock_bh(&xt_connlimit_locks[hash % CONNLIMIT_LOCK_SLOTS]); @@ -338,16 +310,23 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) if (xt_family(par) == NFPROTO_IPV6) { const struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int i; + memcpy(&addr.ip6, (info->flags & XT_CONNLIMIT_DADDR) ? &iph->daddr : &iph->saddr, sizeof(addr.ip6)); + + for (i = 0; i < ARRAY_SIZE(addr.ip6); ++i) + addr.ip6[i] &= info->mask.ip6[i]; } else { const struct iphdr *iph = ip_hdr(skb); addr.ip = (info->flags & XT_CONNLIMIT_DADDR) ? iph->daddr : iph->saddr; + + addr.ip &= info->mask.ip; } connections = count_them(net, info->data, tuple_ptr, &addr, - &info->mask, xt_family(par), zone); + xt_family(par), zone); if (connections == 0) /* kmalloc failed, drop it entirely */ goto hotdrop; @@ -382,10 +361,8 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par) return -ENOMEM; } - for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) - info->data->climit_root4[i] = RB_ROOT; - for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) - info->data->climit_root6[i] = RB_ROOT; + for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) + info->data->climit_root[i] = RB_ROOT; return 0; } @@ -416,10 +393,8 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par) nf_ct_netns_put(par->net, par->family); - for (i = 0; i < ARRAY_SIZE(info->data->climit_root4); ++i) - destroy_tree(&info->data->climit_root4[i]); - for (i = 0; i < ARRAY_SIZE(info->data->climit_root6); ++i) - destroy_tree(&info->data->climit_root6[i]); + for (i = 0; i < ARRAY_SIZE(info->data->climit_root); ++i) + destroy_tree(&info->data->climit_root[i]); kfree(info->data); } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 762e1874f28b..5da8746f7b88 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -35,6 +35,7 @@ #include <linux/netfilter_ipv6/ip6_tables.h> #include <linux/netfilter/xt_hashlimit.h> #include <linux/mutex.h> +#include <linux/kernel.h> MODULE_LICENSE("GPL"); MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>"); @@ -56,6 +57,7 @@ static inline struct hashlimit_net *hashlimit_pernet(struct net *net) } /* need to declare this at the top */ +static const struct file_operations dl_file_ops_v2; static const struct file_operations dl_file_ops_v1; static const struct file_operations dl_file_ops; @@ -87,8 +89,19 @@ struct dsthash_ent { unsigned long expires; /* precalculated expiry time */ struct { unsigned long prev; /* last modification */ - u_int64_t credit; - u_int64_t credit_cap, cost; + union { + struct { + u_int64_t credit; + u_int64_t credit_cap; + u_int64_t cost; + }; + struct { + u_int32_t interval, prev_window; + u_int64_t current_rate; + u_int64_t rate; + int64_t burst; + }; + }; } rateinfo; struct rcu_head rcu; }; @@ -99,7 +112,7 @@ struct xt_hashlimit_htable { u_int8_t family; bool rnd_initialized; - struct hashlimit_cfg2 cfg; /* config */ + struct hashlimit_cfg3 cfg; /* config */ /* used internally */ spinlock_t lock; /* lock for list_head */ @@ -116,10 +129,10 @@ struct xt_hashlimit_htable { }; static int -cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) +cfg_copy(struct hashlimit_cfg3 *to, const void *from, int revision) { if (revision == 1) { - struct hashlimit_cfg1 *cfg = from; + struct hashlimit_cfg1 *cfg = (struct hashlimit_cfg1 *)from; to->mode = cfg->mode; to->avg = cfg->avg; @@ -131,7 +144,19 @@ cfg_copy(struct hashlimit_cfg2 *to, void *from, int revision) to->srcmask = cfg->srcmask; to->dstmask = cfg->dstmask; } else if (revision == 2) { - memcpy(to, from, sizeof(struct hashlimit_cfg2)); + struct hashlimit_cfg2 *cfg = (struct hashlimit_cfg2 *)from; + + to->mode = cfg->mode; + to->avg = cfg->avg; + to->burst = cfg->burst; + to->size = cfg->size; + to->max = cfg->max; + to->gc_interval = cfg->gc_interval; + to->expire = cfg->expire; + to->srcmask = cfg->srcmask; + to->dstmask = cfg->dstmask; + } else if (revision == 3) { + memcpy(to, from, sizeof(struct hashlimit_cfg3)); } else { return -EINVAL; } @@ -240,13 +265,14 @@ dsthash_free(struct xt_hashlimit_htable *ht, struct dsthash_ent *ent) } static void htable_gc(struct work_struct *work); -static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, +static int htable_create(struct net *net, struct hashlimit_cfg3 *cfg, const char *name, u_int8_t family, struct xt_hashlimit_htable **out_hinfo, int revision) { struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); struct xt_hashlimit_htable *hinfo; + const struct file_operations *fops; unsigned int size, i; int ret; @@ -254,7 +280,7 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, size = cfg->size; } else { size = (totalram_pages << PAGE_SHIFT) / 16384 / - sizeof(struct list_head); + sizeof(struct hlist_head); if (totalram_pages > 1024 * 1024 * 1024 / PAGE_SIZE) size = 8192; if (size < 16) @@ -262,13 +288,13 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, } /* FIXME: don't use vmalloc() here or anywhere else -HW */ hinfo = vmalloc(sizeof(struct xt_hashlimit_htable) + - sizeof(struct list_head) * size); + sizeof(struct hlist_head) * size); if (hinfo == NULL) return -ENOMEM; *out_hinfo = hinfo; /* copy match config into hashtable config */ - ret = cfg_copy(&hinfo->cfg, (void *)cfg, 2); + ret = cfg_copy(&hinfo->cfg, (void *)cfg, 3); if (ret) return ret; @@ -293,11 +319,21 @@ static int htable_create(struct net *net, struct hashlimit_cfg2 *cfg, } spin_lock_init(&hinfo->lock); + switch (revision) { + case 1: + fops = &dl_file_ops_v1; + break; + case 2: + fops = &dl_file_ops_v2; + break; + default: + fops = &dl_file_ops; + } + hinfo->pde = proc_create_data(name, 0, (family == NFPROTO_IPV4) ? hashlimit_net->ipt_hashlimit : hashlimit_net->ip6t_hashlimit, - (revision == 1) ? &dl_file_ops_v1 : &dl_file_ops, - hinfo); + fops, hinfo); if (hinfo->pde == NULL) { kfree(hinfo->name); vfree(hinfo); @@ -482,6 +518,25 @@ static u32 user2credits_byte(u32 user) return (u32) (us >> 32); } +static u64 user2rate(u64 user) +{ + if (user != 0) { + return div64_u64(XT_HASHLIMIT_SCALE_v2, user); + } else { + pr_warn("invalid rate from userspace: %llu\n", user); + return 0; + } +} + +static u64 user2rate_bytes(u32 user) +{ + u64 r; + + r = user ? U32_MAX / user : U32_MAX; + r = (r - 1) << XT_HASHLIMIT_BYTE_SHIFT; + return r; +} + static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, u32 mode, int revision) { @@ -491,6 +546,21 @@ static void rateinfo_recalc(struct dsthash_ent *dh, unsigned long now, if (delta == 0) return; + if (revision >= 3 && mode & XT_HASHLIMIT_RATE_MATCH) { + u64 interval = dh->rateinfo.interval * HZ; + + if (delta < interval) + return; + + dh->rateinfo.prev = now; + dh->rateinfo.prev_window = + ((dh->rateinfo.current_rate * interval) > + (delta * dh->rateinfo.rate)); + dh->rateinfo.current_rate = 0; + + return; + } + dh->rateinfo.prev = now; if (mode & XT_HASHLIMIT_BYTES) { @@ -515,7 +585,24 @@ static void rateinfo_init(struct dsthash_ent *dh, struct xt_hashlimit_htable *hinfo, int revision) { dh->rateinfo.prev = jiffies; - if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { + if (revision >= 3 && hinfo->cfg.mode & XT_HASHLIMIT_RATE_MATCH) { + dh->rateinfo.prev_window = 0; + dh->rateinfo.current_rate = 0; + if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { + dh->rateinfo.rate = + user2rate_bytes((u32)hinfo->cfg.avg); + if (hinfo->cfg.burst) + dh->rateinfo.burst = + hinfo->cfg.burst * dh->rateinfo.rate; + else + dh->rateinfo.burst = dh->rateinfo.rate; + } else { + dh->rateinfo.rate = user2rate(hinfo->cfg.avg); + dh->rateinfo.burst = + hinfo->cfg.burst + dh->rateinfo.rate; + } + dh->rateinfo.interval = hinfo->cfg.interval; + } else if (hinfo->cfg.mode & XT_HASHLIMIT_BYTES) { dh->rateinfo.credit = CREDITS_PER_JIFFY_BYTES * HZ; dh->rateinfo.cost = user2credits_byte(hinfo->cfg.avg); dh->rateinfo.credit_cap = hinfo->cfg.burst; @@ -648,7 +735,7 @@ static u32 hashlimit_byte_cost(unsigned int len, struct dsthash_ent *dh) static bool hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, struct xt_hashlimit_htable *hinfo, - const struct hashlimit_cfg2 *cfg, int revision) + const struct hashlimit_cfg3 *cfg, int revision) { unsigned long now = jiffies; struct dsthash_ent *dh; @@ -659,12 +746,12 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) goto hotdrop; - rcu_read_lock_bh(); + local_bh_disable(); dh = dsthash_find(hinfo, &dst); if (dh == NULL) { dh = dsthash_alloc_init(hinfo, &dst, &race); if (dh == NULL) { - rcu_read_unlock_bh(); + local_bh_enable(); goto hotdrop; } else if (race) { /* Already got an entry, update expiration timeout */ @@ -680,6 +767,20 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, rateinfo_recalc(dh, now, hinfo->cfg.mode, revision); } + if (cfg->mode & XT_HASHLIMIT_RATE_MATCH) { + cost = (cfg->mode & XT_HASHLIMIT_BYTES) ? skb->len : 1; + dh->rateinfo.current_rate += cost; + + if (!dh->rateinfo.prev_window && + (dh->rateinfo.current_rate <= dh->rateinfo.burst)) { + spin_unlock(&dh->lock); + rcu_read_unlock_bh(); + return !(cfg->mode & XT_HASHLIMIT_INVERT); + } else { + goto overlimit; + } + } + if (cfg->mode & XT_HASHLIMIT_BYTES) cost = hashlimit_byte_cost(skb->len, dh); else @@ -689,12 +790,13 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, /* below the limit */ dh->rateinfo.credit -= cost; spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); return !(cfg->mode & XT_HASHLIMIT_INVERT); } +overlimit: spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); /* default match is underlimit - so over the limit, we need to invert */ return cfg->mode & XT_HASHLIMIT_INVERT; @@ -708,7 +810,7 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_hashlimit_mtinfo1 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; - struct hashlimit_cfg2 cfg = {}; + struct hashlimit_cfg3 cfg = {}; int ret; ret = cfg_copy(&cfg, (void *)&info->cfg, 1); @@ -720,17 +822,33 @@ hashlimit_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) } static bool -hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +hashlimit_mt_v2(const struct sk_buff *skb, struct xt_action_param *par) { const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; struct xt_hashlimit_htable *hinfo = info->hinfo; + struct hashlimit_cfg3 cfg = {}; + int ret; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 2); + + if (ret) + return ret; + + return hashlimit_mt_common(skb, par, hinfo, &cfg, 2); +} + +static bool +hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) +{ + const struct xt_hashlimit_mtinfo3 *info = par->matchinfo; + struct xt_hashlimit_htable *hinfo = info->hinfo; - return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 2); + return hashlimit_mt_common(skb, par, hinfo, &info->cfg, 3); } static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, struct xt_hashlimit_htable **hinfo, - struct hashlimit_cfg2 *cfg, + struct hashlimit_cfg3 *cfg, const char *name, int revision) { struct net *net = par->net; @@ -753,7 +871,17 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, } /* Check for overflow. */ - if (cfg->mode & XT_HASHLIMIT_BYTES) { + if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) { + if (cfg->avg == 0 || cfg->avg > U32_MAX) { + pr_info("hashlimit invalid rate\n"); + return -ERANGE; + } + + if (cfg->interval == 0) { + pr_info("hashlimit invalid interval\n"); + return -EINVAL; + } + } else if (cfg->mode & XT_HASHLIMIT_BYTES) { if (user2credits_byte(cfg->avg) == 0) { pr_info("overflow, rate too high: %llu\n", cfg->avg); return -EINVAL; @@ -784,7 +912,7 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) { struct xt_hashlimit_mtinfo1 *info = par->matchinfo; - struct hashlimit_cfg2 cfg = {}; + struct hashlimit_cfg3 cfg = {}; int ret; if (info->name[sizeof(info->name) - 1] != '\0') @@ -799,15 +927,40 @@ static int hashlimit_mt_check_v1(const struct xt_mtchk_param *par) &cfg, info->name, 1); } -static int hashlimit_mt_check(const struct xt_mtchk_param *par) +static int hashlimit_mt_check_v2(const struct xt_mtchk_param *par) { struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + struct hashlimit_cfg3 cfg = {}; + int ret; + + if (info->name[sizeof(info->name) - 1] != '\0') + return -EINVAL; + + ret = cfg_copy(&cfg, (void *)&info->cfg, 2); + + if (ret) + return ret; + + return hashlimit_mt_check_common(par, &info->hinfo, + &cfg, info->name, 2); +} + +static int hashlimit_mt_check(const struct xt_mtchk_param *par) +{ + struct xt_hashlimit_mtinfo3 *info = par->matchinfo; if (info->name[sizeof(info->name) - 1] != '\0') return -EINVAL; return hashlimit_mt_check_common(par, &info->hinfo, &info->cfg, - info->name, 2); + info->name, 3); +} + +static void hashlimit_mt_destroy_v2(const struct xt_mtdtor_param *par) +{ + const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + + htable_put(info->hinfo); } static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) @@ -819,7 +972,7 @@ static void hashlimit_mt_destroy_v1(const struct xt_mtdtor_param *par) static void hashlimit_mt_destroy(const struct xt_mtdtor_param *par) { - const struct xt_hashlimit_mtinfo2 *info = par->matchinfo; + const struct xt_hashlimit_mtinfo3 *info = par->matchinfo; htable_put(info->hinfo); } @@ -840,9 +993,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 2, .family = NFPROTO_IPV4, - .match = hashlimit_mt, + .match = hashlimit_mt_v2, .matchsize = sizeof(struct xt_hashlimit_mtinfo2), .usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo), + .checkentry = hashlimit_mt_check_v2, + .destroy = hashlimit_mt_destroy_v2, + .me = THIS_MODULE, + }, + { + .name = "hashlimit", + .revision = 3, + .family = NFPROTO_IPV4, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo3), + .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, @@ -863,9 +1027,20 @@ static struct xt_match hashlimit_mt_reg[] __read_mostly = { .name = "hashlimit", .revision = 2, .family = NFPROTO_IPV6, - .match = hashlimit_mt, + .match = hashlimit_mt_v2, .matchsize = sizeof(struct xt_hashlimit_mtinfo2), .usersize = offsetof(struct xt_hashlimit_mtinfo2, hinfo), + .checkentry = hashlimit_mt_check_v2, + .destroy = hashlimit_mt_destroy_v2, + .me = THIS_MODULE, + }, + { + .name = "hashlimit", + .revision = 3, + .family = NFPROTO_IPV6, + .match = hashlimit_mt, + .matchsize = sizeof(struct xt_hashlimit_mtinfo3), + .usersize = offsetof(struct xt_hashlimit_mtinfo3, hinfo), .checkentry = hashlimit_mt_check, .destroy = hashlimit_mt_destroy, .me = THIS_MODULE, @@ -947,6 +1122,21 @@ static void dl_seq_print(struct dsthash_ent *ent, u_int8_t family, } } +static int dl_seq_real_show_v2(struct dsthash_ent *ent, u_int8_t family, + struct seq_file *s) +{ + const struct xt_hashlimit_htable *ht = s->private; + + spin_lock(&ent->lock); + /* recalculate to show accurate numbers */ + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2); + + dl_seq_print(ent, family, s); + + spin_unlock(&ent->lock); + return seq_has_overflowed(s); +} + static int dl_seq_real_show_v1(struct dsthash_ent *ent, u_int8_t family, struct seq_file *s) { @@ -969,7 +1159,7 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, spin_lock(&ent->lock); /* recalculate to show accurate numbers */ - rateinfo_recalc(ent, jiffies, ht->cfg.mode, 2); + rateinfo_recalc(ent, jiffies, ht->cfg.mode, 3); dl_seq_print(ent, family, s); @@ -977,6 +1167,20 @@ static int dl_seq_real_show(struct dsthash_ent *ent, u_int8_t family, return seq_has_overflowed(s); } +static int dl_seq_show_v2(struct seq_file *s, void *v) +{ + struct xt_hashlimit_htable *htable = s->private; + unsigned int *bucket = (unsigned int *)v; + struct dsthash_ent *ent; + + if (!hlist_empty(&htable->hash[*bucket])) { + hlist_for_each_entry(ent, &htable->hash[*bucket], node) + if (dl_seq_real_show_v2(ent, htable->family, s)) + return -1; + } + return 0; +} + static int dl_seq_show_v1(struct seq_file *s, void *v) { struct xt_hashlimit_htable *htable = s->private; @@ -1012,6 +1216,13 @@ static const struct seq_operations dl_seq_ops_v1 = { .show = dl_seq_show_v1 }; +static const struct seq_operations dl_seq_ops_v2 = { + .start = dl_seq_start, + .next = dl_seq_next, + .stop = dl_seq_stop, + .show = dl_seq_show_v2 +}; + static const struct seq_operations dl_seq_ops = { .start = dl_seq_start, .next = dl_seq_next, @@ -1019,6 +1230,18 @@ static const struct seq_operations dl_seq_ops = { .show = dl_seq_show }; +static int dl_proc_open_v2(struct inode *inode, struct file *file) +{ + int ret = seq_open(file, &dl_seq_ops_v2); + + if (!ret) { + struct seq_file *sf = file->private_data; + + sf->private = PDE_DATA(inode); + } + return ret; +} + static int dl_proc_open_v1(struct inode *inode, struct file *file) { int ret = seq_open(file, &dl_seq_ops_v1); @@ -1042,6 +1265,14 @@ static int dl_proc_open(struct inode *inode, struct file *file) return ret; } +static const struct file_operations dl_file_ops_v2 = { + .owner = THIS_MODULE, + .open = dl_proc_open_v2, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release +}; + static const struct file_operations dl_file_ops_v1 = { .owner = THIS_MODULE, .open = dl_proc_open_v1, diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 8107b3eb865f..0fd14d1eb09d 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -58,9 +58,9 @@ xt_snat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct != NULL && - (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); + WARN_ON(!(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); xt_nat_convert_range(&range, &mr->range[0]); return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_SRC); @@ -75,8 +75,8 @@ xt_dnat_target_v0(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct != NULL && - (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + WARN_ON(!(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); xt_nat_convert_range(&range, &mr->range[0]); return nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); @@ -90,9 +90,9 @@ xt_snat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct != NULL && - (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || - ctinfo == IP_CT_RELATED_REPLY)); + WARN_ON(!(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY))); return nf_nat_setup_info(ct, range, NF_NAT_MANIP_SRC); } @@ -105,8 +105,8 @@ xt_dnat_target_v1(struct sk_buff *skb, const struct xt_action_param *par) struct nf_conn *ct; ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct != NULL && - (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + WARN_ON(!(ct != NULL && + (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED))); return nf_nat_setup_info(ct, range, NF_NAT_MANIP_DST); } diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c index 71cfa9551d08..36e14b1f061d 100644 --- a/net/netfilter/xt_osf.c +++ b/net/netfilter/xt_osf.c @@ -226,7 +226,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) sizeof(struct tcphdr), optsize, opts); } - rcu_read_lock(); list_for_each_entry_rcu(kf, &xt_osf_fingers[df], finger_entry) { int foptsize, optnum; @@ -340,7 +339,6 @@ xt_osf_match_packet(const struct sk_buff *skb, struct xt_action_param *p) info->loglevel == XT_OSF_LOGLEVEL_FIRST) break; } - rcu_read_unlock(); if (!fcount && (info->flags & XT_OSF_LOG)) nf_log_packet(net, xt_family(p), xt_hooknum(p), skb, xt_in(p), diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 3f6c4fa78bdb..245fa350a7a8 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -106,7 +106,7 @@ static DEFINE_SPINLOCK(recent_lock); static DEFINE_MUTEX(recent_mutex); #ifdef CONFIG_PROC_FS -static const struct file_operations recent_old_fops, recent_mt_fops; +static const struct file_operations recent_mt_fops; #endif static u_int32_t hash_rnd __read_mostly; diff --git a/net/netfilter/xt_repldata.h b/net/netfilter/xt_repldata.h index 8fd324116e6f..68ccbe50bb1e 100644 --- a/net/netfilter/xt_repldata.h +++ b/net/netfilter/xt_repldata.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Today's hack: quantum tunneling in structs * diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index e75ef39669c5..575d2153e3b8 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -76,7 +76,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) @@ -133,7 +133,7 @@ socket_mt6_v1_v2_v3(const struct sk_buff *skb, struct xt_action_param *par) transparent = nf_sk_is_transparent(sk); if (info->flags & XT_SOCKET_RESTORESKMARK && !wildcard && - transparent) + transparent && sk_fullsock(sk)) pskb->mark = sk->sk_mark; if (sk != skb->sk) diff --git a/net/netlabel/Makefile b/net/netlabel/Makefile index d341ede0dca5..5a46381a64e7 100644 --- a/net/netlabel/Makefile +++ b/net/netlabel/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the NetLabel subsystem. # diff --git a/net/netlabel/netlabel_addrlist.h b/net/netlabel/netlabel_addrlist.h index d0f38bc9af6d..ac709f0f197b 100644 --- a/net/netlabel/netlabel_addrlist.h +++ b/net/netlabel/netlabel_addrlist.h @@ -87,7 +87,7 @@ static inline struct netlbl_af4list *__af4list_valid_rcu(struct list_head *s, struct list_head *i = s; struct netlbl_af4list *n = __af4list_entry(s); while (i != h && !n->valid) { - i = rcu_dereference(i->next); + i = rcu_dereference(list_next_rcu(i)); n = __af4list_entry(i); } return n; @@ -154,7 +154,7 @@ static inline struct netlbl_af6list *__af6list_valid_rcu(struct list_head *s, struct list_head *i = s; struct netlbl_af6list *n = __af6list_entry(s); while (i != h && !n->valid) { - i = rcu_dereference(i->next); + i = rcu_dereference(list_next_rcu(i)); n = __af6list_entry(i); } return n; diff --git a/net/netlabel/netlabel_calipso.c b/net/netlabel/netlabel_calipso.c index d177dd066504..4d748975117d 100644 --- a/net/netlabel/netlabel_calipso.c +++ b/net/netlabel/netlabel_calipso.c @@ -393,7 +393,7 @@ EXPORT_SYMBOL(netlbl_calipso_ops_register); static const struct netlbl_calipso_ops *netlbl_calipso_ops_get(void) { - return ACCESS_ONCE(calipso_ops); + return READ_ONCE(calipso_ops); } /** diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 5acee49db90b..b9e0ee4e22f5 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -128,7 +128,6 @@ static const char *const nlk_cb_mutex_key_strings[MAX_LINKS + 1] = { }; static int netlink_dump(struct sock *sk); -static void netlink_skb_destructor(struct sk_buff *skb); /* nl_table locking explained: * Lookup and traversal are protected with an RCU read-side lock. Insertion @@ -691,6 +690,9 @@ static void deferred_put_nlk_sk(struct rcu_head *head) struct netlink_sock *nlk = container_of(head, struct netlink_sock, rcu); struct sock *sk = &nlk->sk; + kfree(nlk->groups); + nlk->groups = NULL; + if (!refcount_dec_and_test(&sk->sk_refcnt)) return; @@ -769,9 +771,6 @@ static int netlink_release(struct socket *sock) netlink_table_ungrab(); } - kfree(nlk->groups); - nlk->groups = NULL; - local_bh_disable(); sock_prot_inuse_add(sock_net(sk), &netlink_proto, -1); local_bh_enable(); @@ -955,7 +954,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, struct net *net = sock_net(sk); struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *nladdr = (struct sockaddr_nl *)addr; - int err; + int err = 0; long unsigned int groups = nladdr->nl_groups; bool bound; @@ -983,6 +982,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return -EINVAL; } + netlink_lock_table(); if (nlk->netlink_bind && groups) { int group; @@ -993,7 +993,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, if (!err) continue; netlink_undo_bind(group, groups, sk); - return err; + goto unlock; } } @@ -1006,12 +1006,13 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_autobind(sock); if (err) { netlink_undo_bind(nlk->ngroups, groups, sk); - return err; + goto unlock; } } if (!groups && (nlk->groups == NULL || !(u32)nlk->groups[0])) - return 0; + goto unlock; + netlink_unlock_table(); netlink_table_grab(); netlink_update_subscriptions(sk, nlk->subscriptions + @@ -1022,6 +1023,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, netlink_table_ungrab(); return 0; + +unlock: + netlink_unlock_table(); + return err; } static int netlink_connect(struct socket *sock, struct sockaddr *addr, @@ -1079,7 +1084,9 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { nladdr->nl_pid = nlk->portid; + netlink_lock_table(); nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; + netlink_unlock_table(); } return 0; } @@ -2128,7 +2135,7 @@ static int netlink_dump(struct sock *sk) struct sk_buff *skb = NULL; struct nlmsghdr *nlh; struct module *module; - int len, err = -ENOBUFS; + int err = -ENOBUFS; int alloc_min_size; int alloc_size; @@ -2175,9 +2182,11 @@ static int netlink_dump(struct sock *sk) skb_reserve(skb, skb_tailroom(skb) - alloc_size); netlink_skb_set_owner_r(skb, sk); - len = cb->dump(skb, cb); + if (nlk->dump_done_errno > 0) + nlk->dump_done_errno = cb->dump(skb, cb); - if (len > 0) { + if (nlk->dump_done_errno > 0 || + skb_tailroom(skb) < nlmsg_total_size(sizeof(nlk->dump_done_errno))) { mutex_unlock(nlk->cb_mutex); if (sk_filter(sk, skb)) @@ -2187,13 +2196,15 @@ static int netlink_dump(struct sock *sk) return 0; } - nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, sizeof(len), NLM_F_MULTI); - if (!nlh) + nlh = nlmsg_put_answer(skb, cb, NLMSG_DONE, + sizeof(nlk->dump_done_errno), NLM_F_MULTI); + if (WARN_ON(!nlh)) goto errout_skb; nl_dump_check_consistent(cb, nlh); - memcpy(nlmsg_data(nlh), &len, sizeof(len)); + memcpy(nlmsg_data(nlh), &nlk->dump_done_errno, + sizeof(nlk->dump_done_errno)); if (sk_filter(sk, skb)) kfree_skb(skb); @@ -2258,14 +2269,19 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, cb->min_dump_alloc = control->min_dump_alloc; cb->skb = skb; + if (cb->start) { + ret = cb->start(cb); + if (ret) + goto error_unlock; + } + nlk->cb_running = true; + nlk->dump_done_errno = INT_MAX; mutex_unlock(nlk->cb_mutex); - if (cb->start) - cb->start(cb); - ret = netlink_dump(sk); + sock_put(sk); if (ret) @@ -2295,27 +2311,26 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, size_t tlvlen = 0; struct netlink_sock *nlk = nlk_sk(NETLINK_CB(in_skb).sk); unsigned int flags = 0; + bool nlk_has_extack = nlk->flags & NETLINK_F_EXT_ACK; /* Error messages get the original request appened, unless the user * requests to cap the error message, and get extra error data if * requested. */ + if (nlk_has_extack && extack && extack->_msg) + tlvlen += nla_total_size(strlen(extack->_msg) + 1); + if (err) { if (!(nlk->flags & NETLINK_F_CAP_ACK)) payload += nlmsg_len(nlh); else flags |= NLM_F_CAPPED; - if (nlk->flags & NETLINK_F_EXT_ACK && extack) { - if (extack->_msg) - tlvlen += nla_total_size(strlen(extack->_msg) + 1); - if (extack->bad_attr) - tlvlen += nla_total_size(sizeof(u32)); - } + if (nlk_has_extack && extack && extack->bad_attr) + tlvlen += nla_total_size(sizeof(u32)); } else { flags |= NLM_F_CAPPED; - if (nlk->flags & NETLINK_F_EXT_ACK && - extack && extack->cookie_len) + if (nlk_has_extack && extack && extack->cookie_len) tlvlen += nla_total_size(extack->cookie_len); } @@ -2324,16 +2339,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, skb = nlmsg_new(payload + tlvlen, GFP_KERNEL); if (!skb) { - struct sock *sk; - - sk = netlink_lookup(sock_net(in_skb->sk), - in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).portid); - if (sk) { - sk->sk_err = ENOBUFS; - sk->sk_error_report(sk); - sock_put(sk); - } + NETLINK_CB(in_skb).sk->sk_err = ENOBUFS; + NETLINK_CB(in_skb).sk->sk_error_report(NETLINK_CB(in_skb).sk); return; } @@ -2343,11 +2350,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, errmsg->error = err; memcpy(&errmsg->msg, nlh, payload > sizeof(*errmsg) ? nlh->nlmsg_len : sizeof(*nlh)); - if (nlk->flags & NETLINK_F_EXT_ACK && extack) { + if (nlk_has_extack && extack) { + if (extack->_msg) { + WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, + extack->_msg)); + } if (err) { - if (extack->_msg) - WARN_ON(nla_put_string(skb, NLMSGERR_ATTR_MSG, - extack->_msg)); if (extack->bad_attr && !WARN_ON((u8 *)extack->bad_attr < in_skb->data || (u8 *)extack->bad_attr >= in_skb->data + diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h index 3490f2430532..962de7b3c023 100644 --- a/net/netlink/af_netlink.h +++ b/net/netlink/af_netlink.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _AF_NETLINK_H #define _AF_NETLINK_H @@ -33,6 +34,7 @@ struct netlink_sock { wait_queue_head_t wait; bool bound; bool cb_running; + int dump_done_errno; struct netlink_callback cb; struct mutex *cb_mutex; struct mutex cb_def_mutex; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 10f8b4cff40a..d444daf1ac04 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * NETLINK Generic Netlink Family * diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ebf16f7f9089..2dec3583c97d 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -241,9 +241,9 @@ void nr_destroy_socket(struct sock *); /* * Handler for deferred kills. */ -static void nr_destroy_timer(unsigned long data) +static void nr_destroy_timer(struct timer_list *t) { - struct sock *sk=(struct sock *)data; + struct sock *sk = from_timer(sk, t, sk_timer); bh_lock_sock(sk); sock_hold(sk); nr_destroy_socket(sk); @@ -284,7 +284,7 @@ void nr_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ - sk->sk_timer.function = nr_destroy_timer; + sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_destroy_timer; sk->sk_timer.expires = jiffies + 2 * HZ; add_timer(&sk->sk_timer); } else diff --git a/net/netrom/nr_in.c b/net/netrom/nr_in.c index 80dbd0beb516..fbfdae452ff9 100644 --- a/net/netrom/nr_in.c +++ b/net/netrom/nr_in.c @@ -125,7 +125,7 @@ static int nr_state2_machine(struct sock *sk, struct sk_buff *skb, case NR_DISCREQ: nr_write_internal(sk, NR_DISCACK); - + /* fall through */ case NR_DISCACK: nr_disconnect(sk, 0); break; diff --git a/net/netrom/nr_loopback.c b/net/netrom/nr_loopback.c index 94d4e922af53..989ae647825e 100644 --- a/net/netrom/nr_loopback.c +++ b/net/netrom/nr_loopback.c @@ -18,7 +18,7 @@ static void nr_loopback_timer(unsigned long); static struct sk_buff_head loopback_queue; -static DEFINE_TIMER(loopback_timer, nr_loopback_timer, 0, 0); +static DEFINE_TIMER(loopback_timer, nr_loopback_timer); void __init nr_loopback_init(void) { diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c index 0c59354e280e..75e6ba970fde 100644 --- a/net/netrom/nr_route.c +++ b/net/netrom/nr_route.c @@ -80,6 +80,19 @@ static struct nr_neigh *nr_neigh_get_dev(ax25_address *callsign, static void nr_remove_neigh(struct nr_neigh *); +/* re-sort the routes in quality order. */ +static void re_sort_routes(struct nr_node *nr_node, int x, int y) +{ + if (nr_node->routes[y].quality > nr_node->routes[x].quality) { + if (nr_node->which == x) + nr_node->which = y; + else if (nr_node->which == y) + nr_node->which = x; + + swap(nr_node->routes[x], nr_node->routes[y]); + } +} + /* * Add a new route to a node, and in the process add the node and the * neighbour if it is new. @@ -90,7 +103,6 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, { struct nr_node *nr_node; struct nr_neigh *nr_neigh; - struct nr_route nr_route; int i, found; struct net_device *odev; @@ -251,49 +263,11 @@ static int __must_check nr_add_node(ax25_address *nr, const char *mnemonic, /* Now re-sort the routes in quality order */ switch (nr_node->count) { case 3: - if (nr_node->routes[1].quality > nr_node->routes[0].quality) { - switch (nr_node->which) { - case 0: - nr_node->which = 1; - break; - case 1: - nr_node->which = 0; - break; - } - nr_route = nr_node->routes[0]; - nr_node->routes[0] = nr_node->routes[1]; - nr_node->routes[1] = nr_route; - } - if (nr_node->routes[2].quality > nr_node->routes[1].quality) { - switch (nr_node->which) { - case 1: nr_node->which = 2; - break; - - case 2: nr_node->which = 1; - break; - - default: - break; - } - nr_route = nr_node->routes[1]; - nr_node->routes[1] = nr_node->routes[2]; - nr_node->routes[2] = nr_route; - } + re_sort_routes(nr_node, 0, 1); + re_sort_routes(nr_node, 1, 2); + /* fall through */ case 2: - if (nr_node->routes[1].quality > nr_node->routes[0].quality) { - switch (nr_node->which) { - case 0: nr_node->which = 1; - break; - - case 1: nr_node->which = 0; - break; - - default: break; - } - nr_route = nr_node->routes[0]; - nr_node->routes[0] = nr_node->routes[1]; - nr_node->routes[1] = nr_route; - } + re_sort_routes(nr_node, 0, 1); case 1: break; } @@ -384,6 +358,7 @@ static int nr_del_node(ax25_address *callsign, ax25_address *neighbour, struct n switch (i) { case 0: nr_node->routes[0] = nr_node->routes[1]; + /* fall through */ case 1: nr_node->routes[1] = nr_node->routes[2]; case 2: @@ -553,6 +528,7 @@ void nr_rt_device_down(struct net_device *dev) switch (i) { case 0: t->routes[0] = t->routes[1]; + /* fall through */ case 1: t->routes[1] = t->routes[2]; case 2: diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index 94d05806a9a2..43569aea0f5e 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -29,24 +29,23 @@ #include <linux/interrupt.h> #include <net/netrom.h> -static void nr_heartbeat_expiry(unsigned long); -static void nr_t1timer_expiry(unsigned long); -static void nr_t2timer_expiry(unsigned long); -static void nr_t4timer_expiry(unsigned long); -static void nr_idletimer_expiry(unsigned long); +static void nr_heartbeat_expiry(struct timer_list *); +static void nr_t1timer_expiry(struct timer_list *); +static void nr_t2timer_expiry(struct timer_list *); +static void nr_t4timer_expiry(struct timer_list *); +static void nr_idletimer_expiry(struct timer_list *); void nr_init_timers(struct sock *sk) { struct nr_sock *nr = nr_sk(sk); - setup_timer(&nr->t1timer, nr_t1timer_expiry, (unsigned long)sk); - setup_timer(&nr->t2timer, nr_t2timer_expiry, (unsigned long)sk); - setup_timer(&nr->t4timer, nr_t4timer_expiry, (unsigned long)sk); - setup_timer(&nr->idletimer, nr_idletimer_expiry, (unsigned long)sk); + timer_setup(&nr->t1timer, nr_t1timer_expiry, 0); + timer_setup(&nr->t2timer, nr_t2timer_expiry, 0); + timer_setup(&nr->t4timer, nr_t4timer_expiry, 0); + timer_setup(&nr->idletimer, nr_idletimer_expiry, 0); /* initialized by sock_init_data */ - sk->sk_timer.data = (unsigned long)sk; - sk->sk_timer.function = &nr_heartbeat_expiry; + sk->sk_timer.function = (TIMER_FUNC_TYPE)nr_heartbeat_expiry; } void nr_start_t1timer(struct sock *sk) @@ -113,9 +112,9 @@ int nr_t1timer_running(struct sock *sk) return timer_pending(&nr_sk(sk)->t1timer); } -static void nr_heartbeat_expiry(unsigned long param) +static void nr_heartbeat_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct sock *sk = from_timer(sk, t, sk_timer); struct nr_sock *nr = nr_sk(sk); bh_lock_sock(sk); @@ -152,10 +151,10 @@ static void nr_heartbeat_expiry(unsigned long param) bh_unlock_sock(sk); } -static void nr_t2timer_expiry(unsigned long param) +static void nr_t2timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct nr_sock *nr = nr_sk(sk); + struct nr_sock *nr = from_timer(nr, t, t2timer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); if (nr->condition & NR_COND_ACK_PENDING) { @@ -165,19 +164,20 @@ static void nr_t2timer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void nr_t4timer_expiry(unsigned long param) +static void nr_t4timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct nr_sock *nr = from_timer(nr, t, t4timer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); nr_sk(sk)->condition &= ~NR_COND_PEER_RX_BUSY; bh_unlock_sock(sk); } -static void nr_idletimer_expiry(unsigned long param) +static void nr_idletimer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct nr_sock *nr = nr_sk(sk); + struct nr_sock *nr = from_timer(nr, t, idletimer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); @@ -202,10 +202,10 @@ static void nr_idletimer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void nr_t1timer_expiry(unsigned long param) +static void nr_t1timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct nr_sock *nr = nr_sk(sk); + struct nr_sock *nr = from_timer(nr, t, t1timer); + struct sock *sk = &nr->sock; bh_lock_sock(sk); switch (nr->state) { diff --git a/net/nfc/Makefile b/net/nfc/Makefile index 2555ff8e7219..2ffc69b473fc 100644 --- a/net/nfc/Makefile +++ b/net/nfc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux NFC subsystem. # diff --git a/net/nfc/core.c b/net/nfc/core.c index 5cf33df888c3..947a470f929d 100644 --- a/net/nfc/core.c +++ b/net/nfc/core.c @@ -1015,9 +1015,9 @@ exit: device_unlock(&dev->dev); } -static void nfc_check_pres_timeout(unsigned long data) +static void nfc_check_pres_timeout(struct timer_list *t) { - struct nfc_dev *dev = (struct nfc_dev *)data; + struct nfc_dev *dev = from_timer(dev, t, check_pres_timer); schedule_work(&dev->check_pres_work); } @@ -1094,10 +1094,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, dev->targets_generation = 1; if (ops->check_presence) { - init_timer(&dev->check_pres_timer); - dev->check_pres_timer.data = (unsigned long)dev; - dev->check_pres_timer.function = nfc_check_pres_timeout; - + timer_setup(&dev->check_pres_timer, nfc_check_pres_timeout, 0); INIT_WORK(&dev->check_pres_work, nfc_check_pres_work); } @@ -1106,7 +1103,7 @@ struct nfc_dev *nfc_allocate_device(struct nfc_ops *ops, err_free_dev: kfree(dev); - return ERR_PTR(rc); + return NULL; } EXPORT_SYMBOL(nfc_allocate_device); diff --git a/net/nfc/digital_core.c b/net/nfc/digital_core.c index de6dd37d04c7..ec0a8998e52d 100644 --- a/net/nfc/digital_core.c +++ b/net/nfc/digital_core.c @@ -650,6 +650,7 @@ static void digital_deactivate_target(struct nfc_dev *nfc_dev, return; } + digital_abort_cmd(ddev); ddev->curr_protocol = 0; } diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c index b740fef0acc5..ac8030c4bcf8 100644 --- a/net/nfc/hci/core.c +++ b/net/nfc/hci/core.c @@ -428,9 +428,9 @@ exit_noskb: nfc_hci_driver_failure(hdev, r); } -static void nfc_hci_cmd_timeout(unsigned long data) +static void nfc_hci_cmd_timeout(struct timer_list *t) { - struct nfc_hci_dev *hdev = (struct nfc_hci_dev *)data; + struct nfc_hci_dev *hdev = from_timer(hdev, t, cmd_timer); schedule_work(&hdev->msg_tx_work); } @@ -1004,9 +1004,7 @@ int nfc_hci_register_device(struct nfc_hci_dev *hdev) INIT_WORK(&hdev->msg_tx_work, nfc_hci_msg_tx_work); - init_timer(&hdev->cmd_timer); - hdev->cmd_timer.data = (unsigned long)hdev; - hdev->cmd_timer.function = nfc_hci_cmd_timeout; + timer_setup(&hdev->cmd_timer, nfc_hci_cmd_timeout, 0); skb_queue_head_init(&hdev->rx_hcp_frags); diff --git a/net/nfc/hci/llc_shdlc.c b/net/nfc/hci/llc_shdlc.c index 17e59a009ce6..fe988936ad92 100644 --- a/net/nfc/hci/llc_shdlc.c +++ b/net/nfc/hci/llc_shdlc.c @@ -580,27 +580,27 @@ static void llc_shdlc_handle_send_queue(struct llc_shdlc *shdlc) } } -static void llc_shdlc_connect_timeout(unsigned long data) +static void llc_shdlc_connect_timeout(struct timer_list *t) { - struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + struct llc_shdlc *shdlc = from_timer(shdlc, t, connect_timer); pr_debug("\n"); schedule_work(&shdlc->sm_work); } -static void llc_shdlc_t1_timeout(unsigned long data) +static void llc_shdlc_t1_timeout(struct timer_list *t) { - struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + struct llc_shdlc *shdlc = from_timer(shdlc, t, t1_timer); pr_debug("SoftIRQ: need to send ack\n"); schedule_work(&shdlc->sm_work); } -static void llc_shdlc_t2_timeout(unsigned long data) +static void llc_shdlc_t2_timeout(struct timer_list *t) { - struct llc_shdlc *shdlc = (struct llc_shdlc *)data; + struct llc_shdlc *shdlc = from_timer(shdlc, t, t2_timer); pr_debug("SoftIRQ: need to retransmit\n"); @@ -763,17 +763,9 @@ static void *llc_shdlc_init(struct nfc_hci_dev *hdev, xmit_to_drv_t xmit_to_drv, mutex_init(&shdlc->state_mutex); shdlc->state = SHDLC_DISCONNECTED; - init_timer(&shdlc->connect_timer); - shdlc->connect_timer.data = (unsigned long)shdlc; - shdlc->connect_timer.function = llc_shdlc_connect_timeout; - - init_timer(&shdlc->t1_timer); - shdlc->t1_timer.data = (unsigned long)shdlc; - shdlc->t1_timer.function = llc_shdlc_t1_timeout; - - init_timer(&shdlc->t2_timer); - shdlc->t2_timer.data = (unsigned long)shdlc; - shdlc->t2_timer.function = llc_shdlc_t2_timeout; + timer_setup(&shdlc->connect_timer, llc_shdlc_connect_timeout, 0); + timer_setup(&shdlc->t1_timer, llc_shdlc_t1_timeout, 0); + timer_setup(&shdlc->t2_timer, llc_shdlc_t2_timeout, 0); shdlc->w = SHDLC_MAX_WINDOW; shdlc->srej_support = SHDLC_SREJ_SUPPORT; diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c index 02eef5cf3cce..ef4026a23e80 100644 --- a/net/nfc/llcp_core.c +++ b/net/nfc/llcp_core.c @@ -242,9 +242,9 @@ static void nfc_llcp_timeout_work(struct work_struct *work) nfc_dep_link_down(local->dev); } -static void nfc_llcp_symm_timer(unsigned long data) +static void nfc_llcp_symm_timer(struct timer_list *t) { - struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + struct nfc_llcp_local *local = from_timer(local, t, link_timer); pr_err("SYMM timeout\n"); @@ -285,9 +285,9 @@ static void nfc_llcp_sdreq_timeout_work(struct work_struct *work) nfc_genl_llc_send_sdres(local->dev, &nl_sdres_list); } -static void nfc_llcp_sdreq_timer(unsigned long data) +static void nfc_llcp_sdreq_timer(struct timer_list *t) { - struct nfc_llcp_local *local = (struct nfc_llcp_local *) data; + struct nfc_llcp_local *local = from_timer(local, t, sdreq_timer); schedule_work(&local->sdreq_timeout_work); } @@ -1573,9 +1573,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) INIT_LIST_HEAD(&local->list); kref_init(&local->ref); mutex_init(&local->sdp_lock); - init_timer(&local->link_timer); - local->link_timer.data = (unsigned long) local; - local->link_timer.function = nfc_llcp_symm_timer; + timer_setup(&local->link_timer, nfc_llcp_symm_timer, 0); skb_queue_head_init(&local->tx_queue); INIT_WORK(&local->tx_work, nfc_llcp_tx_work); @@ -1601,9 +1599,7 @@ int nfc_llcp_register_device(struct nfc_dev *ndev) mutex_init(&local->sdreq_lock); INIT_HLIST_HEAD(&local->pending_sdreqs); - init_timer(&local->sdreq_timer); - local->sdreq_timer.data = (unsigned long) local; - local->sdreq_timer.function = nfc_llcp_sdreq_timer; + timer_setup(&local->sdreq_timer, nfc_llcp_sdreq_timer, 0); INIT_WORK(&local->sdreq_timeout_work, nfc_llcp_sdreq_timeout_work); list_add(&local->list, &llcp_devices); diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile index 0ca31d9bf741..c3362c499281 100644 --- a/net/nfc/nci/Makefile +++ b/net/nfc/nci/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux NFC NCI layer. # diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index b251fb936a27..c0b83dc9d993 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -75,7 +75,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, if (!hdr) return -EMSGSIZE; - genl_dump_check_consistent(cb, hdr, &nfc_genl_family); + genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NFC_ATTR_TARGET_INDEX, target->idx) || nla_put_u32(msg, NFC_ATTR_PROTOCOLS, target->supported_protocols) || @@ -603,7 +603,7 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, return -EMSGSIZE; if (cb) - genl_dump_check_consistent(cb, hdr, &nfc_genl_family); + genl_dump_check_consistent(cb, hdr); if (nfc_genl_setup_device_added(dev, msg)) goto nla_put_failure; @@ -928,6 +928,30 @@ static int nfc_genl_activate_target(struct sk_buff *skb, struct genl_info *info) return rc; } +static int nfc_genl_deactivate_target(struct sk_buff *skb, + struct genl_info *info) +{ + struct nfc_dev *dev; + u32 device_idx, target_idx; + int rc; + + if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + return -EINVAL; + + device_idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); + + dev = nfc_get_device(device_idx); + if (!dev) + return -ENODEV; + + target_idx = nla_get_u32(info->attrs[NFC_ATTR_TARGET_INDEX]); + + rc = nfc_deactivate_target(dev, target_idx, NFC_TARGET_MODE_SLEEP); + + nfc_put_device(dev); + return rc; +} + static int nfc_genl_dep_link_up(struct sk_buff *skb, struct genl_info *info) { struct nfc_dev *dev; @@ -1332,7 +1356,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev, goto nla_put_failure; if (cb) - genl_dump_check_consistent(cb, hdr, &nfc_genl_family); + genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) || nla_put_u32(msg, NFC_ATTR_SE_INDEX, se->idx) || @@ -1751,6 +1775,11 @@ static const struct genl_ops nfc_genl_ops[] = { .doit = nfc_genl_vendor_cmd, .policy = nfc_genl_policy, }, + { + .cmd = NFC_CMD_DEACTIVATE_TARGET, + .doit = nfc_genl_deactivate_target, + .policy = nfc_genl_policy, + }, }; static struct genl_family nfc_genl_family __ro_after_init = { diff --git a/net/nsh/Kconfig b/net/nsh/Kconfig new file mode 100644 index 000000000000..bafc3dd60c2c --- /dev/null +++ b/net/nsh/Kconfig @@ -0,0 +1,9 @@ +menuconfig NET_NSH + tristate "Network Service Header (NSH) protocol" + default n + ---help--- + Network Service Header is an implementation of Service Function + Chaining (RFC 7665). The current implementation in Linux supports + only MD type 1 and only with the openvswitch module. + + If unsure, say N. diff --git a/net/nsh/Makefile b/net/nsh/Makefile new file mode 100644 index 000000000000..c93c787385ca --- /dev/null +++ b/net/nsh/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_NET_NSH) += nsh.o diff --git a/net/nsh/nsh.c b/net/nsh/nsh.c new file mode 100644 index 000000000000..d7da99a0b0b8 --- /dev/null +++ b/net/nsh/nsh.c @@ -0,0 +1,151 @@ +/* + * Network Service Header + * + * Copyright (c) 2017 Red Hat, Inc. -- Jiri Benc <jbenc@redhat.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <net/nsh.h> +#include <net/tun_proto.h> + +int nsh_push(struct sk_buff *skb, const struct nshhdr *pushed_nh) +{ + struct nshhdr *nh; + size_t length = nsh_hdr_len(pushed_nh); + u8 next_proto; + + if (skb->mac_len) { + next_proto = TUN_P_ETHERNET; + } else { + next_proto = tun_p_from_eth_p(skb->protocol); + if (!next_proto) + return -EAFNOSUPPORT; + } + + /* Add the NSH header */ + if (skb_cow_head(skb, length) < 0) + return -ENOMEM; + + skb_push(skb, length); + nh = (struct nshhdr *)(skb->data); + memcpy(nh, pushed_nh, length); + nh->np = next_proto; + skb_postpush_rcsum(skb, nh, length); + + skb->protocol = htons(ETH_P_NSH); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + + return 0; +} +EXPORT_SYMBOL_GPL(nsh_push); + +int nsh_pop(struct sk_buff *skb) +{ + struct nshhdr *nh; + size_t length; + __be16 inner_proto; + + if (!pskb_may_pull(skb, NSH_BASE_HDR_LEN)) + return -ENOMEM; + nh = (struct nshhdr *)(skb->data); + length = nsh_hdr_len(nh); + inner_proto = tun_p_to_eth_p(nh->np); + if (!pskb_may_pull(skb, length)) + return -ENOMEM; + + if (!inner_proto) + return -EAFNOSUPPORT; + + skb_pull_rcsum(skb, length); + skb_reset_mac_header(skb); + skb_reset_network_header(skb); + skb_reset_mac_len(skb); + skb->protocol = inner_proto; + + return 0; +} +EXPORT_SYMBOL_GPL(nsh_pop); + +static struct sk_buff *nsh_gso_segment(struct sk_buff *skb, + netdev_features_t features) +{ + struct sk_buff *segs = ERR_PTR(-EINVAL); + unsigned int nsh_len, mac_len; + __be16 proto; + int nhoff; + + skb_reset_network_header(skb); + + nhoff = skb->network_header - skb->mac_header; + mac_len = skb->mac_len; + + if (unlikely(!pskb_may_pull(skb, NSH_BASE_HDR_LEN))) + goto out; + nsh_len = nsh_hdr_len(nsh_hdr(skb)); + if (unlikely(!pskb_may_pull(skb, nsh_len))) + goto out; + + proto = tun_p_to_eth_p(nsh_hdr(skb)->np); + if (!proto) + goto out; + + __skb_pull(skb, nsh_len); + + skb_reset_mac_header(skb); + skb_reset_mac_len(skb); + skb->protocol = proto; + + features &= NETIF_F_SG; + segs = skb_mac_gso_segment(skb, features); + if (IS_ERR_OR_NULL(segs)) { + skb_gso_error_unwind(skb, htons(ETH_P_NSH), nsh_len, + skb->network_header - nhoff, + mac_len); + goto out; + } + + for (skb = segs; skb; skb = skb->next) { + skb->protocol = htons(ETH_P_NSH); + __skb_push(skb, nsh_len); + skb_set_mac_header(skb, -nhoff); + skb->network_header = skb->mac_header + mac_len; + skb->mac_len = mac_len; + } + +out: + return segs; +} + +static struct packet_offload nsh_packet_offload __read_mostly = { + .type = htons(ETH_P_NSH), + .priority = 15, + .callbacks = { + .gso_segment = nsh_gso_segment, + }, +}; + +static int __init nsh_init_module(void) +{ + dev_add_offload(&nsh_packet_offload); + return 0; +} + +static void __exit nsh_cleanup_module(void) +{ + dev_remove_offload(&nsh_packet_offload); +} + +module_init(nsh_init_module); +module_exit(nsh_cleanup_module); + +MODULE_AUTHOR("Jiri Benc <jbenc@redhat.com>"); +MODULE_DESCRIPTION("NSH protocol"); +MODULE_LICENSE("GPL v2"); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index ce947292ae77..2650205cdaf9 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -14,6 +14,7 @@ config OPENVSWITCH select MPLS select NET_MPLS_GSO select DST_CACHE + select NET_NSH ---help--- Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features diff --git a/net/openvswitch/Makefile b/net/openvswitch/Makefile index 60f809085b92..41109c326f3a 100644 --- a/net/openvswitch/Makefile +++ b/net/openvswitch/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for Open vSwitch. # @@ -11,6 +12,7 @@ openvswitch-y := \ flow.o \ flow_netlink.o \ flow_table.o \ + meter.o \ vport.o \ vport-internal_dev.o \ vport-netdev.o diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e4610676299b..30a5df27116e 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -43,6 +43,7 @@ #include "flow.h" #include "conntrack.h" #include "vport.h" +#include "flow_netlink.h" struct deferred_action { struct sk_buff *skb; @@ -380,6 +381,38 @@ static int push_eth(struct sk_buff *skb, struct sw_flow_key *key, return 0; } +static int push_nsh(struct sk_buff *skb, struct sw_flow_key *key, + const struct nshhdr *nh) +{ + int err; + + err = nsh_push(skb, nh); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + +static int pop_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + int err; + + err = nsh_pop(skb); + if (err) + return err; + + /* safe right before invalidate_flow_key */ + if (skb->protocol == htons(ETH_P_TEB)) + key->mac_proto = MAC_PROTO_ETHERNET; + else + key->mac_proto = MAC_PROTO_NONE; + invalidate_flow_key(key); + return 0; +} + static void update_ip_l4_checksum(struct sk_buff *skb, struct iphdr *nh, __be32 addr, __be32 new_addr) { @@ -602,6 +635,69 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key, return 0; } +static int set_nsh(struct sk_buff *skb, struct sw_flow_key *flow_key, + const struct nlattr *a) +{ + struct nshhdr *nh; + size_t length; + int err; + u8 flags; + u8 ttl; + int i; + + struct ovs_key_nsh key; + struct ovs_key_nsh mask; + + err = nsh_key_from_nlattr(a, &key, &mask); + if (err) + return err; + + /* Make sure the NSH base header is there */ + if (!pskb_may_pull(skb, skb_network_offset(skb) + NSH_BASE_HDR_LEN)) + return -ENOMEM; + + nh = nsh_hdr(skb); + length = nsh_hdr_len(nh); + + /* Make sure the whole NSH header is there */ + err = skb_ensure_writable(skb, skb_network_offset(skb) + + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + skb_postpull_rcsum(skb, nh, length); + flags = nsh_get_flags(nh); + flags = OVS_MASKED(flags, key.base.flags, mask.base.flags); + flow_key->nsh.base.flags = flags; + ttl = nsh_get_ttl(nh); + ttl = OVS_MASKED(ttl, key.base.ttl, mask.base.ttl); + flow_key->nsh.base.ttl = ttl; + nsh_set_flags_and_ttl(nh, flags, ttl); + nh->path_hdr = OVS_MASKED(nh->path_hdr, key.base.path_hdr, + mask.base.path_hdr); + flow_key->nsh.base.path_hdr = nh->path_hdr; + switch (nh->mdtype) { + case NSH_M_TYPE1: + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) { + nh->md1.context[i] = + OVS_MASKED(nh->md1.context[i], key.context[i], + mask.context[i]); + } + memcpy(flow_key->nsh.context, nh->md1.context, + sizeof(nh->md1.context)); + break; + case NSH_M_TYPE2: + memset(flow_key->nsh.context, 0, + sizeof(flow_key->nsh.context)); + break; + default: + return -EINVAL; + } + skb_postpush_rcsum(skb, nh, length); + return 0; +} + /* Must follow skb_ensure_writable() since that can move the skb data. */ static void set_tp_port(struct sk_buff *skb, __be16 *port, __be16 new_port, __sum16 *check) @@ -1024,6 +1120,10 @@ static int execute_masked_set_action(struct sk_buff *skb, get_mask(a, struct ovs_key_ethernet *)); break; + case OVS_KEY_ATTR_NSH: + err = set_nsh(skb, flow_key, a); + break; + case OVS_KEY_ATTR_IPV4: err = set_ipv4(skb, flow_key, nla_data(a), get_mask(a, struct ovs_key_ipv4 *)); @@ -1203,6 +1303,10 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, return err == -EINPROGRESS ? 0 : err; break; + case OVS_ACTION_ATTR_CT_CLEAR: + err = ovs_ct_clear(skb, key); + break; + case OVS_ACTION_ATTR_PUSH_ETH: err = push_eth(skb, key, nla_data(a)); break; @@ -1210,6 +1314,28 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb, case OVS_ACTION_ATTR_POP_ETH: err = pop_eth(skb, key); break; + + case OVS_ACTION_ATTR_PUSH_NSH: { + u8 buffer[NSH_HDR_MAX_LEN]; + struct nshhdr *nh = (struct nshhdr *)buffer; + + err = nsh_hdr_from_nlattr(nla_data(a), nh, + NSH_HDR_MAX_LEN); + if (unlikely(err)) + break; + err = push_nsh(skb, key, nh); + break; + } + + case OVS_ACTION_ATTR_POP_NSH: + err = pop_nsh(skb, key); + break; + + case OVS_ACTION_ATTR_METER: + if (ovs_meter_execute(dp, skb, key, nla_get_u32(a))) { + consume_skb(skb); + return 0; + } } if (unlikely(err)) { @@ -1337,6 +1463,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, goto out; } + OVS_CB(skb)->acts_origlen = acts->orig_len; err = do_execute_actions(dp, skb, key, acts->actions, acts->actions_len); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 03859e386b47..b27c5c6d9cab 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -579,8 +579,8 @@ static struct nf_conn * ovs_ct_find_existing(struct net *net, const struct nf_conntrack_zone *zone, u8 l3num, struct sk_buff *skb, bool natted) { - struct nf_conntrack_l3proto *l3proto; - struct nf_conntrack_l4proto *l4proto; + const struct nf_conntrack_l3proto *l3proto; + const struct nf_conntrack_l4proto *l4proto; struct nf_conntrack_tuple tuple; struct nf_conntrack_tuple_hash *h; struct nf_conn *ct; @@ -752,6 +752,7 @@ static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, } } /* Non-ICMP, fall thru to initialize if needed. */ + /* fall through */ case IP_CT_NEW: /* Seen it before? This can happen for loopback, retrans, * or local packets. @@ -1129,6 +1130,17 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb, return err; } +int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key) +{ + if (skb_nfct(skb)) { + nf_conntrack_put(skb_nfct(skb)); + nf_ct_set(skb, NULL, IP_CT_UNTRACKED); + ovs_ct_fill_key(skb, key); + } + + return 0; +} + static int ovs_ct_add_helper(struct ovs_conntrack_info *info, const char *name, const struct sw_flow_key *key, bool log) { @@ -1180,15 +1192,13 @@ static int parse_nat(const struct nlattr *attr, int type = nla_type(a); if (type > OVS_NAT_ATTR_MAX) { - OVS_NLERR(log, - "Unknown NAT attribute (type=%d, max=%d).\n", + OVS_NLERR(log, "Unknown NAT attribute (type=%d, max=%d)", type, OVS_NAT_ATTR_MAX); return -EINVAL; } if (nla_len(a) != ovs_nat_attr_lens[type][ip_vers]) { - OVS_NLERR(log, - "NAT attribute type %d has unexpected length (%d != %d).\n", + OVS_NLERR(log, "NAT attribute type %d has unexpected length (%d != %d)", type, nla_len(a), ovs_nat_attr_lens[type][ip_vers]); return -EINVAL; @@ -1198,9 +1208,7 @@ static int parse_nat(const struct nlattr *attr, case OVS_NAT_ATTR_SRC: case OVS_NAT_ATTR_DST: if (info->nat) { - OVS_NLERR(log, - "Only one type of NAT may be specified.\n" - ); + OVS_NLERR(log, "Only one type of NAT may be specified"); return -ERANGE; } info->nat |= OVS_CT_NAT; @@ -1245,13 +1253,13 @@ static int parse_nat(const struct nlattr *attr, break; default: - OVS_NLERR(log, "Unknown nat attribute (%d).\n", type); + OVS_NLERR(log, "Unknown nat attribute (%d)", type); return -EINVAL; } } if (rem > 0) { - OVS_NLERR(log, "NAT attribute has %d unknown bytes.\n", rem); + OVS_NLERR(log, "NAT attribute has %d unknown bytes", rem); return -EINVAL; } if (!info->nat) { diff --git a/net/openvswitch/conntrack.h b/net/openvswitch/conntrack.h index bc7efd1867ab..399dfdd2c4f9 100644 --- a/net/openvswitch/conntrack.h +++ b/net/openvswitch/conntrack.h @@ -30,6 +30,7 @@ int ovs_ct_action_to_attr(const struct ovs_conntrack_info *, struct sk_buff *); int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *, const struct ovs_conntrack_info *); +int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key); void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key); int ovs_ct_put_key(const struct sw_flow_key *swkey, @@ -73,6 +74,12 @@ static inline int ovs_ct_execute(struct net *net, struct sk_buff *skb, return -ENOTSUPP; } +static inline int ovs_ct_clear(struct sk_buff *skb, + struct sw_flow_key *key) +{ + return -ENOTSUPP; +} + static inline void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key) { diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45fe8c8a884d..0dab33fb9844 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -55,6 +55,7 @@ #include "flow.h" #include "flow_table.h" #include "flow_netlink.h" +#include "meter.h" #include "vport-internal_dev.h" #include "vport-netdev.h" @@ -142,35 +143,6 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *, const struct dp_upcall_info *, uint32_t cutlen); -/* Must be called with rcu_read_lock. */ -static struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) -{ - struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); - - if (dev) { - struct vport *vport = ovs_internal_dev_get_vport(dev); - if (vport) - return vport->dp; - } - - return NULL; -} - -/* The caller must hold either ovs_mutex or rcu_read_lock to keep the - * returned dp pointer valid. - */ -static inline struct datapath *get_dp(struct net *net, int dp_ifindex) -{ - struct datapath *dp; - - WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); - rcu_read_lock(); - dp = get_dp_rcu(net, dp_ifindex); - rcu_read_unlock(); - - return dp; -} - /* Must be called with rcu_read_lock or ovs_mutex. */ const char *ovs_dp_name(const struct datapath *dp) { @@ -203,6 +175,7 @@ static void destroy_dp_rcu(struct rcu_head *rcu) ovs_flow_tbl_destroy(&dp->table); free_percpu(dp->stats_percpu); kfree(dp->ports); + ovs_meters_exit(dp); kfree(dp); } @@ -335,8 +308,6 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, const struct dp_upcall_info *upcall_info, uint32_t cutlen) { - unsigned short gso_type = skb_shinfo(skb)->gso_type; - struct sw_flow_key later_key; struct sk_buff *segs, *nskb; int err; @@ -347,21 +318,9 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, if (segs == NULL) return -EINVAL; - if (gso_type & SKB_GSO_UDP) { - /* The initial flow key extracted by ovs_flow_key_extract() - * in this case is for a first fragment, so we need to - * properly mark later fragments. - */ - later_key = *key; - later_key.ip.frag = OVS_FRAG_TYPE_LATER; - } - /* Queue all of the segments. */ skb = segs; do { - if (gso_type & SKB_GSO_UDP && skb != segs) - key = &later_key; - err = queue_userspace_packet(dp, skb, key, upcall_info, cutlen); if (err) break; @@ -381,7 +340,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, } static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, - unsigned int hdrlen) + unsigned int hdrlen, int actions_attrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ @@ -398,7 +357,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, /* OVS_PACKET_ATTR_ACTIONS */ if (upcall_info->actions_len) - size += nla_total_size(upcall_info->actions_len); + size += nla_total_size(actions_attrlen); /* OVS_PACKET_ATTR_MRU */ if (upcall_info->mru) @@ -465,7 +424,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info, hlen - cutlen); + len = upcall_msg_size(upcall_info, hlen - cutlen, + OVS_CB(skb)->acts_origlen); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; @@ -1125,7 +1085,8 @@ static int ovs_nla_init_match_and_action(struct net *net, if (!a[OVS_FLOW_ATTR_KEY]) { OVS_NLERR(log, "Flow key attribute not present in set flow."); - return -EINVAL; + error = -EINVAL; + goto error; } *acts = get_flow_actions(net, a[OVS_FLOW_ATTR_ACTIONS], key, @@ -1613,6 +1574,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) for (i = 0; i < DP_VPORT_HASH_BUCKETS; i++) INIT_HLIST_HEAD(&dp->ports[i]); + err = ovs_meters_init(dp); + if (err) + goto err_destroy_ports_array; + /* Set up our datapath device. */ parms.name = nla_data(a[OVS_DP_ATTR_NAME]); parms.type = OVS_VPORT_TYPE_INTERNAL; @@ -1641,7 +1606,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_dp_reset_user_features(skb, info); } - goto err_destroy_ports_array; + goto err_destroy_meters; } err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid, @@ -1656,8 +1621,10 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) ovs_notify(&dp_datapath_genl_family, reply, info); return 0; -err_destroy_ports_array: +err_destroy_meters: ovs_unlock(); + ovs_meters_exit(dp); +err_destroy_ports_array: kfree(dp->ports); err_destroy_percpu: free_percpu(dp->stats_percpu); @@ -1860,7 +1827,8 @@ static struct genl_family dp_datapath_genl_family __ro_after_init = { /* Called with ovs_mutex or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 portid, u32 seq, u32 flags, u8 cmd) + struct net *net, u32 portid, u32 seq, + u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; @@ -1876,9 +1844,17 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, - ovs_vport_name(vport))) + ovs_vport_name(vport)) || + nla_put_u32(skb, OVS_VPORT_ATTR_IFINDEX, vport->dev->ifindex)) goto nla_put_failure; + if (!net_eq(net, dev_net(vport->dev))) { + int id = peernet2id_alloc(net, dev_net(vport->dev)); + + if (nla_put_s32(skb, OVS_VPORT_ATTR_NETNSID, id)) + goto nla_put_failure; + } + ovs_vport_get_stats(vport, &vport_stats); if (nla_put_64bit(skb, OVS_VPORT_ATTR_STATS, sizeof(struct ovs_vport_stats), &vport_stats, @@ -1908,8 +1884,8 @@ static struct sk_buff *ovs_vport_cmd_alloc_info(void) } /* Called with ovs_mutex, only via ovs_dp_notify_wq(). */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, - u32 seq, u8 cmd) +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -1918,7 +1894,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, net, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; @@ -1932,6 +1908,8 @@ static struct vport *lookup_vport(struct net *net, struct datapath *dp; struct vport *vport; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return ERR_PTR(-EOPNOTSUPP); if (a[OVS_VPORT_ATTR_NAME]) { vport = ovs_vport_locate(net, nla_data(a[OVS_VPORT_ATTR_NAME])); if (!vport) @@ -1956,6 +1934,7 @@ static struct vport *lookup_vport(struct net *net, return vport; } else return ERR_PTR(-EINVAL); + } /* Called with ovs_mutex */ @@ -1995,6 +1974,8 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) if (!a[OVS_VPORT_ATTR_NAME] || !a[OVS_VPORT_ATTR_TYPE] || !a[OVS_VPORT_ATTR_UPCALL_PID]) return -EINVAL; + if (a[OVS_VPORT_ATTR_IFINDEX]) + return -EOPNOTSUPP; port_no = a[OVS_VPORT_ATTR_PORT_NO] ? nla_get_u32(a[OVS_VPORT_ATTR_PORT_NO]) : 0; @@ -2044,8 +2025,9 @@ restart: goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); if (netdev_get_fwd_headroom(vport->dev) > dp->max_headroom) update_headroom(dp); @@ -2102,8 +2084,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); ovs_unlock(); @@ -2140,8 +2123,9 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock_free; } - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_DEL); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_DEL); BUG_ON(err < 0); /* the vport deletion may trigger dp headroom update */ @@ -2181,8 +2165,9 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock_free; - err = ovs_vport_cmd_fill_info(vport, reply, info->snd_portid, - info->snd_seq, 0, OVS_VPORT_CMD_NEW); + err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info), + info->snd_portid, info->snd_seq, 0, + OVS_VPORT_CMD_NEW); BUG_ON(err < 0); rcu_read_unlock(); @@ -2214,6 +2199,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, + sock_net(skb->sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, @@ -2240,6 +2226,8 @@ static const struct nla_policy vport_policy[OVS_VPORT_ATTR_MAX + 1] = { [OVS_VPORT_ATTR_TYPE] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_UPCALL_PID] = { .type = NLA_U32 }, [OVS_VPORT_ATTR_OPTIONS] = { .type = NLA_NESTED }, + [OVS_VPORT_ATTR_IFINDEX] = { .type = NLA_U32 }, + [OVS_VPORT_ATTR_NETNSID] = { .type = NLA_S32 }, }; static const struct genl_ops dp_vport_genl_ops[] = { @@ -2285,6 +2273,7 @@ static struct genl_family * const dp_genl_families[] = { &dp_vport_genl_family, &dp_flow_genl_family, &dp_packet_genl_family, + &dp_meter_genl_family, }; static void dp_unregister_genl(int n_families) @@ -2465,3 +2454,4 @@ MODULE_ALIAS_GENL_FAMILY(OVS_DATAPATH_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_VPORT_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_FLOW_FAMILY); MODULE_ALIAS_GENL_FAMILY(OVS_PACKET_FAMILY); +MODULE_ALIAS_GENL_FAMILY(OVS_METER_FAMILY); diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 5d8dcd88815f..523d65526766 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -30,6 +30,8 @@ #include "conntrack.h" #include "flow.h" #include "flow_table.h" +#include "meter.h" +#include "vport-internal_dev.h" #define DP_MAX_PORTS USHRT_MAX #define DP_VPORT_HASH_BUCKETS 1024 @@ -91,6 +93,9 @@ struct datapath { u32 user_features; u32 max_headroom; + + /* Switch meters. */ + struct hlist_head *meters; }; /** @@ -99,11 +104,13 @@ struct datapath { * when a packet is received by OVS. * @mru: The maximum received fragement size; 0 if the packet is not * fragmented. + * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; + u16 acts_origlen; u32 cutlen; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) @@ -188,6 +195,36 @@ static inline struct vport *ovs_vport_ovsl(const struct datapath *dp, int port_n return ovs_lookup_vport(dp, port_no); } +/* Must be called with rcu_read_lock. */ +static inline struct datapath *get_dp_rcu(struct net *net, int dp_ifindex) +{ + struct net_device *dev = dev_get_by_index_rcu(net, dp_ifindex); + + if (dev) { + struct vport *vport = ovs_internal_dev_get_vport(dev); + + if (vport) + return vport->dp; + } + + return NULL; +} + +/* The caller must hold either ovs_mutex or rcu_read_lock to keep the + * returned dp pointer valid. + */ +static inline struct datapath *get_dp(struct net *net, int dp_ifindex) +{ + struct datapath *dp; + + WARN_ON_ONCE(!rcu_read_lock_held() && !lockdep_ovsl_is_held()); + rcu_read_lock(); + dp = get_dp_rcu(net, dp_ifindex); + rcu_read_unlock(); + + return dp; +} + extern struct notifier_block ovs_dp_device_notifier; extern struct genl_family dp_vport_genl_family; @@ -198,8 +235,8 @@ int ovs_dp_upcall(struct datapath *, struct sk_buff *, uint32_t cutlen); const char *ovs_dp_name(const struct datapath *dp); -struct sk_buff *ovs_vport_cmd_build_info(struct vport *, u32 pid, u32 seq, - u8 cmd); +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, struct net *net, + u32 portid, u32 seq, u8 cmd); int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, const struct sw_flow_actions *, struct sw_flow_key *); diff --git a/net/openvswitch/dp_notify.c b/net/openvswitch/dp_notify.c index 653d073bae45..f3ee2f2825c0 100644 --- a/net/openvswitch/dp_notify.c +++ b/net/openvswitch/dp_notify.c @@ -30,8 +30,8 @@ static void dp_detach_port_notify(struct vport *vport) struct datapath *dp; dp = vport->dp; - notify = ovs_vport_cmd_build_info(vport, 0, 0, - OVS_VPORT_CMD_DEL); + notify = ovs_vport_cmd_build_info(vport, ovs_dp_get_net(dp), + 0, 0, OVS_VPORT_CMD_DEL); ovs_dp_detach_port(vport); if (IS_ERR(notify)) { genl_set_err(&dp_vport_genl_family, ovs_dp_get_net(dp), 0, diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c index 3f76cb765e5b..864ddb1e3642 100644 --- a/net/openvswitch/flow.c +++ b/net/openvswitch/flow.c @@ -46,6 +46,7 @@ #include <net/ipv6.h> #include <net/mpls.h> #include <net/ndisc.h> +#include <net/nsh.h> #include "conntrack.h" #include "datapath.h" @@ -72,8 +73,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, const struct sk_buff *skb) { struct flow_stats *stats; - int node = numa_node_id(); - int cpu = smp_processor_id(); + unsigned int cpu = smp_processor_id(); int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0); stats = rcu_dereference(flow->stats[cpu]); @@ -108,7 +108,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, __GFP_THISNODE | __GFP_NOWARN | __GFP_NOMEMALLOC, - node); + numa_node_id()); if (likely(new_stats)) { new_stats->used = jiffies; new_stats->packet_count = 1; @@ -118,6 +118,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags, rcu_assign_pointer(flow->stats[cpu], new_stats); + cpumask_set_cpu(cpu, &flow->cpu_used_mask); goto unlock; } } @@ -145,7 +146,7 @@ void ovs_flow_stats_get(const struct sw_flow *flow, memset(ovs_stats, 0, sizeof(*ovs_stats)); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]); if (stats) { @@ -169,7 +170,7 @@ void ovs_flow_stats_clear(struct sw_flow *flow) int cpu; /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) { + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) { struct flow_stats *stats = ovsl_dereference(flow->stats[cpu]); if (stats) { @@ -490,6 +491,52 @@ invalid: return 0; } +static int parse_nsh(struct sk_buff *skb, struct sw_flow_key *key) +{ + struct nshhdr *nh; + unsigned int nh_ofs = skb_network_offset(skb); + u8 version, length; + int err; + + err = check_header(skb, nh_ofs + NSH_BASE_HDR_LEN); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + version = nsh_get_ver(nh); + length = nsh_hdr_len(nh); + + if (version != 0) + return -EINVAL; + + err = check_header(skb, nh_ofs + length); + if (unlikely(err)) + return err; + + nh = nsh_hdr(skb); + key->nsh.base.flags = nsh_get_flags(nh); + key->nsh.base.ttl = nsh_get_ttl(nh); + key->nsh.base.mdtype = nh->mdtype; + key->nsh.base.np = nh->np; + key->nsh.base.path_hdr = nh->path_hdr; + switch (key->nsh.base.mdtype) { + case NSH_M_TYPE1: + if (length != NSH_M_TYPE1_LEN) + return -EINVAL; + memcpy(key->nsh.context, nh->md1.context, + sizeof(nh->md1)); + break; + case NSH_M_TYPE2: + memset(key->nsh.context, 0, + sizeof(nh->md1)); + break; + default: + return -EINVAL; + } + + return 0; +} + /** * key_extract - extracts a flow key from an Ethernet frame. * @skb: sk_buff that contains the frame, with skb->data pointing to the @@ -584,8 +631,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) key->ip.frag = OVS_FRAG_TYPE_LATER; return 0; } - if (nh->frag_off & htons(IP_MF) || - skb_shinfo(skb)->gso_type & SKB_GSO_UDP) + if (nh->frag_off & htons(IP_MF)) key->ip.frag = OVS_FRAG_TYPE_FIRST; else key->ip.frag = OVS_FRAG_TYPE_NONE; @@ -701,9 +747,6 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) if (key->ip.frag == OVS_FRAG_TYPE_LATER) return 0; - if (skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - key->ip.frag = OVS_FRAG_TYPE_FIRST; - /* Transport layer. */ if (key->ip.proto == NEXTHDR_TCP) { if (tcphdr_ok(skb)) { @@ -739,6 +782,10 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key) memset(&key->tp, 0, sizeof(key->tp)); } } + } else if (key->eth.type == htons(ETH_P_NSH)) { + error = parse_nsh(skb, key); + if (error) + return error; } return 0; } diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h index a9bc1c875965..c670dd24b8b7 100644 --- a/net/openvswitch/flow.h +++ b/net/openvswitch/flow.h @@ -31,9 +31,11 @@ #include <linux/jiffies.h> #include <linux/time.h> #include <linux/flex_array.h> +#include <linux/cpumask.h> #include <net/inet_ecn.h> #include <net/ip_tunnels.h> #include <net/dst_metadata.h> +#include <net/nsh.h> struct sk_buff; @@ -65,6 +67,11 @@ struct vlan_head { (offsetof(struct sw_flow_key, recirc_id) + \ FIELD_SIZEOF(struct sw_flow_key, recirc_id)) +struct ovs_key_nsh { + struct ovs_nsh_key_base base; + __be32 context[NSH_MD1_CONTEXT_SIZE]; +}; + struct sw_flow_key { u8 tun_opts[IP_TUNNEL_OPTS_MAX]; u8 tun_opts_len; @@ -142,6 +149,7 @@ struct sw_flow_key { } nd; }; } ipv6; + struct ovs_key_nsh nsh; /* network service header */ }; struct { /* Connection tracking fields not packed above. */ @@ -219,6 +227,7 @@ struct sw_flow { */ struct sw_flow_key key; struct sw_flow_id id; + struct cpumask cpu_used_mask; struct sw_flow_mask *mask; struct sw_flow_actions __rcu *sf_acts; struct flow_stats __rcu *stats[]; /* One for each CPU. First one diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index f07d10ac35d8..dc424798ba6f 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -48,6 +48,8 @@ #include <net/ndisc.h> #include <net/mpls.h> #include <net/vxlan.h> +#include <net/tun_proto.h> +#include <net/erspan.h> #include "flow_netlink.h" @@ -75,16 +77,20 @@ static bool actions_may_change_flow(const struct nlattr *actions) break; case OVS_ACTION_ATTR_CT: + case OVS_ACTION_ATTR_CT_CLEAR: case OVS_ACTION_ATTR_HASH: case OVS_ACTION_ATTR_POP_ETH: case OVS_ACTION_ATTR_POP_MPLS: + case OVS_ACTION_ATTR_POP_NSH: case OVS_ACTION_ATTR_POP_VLAN: case OVS_ACTION_ATTR_PUSH_ETH: case OVS_ACTION_ATTR_PUSH_MPLS: + case OVS_ACTION_ATTR_PUSH_NSH: case OVS_ACTION_ATTR_PUSH_VLAN: case OVS_ACTION_ATTR_SAMPLE: case OVS_ACTION_ATTR_SET: case OVS_ACTION_ATTR_SET_MASKED: + case OVS_ACTION_ATTR_METER: default: return true; } @@ -173,7 +179,8 @@ static bool match_validate(const struct sw_flow_match *match, | (1 << OVS_KEY_ATTR_ICMPV6) | (1 << OVS_KEY_ATTR_ARP) | (1 << OVS_KEY_ATTR_ND) - | (1 << OVS_KEY_ATTR_MPLS)); + | (1 << OVS_KEY_ATTR_MPLS) + | (1 << OVS_KEY_ATTR_NSH)); /* Always allowed mask fields. */ mask_allowed |= ((1 << OVS_KEY_ATTR_TUNNEL) @@ -282,6 +289,14 @@ static bool match_validate(const struct sw_flow_match *match, } } + if (match->key->eth.type == htons(ETH_P_NSH)) { + key_expected |= 1 << OVS_KEY_ATTR_NSH; + if (match->mask && + match->mask->key.eth.type == htons(0xffff)) { + mask_allowed |= 1 << OVS_KEY_ATTR_NSH; + } + } + if ((key_attrs & key_expected) != key_expected) { /* Key attributes check failed. */ OVS_NLERR(log, "Missing key (keys=%llx, expected=%llx)", @@ -319,7 +334,21 @@ size_t ovs_tun_key_attr_size(void) * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it. */ + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_SRC */ - + nla_total_size(2); /* OVS_TUNNEL_KEY_ATTR_TP_DST */ + + nla_total_size(2) /* OVS_TUNNEL_KEY_ATTR_TP_DST */ + + nla_total_size(4); /* OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS */ +} + +static size_t ovs_nsh_key_attr_size(void) +{ + /* Whenever adding new OVS_NSH_KEY_ FIELDS, we should consider + * updating this function. + */ + return nla_total_size(NSH_BASE_HDR_LEN) /* OVS_NSH_KEY_ATTR_BASE */ + /* OVS_NSH_KEY_ATTR_MD1 and OVS_NSH_KEY_ATTR_MD2 are + * mutually exclusive, so the bigger one can cover + * the small one. + */ + + nla_total_size(NSH_CTX_HDRS_MAX_LEN); } size_t ovs_key_attr_size(void) @@ -327,7 +356,7 @@ size_t ovs_key_attr_size(void) /* Whenever adding new OVS_KEY_ FIELDS, we should consider * updating this function. */ - BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 28); + BUILD_BUG_ON(OVS_KEY_ATTR_TUNNEL_INFO != 29); return nla_total_size(4) /* OVS_KEY_ATTR_PRIORITY */ + nla_total_size(0) /* OVS_KEY_ATTR_TUNNEL */ @@ -341,6 +370,8 @@ size_t ovs_key_attr_size(void) + nla_total_size(4) /* OVS_KEY_ATTR_CT_MARK */ + nla_total_size(16) /* OVS_KEY_ATTR_CT_LABELS */ + nla_total_size(40) /* OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6 */ + + nla_total_size(0) /* OVS_KEY_ATTR_NSH */ + + ovs_nsh_key_attr_size() + nla_total_size(12) /* OVS_KEY_ATTR_ETHERNET */ + nla_total_size(2) /* OVS_KEY_ATTR_ETHERTYPE */ + nla_total_size(4) /* OVS_KEY_ATTR_VLAN */ @@ -371,6 +402,14 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1] .next = ovs_vxlan_ext_key_lens }, [OVS_TUNNEL_KEY_ATTR_IPV6_SRC] = { .len = sizeof(struct in6_addr) }, [OVS_TUNNEL_KEY_ATTR_IPV6_DST] = { .len = sizeof(struct in6_addr) }, + [OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS] = { .len = sizeof(u32) }, +}; + +static const struct ovs_len_tbl +ovs_nsh_key_attr_lens[OVS_NSH_KEY_ATTR_MAX + 1] = { + [OVS_NSH_KEY_ATTR_BASE] = { .len = sizeof(struct ovs_nsh_key_base) }, + [OVS_NSH_KEY_ATTR_MD1] = { .len = sizeof(struct ovs_nsh_key_md1) }, + [OVS_NSH_KEY_ATTR_MD2] = { .len = OVS_ATTR_VARIABLE }, }; /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute. */ @@ -405,6 +444,8 @@ static const struct ovs_len_tbl ovs_key_lens[OVS_KEY_ATTR_MAX + 1] = { .len = sizeof(struct ovs_key_ct_tuple_ipv4) }, [OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6] = { .len = sizeof(struct ovs_key_ct_tuple_ipv6) }, + [OVS_KEY_ATTR_NSH] = { .len = OVS_ATTR_NESTED, + .next = ovs_nsh_key_attr_lens, }, }; static bool check_attr_len(unsigned int attr_len, unsigned int expected_len) @@ -593,6 +634,33 @@ static int vxlan_tun_opt_from_nlattr(const struct nlattr *attr, return 0; } +static int erspan_tun_opt_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool log) +{ + unsigned long opt_key_offset; + struct erspan_metadata opts; + + BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts)); + + memset(&opts, 0, sizeof(opts)); + opts.index = nla_get_be32(attr); + + /* Index has only 20-bit */ + if (ntohl(opts.index) & ~INDEX_MASK) { + OVS_NLERR(log, "ERSPAN index number %x too large.", + ntohl(opts.index)); + return -EINVAL; + } + + SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), is_mask); + opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts)); + SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts), + is_mask); + + return 0; +} + static int ip_tun_from_nlattr(const struct nlattr *attr, struct sw_flow_match *match, bool is_mask, bool log) @@ -700,6 +768,19 @@ static int ip_tun_from_nlattr(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_PAD: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + if (opts_type) { + OVS_NLERR(log, "Multiple metadata blocks provided"); + return -EINVAL; + } + + err = erspan_tun_opt_from_nlattr(a, match, is_mask, log); + if (err) + return err; + + tun_flags |= TUNNEL_ERSPAN_OPT; + opts_type = type; + break; default: OVS_NLERR(log, "Unknown IP tunnel attribute %d", type); @@ -824,6 +905,10 @@ static int __ip_tun_to_nlattr(struct sk_buff *skb, else if (output->tun_flags & TUNNEL_VXLAN_OPT && vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len)) return -EMSGSIZE; + else if (output->tun_flags & TUNNEL_ERSPAN_OPT && + nla_put_be32(skb, OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS, + ((struct erspan_metadata *)tun_opts)->index)) + return -EMSGSIZE; } return 0; @@ -1179,6 +1264,221 @@ static int metadata_from_nlattrs(struct net *net, struct sw_flow_match *match, return 0; } +int nsh_hdr_from_nlattr(const struct nlattr *attr, + struct nshhdr *nh, size_t size) +{ + struct nlattr *a; + int rem; + u8 flags = 0; + u8 ttl = 0; + int mdlen = 0; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + if (size < NSH_BASE_HDR_LEN) + return -ENOBUFS; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + flags = base->flags; + ttl = base->ttl; + nh->np = base->np; + nh->mdtype = base->mdtype; + nh->path_hdr = base->path_hdr; + break; + } + case OVS_NSH_KEY_ATTR_MD1: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md1, nla_data(a), mdlen); + break; + + case OVS_NSH_KEY_ATTR_MD2: + mdlen = nla_len(a); + if (mdlen > size - NSH_BASE_HDR_LEN) + return -ENOBUFS; + memcpy(&nh->md2, nla_data(a), mdlen); + break; + + default: + return -EINVAL; + } + } + + /* nsh header length = NSH_BASE_HDR_LEN + mdlen */ + nh->ver_flags_ttl_len = 0; + nsh_set_flags_ttl_len(nh, flags, ttl, NSH_BASE_HDR_LEN + mdlen); + + return 0; +} + +int nsh_key_from_nlattr(const struct nlattr *attr, + struct ovs_key_nsh *nsh, struct ovs_key_nsh *nsh_mask) +{ + struct nlattr *a; + int rem; + + /* validate_nsh has check this, so we needn't do duplicate check here + */ + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + const struct ovs_nsh_key_base *base_mask = base + 1; + + nsh->base = *base; + nsh_mask->base = *base_mask; + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + const struct ovs_nsh_key_md1 *md1_mask = md1 + 1; + + memcpy(nsh->context, md1->context, sizeof(*md1)); + memcpy(nsh_mask->context, md1_mask->context, + sizeof(*md1_mask)); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + /* Not supported yet */ + return -ENOTSUPP; + default: + return -EINVAL; + } + } + + return 0; +} + +static int nsh_key_put_from_nlattr(const struct nlattr *attr, + struct sw_flow_match *match, bool is_mask, + bool is_push_nsh, bool log) +{ + struct nlattr *a; + int rem; + bool has_base = false; + bool has_md1 = false; + bool has_md2 = false; + u8 mdtype = 0; + int mdlen = 0; + + if (WARN_ON(is_push_nsh && is_mask)) + return -EINVAL; + + nla_for_each_nested(a, attr, rem) { + int type = nla_type(a); + int i; + + if (type > OVS_NSH_KEY_ATTR_MAX) { + OVS_NLERR(log, "nsh attr %d is out of range max %d", + type, OVS_NSH_KEY_ATTR_MAX); + return -EINVAL; + } + + if (!check_attr_len(nla_len(a), + ovs_nsh_key_attr_lens[type].len)) { + OVS_NLERR( + log, + "nsh attr %d has unexpected len %d expected %d", + type, + nla_len(a), + ovs_nsh_key_attr_lens[type].len + ); + return -EINVAL; + } + + switch (type) { + case OVS_NSH_KEY_ATTR_BASE: { + const struct ovs_nsh_key_base *base = nla_data(a); + + has_base = true; + mdtype = base->mdtype; + SW_FLOW_KEY_PUT(match, nsh.base.flags, + base->flags, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.ttl, + base->ttl, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.mdtype, + base->mdtype, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.np, + base->np, is_mask); + SW_FLOW_KEY_PUT(match, nsh.base.path_hdr, + base->path_hdr, is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD1: { + const struct ovs_nsh_key_md1 *md1 = nla_data(a); + + has_md1 = true; + for (i = 0; i < NSH_MD1_CONTEXT_SIZE; i++) + SW_FLOW_KEY_PUT(match, nsh.context[i], + md1->context[i], is_mask); + break; + } + case OVS_NSH_KEY_ATTR_MD2: + if (!is_push_nsh) /* Not supported MD type 2 yet */ + return -ENOTSUPP; + + has_md2 = true; + mdlen = nla_len(a); + if (mdlen > NSH_CTX_HDRS_MAX_LEN || mdlen <= 0) { + OVS_NLERR( + log, + "Invalid MD length %d for MD type %d", + mdlen, + mdtype + ); + return -EINVAL; + } + break; + default: + OVS_NLERR(log, "Unknown nsh attribute %d", + type); + return -EINVAL; + } + } + + if (rem > 0) { + OVS_NLERR(log, "nsh attribute has %d unknown bytes.", rem); + return -EINVAL; + } + + if (has_md1 && has_md2) { + OVS_NLERR( + 1, + "invalid nsh attribute: md1 and md2 are exclusive." + ); + return -EINVAL; + } + + if (!is_mask) { + if ((has_md1 && mdtype != NSH_M_TYPE1) || + (has_md2 && mdtype != NSH_M_TYPE2)) { + OVS_NLERR(1, "nsh attribute has unmatched MD type %d.", + mdtype); + return -EINVAL; + } + + if (is_push_nsh && + (!has_base || (!has_md1 && !has_md2))) { + OVS_NLERR( + 1, + "push_nsh: missing base or metadata attributes" + ); + return -EINVAL; + } + } + + return 0; +} + static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, u64 attrs, const struct nlattr **a, bool is_mask, bool log) @@ -1255,7 +1555,7 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, } if (!is_mask && ipv6_key->ipv6_label & htonl(0xFFF00000)) { - OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x).\n", + OVS_NLERR(log, "IPv6 flow label %x is out of range (max=%x)", ntohl(ipv6_key->ipv6_label), (1 << 20) - 1); return -EINVAL; } @@ -1306,6 +1606,13 @@ static int ovs_key_from_nlattrs(struct net *net, struct sw_flow_match *match, attrs &= ~(1 << OVS_KEY_ATTR_ARP); } + if (attrs & (1 << OVS_KEY_ATTR_NSH)) { + if (nsh_key_put_from_nlattr(a[OVS_KEY_ATTR_NSH], match, + is_mask, false, log) < 0) + return -EINVAL; + attrs &= ~(1 << OVS_KEY_ATTR_NSH); + } + if (attrs & (1 << OVS_KEY_ATTR_MPLS)) { const struct ovs_key_mpls *mpls_key; @@ -1622,6 +1929,34 @@ static int ovs_nla_put_vlan(struct sk_buff *skb, const struct vlan_head *vh, return 0; } +static int nsh_key_to_nlattr(const struct ovs_key_nsh *nsh, bool is_mask, + struct sk_buff *skb) +{ + struct nlattr *start; + + start = nla_nest_start(skb, OVS_KEY_ATTR_NSH); + if (!start) + return -EMSGSIZE; + + if (nla_put(skb, OVS_NSH_KEY_ATTR_BASE, sizeof(nsh->base), &nsh->base)) + goto nla_put_failure; + + if (is_mask || nsh->base.mdtype == NSH_M_TYPE1) { + if (nla_put(skb, OVS_NSH_KEY_ATTR_MD1, + sizeof(nsh->context), nsh->context)) + goto nla_put_failure; + } + + /* Don't support MD type 2 yet */ + + nla_nest_end(skb, start); + + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + static int __ovs_nla_put_key(const struct sw_flow_key *swkey, const struct sw_flow_key *output, bool is_mask, struct sk_buff *skb) @@ -1750,6 +2085,9 @@ static int __ovs_nla_put_key(const struct sw_flow_key *swkey, ipv6_key->ipv6_tclass = output->ip.tos; ipv6_key->ipv6_hlimit = output->ip.ttl; ipv6_key->ipv6_frag = output->ip.frag; + } else if (swkey->eth.type == htons(ETH_P_NSH)) { + if (nsh_key_to_nlattr(&output->nsh, is_mask, skb)) + goto nla_put_failure; } else if (swkey->eth.type == htons(ETH_P_ARP) || swkey->eth.type == htons(ETH_P_RARP)) { struct ovs_key_arp *arp_key; @@ -2195,6 +2533,8 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, break; case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS: break; + case OVS_TUNNEL_KEY_ATTR_ERSPAN_OPTS: + break; } }; @@ -2242,6 +2582,19 @@ static int validate_and_copy_set_tun(const struct nlattr *attr, return err; } +static bool validate_nsh(const struct nlattr *attr, bool is_mask, + bool is_push_nsh, bool log) +{ + struct sw_flow_match match; + struct sw_flow_key key; + int ret = 0; + + ovs_match_init(&match, &key, true, NULL); + ret = nsh_key_put_from_nlattr(attr, &match, is_mask, + is_push_nsh, log); + return !ret; +} + /* Return false if there are any non-masked bits set. * Mask follows data immediately, before any netlink padding. */ @@ -2384,6 +2737,13 @@ static int validate_set(const struct nlattr *a, break; + case OVS_KEY_ATTR_NSH: + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + if (!validate_nsh(nla_data(a), masked, false, log)) + return -EINVAL; + break; + default: return -EINVAL; } @@ -2479,9 +2839,13 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, [OVS_ACTION_ATTR_SAMPLE] = (u32)-1, [OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash), [OVS_ACTION_ATTR_CT] = (u32)-1, + [OVS_ACTION_ATTR_CT_CLEAR] = 0, [OVS_ACTION_ATTR_TRUNC] = sizeof(struct ovs_action_trunc), [OVS_ACTION_ATTR_PUSH_ETH] = sizeof(struct ovs_action_push_eth), [OVS_ACTION_ATTR_POP_ETH] = 0, + [OVS_ACTION_ATTR_PUSH_NSH] = (u32)-1, + [OVS_ACTION_ATTR_POP_NSH] = 0, + [OVS_ACTION_ATTR_METER] = sizeof(u32), }; const struct ovs_action_push_vlan *vlan; int type = nla_type(a); @@ -2620,6 +2984,9 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, skip_copy = true; break; + case OVS_ACTION_ATTR_CT_CLEAR: + break; + case OVS_ACTION_ATTR_PUSH_ETH: /* Disallow pushing an Ethernet header if one * is already present */ @@ -2636,6 +3003,38 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, mac_proto = MAC_PROTO_ETHERNET; break; + case OVS_ACTION_ATTR_PUSH_NSH: + if (mac_proto != MAC_PROTO_ETHERNET) { + u8 next_proto; + + next_proto = tun_p_from_eth_p(eth_type); + if (!next_proto) + return -EINVAL; + } + mac_proto = MAC_PROTO_NONE; + if (!validate_nsh(nla_data(a), false, true, true)) + return -EINVAL; + break; + + case OVS_ACTION_ATTR_POP_NSH: { + __be16 inner_proto; + + if (eth_type != htons(ETH_P_NSH)) + return -EINVAL; + inner_proto = tun_p_to_eth_p(key->nsh.base.np); + if (!inner_proto) + return -EINVAL; + if (key->nsh.base.np == TUN_P_ETHERNET) + mac_proto = MAC_PROTO_ETHERNET; + else + mac_proto = MAC_PROTO_NONE; + break; + } + + case OVS_ACTION_ATTR_METER: + /* Non-existent meters are simply ignored. */ + break; + default: OVS_NLERR(log, "Unknown Action type %d", type); return -EINVAL; diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h index 929c665ac3aa..6657606b2b47 100644 --- a/net/openvswitch/flow_netlink.h +++ b/net/openvswitch/flow_netlink.h @@ -79,4 +79,9 @@ int ovs_nla_put_actions(const struct nlattr *attr, void ovs_nla_free_flow_actions(struct sw_flow_actions *); void ovs_nla_free_flow_actions_rcu(struct sw_flow_actions *); +int nsh_key_from_nlattr(const struct nlattr *attr, struct ovs_key_nsh *nsh, + struct ovs_key_nsh *nsh_mask); +int nsh_hdr_from_nlattr(const struct nlattr *attr, struct nshhdr *nh, + size_t size); + #endif /* flow_netlink.h */ diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c index ea7a8073fa02..80ea2a71852e 100644 --- a/net/openvswitch/flow_table.c +++ b/net/openvswitch/flow_table.c @@ -98,6 +98,8 @@ struct sw_flow *ovs_flow_alloc(void) RCU_INIT_POINTER(flow->stats[0], stats); + cpumask_set_cpu(0, &flow->cpu_used_mask); + return flow; err: kmem_cache_free(flow_cache, flow); @@ -141,7 +143,7 @@ static void flow_free(struct sw_flow *flow) if (flow->sf_acts) ovs_nla_free_flow_actions((struct sw_flow_actions __force *)flow->sf_acts); /* We open code this to make sure cpu 0 is always considered */ - for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, cpu_possible_mask)) + for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) if (flow->stats[cpu]) kmem_cache_free(flow_stats_cache, (struct flow_stats __force *)flow->stats[cpu]); diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c new file mode 100644 index 000000000000..3fbfc78991ac --- /dev/null +++ b/net/openvswitch/meter.c @@ -0,0 +1,597 @@ +/* + * Copyright (c) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/if.h> +#include <linux/skbuff.h> +#include <linux/ip.h> +#include <linux/kernel.h> +#include <linux/openvswitch.h> +#include <linux/netlink.h> +#include <linux/rculist.h> + +#include <net/netlink.h> +#include <net/genetlink.h> + +#include "datapath.h" +#include "meter.h" + +#define METER_HASH_BUCKETS 1024 + +static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = { + [OVS_METER_ATTR_ID] = { .type = NLA_U32, }, + [OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG }, + [OVS_METER_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, + [OVS_METER_ATTR_BANDS] = { .type = NLA_NESTED }, + [OVS_METER_ATTR_USED] = { .type = NLA_U64 }, + [OVS_METER_ATTR_CLEAR] = { .type = NLA_FLAG }, + [OVS_METER_ATTR_MAX_METERS] = { .type = NLA_U32 }, + [OVS_METER_ATTR_MAX_BANDS] = { .type = NLA_U32 }, +}; + +static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = { + [OVS_BAND_ATTR_TYPE] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_RATE] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_BURST] = { .type = NLA_U32, }, + [OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) }, +}; + +static void ovs_meter_free(struct dp_meter *meter) +{ + if (!meter) + return; + + kfree_rcu(meter, rcu); +} + +static struct hlist_head *meter_hash_bucket(const struct datapath *dp, + u32 meter_id) +{ + return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)]; +} + +/* Call with ovs_mutex or RCU read lock. */ +static struct dp_meter *lookup_meter(const struct datapath *dp, + u32 meter_id) +{ + struct dp_meter *meter; + struct hlist_head *head; + + head = meter_hash_bucket(dp, meter_id); + hlist_for_each_entry_rcu(meter, head, dp_hash_node) { + if (meter->id == meter_id) + return meter; + } + return NULL; +} + +static void attach_meter(struct datapath *dp, struct dp_meter *meter) +{ + struct hlist_head *head = meter_hash_bucket(dp, meter->id); + + hlist_add_head_rcu(&meter->dp_hash_node, head); +} + +static void detach_meter(struct dp_meter *meter) +{ + ASSERT_OVSL(); + if (meter) + hlist_del_rcu(&meter->dp_hash_node); +} + +static struct sk_buff * +ovs_meter_cmd_reply_start(struct genl_info *info, u8 cmd, + struct ovs_header **ovs_reply_header) +{ + struct sk_buff *skb; + struct ovs_header *ovs_header = info->userhdr; + + skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); + if (!skb) + return ERR_PTR(-ENOMEM); + + *ovs_reply_header = genlmsg_put(skb, info->snd_portid, + info->snd_seq, + &dp_meter_genl_family, 0, cmd); + if (!*ovs_reply_header) { + nlmsg_free(skb); + return ERR_PTR(-EMSGSIZE); + } + (*ovs_reply_header)->dp_ifindex = ovs_header->dp_ifindex; + + return skb; +} + +static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id, + struct dp_meter *meter) +{ + struct nlattr *nla; + struct dp_meter_band *band; + u16 i; + + if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id)) + goto error; + + if (!meter) + return 0; + + if (nla_put(reply, OVS_METER_ATTR_STATS, + sizeof(struct ovs_flow_stats), &meter->stats) || + nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used, + OVS_METER_ATTR_PAD)) + goto error; + + nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + if (!nla) + goto error; + + band = meter->bands; + + for (i = 0; i < meter->n_bands; ++i, ++band) { + struct nlattr *band_nla; + + band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + if (!band_nla || nla_put(reply, OVS_BAND_ATTR_STATS, + sizeof(struct ovs_flow_stats), + &band->stats)) + goto error; + nla_nest_end(reply, band_nla); + } + nla_nest_end(reply, nla); + + return 0; +error: + return -EMSGSIZE; +} + +static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info) +{ + struct sk_buff *reply; + struct ovs_header *ovs_reply_header; + struct nlattr *nla, *band_nla; + int err; + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) || + nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS)) + goto nla_put_failure; + + nla = nla_nest_start(reply, OVS_METER_ATTR_BANDS); + if (!nla) + goto nla_put_failure; + + band_nla = nla_nest_start(reply, OVS_BAND_ATTR_UNSPEC); + if (!band_nla) + goto nla_put_failure; + /* Currently only DROP band type is supported. */ + if (nla_put_u32(reply, OVS_BAND_ATTR_TYPE, OVS_METER_BAND_TYPE_DROP)) + goto nla_put_failure; + nla_nest_end(reply, band_nla); + nla_nest_end(reply, nla); + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +nla_put_failure: + nlmsg_free(reply); + err = -EMSGSIZE; + return err; +} + +static struct dp_meter *dp_meter_create(struct nlattr **a) +{ + struct nlattr *nla; + int rem; + u16 n_bands = 0; + struct dp_meter *meter; + struct dp_meter_band *band; + int err; + + /* Validate attributes, count the bands. */ + if (!a[OVS_METER_ATTR_BANDS]) + return ERR_PTR(-EINVAL); + + nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) + if (++n_bands > DP_MAX_BANDS) + return ERR_PTR(-EINVAL); + + /* Allocate and set up the meter before locking anything. */ + meter = kzalloc(n_bands * sizeof(struct dp_meter_band) + + sizeof(*meter), GFP_KERNEL); + if (!meter) + return ERR_PTR(-ENOMEM); + + meter->used = div_u64(ktime_get_ns(), 1000 * 1000); + meter->kbps = a[OVS_METER_ATTR_KBPS] ? 1 : 0; + meter->keep_stats = !a[OVS_METER_ATTR_CLEAR]; + spin_lock_init(&meter->lock); + if (meter->keep_stats && a[OVS_METER_ATTR_STATS]) { + meter->stats = *(struct ovs_flow_stats *) + nla_data(a[OVS_METER_ATTR_STATS]); + } + meter->n_bands = n_bands; + + /* Set up meter bands. */ + band = meter->bands; + nla_for_each_nested(nla, a[OVS_METER_ATTR_BANDS], rem) { + struct nlattr *attr[OVS_BAND_ATTR_MAX + 1]; + u32 band_max_delta_t; + + err = nla_parse((struct nlattr **)&attr, OVS_BAND_ATTR_MAX, + nla_data(nla), nla_len(nla), band_policy, + NULL); + if (err) + goto exit_free_meter; + + if (!attr[OVS_BAND_ATTR_TYPE] || + !attr[OVS_BAND_ATTR_RATE] || + !attr[OVS_BAND_ATTR_BURST]) { + err = -EINVAL; + goto exit_free_meter; + } + + band->type = nla_get_u32(attr[OVS_BAND_ATTR_TYPE]); + band->rate = nla_get_u32(attr[OVS_BAND_ATTR_RATE]); + band->burst_size = nla_get_u32(attr[OVS_BAND_ATTR_BURST]); + /* Figure out max delta_t that is enough to fill any bucket. + * Keep max_delta_t size to the bucket units: + * pkts => 1/1000 packets, kilobits => bits. + */ + band_max_delta_t = (band->burst_size + band->rate) * 1000; + /* Start with a full bucket. */ + band->bucket = band_max_delta_t; + if (band_max_delta_t > meter->max_delta_t) + meter->max_delta_t = band_max_delta_t; + band++; + } + + return meter; + +exit_free_meter: + kfree(meter); + return ERR_PTR(err); +} + +static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + struct dp_meter *meter, *old_meter; + struct sk_buff *reply; + struct ovs_header *ovs_reply_header; + struct ovs_header *ovs_header = info->userhdr; + struct datapath *dp; + int err; + u32 meter_id; + bool failed; + + meter = dp_meter_create(a); + if (IS_ERR_OR_NULL(meter)) + return PTR_ERR(meter); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_SET, + &ovs_reply_header); + if (IS_ERR(reply)) { + err = PTR_ERR(reply); + goto exit_free_meter; + } + + ovs_lock(); + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + if (!a[OVS_METER_ATTR_ID]) { + err = -ENODEV; + goto exit_unlock; + } + + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + /* Cannot fail after this. */ + old_meter = lookup_meter(dp, meter_id); + detach_meter(old_meter); + attach_meter(dp, meter); + ovs_unlock(); + + /* Build response with the meter_id and stats from + * the old meter, if any. + */ + failed = nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id); + WARN_ON(failed); + if (old_meter) { + spin_lock_bh(&old_meter->lock); + if (old_meter->keep_stats) { + err = ovs_meter_cmd_reply_stats(reply, meter_id, + old_meter); + WARN_ON(err); + } + spin_unlock_bh(&old_meter->lock); + ovs_meter_free(old_meter); + } + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); +exit_free_meter: + kfree(meter); + return err; +} + +static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + u32 meter_id; + struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_reply_header; + struct datapath *dp; + int err; + struct sk_buff *reply; + struct dp_meter *meter; + + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; + + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_GET, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + ovs_lock(); + + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + /* Locate meter, copy stats. */ + meter = lookup_meter(dp, meter_id); + if (!meter) { + err = -ENOENT; + goto exit_unlock; + } + + spin_lock_bh(&meter->lock); + err = ovs_meter_cmd_reply_stats(reply, meter_id, meter); + spin_unlock_bh(&meter->lock); + if (err) + goto exit_unlock; + + ovs_unlock(); + + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); + return err; +} + +static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info) +{ + struct nlattr **a = info->attrs; + u32 meter_id; + struct ovs_header *ovs_header = info->userhdr; + struct ovs_header *ovs_reply_header; + struct datapath *dp; + int err; + struct sk_buff *reply; + struct dp_meter *old_meter; + + if (!a[OVS_METER_ATTR_ID]) + return -EINVAL; + meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]); + + reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL, + &ovs_reply_header); + if (IS_ERR(reply)) + return PTR_ERR(reply); + + ovs_lock(); + + dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex); + if (!dp) { + err = -ENODEV; + goto exit_unlock; + } + + old_meter = lookup_meter(dp, meter_id); + if (old_meter) { + spin_lock_bh(&old_meter->lock); + err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter); + WARN_ON(err); + spin_unlock_bh(&old_meter->lock); + detach_meter(old_meter); + } + ovs_unlock(); + ovs_meter_free(old_meter); + genlmsg_end(reply, ovs_reply_header); + return genlmsg_reply(reply, info); + +exit_unlock: + ovs_unlock(); + nlmsg_free(reply); + return err; +} + +/* Meter action execution. + * + * Return true 'meter_id' drop band is triggered. The 'skb' should be + * dropped by the caller'. + */ +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 meter_id) +{ + struct dp_meter *meter; + struct dp_meter_band *band; + long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000); + long long int long_delta_ms; + u32 delta_ms; + u32 cost; + int i, band_exceeded_max = -1; + u32 band_exceeded_rate = 0; + + meter = lookup_meter(dp, meter_id); + /* Do not drop the packet when there is no meter. */ + if (!meter) + return false; + + /* Lock the meter while using it. */ + spin_lock(&meter->lock); + + long_delta_ms = (now_ms - meter->used); /* ms */ + + /* Make sure delta_ms will not be too large, so that bucket will not + * wrap around below. + */ + delta_ms = (long_delta_ms > (long long int)meter->max_delta_t) + ? meter->max_delta_t : (u32)long_delta_ms; + + /* Update meter statistics. + */ + meter->used = now_ms; + meter->stats.n_packets += 1; + meter->stats.n_bytes += skb->len; + + /* Bucket rate is either in kilobits per second, or in packets per + * second. We maintain the bucket in the units of either bits or + * 1/1000th of a packet, correspondingly. + * Then, when rate is multiplied with milliseconds, we get the + * bucket units: + * msec * kbps = bits, and + * msec * packets/sec = 1/1000 packets. + * + * 'cost' is the number of bucket units in this packet. + */ + cost = (meter->kbps) ? skb->len * 8 : 1000; + + /* Update all bands and find the one hit with the highest rate. */ + for (i = 0; i < meter->n_bands; ++i) { + long long int max_bucket_size; + + band = &meter->bands[i]; + max_bucket_size = (band->burst_size + band->rate) * 1000; + + band->bucket += delta_ms * band->rate; + if (band->bucket > max_bucket_size) + band->bucket = max_bucket_size; + + if (band->bucket >= cost) { + band->bucket -= cost; + } else if (band->rate > band_exceeded_rate) { + band_exceeded_rate = band->rate; + band_exceeded_max = i; + } + } + + if (band_exceeded_max >= 0) { + /* Update band statistics. */ + band = &meter->bands[band_exceeded_max]; + band->stats.n_packets += 1; + band->stats.n_bytes += skb->len; + + /* Drop band triggered, let the caller drop the 'skb'. */ + if (band->type == OVS_METER_BAND_TYPE_DROP) { + spin_unlock(&meter->lock); + return true; + } + } + + spin_unlock(&meter->lock); + return false; +} + +static struct genl_ops dp_meter_genl_ops[] = { + { .cmd = OVS_METER_CMD_FEATURES, + .flags = 0, /* OK for unprivileged users. */ + .policy = meter_policy, + .doit = ovs_meter_cmd_features + }, + { .cmd = OVS_METER_CMD_SET, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ + .policy = meter_policy, + .doit = ovs_meter_cmd_set, + }, + { .cmd = OVS_METER_CMD_GET, + .flags = 0, /* OK for unprivileged users. */ + .policy = meter_policy, + .doit = ovs_meter_cmd_get, + }, + { .cmd = OVS_METER_CMD_DEL, + .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ + .policy = meter_policy, + .doit = ovs_meter_cmd_del + }, +}; + +static const struct genl_multicast_group ovs_meter_multicast_group = { + .name = OVS_METER_MCGROUP, +}; + +struct genl_family dp_meter_genl_family __ro_after_init = { + .hdrsize = sizeof(struct ovs_header), + .name = OVS_METER_FAMILY, + .version = OVS_METER_VERSION, + .maxattr = OVS_METER_ATTR_MAX, + .netnsok = true, + .parallel_ops = true, + .ops = dp_meter_genl_ops, + .n_ops = ARRAY_SIZE(dp_meter_genl_ops), + .mcgrps = &ovs_meter_multicast_group, + .n_mcgrps = 1, + .module = THIS_MODULE, +}; + +int ovs_meters_init(struct datapath *dp) +{ + int i; + + dp->meters = kmalloc_array(METER_HASH_BUCKETS, + sizeof(struct hlist_head), GFP_KERNEL); + + if (!dp->meters) + return -ENOMEM; + + for (i = 0; i < METER_HASH_BUCKETS; i++) + INIT_HLIST_HEAD(&dp->meters[i]); + + return 0; +} + +void ovs_meters_exit(struct datapath *dp) +{ + int i; + + for (i = 0; i < METER_HASH_BUCKETS; i++) { + struct hlist_head *head = &dp->meters[i]; + struct dp_meter *meter; + struct hlist_node *n; + + hlist_for_each_entry_safe(meter, n, head, dp_hash_node) + kfree(meter); + } + + kfree(dp->meters); +} diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h new file mode 100644 index 000000000000..964ace2650f8 --- /dev/null +++ b/net/openvswitch/meter.h @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2017 Nicira, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ + +#ifndef METER_H +#define METER_H 1 + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/netlink.h> +#include <linux/openvswitch.h> +#include <linux/genetlink.h> +#include <linux/skbuff.h> + +#include "flow.h" +struct datapath; + +#define DP_MAX_BANDS 1 + +struct dp_meter_band { + u32 type; + u32 rate; + u32 burst_size; + u32 bucket; /* 1/1000 packets, or in bits */ + struct ovs_flow_stats stats; +}; + +struct dp_meter { + spinlock_t lock; /* Per meter lock */ + struct rcu_head rcu; + struct hlist_node dp_hash_node; /*Element in datapath->meters + * hash table. + */ + u32 id; + u16 kbps:1, keep_stats:1; + u16 n_bands; + u32 max_delta_t; + u64 used; + struct ovs_flow_stats stats; + struct dp_meter_band bands[]; +}; + +extern struct genl_family dp_meter_genl_family; +int ovs_meters_init(struct datapath *dp); +void ovs_meters_exit(struct datapath *dp); +bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb, + struct sw_flow_key *key, u32 meter_id); + +#endif /* meter.h */ diff --git a/net/openvswitch/vport-netdev.c b/net/openvswitch/vport-netdev.c index 0389398fa4ab..2e5e7a41d8ef 100644 --- a/net/openvswitch/vport-netdev.c +++ b/net/openvswitch/vport-netdev.c @@ -108,7 +108,8 @@ struct vport *ovs_netdev_link(struct vport *vport, const char *name) rtnl_lock(); err = netdev_master_upper_dev_link(vport->dev, - get_dpdev(vport->dp), NULL, NULL); + get_dpdev(vport->dp), + NULL, NULL, NULL); if (err) goto error_unlock; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 008a45ca3112..737092ca9b4e 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -177,8 +177,6 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, #define BLK_PLUS_PRIV(sz_of_priv) \ (BLK_HDR_LEN + ALIGN((sz_of_priv), V3_ALIGNMENT)) -#define PGV_FROM_VMALLOC 1 - #define BLOCK_STATUS(x) ((x)->hdr.bh1.block_status) #define BLOCK_NUM_PKTS(x) ((x)->hdr.bh1.num_pkts) #define BLOCK_O2FP(x) ((x)->hdr.bh1.offset_to_first_pkt) @@ -203,11 +201,8 @@ static void prb_retire_current_block(struct tpacket_kbdq_core *, static int prb_queue_frozen(struct tpacket_kbdq_core *); static void prb_open_block(struct tpacket_kbdq_core *, struct tpacket_block_desc *); -static void prb_retire_rx_blk_timer_expired(unsigned long); +static void prb_retire_rx_blk_timer_expired(struct timer_list *); static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *); -static void prb_init_blk_timer(struct packet_sock *, - struct tpacket_kbdq_core *, - void (*func) (unsigned long)); static void prb_fill_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); static void prb_clear_rxhash(struct tpacket_kbdq_core *, struct tpacket3_hdr *); @@ -542,22 +537,14 @@ static void prb_shutdown_retire_blk_timer(struct packet_sock *po, prb_del_retire_blk_timer(pkc); } -static void prb_init_blk_timer(struct packet_sock *po, - struct tpacket_kbdq_core *pkc, - void (*func) (unsigned long)) -{ - init_timer(&pkc->retire_blk_timer); - pkc->retire_blk_timer.data = (long)po; - pkc->retire_blk_timer.function = func; - pkc->retire_blk_timer.expires = jiffies; -} - static void prb_setup_retire_blk_timer(struct packet_sock *po) { struct tpacket_kbdq_core *pkc; pkc = GET_PBDQC_FROM_RB(&po->rx_ring); - prb_init_blk_timer(po, pkc, prb_retire_rx_blk_timer_expired); + timer_setup(&pkc->retire_blk_timer, prb_retire_rx_blk_timer_expired, + 0); + pkc->retire_blk_timer.expires = jiffies; } static int prb_calc_retire_blk_tmo(struct packet_sock *po, @@ -675,9 +662,10 @@ static void _prb_refresh_rx_retire_blk_timer(struct tpacket_kbdq_core *pkc) * prb_calc_retire_blk_tmo() calculates the tmo. * */ -static void prb_retire_rx_blk_timer_expired(unsigned long data) +static void prb_retire_rx_blk_timer_expired(struct timer_list *t) { - struct packet_sock *po = (struct packet_sock *)data; + struct packet_sock *po = + from_timer(po, t, rx_ring.prb_bdqc.retire_blk_timer); struct tpacket_kbdq_core *pkc = GET_PBDQC_FROM_RB(&po->rx_ring); unsigned int frozen; struct tpacket_block_desc *pbd; @@ -1686,10 +1674,6 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) mutex_lock(&fanout_mutex); - err = -EINVAL; - if (!po->running) - goto out; - err = -EALREADY; if (po->fanout) goto out; @@ -1751,7 +1735,10 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) list_add(&match->list, &fanout_list); } err = -EINVAL; - if (match->type == type && + + spin_lock(&po->bind_lock); + if (po->running && + match->type == type && match->prot_hook.type == po->prot_hook.type && match->prot_hook.dev == po->prot_hook.dev) { err = -ENOSPC; @@ -1763,9 +1750,16 @@ static int fanout_add(struct sock *sk, u16 id, u16 type_flags) err = 0; } } + spin_unlock(&po->bind_lock); + + if (err && !refcount_read(&match->sk_ref)) { + list_del(&match->list); + kfree(match); + } + out: if (err && rollover) { - kfree(rollover); + kfree_rcu(rollover, rcu); po->rollover = NULL; } mutex_unlock(&fanout_mutex); @@ -1792,8 +1786,10 @@ static struct packet_fanout *fanout_release(struct sock *sk) else f = NULL; - if (po->rollover) + if (po->rollover) { kfree_rcu(po->rollover, rcu); + po->rollover = NULL; + } } mutex_unlock(&fanout_mutex); @@ -2191,6 +2187,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; + bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2241,8 +2238,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) + if (po->has_vnet_hdr) { netoff += sizeof(struct virtio_net_hdr); + do_vnet = true; + } macoff = netoff - maclen; } if (po->tp_version <= TPACKET_V2) { @@ -2259,8 +2258,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, skb_set_owner_r(copy_skb, sk); } snaplen = po->rx_ring.frame_size - macoff; - if ((int)snaplen < 0) + if ((int)snaplen < 0) { snaplen = 0; + do_vnet = false; + } } } else if (unlikely(macoff + snaplen > GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { @@ -2273,6 +2274,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely((int)snaplen < 0)) { snaplen = 0; macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; + do_vnet = false; } } spin_lock(&sk->sk_receive_queue.lock); @@ -2298,7 +2300,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); - if (po->has_vnet_hdr) { + if (do_vnet) { if (virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), vio_le(), true)) { @@ -2830,6 +2832,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) struct virtio_net_hdr vnet_hdr = { 0 }; int offset = 0; struct packet_sock *po = pkt_sk(sk); + bool has_vnet_hdr = false; int hlen, tlen, linear; int extra_len = 0; @@ -2873,6 +2876,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) err = packet_snd_vnet_parse(msg, &len, &vnet_hdr); if (err) goto out_unlock; + has_vnet_hdr = true; } if (unlikely(sock_flag(sk, SOCK_NOFCS))) { @@ -2931,7 +2935,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len) skb->priority = sk->sk_priority; skb->mark = sockc.mark; - if (po->has_vnet_hdr) { + if (has_vnet_hdr) { err = virtio_net_hdr_to_skb(skb, &vnet_hdr, vio_le()); if (err) goto out_free; @@ -3059,13 +3063,15 @@ static int packet_do_bind(struct sock *sk, const char *name, int ifindex, int ret = 0; bool unlisted = false; - if (po->fanout) - return -EINVAL; - lock_sock(sk); spin_lock(&po->bind_lock); rcu_read_lock(); + if (po->fanout) { + ret = -EINVAL; + goto out_unlock; + } + if (name) { dev = dev_get_by_name_rcu(sock_net(sk), name); if (!dev) { @@ -3837,6 +3843,7 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, void *data = &val; union tpacket_stats_u st; struct tpacket_rollover_stats rstats; + struct packet_rollover *rollover; if (level != SOL_PACKET) return -ENOPROTOOPT; @@ -3915,13 +3922,18 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, 0); break; case PACKET_ROLLOVER_STATS: - if (!po->rollover) + rcu_read_lock(); + rollover = rcu_dereference(po->rollover); + if (rollover) { + rstats.tp_all = atomic_long_read(&rollover->num); + rstats.tp_huge = atomic_long_read(&rollover->num_huge); + rstats.tp_failed = atomic_long_read(&rollover->num_failed); + data = &rstats; + lv = sizeof(rstats); + } + rcu_read_unlock(); + if (!rollover) return -EINVAL; - rstats.tp_all = atomic_long_read(&po->rollover->num); - rstats.tp_huge = atomic_long_read(&po->rollover->num_huge); - rstats.tp_failed = atomic_long_read(&po->rollover->num_failed); - data = &rstats; - lv = sizeof(rstats); break; case PACKET_TX_HAS_OFF: val = po->tp_tx_has_off; @@ -4548,6 +4560,7 @@ static int __net_init packet_net_init(struct net *net) static void __net_exit packet_net_exit(struct net *net) { remove_proc_entry("packet", net->proc_net); + WARN_ON_ONCE(!hlist_empty(&net->packet.sklist)); } static struct pernet_operations packet_net_ops = { diff --git a/net/packet/internal.h b/net/packet/internal.h index 94d1d405a116..562fbc155006 100644 --- a/net/packet/internal.h +++ b/net/packet/internal.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __PACKET_INTERNAL_H__ #define __PACKET_INTERNAL_H__ diff --git a/net/phonet/Makefile b/net/phonet/Makefile index e10b1b182ce3..444f875932b9 100644 --- a/net/phonet/Makefile +++ b/net/phonet/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_PHONET) += phonet.o pn_pep.o phonet-y := \ diff --git a/net/phonet/af_phonet.c b/net/phonet/af_phonet.c index f925753668a7..3b0ef691f5b1 100644 --- a/net/phonet/af_phonet.c +++ b/net/phonet/af_phonet.c @@ -35,11 +35,11 @@ #include <net/phonet/pn_dev.h> /* Transport protocol registration */ -static struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; +static const struct phonet_protocol *proto_tab[PHONET_NPROTO] __read_mostly; -static struct phonet_protocol *phonet_proto_get(unsigned int protocol) +static const struct phonet_protocol *phonet_proto_get(unsigned int protocol) { - struct phonet_protocol *pp; + const struct phonet_protocol *pp; if (protocol >= PHONET_NPROTO) return NULL; @@ -53,7 +53,7 @@ static struct phonet_protocol *phonet_proto_get(unsigned int protocol) return pp; } -static inline void phonet_proto_put(struct phonet_protocol *pp) +static inline void phonet_proto_put(const struct phonet_protocol *pp) { module_put(pp->prot->owner); } @@ -65,7 +65,7 @@ static int pn_socket_create(struct net *net, struct socket *sock, int protocol, { struct sock *sk; struct pn_sock *pn; - struct phonet_protocol *pnp; + const struct phonet_protocol *pnp; int err; if (!capable(CAP_SYS_ADMIN)) @@ -149,7 +149,7 @@ static int pn_header_parse(const struct sk_buff *skb, unsigned char *haddr) return 1; } -struct header_ops phonet_header_ops = { +const struct header_ops phonet_header_ops = { .create = pn_header_create, .parse = pn_header_parse, }; @@ -470,7 +470,7 @@ static struct packet_type phonet_packet_type __read_mostly = { static DEFINE_MUTEX(proto_tab_lock); int __init_or_module phonet_proto_register(unsigned int protocol, - struct phonet_protocol *pp) + const struct phonet_protocol *pp) { int err = 0; @@ -492,7 +492,8 @@ int __init_or_module phonet_proto_register(unsigned int protocol, } EXPORT_SYMBOL(phonet_proto_register); -void phonet_proto_unregister(unsigned int protocol, struct phonet_protocol *pp) +void phonet_proto_unregister(unsigned int protocol, + const struct phonet_protocol *pp) { mutex_lock(&proto_tab_lock); BUG_ON(proto_tab[protocol] != pp); diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c index 5e710435ffa9..b44fb9018fb8 100644 --- a/net/phonet/datagram.c +++ b/net/phonet/datagram.c @@ -195,7 +195,7 @@ static struct proto pn_proto = { .name = "PHONET", }; -static struct phonet_protocol pn_dgram_proto = { +static const struct phonet_protocol pn_dgram_proto = { .ops = &phonet_dgram_ops, .prot = &pn_proto, .sock_type = SOCK_DGRAM, diff --git a/net/phonet/pep.c b/net/phonet/pep.c index e81537991ddf..9fc76b19cd3c 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -1351,7 +1351,7 @@ static struct proto pep_proto = { .name = "PNPIPE", }; -static struct phonet_protocol pep_pn_proto = { +static const struct phonet_protocol pep_pn_proto = { .ops = &phonet_stream_ops, .prot = &pep_proto, .sock_type = SOCK_SEQPACKET, diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index 2cb4c5dfad6f..77787512fc32 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -331,7 +331,10 @@ static int __net_init phonet_init_net(struct net *net) static void __net_exit phonet_exit_net(struct net *net) { + struct phonet_net *pnn = phonet_pernet(net); + remove_proc_entry("phonet", net->proc_net); + WARN_ON_ONCE(!list_empty(&pnn->pndevs.list)); } static struct pernet_operations phonet_net_ops = { diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 45b3af3080d8..da754fc926e7 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -300,15 +300,15 @@ out: int __init phonet_netlink_register(void) { int err = __rtnl_register(PF_PHONET, RTM_NEWADDR, addr_doit, - NULL, NULL); + NULL, 0); if (err) return err; /* Further __rtnl_register() cannot fail */ - __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, NULL); - __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, NULL); - __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, NULL); - __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, NULL); - __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, NULL); + __rtnl_register(PF_PHONET, RTM_DELADDR, addr_doit, NULL, 0); + __rtnl_register(PF_PHONET, RTM_GETADDR, NULL, getaddr_dumpit, 0); + __rtnl_register(PF_PHONET, RTM_NEWROUTE, route_doit, NULL, 0); + __rtnl_register(PF_PHONET, RTM_DELROUTE, route_doit, NULL, 0); + __rtnl_register(PF_PHONET, RTM_GETROUTE, NULL, route_dumpit, 0); return 0; } diff --git a/net/psample/psample.c b/net/psample/psample.c index 3a6ad0f438dc..64f95624f219 100644 --- a/net/psample/psample.c +++ b/net/psample/psample.c @@ -296,6 +296,6 @@ static void __exit psample_module_exit(void) module_init(psample_module_init); module_exit(psample_module_exit); -MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>"); MODULE_DESCRIPTION("netlink channel for packet sampling"); MODULE_LICENSE("GPL v2"); diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 5586609afa27..77ab05e23001 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -20,26 +20,15 @@ #include "qrtr.h" -#define QRTR_PROTO_VER 1 +#define QRTR_PROTO_VER_1 1 +#define QRTR_PROTO_VER_2 3 /* auto-bind range */ #define QRTR_MIN_EPH_SOCKET 0x4000 #define QRTR_MAX_EPH_SOCKET 0x7fff -enum qrtr_pkt_type { - QRTR_TYPE_DATA = 1, - QRTR_TYPE_HELLO = 2, - QRTR_TYPE_BYE = 3, - QRTR_TYPE_NEW_SERVER = 4, - QRTR_TYPE_DEL_SERVER = 5, - QRTR_TYPE_DEL_CLIENT = 6, - QRTR_TYPE_RESUME_TX = 7, - QRTR_TYPE_EXIT = 8, - QRTR_TYPE_PING = 9, -}; - /** - * struct qrtr_hdr - (I|R)PCrouter packet header + * struct qrtr_hdr_v1 - (I|R)PCrouter packet header version 1 * @version: protocol version * @type: packet type; one of QRTR_TYPE_* * @src_node_id: source node @@ -49,7 +38,7 @@ enum qrtr_pkt_type { * @dst_node_id: destination node * @dst_port_id: destination port */ -struct qrtr_hdr { +struct qrtr_hdr_v1 { __le32 version; __le32 type; __le32 src_node_id; @@ -60,9 +49,44 @@ struct qrtr_hdr { __le32 dst_port_id; } __packed; -#define QRTR_HDR_SIZE sizeof(struct qrtr_hdr) -#define QRTR_NODE_BCAST ((unsigned int)-1) -#define QRTR_PORT_CTRL ((unsigned int)-2) +/** + * struct qrtr_hdr_v2 - (I|R)PCrouter packet header later versions + * @version: protocol version + * @type: packet type; one of QRTR_TYPE_* + * @flags: bitmask of QRTR_FLAGS_* + * @optlen: length of optional header data + * @size: length of packet, excluding this header and optlen + * @src_node_id: source node + * @src_port_id: source port + * @dst_node_id: destination node + * @dst_port_id: destination port + */ +struct qrtr_hdr_v2 { + u8 version; + u8 type; + u8 flags; + u8 optlen; + __le32 size; + __le16 src_node_id; + __le16 src_port_id; + __le16 dst_node_id; + __le16 dst_port_id; +}; + +#define QRTR_FLAGS_CONFIRM_RX BIT(0) + +struct qrtr_cb { + u32 src_node; + u32 src_port; + u32 dst_node; + u32 dst_port; + + u8 type; + u8 confirm_rx; +}; + +#define QRTR_HDR_MAX_SIZE max_t(size_t, sizeof(struct qrtr_hdr_v1), \ + sizeof(struct qrtr_hdr_v2)) struct qrtr_sock { /* WARNING: sk must be the first member */ @@ -111,8 +135,12 @@ struct qrtr_node { struct list_head item; }; -static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb); -static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb); +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to); +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to); /* Release node resources and free the node. * @@ -150,10 +178,27 @@ static void qrtr_node_release(struct qrtr_node *node) } /* Pass an outgoing packet socket buffer to the endpoint driver. */ -static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb) +static int qrtr_node_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to) { + struct qrtr_hdr_v1 *hdr; + size_t len = skb->len; int rc = -ENODEV; + hdr = skb_push(skb, sizeof(*hdr)); + hdr->version = cpu_to_le32(QRTR_PROTO_VER_1); + hdr->type = cpu_to_le32(type); + hdr->src_node_id = cpu_to_le32(from->sq_node); + hdr->src_port_id = cpu_to_le32(from->sq_port); + hdr->dst_node_id = cpu_to_le32(to->sq_node); + hdr->dst_port_id = cpu_to_le32(to->sq_port); + + hdr->size = cpu_to_le32(len); + hdr->confirm_rx = 0; + + skb_put_padto(skb, ALIGN(len, 4)); + mutex_lock(&node->ep_lock); if (node->ep) rc = node->ep->xmit(node->ep, skb); @@ -207,125 +252,103 @@ static void qrtr_node_assign(struct qrtr_node *node, unsigned int nid) int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len) { struct qrtr_node *node = ep->node; - const struct qrtr_hdr *phdr = data; + const struct qrtr_hdr_v1 *v1; + const struct qrtr_hdr_v2 *v2; struct sk_buff *skb; - unsigned int psize; + struct qrtr_cb *cb; unsigned int size; - unsigned int type; unsigned int ver; - unsigned int dst; - - if (len < QRTR_HDR_SIZE || len & 3) - return -EINVAL; - - ver = le32_to_cpu(phdr->version); - size = le32_to_cpu(phdr->size); - type = le32_to_cpu(phdr->type); - dst = le32_to_cpu(phdr->dst_port_id); - - psize = (size + 3) & ~3; + size_t hdrlen; - if (ver != QRTR_PROTO_VER) - return -EINVAL; - - if (len != psize + QRTR_HDR_SIZE) - return -EINVAL; - - if (dst != QRTR_PORT_CTRL && type != QRTR_TYPE_DATA) + if (len & 3) return -EINVAL; skb = netdev_alloc_skb(NULL, len); if (!skb) return -ENOMEM; - skb_reset_transport_header(skb); - skb_put_data(skb, data, len); - - skb_queue_tail(&node->rx_queue, skb); - schedule_work(&node->work); - - return 0; -} -EXPORT_SYMBOL_GPL(qrtr_endpoint_post); + cb = (struct qrtr_cb *)skb->cb; -static struct sk_buff *qrtr_alloc_ctrl_packet(u32 type, size_t pkt_len, - u32 src_node, u32 dst_node) -{ - struct qrtr_hdr *hdr; - struct sk_buff *skb; - - skb = alloc_skb(QRTR_HDR_SIZE + pkt_len, GFP_KERNEL); - if (!skb) - return NULL; - skb_reset_transport_header(skb); + /* Version field in v1 is little endian, so this works for both cases */ + ver = *(u8*)data; - hdr = skb_put(skb, QRTR_HDR_SIZE); - hdr->version = cpu_to_le32(QRTR_PROTO_VER); - hdr->type = cpu_to_le32(type); - hdr->src_node_id = cpu_to_le32(src_node); - hdr->src_port_id = cpu_to_le32(QRTR_PORT_CTRL); - hdr->confirm_rx = cpu_to_le32(0); - hdr->size = cpu_to_le32(pkt_len); - hdr->dst_node_id = cpu_to_le32(dst_node); - hdr->dst_port_id = cpu_to_le32(QRTR_PORT_CTRL); + switch (ver) { + case QRTR_PROTO_VER_1: + v1 = data; + hdrlen = sizeof(*v1); - return skb; -} + cb->type = le32_to_cpu(v1->type); + cb->src_node = le32_to_cpu(v1->src_node_id); + cb->src_port = le32_to_cpu(v1->src_port_id); + cb->confirm_rx = !!v1->confirm_rx; + cb->dst_node = le32_to_cpu(v1->dst_node_id); + cb->dst_port = le32_to_cpu(v1->dst_port_id); -/* Allocate and construct a resume-tx packet. */ -static struct sk_buff *qrtr_alloc_resume_tx(u32 src_node, - u32 dst_node, u32 port) -{ - const int pkt_len = 20; - struct sk_buff *skb; - __le32 *buf; + size = le32_to_cpu(v1->size); + break; + case QRTR_PROTO_VER_2: + v2 = data; + hdrlen = sizeof(*v2) + v2->optlen; + + cb->type = v2->type; + cb->confirm_rx = !!(v2->flags & QRTR_FLAGS_CONFIRM_RX); + cb->src_node = le16_to_cpu(v2->src_node_id); + cb->src_port = le16_to_cpu(v2->src_port_id); + cb->dst_node = le16_to_cpu(v2->dst_node_id); + cb->dst_port = le16_to_cpu(v2->dst_port_id); + + if (cb->src_port == (u16)QRTR_PORT_CTRL) + cb->src_port = QRTR_PORT_CTRL; + if (cb->dst_port == (u16)QRTR_PORT_CTRL) + cb->dst_port = QRTR_PORT_CTRL; + + size = le32_to_cpu(v2->size); + break; + default: + pr_err("qrtr: Invalid version %d\n", ver); + goto err; + } - skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_RESUME_TX, pkt_len, - src_node, dst_node); - if (!skb) - return NULL; + if (len != ALIGN(size, 4) + hdrlen) + goto err; - buf = skb_put_zero(skb, pkt_len); - buf[0] = cpu_to_le32(QRTR_TYPE_RESUME_TX); - buf[1] = cpu_to_le32(src_node); - buf[2] = cpu_to_le32(port); + if (cb->dst_port != QRTR_PORT_CTRL && cb->type != QRTR_TYPE_DATA) + goto err; - return skb; -} + skb_put_data(skb, data + hdrlen, size); -/* Allocate and construct a BYE message to signal remote termination */ -static struct sk_buff *qrtr_alloc_local_bye(u32 src_node) -{ - const int pkt_len = 20; - struct sk_buff *skb; - __le32 *buf; + skb_queue_tail(&node->rx_queue, skb); + schedule_work(&node->work); - skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_BYE, pkt_len, - src_node, qrtr_local_nid); - if (!skb) - return NULL; + return 0; - buf = skb_put_zero(skb, pkt_len); - buf[0] = cpu_to_le32(QRTR_TYPE_BYE); +err: + kfree_skb(skb); + return -EINVAL; - return skb; } +EXPORT_SYMBOL_GPL(qrtr_endpoint_post); -static struct sk_buff *qrtr_alloc_del_client(struct sockaddr_qrtr *sq) +/** + * qrtr_alloc_ctrl_packet() - allocate control packet skb + * @pkt: reference to qrtr_ctrl_pkt pointer + * + * Returns newly allocated sk_buff, or NULL on failure + * + * This function allocates a sk_buff large enough to carry a qrtr_ctrl_pkt and + * on success returns a reference to the control packet in @pkt. + */ +static struct sk_buff *qrtr_alloc_ctrl_packet(struct qrtr_ctrl_pkt **pkt) { - const int pkt_len = 20; + const int pkt_len = sizeof(struct qrtr_ctrl_pkt); struct sk_buff *skb; - __le32 *buf; - skb = qrtr_alloc_ctrl_packet(QRTR_TYPE_DEL_CLIENT, pkt_len, - sq->sq_node, QRTR_NODE_BCAST); + skb = alloc_skb(QRTR_HDR_MAX_SIZE + pkt_len, GFP_KERNEL); if (!skb) return NULL; - buf = skb_put_zero(skb, pkt_len); - buf[0] = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); - buf[1] = cpu_to_le32(sq->sq_node); - buf[2] = cpu_to_le32(sq->sq_port); + skb_reserve(skb, QRTR_HDR_MAX_SIZE); + *pkt = skb_put_zero(skb, pkt_len); return skb; } @@ -340,24 +363,26 @@ static void qrtr_port_put(struct qrtr_sock *ipc); static void qrtr_node_rx_work(struct work_struct *work) { struct qrtr_node *node = container_of(work, struct qrtr_node, work); + struct qrtr_ctrl_pkt *pkt; + struct sockaddr_qrtr dst; + struct sockaddr_qrtr src; struct sk_buff *skb; while ((skb = skb_dequeue(&node->rx_queue)) != NULL) { - const struct qrtr_hdr *phdr; - u32 dst_node, dst_port; struct qrtr_sock *ipc; - u32 src_node; + struct qrtr_cb *cb; int confirm; - phdr = (const struct qrtr_hdr *)skb_transport_header(skb); - src_node = le32_to_cpu(phdr->src_node_id); - dst_node = le32_to_cpu(phdr->dst_node_id); - dst_port = le32_to_cpu(phdr->dst_port_id); - confirm = !!phdr->confirm_rx; + cb = (struct qrtr_cb *)skb->cb; + src.sq_node = cb->src_node; + src.sq_port = cb->src_port; + dst.sq_node = cb->dst_node; + dst.sq_port = cb->dst_port; + confirm = !!cb->confirm_rx; - qrtr_node_assign(node, src_node); + qrtr_node_assign(node, cb->src_node); - ipc = qrtr_port_lookup(dst_port); + ipc = qrtr_port_lookup(cb->dst_port); if (!ipc) { kfree_skb(skb); } else { @@ -368,10 +393,16 @@ static void qrtr_node_rx_work(struct work_struct *work) } if (confirm) { - skb = qrtr_alloc_resume_tx(dst_node, node->nid, dst_port); + skb = qrtr_alloc_ctrl_packet(&pkt); if (!skb) break; - if (qrtr_node_enqueue(node, skb)) + + pkt->cmd = cpu_to_le32(QRTR_TYPE_RESUME_TX); + pkt->client.node = cpu_to_le32(dst.sq_node); + pkt->client.port = cpu_to_le32(dst.sq_port); + + if (qrtr_node_enqueue(node, skb, QRTR_TYPE_RESUME_TX, + &dst, &src)) break; } } @@ -421,6 +452,9 @@ EXPORT_SYMBOL_GPL(qrtr_endpoint_register); void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) { struct qrtr_node *node = ep->node; + struct sockaddr_qrtr src = {AF_QIPCRTR, node->nid, QRTR_PORT_CTRL}; + struct sockaddr_qrtr dst = {AF_QIPCRTR, qrtr_local_nid, QRTR_PORT_CTRL}; + struct qrtr_ctrl_pkt *pkt; struct sk_buff *skb; mutex_lock(&node->ep_lock); @@ -428,9 +462,11 @@ void qrtr_endpoint_unregister(struct qrtr_endpoint *ep) mutex_unlock(&node->ep_lock); /* Notify the local controller about the event */ - skb = qrtr_alloc_local_bye(node->nid); - if (skb) - qrtr_local_enqueue(NULL, skb); + skb = qrtr_alloc_ctrl_packet(&pkt); + if (skb) { + pkt->cmd = cpu_to_le32(QRTR_TYPE_BYE); + qrtr_local_enqueue(NULL, skb, QRTR_TYPE_BYE, &src, &dst); + } qrtr_node_release(node); ep->node = NULL; @@ -466,13 +502,24 @@ static void qrtr_port_put(struct qrtr_sock *ipc) /* Remove port assignment. */ static void qrtr_port_remove(struct qrtr_sock *ipc) { + struct qrtr_ctrl_pkt *pkt; struct sk_buff *skb; int port = ipc->us.sq_port; + struct sockaddr_qrtr to; - skb = qrtr_alloc_del_client(&ipc->us); + to.sq_family = AF_QIPCRTR; + to.sq_node = QRTR_NODE_BCAST; + to.sq_port = QRTR_PORT_CTRL; + + skb = qrtr_alloc_ctrl_packet(&pkt); if (skb) { + pkt->cmd = cpu_to_le32(QRTR_TYPE_DEL_CLIENT); + pkt->client.node = cpu_to_le32(ipc->us.sq_node); + pkt->client.port = cpu_to_le32(ipc->us.sq_port); + skb_set_owner_w(skb, &ipc->sk); - qrtr_bcast_enqueue(NULL, skb); + qrtr_bcast_enqueue(NULL, skb, QRTR_TYPE_DEL_CLIENT, &ipc->us, + &to); } if (port == QRTR_PORT_CTRL) @@ -541,7 +588,7 @@ static void qrtr_reset_ports(void) sock_hold(&ipc->sk); ipc->sk.sk_err = ENETRESET; - wake_up_interruptible(sk_sleep(&ipc->sk)); + ipc->sk.sk_error_report(&ipc->sk); sock_put(&ipc->sk); } mutex_unlock(&qrtr_port_lock); @@ -620,19 +667,23 @@ static int qrtr_bind(struct socket *sock, struct sockaddr *saddr, int len) } /* Queue packet to local peer socket. */ -static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) +static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to) { - const struct qrtr_hdr *phdr; struct qrtr_sock *ipc; + struct qrtr_cb *cb; - phdr = (const struct qrtr_hdr *)skb_transport_header(skb); - - ipc = qrtr_port_lookup(le32_to_cpu(phdr->dst_port_id)); + ipc = qrtr_port_lookup(to->sq_port); if (!ipc || &ipc->sk == skb->sk) { /* do not send to self */ kfree_skb(skb); return -ENODEV; } + cb = (struct qrtr_cb *)skb->cb; + cb->src_node = from->sq_node; + cb->src_port = from->sq_port; + if (sock_queue_rcv_skb(&ipc->sk, skb)) { qrtr_port_put(ipc); kfree_skb(skb); @@ -645,7 +696,9 @@ static int qrtr_local_enqueue(struct qrtr_node *node, struct sk_buff *skb) } /* Queue packet for broadcast. */ -static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) +static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, + int type, struct sockaddr_qrtr *from, + struct sockaddr_qrtr *to) { struct sk_buff *skbn; @@ -655,11 +708,11 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) if (!skbn) break; skb_set_owner_w(skbn, skb->sk); - qrtr_node_enqueue(node, skbn); + qrtr_node_enqueue(node, skbn, type, from, to); } mutex_unlock(&qrtr_node_lock); - qrtr_local_enqueue(node, skb); + qrtr_local_enqueue(node, skb, type, from, to); return 0; } @@ -667,13 +720,14 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb) static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); - int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *); + int (*enqueue_fn)(struct qrtr_node *, struct sk_buff *, int, + struct sockaddr_qrtr *, struct sockaddr_qrtr *); struct qrtr_sock *ipc = qrtr_sk(sock->sk); struct sock *sk = sock->sk; struct qrtr_node *node; - struct qrtr_hdr *hdr; struct sk_buff *skb; size_t plen; + u32 type = QRTR_TYPE_DATA; int rc; if (msg->msg_flags & ~(MSG_DONTWAIT)) @@ -722,37 +776,19 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } plen = (len + 3) & ~3; - skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_SIZE, + skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE, msg->msg_flags & MSG_DONTWAIT, &rc); if (!skb) goto out_node; - skb_reset_transport_header(skb); - skb_put(skb, len + QRTR_HDR_SIZE); - - hdr = (struct qrtr_hdr *)skb_transport_header(skb); - hdr->version = cpu_to_le32(QRTR_PROTO_VER); - hdr->src_node_id = cpu_to_le32(ipc->us.sq_node); - hdr->src_port_id = cpu_to_le32(ipc->us.sq_port); - hdr->confirm_rx = cpu_to_le32(0); - hdr->size = cpu_to_le32(len); - hdr->dst_node_id = cpu_to_le32(addr->sq_node); - hdr->dst_port_id = cpu_to_le32(addr->sq_port); + skb_reserve(skb, QRTR_HDR_MAX_SIZE); - rc = skb_copy_datagram_from_iter(skb, QRTR_HDR_SIZE, - &msg->msg_iter, len); + rc = memcpy_from_msg(skb_put(skb, len), msg, len); if (rc) { kfree_skb(skb); goto out_node; } - if (plen != len) { - rc = skb_pad(skb, plen - len); - if (rc) - goto out_node; - skb_put(skb, plen - len); - } - if (ipc->us.sq_port == QRTR_PORT_CTRL) { if (len < 4) { rc = -EINVAL; @@ -761,12 +797,11 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) } /* control messages already require the type as 'command' */ - skb_copy_bits(skb, QRTR_HDR_SIZE, &hdr->type, 4); - } else { - hdr->type = cpu_to_le32(QRTR_TYPE_DATA); + skb_copy_bits(skb, 0, &type, 4); + type = le32_to_cpu(type); } - rc = enqueue_fn(node, skb); + rc = enqueue_fn(node, skb, type, &ipc->us, addr); if (rc >= 0) rc = len; @@ -781,9 +816,9 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int flags) { DECLARE_SOCKADDR(struct sockaddr_qrtr *, addr, msg->msg_name); - const struct qrtr_hdr *phdr; struct sock *sk = sock->sk; struct sk_buff *skb; + struct qrtr_cb *cb; int copied, rc; lock_sock(sk); @@ -800,22 +835,22 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg, return rc; } - phdr = (const struct qrtr_hdr *)skb_transport_header(skb); - copied = le32_to_cpu(phdr->size); + copied = skb->len; if (copied > size) { copied = size; msg->msg_flags |= MSG_TRUNC; } - rc = skb_copy_datagram_msg(skb, QRTR_HDR_SIZE, msg, copied); + rc = skb_copy_datagram_msg(skb, 0, msg, copied); if (rc < 0) goto out; rc = copied; if (addr) { + cb = (struct qrtr_cb *)skb->cb; addr->sq_family = AF_QIPCRTR; - addr->sq_node = le32_to_cpu(phdr->src_node_id); - addr->sq_port = le32_to_cpu(phdr->src_port_id); + addr->sq_node = cb->src_node; + addr->sq_port = cb->src_port; msg->msg_namelen = sizeof(*addr); } @@ -908,7 +943,7 @@ static int qrtr_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case TIOCINQ: skb = skb_peek(&sk->sk_receive_queue); if (skb) - len = skb->len - QRTR_HDR_SIZE; + len = skb->len; rc = put_user(len, (int __user *)argp); break; case SIOCGIFADDR: @@ -1081,11 +1116,11 @@ static int __init qrtr_proto_init(void) return rc; } - rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, NULL); + rtnl_register(PF_QIPCRTR, RTM_NEWADDR, qrtr_addr_doit, NULL, 0); return 0; } -module_init(qrtr_proto_init); +postcore_initcall(qrtr_proto_init); static void __exit qrtr_proto_fini(void) { diff --git a/net/qrtr/qrtr.h b/net/qrtr/qrtr.h index 2b848718f8fe..b81e6953c04b 100644 --- a/net/qrtr/qrtr.h +++ b/net/qrtr/qrtr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __QRTR_H_ #define __QRTR_H_ diff --git a/net/rds/Makefile b/net/rds/Makefile index 56c7d27eefee..b5d568bd479c 100644 --- a/net/rds/Makefile +++ b/net/rds/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_RDS) += rds.o rds-y := af_rds.o bind.o cong.o connection.o info.o message.o \ recv.o send.o stats.o sysctl.o threads.o transport.o \ diff --git a/net/rds/bind.c b/net/rds/bind.c index 3a915bedb76c..75d43dc8e96b 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -40,7 +40,7 @@ static struct rhashtable bind_hash_table; -static struct rhashtable_params ht_parms = { +static const struct rhashtable_params ht_parms = { .nelem_hint = 768, .key_len = sizeof(u64), .key_offset = offsetof(struct rds_sock, rs_bound_key), diff --git a/net/rds/connection.c b/net/rds/connection.c index 50a3789ac23e..7ee2d5d68b78 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -151,6 +151,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_transport *loop_trans; unsigned long flags; int ret, i; + int npaths = (trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rcu_read_lock(); conn = rds_conn_lookup(net, head, laddr, faddr, trans); @@ -172,6 +173,12 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn = ERR_PTR(-ENOMEM); goto out; } + conn->c_path = kcalloc(npaths, sizeof(struct rds_conn_path), gfp); + if (!conn->c_path) { + kmem_cache_free(rds_conn_slab, conn); + conn = ERR_PTR(-ENOMEM); + goto out; + } INIT_HLIST_NODE(&conn->c_hash_node); conn->c_laddr = laddr; @@ -181,6 +188,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, ret = rds_cong_get_maps(conn); if (ret) { + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(ret); goto out; @@ -207,13 +215,14 @@ static struct rds_connection *__rds_conn_create(struct net *net, conn->c_trans = trans; init_waitqueue_head(&conn->c_hs_waitq); - for (i = 0; i < RDS_MPATH_WORKERS; i++) { + for (i = 0; i < npaths; i++) { __rds_conn_path_init(conn, &conn->c_path[i], is_outgoing); conn->c_path[i].cp_index = i; } ret = trans->conn_alloc(conn, gfp); if (ret) { + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = ERR_PTR(ret); goto out; @@ -236,6 +245,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, /* Creating passive conn */ if (parent->c_passive) { trans->conn_free(conn->c_path[0].cp_transport_data); + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = parent->c_passive; } else { @@ -252,7 +262,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, struct rds_conn_path *cp; int i; - for (i = 0; i < RDS_MPATH_WORKERS; i++) { + for (i = 0; i < npaths; i++) { cp = &conn->c_path[i]; /* The ->conn_alloc invocation may have * allocated resource for all paths, so all @@ -261,6 +271,7 @@ static struct rds_connection *__rds_conn_create(struct net *net, if (cp->cp_transport_data) trans->conn_free(cp->cp_transport_data); } + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); conn = found; } else { @@ -374,13 +385,13 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp) if (!cp->cp_transport_data) return; - rds_conn_path_drop(cp); - flush_work(&cp->cp_down_w); - /* make sure lingering queued work won't try to ref the conn */ cancel_delayed_work_sync(&cp->cp_send_w); cancel_delayed_work_sync(&cp->cp_recv_w); + rds_conn_path_drop(cp, true); + flush_work(&cp->cp_down_w); + /* tear down queued messages */ list_for_each_entry_safe(rm, rtmp, &cp->cp_send_queue, @@ -407,6 +418,7 @@ void rds_conn_destroy(struct rds_connection *conn) unsigned long flags; int i; struct rds_conn_path *cp; + int npaths = (conn->c_trans->t_mp_capable ? RDS_MPATH_WORKERS : 1); rdsdebug("freeing conn %p for %pI4 -> " "%pI4\n", conn, &conn->c_laddr, @@ -420,7 +432,7 @@ void rds_conn_destroy(struct rds_connection *conn) synchronize_rcu(); /* shut the connection down */ - for (i = 0; i < RDS_MPATH_WORKERS; i++) { + for (i = 0; i < npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_destroy(cp); BUG_ON(!list_empty(&cp->cp_retrans)); @@ -434,6 +446,7 @@ void rds_conn_destroy(struct rds_connection *conn) rds_cong_remove_conn(conn); put_net(conn->c_net); + kfree(conn->c_path); kmem_cache_free(rds_conn_slab, conn); spin_lock_irqsave(&rds_conn_lock, flags); @@ -464,8 +477,12 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, i++, head++) { hlist_for_each_entry_rcu(conn, head, c_hash_node) { struct rds_conn_path *cp; + int npaths; + + npaths = (conn->c_trans->t_mp_capable ? + RDS_MPATH_WORKERS : 1); - for (j = 0; j < RDS_MPATH_WORKERS; j++) { + for (j = 0; j < npaths; j++) { cp = &conn->c_path[j]; if (want_send) list = &cp->cp_send_queue; @@ -486,8 +503,6 @@ static void rds_conn_message_info(struct socket *sock, unsigned int len, } spin_unlock_irqrestore(&cp->cp_lock, flags); - if (!conn->c_trans->t_mp_capable) - break; } } } @@ -571,15 +586,16 @@ static void rds_walk_conn_path_info(struct socket *sock, unsigned int len, i++, head++) { hlist_for_each_entry_rcu(conn, head, c_hash_node) { struct rds_conn_path *cp; + int npaths; - for (j = 0; j < RDS_MPATH_WORKERS; j++) { + npaths = (conn->c_trans->t_mp_capable ? + RDS_MPATH_WORKERS : 1); + for (j = 0; j < npaths; j++) { cp = &conn->c_path[j]; /* XXX no cp_lock usage.. */ if (!visitor(cp, buffer)) continue; - if (!conn->c_trans->t_mp_capable) - break; } /* We copy as much as we can fit in the buffer, @@ -664,9 +680,13 @@ void rds_conn_exit(void) /* * Force a disconnect */ -void rds_conn_path_drop(struct rds_conn_path *cp) +void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy) { atomic_set(&cp->cp_state, RDS_CONN_ERROR); + + if (!destroy && cp->cp_conn->c_destroy_in_prog) + return; + queue_work(rds_wq, &cp->cp_down_w); } EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -674,7 +694,7 @@ EXPORT_SYMBOL_GPL(rds_conn_path_drop); void rds_conn_drop(struct rds_connection *conn) { WARN_ON(conn->c_trans->t_mp_capable); - rds_conn_path_drop(&conn->c_path[0]); + rds_conn_path_drop(&conn->c_path[0], false); } EXPORT_SYMBOL_GPL(rds_conn_drop); @@ -706,5 +726,5 @@ __rds_conn_path_error(struct rds_conn_path *cp, const char *fmt, ...) vprintk(fmt, ap); va_end(ap); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } diff --git a/net/rds/ib.c b/net/rds/ib.c index a0954ace3774..36dd2099048a 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -126,6 +126,7 @@ void rds_ib_dev_put(struct rds_ib_device *rds_ibdev) static void rds_ib_add_one(struct ib_device *device) { struct rds_ib_device *rds_ibdev; + bool has_fr, has_fmr; /* Only handle IB (no iWARP) devices */ if (device->node_type != RDMA_NODE_IB_CA) @@ -143,11 +144,11 @@ static void rds_ib_add_one(struct ib_device *device) rds_ibdev->max_wrs = device->attrs.max_qp_wr; rds_ibdev->max_sge = min(device->attrs.max_sge, RDS_IB_MAX_SGE); - rds_ibdev->has_fr = (device->attrs.device_cap_flags & - IB_DEVICE_MEM_MGT_EXTENSIONS); - rds_ibdev->has_fmr = (device->alloc_fmr && device->dealloc_fmr && - device->map_phys_fmr && device->unmap_fmr); - rds_ibdev->use_fastreg = (rds_ibdev->has_fr && !rds_ibdev->has_fmr); + has_fr = (device->attrs.device_cap_flags & + IB_DEVICE_MEM_MGT_EXTENSIONS); + has_fmr = (device->alloc_fmr && device->dealloc_fmr && + device->map_phys_fmr && device->unmap_fmr); + rds_ibdev->use_fastreg = (has_fr && !has_fmr); rds_ibdev->fmr_max_remaps = device->attrs.max_map_per_fmr?: 32; rds_ibdev->max_1m_mrs = device->attrs.max_mr ? diff --git a/net/rds/ib.h b/net/rds/ib.h index bf4822407567..a6f4d7d68e95 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_IB_H #define _RDS_IB_H @@ -215,8 +216,6 @@ struct rds_ib_device { struct list_head conn_list; struct ib_device *dev; struct ib_pd *pd; - bool has_fmr; - bool has_fr; bool use_fastreg; unsigned int max_mrs; diff --git a/net/rds/ib_fmr.c b/net/rds/ib_fmr.c index 86ef907067bb..e0f70c4051b6 100644 --- a/net/rds/ib_fmr.c +++ b/net/rds/ib_fmr.c @@ -139,8 +139,8 @@ static int rds_ib_map_fmr(struct rds_ib_device *rds_ibdev, return -EINVAL; } - dma_pages = kmalloc_node(sizeof(u64) * page_cnt, GFP_ATOMIC, - rdsibdev_to_node(rds_ibdev)); + dma_pages = kmalloc_array_node(sizeof(u64), page_cnt, GFP_ATOMIC, + rdsibdev_to_node(rds_ibdev)); if (!dma_pages) { ib_dma_unmap_sg(dev, sg, nents, DMA_BIDIRECTIONAL); return -ENOMEM; diff --git a/net/rds/ib_rdma.c b/net/rds/ib_rdma.c index 9a3c54e659e9..e678699268a2 100644 --- a/net/rds/ib_rdma.c +++ b/net/rds/ib_rdma.c @@ -601,11 +601,11 @@ struct rds_ib_mr_pool *rds_ib_create_mr_pool(struct rds_ib_device *rds_ibdev, if (pool_type == RDS_IB_MR_1M_POOL) { /* +1 allows for unaligned MRs */ pool->fmr_attr.max_pages = RDS_MR_1M_MSG_SIZE + 1; - pool->max_items = RDS_MR_1M_POOL_SIZE; + pool->max_items = rds_ibdev->max_1m_mrs; } else { /* pool_type == RDS_IB_MR_8K_POOL */ pool->fmr_attr.max_pages = RDS_MR_8K_MSG_SIZE + 1; - pool->max_items = RDS_MR_8K_POOL_SIZE; + pool->max_items = rds_ibdev->max_8k_mrs; } pool->max_free_pinned = pool->max_items * pool->fmr_attr.max_pages / 4; diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index 9722bf839d9d..b4e421aa9727 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -410,14 +410,14 @@ void rds_ib_recv_refill(struct rds_connection *conn, int prefill, gfp_t gfp) break; } - /* XXX when can this fail? */ - ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); - rdsdebug("recv %p ibinc %p page %p addr %lu ret %d\n", recv, + rdsdebug("recv %p ibinc %p page %p addr %lu\n", recv, recv->r_ibinc, sg_page(&recv->r_frag->f_sg), (long) ib_sg_dma_address( ic->i_cm_id->device, - &recv->r_frag->f_sg), - ret); + &recv->r_frag->f_sg)); + + /* XXX when can this fail? */ + ret = ib_post_recv(ic->i_cm_id->qp, &recv->r_wr, &failed_wr); if (ret) { rds_ib_conn_error(conn, "recv post on " "%pI4 returned %d, disconnecting and " diff --git a/net/rds/ib_send.c b/net/rds/ib_send.c index 6ab39dbcca01..8557a1cae041 100644 --- a/net/rds/ib_send.c +++ b/net/rds/ib_send.c @@ -661,13 +661,15 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, } } - rds_ib_set_wr_signal_state(ic, send, 0); + rds_ib_set_wr_signal_state(ic, send, false); /* * Always signal the last one if we're stopping due to flow control. */ - if (ic->i_flowctl && flow_controlled && i == (work_alloc-1)) - send->s_wr.send_flags |= IB_SEND_SIGNALED | IB_SEND_SOLICITED; + if (ic->i_flowctl && flow_controlled && i == (work_alloc - 1)) { + rds_ib_set_wr_signal_state(ic, send, true); + send->s_wr.send_flags |= IB_SEND_SOLICITED; + } if (send->s_wr.send_flags & IB_SEND_SIGNALED) nr_sig++; @@ -705,11 +707,8 @@ int rds_ib_xmit(struct rds_connection *conn, struct rds_message *rm, if (scat == &rm->data.op_sg[rm->data.op_count]) { prev->s_op = ic->i_data_op; prev->s_wr.send_flags |= IB_SEND_SOLICITED; - if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) { - ic->i_unsignaled_wrs = rds_ib_sysctl_max_unsig_wrs; - prev->s_wr.send_flags |= IB_SEND_SIGNALED; - nr_sig++; - } + if (!(prev->s_wr.send_flags & IB_SEND_SIGNALED)) + nr_sig += rds_ib_set_wr_signal_state(ic, prev, true); ic->i_data_op = NULL; } @@ -792,6 +791,7 @@ int rds_ib_xmit_atomic(struct rds_connection *conn, struct rm_atomic_op *op) send->s_atomic_wr.compare_add_mask = op->op_m_fadd.nocarry_mask; send->s_atomic_wr.swap_mask = 0; } + send->s_wr.send_flags = 0; nr_sig = rds_ib_set_wr_signal_state(ic, send, op->op_notify); send->s_atomic_wr.wr.num_sge = 1; send->s_atomic_wr.wr.next = NULL; diff --git a/net/rds/info.h b/net/rds/info.h index b6c052ca7d22..a069b51c4679 100644 --- a/net/rds/info.h +++ b/net/rds/info.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_INFO_H #define _RDS_INFO_H diff --git a/net/rds/loop.h b/net/rds/loop.h index f32b0939a04d..469fa4b2da4f 100644 --- a/net/rds/loop.h +++ b/net/rds/loop.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_LOOP_H #define _RDS_LOOP_H diff --git a/net/rds/rdma_transport.h b/net/rds/rdma_transport.h index ff2010e9d20c..d309c4430124 100644 --- a/net/rds/rdma_transport.h +++ b/net/rds/rdma_transport.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDMA_TRANSPORT_H #define _RDMA_TRANSPORT_H diff --git a/net/rds/rds.h b/net/rds/rds.h index 516bcc89b46f..c349c71babff 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_RDS_H #define _RDS_RDS_H @@ -154,7 +155,7 @@ struct rds_connection { struct list_head c_map_item; unsigned long c_map_queued; - struct rds_conn_path c_path[RDS_MPATH_WORKERS]; + struct rds_conn_path *c_path; wait_queue_head_t c_hs_waitq; /* handshake waitq */ u32 c_my_gen_num; @@ -700,7 +701,7 @@ struct rds_connection *rds_conn_create_outgoing(struct net *net, void rds_conn_shutdown(struct rds_conn_path *cpath); void rds_conn_destroy(struct rds_connection *conn); void rds_conn_drop(struct rds_connection *conn); -void rds_conn_path_drop(struct rds_conn_path *cpath); +void rds_conn_path_drop(struct rds_conn_path *cpath, bool destroy); void rds_conn_connect_if_down(struct rds_connection *conn); void rds_conn_path_connect_if_down(struct rds_conn_path *cp); void rds_for_each_conn_info(struct socket *sock, unsigned int len, diff --git a/net/rds/rds_single_path.h b/net/rds/rds_single_path.h index e1241af7c1ad..9521f6e99bef 100644 --- a/net/rds/rds_single_path.h +++ b/net/rds/rds_single_path.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_RDS_SINGLE_H #define _RDS_RDS_SINGLE_H diff --git a/net/rds/send.c b/net/rds/send.c index 41b9f0f5bb9c..b52cdc8ae428 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -273,7 +273,7 @@ restart: len = ntohl(rm->m_inc.i_hdr.h_len); if (cp->cp_unacked_packets == 0 || cp->cp_unacked_bytes < len) { - __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); + set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); cp->cp_unacked_packets = rds_sysctl_max_unacked_packets; @@ -428,14 +428,18 @@ over_batch: * some work and we will skip our goto */ if (ret == 0) { + bool raced; + smp_mb(); + raced = send_gen != READ_ONCE(cp->cp_send_gen); + if ((test_bit(0, &conn->c_map_queued) || - !list_empty(&cp->cp_send_queue)) && - send_gen == READ_ONCE(cp->cp_send_gen)) { - rds_stats_inc(s_send_lock_queue_raced); + !list_empty(&cp->cp_send_queue)) && !raced) { if (batch_count < send_batch_count) goto restart; queue_delayed_work(rds_wq, &cp->cp_send_w, 1); + } else if (raced) { + rds_stats_inc(s_send_lock_queue_raced); } } out: @@ -829,7 +833,7 @@ static int rds_send_queue_rm(struct rds_sock *rs, struct rds_connection *conn, * throughput hits a certain threshold. */ if (rs->rs_snd_bytes >= rds_sk_sndbuf(rs) / 2) - __set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); + set_bit(RDS_MSG_ACK_REQUIRED, &rm->m_flags); list_add_tail(&rm->m_sock_item, &rs->rs_send_queue); set_bit(RDS_MSG_ON_SOCK, &rm->m_flags); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 431404dbdad1..6b7ee71f40c6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -592,7 +592,7 @@ static void rds_tcp_sysctl_reset(struct net *net) continue; /* reconnect with new parameters */ - rds_conn_path_drop(tc->t_cpath); + rds_conn_path_drop(tc->t_cpath, false); } spin_unlock_irq(&rds_tcp_conn_lock); } diff --git a/net/rds/tcp.h b/net/rds/tcp.h index f8800b7ce79c..1aafbf7c3011 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _RDS_TCP_H #define _RDS_TCP_H diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index cbe08a1fa4c7..46f74dad0e16 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -69,14 +69,14 @@ void rds_tcp_state_change(struct sock *sk) if (!IS_CANONICAL(cp->cp_conn->c_laddr, cp->cp_conn->c_faddr) && rds_conn_path_transition(cp, RDS_CONN_CONNECTING, RDS_CONN_ERROR)) { - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } else { rds_connect_path_complete(cp, RDS_CONN_CONNECTING); } break; case TCP_CLOSE_WAIT: case TCP_CLOSE: - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); default: break; } diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 0d8616aa5bad..dc860d1bb608 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -157,7 +157,7 @@ out: "returned %d, " "disconnecting and reconnecting\n", &conn->c_faddr, cp->cp_index, ret); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); } } } diff --git a/net/rds/threads.c b/net/rds/threads.c index 2852bc1d37d4..f121daa402c8 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -78,7 +78,7 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr) "current state is %d\n", __func__, atomic_read(&cp->cp_state)); - rds_conn_path_drop(cp); + rds_conn_path_drop(cp, false); return; } diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 4a9729257023..6a5c4992cf61 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -318,9 +318,11 @@ void rose_destroy_socket(struct sock *); /* * Handler for deferred kills. */ -static void rose_destroy_timer(unsigned long data) +static void rose_destroy_timer(struct timer_list *t) { - rose_destroy_socket((struct sock *)data); + struct sock *sk = from_timer(sk, t, sk_timer); + + rose_destroy_socket(sk); } /* @@ -353,8 +355,7 @@ void rose_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ - setup_timer(&sk->sk_timer, rose_destroy_timer, - (unsigned long)sk); + timer_setup(&sk->sk_timer, rose_destroy_timer, 0); sk->sk_timer.expires = jiffies + 10 * HZ; add_timer(&sk->sk_timer); } else @@ -538,8 +539,8 @@ static int rose_create(struct net *net, struct socket *sock, int protocol, sock->ops = &rose_proto_ops; sk->sk_protocol = protocol; - init_timer(&rose->timer); - init_timer(&rose->idletimer); + timer_setup(&rose->timer, NULL, 0); + timer_setup(&rose->idletimer, NULL, 0); rose->t1 = msecs_to_jiffies(sysctl_rose_call_request_timeout); rose->t2 = msecs_to_jiffies(sysctl_rose_reset_request_timeout); @@ -582,8 +583,8 @@ static struct sock *rose_make_new(struct sock *osk) sk->sk_state = TCP_ESTABLISHED; sock_copy_flags(sk, osk); - init_timer(&rose->timer); - init_timer(&rose->idletimer); + timer_setup(&rose->timer, NULL, 0); + timer_setup(&rose->idletimer, NULL, 0); orose = rose_sk(osk); rose->t1 = orose->t1; diff --git a/net/rose/rose_in.c b/net/rose/rose_in.c index 0a6394754e81..9bbbfe325c5a 100644 --- a/net/rose/rose_in.c +++ b/net/rose/rose_in.c @@ -219,6 +219,7 @@ static int rose_state4_machine(struct sock *sk, struct sk_buff *skb, int framety switch (frametype) { case ROSE_RESET_REQUEST: rose_write_internal(sk, ROSE_RESET_CONFIRMATION); + /* fall through */ case ROSE_RESET_CONFIRMATION: rose_stop_timer(sk); rose_start_idletimer(sk); diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index c76638cc2cd5..cda4c6678ef1 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -27,8 +27,8 @@ #include <linux/interrupt.h> #include <net/rose.h> -static void rose_ftimer_expiry(unsigned long); -static void rose_t0timer_expiry(unsigned long); +static void rose_ftimer_expiry(struct timer_list *); +static void rose_t0timer_expiry(struct timer_list *); static void rose_transmit_restart_confirmation(struct rose_neigh *neigh); static void rose_transmit_restart_request(struct rose_neigh *neigh); @@ -37,8 +37,7 @@ void rose_start_ftimer(struct rose_neigh *neigh) { del_timer(&neigh->ftimer); - neigh->ftimer.data = (unsigned long)neigh; - neigh->ftimer.function = &rose_ftimer_expiry; + neigh->ftimer.function = (TIMER_FUNC_TYPE)rose_ftimer_expiry; neigh->ftimer.expires = jiffies + msecs_to_jiffies(sysctl_rose_link_fail_timeout); @@ -49,8 +48,7 @@ static void rose_start_t0timer(struct rose_neigh *neigh) { del_timer(&neigh->t0timer); - neigh->t0timer.data = (unsigned long)neigh; - neigh->t0timer.function = &rose_t0timer_expiry; + neigh->t0timer.function = (TIMER_FUNC_TYPE)rose_t0timer_expiry; neigh->t0timer.expires = jiffies + msecs_to_jiffies(sysctl_rose_restart_request_timeout); @@ -77,13 +75,13 @@ static int rose_t0timer_running(struct rose_neigh *neigh) return timer_pending(&neigh->t0timer); } -static void rose_ftimer_expiry(unsigned long param) +static void rose_ftimer_expiry(struct timer_list *t) { } -static void rose_t0timer_expiry(unsigned long param) +static void rose_t0timer_expiry(struct timer_list *t) { - struct rose_neigh *neigh = (struct rose_neigh *)param; + struct rose_neigh *neigh = from_timer(neigh, t, t0timer); rose_transmit_restart_request(neigh); diff --git a/net/rose/rose_loopback.c b/net/rose/rose_loopback.c index 344456206b70..7af4f99c4a93 100644 --- a/net/rose/rose_loopback.c +++ b/net/rose/rose_loopback.c @@ -19,12 +19,13 @@ static struct sk_buff_head loopback_queue; static struct timer_list loopback_timer; static void rose_set_loopback_timer(void); +static void rose_loopback_timer(struct timer_list *unused); void rose_loopback_init(void) { skb_queue_head_init(&loopback_queue); - init_timer(&loopback_timer); + timer_setup(&loopback_timer, rose_loopback_timer, 0); } static int rose_loopback_running(void) @@ -50,20 +51,16 @@ int rose_loopback_queue(struct sk_buff *skb, struct rose_neigh *neigh) return 1; } -static void rose_loopback_timer(unsigned long); static void rose_set_loopback_timer(void) { del_timer(&loopback_timer); - loopback_timer.data = 0; - loopback_timer.function = &rose_loopback_timer; loopback_timer.expires = jiffies + 10; - add_timer(&loopback_timer); } -static void rose_loopback_timer(unsigned long param) +static void rose_loopback_timer(struct timer_list *unused) { struct sk_buff *skb; struct net_device *dev; diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c index 452bbb38d943..8ca3124df83f 100644 --- a/net/rose/rose_route.c +++ b/net/rose/rose_route.c @@ -104,8 +104,8 @@ static int __must_check rose_add_node(struct rose_route_struct *rose_route, skb_queue_head_init(&rose_neigh->queue); - init_timer(&rose_neigh->ftimer); - init_timer(&rose_neigh->t0timer); + timer_setup(&rose_neigh->ftimer, NULL, 0); + timer_setup(&rose_neigh->t0timer, NULL, 0); if (rose_route->ndigis != 0) { rose_neigh->digipeat = @@ -346,6 +346,7 @@ static int rose_del_node(struct rose_route_struct *rose_route, case 0: rose_node->neighbour[0] = rose_node->neighbour[1]; + /* fall through */ case 1: rose_node->neighbour[1] = rose_node->neighbour[2]; @@ -390,8 +391,8 @@ void rose_add_loopback_neigh(void) skb_queue_head_init(&sn->queue); - init_timer(&sn->ftimer); - init_timer(&sn->t0timer); + timer_setup(&sn->ftimer, NULL, 0); + timer_setup(&sn->t0timer, NULL, 0); spin_lock_bh(&rose_neigh_list_lock); sn->next = rose_neigh_list; @@ -507,6 +508,7 @@ void rose_rt_device_down(struct net_device *dev) switch (i) { case 0: t->neighbour[0] = t->neighbour[1]; + /* fall through */ case 1: t->neighbour[1] = t->neighbour[2]; case 2: diff --git a/net/rose/rose_timer.c b/net/rose/rose_timer.c index bc5469d6d9cb..ea613b2a9735 100644 --- a/net/rose/rose_timer.c +++ b/net/rose/rose_timer.c @@ -28,16 +28,15 @@ #include <linux/interrupt.h> #include <net/rose.h> -static void rose_heartbeat_expiry(unsigned long); -static void rose_timer_expiry(unsigned long); -static void rose_idletimer_expiry(unsigned long); +static void rose_heartbeat_expiry(struct timer_list *t); +static void rose_timer_expiry(struct timer_list *); +static void rose_idletimer_expiry(struct timer_list *); void rose_start_heartbeat(struct sock *sk) { del_timer(&sk->sk_timer); - sk->sk_timer.data = (unsigned long)sk; - sk->sk_timer.function = &rose_heartbeat_expiry; + sk->sk_timer.function = (TIMER_FUNC_TYPE)rose_heartbeat_expiry; sk->sk_timer.expires = jiffies + 5 * HZ; add_timer(&sk->sk_timer); @@ -49,8 +48,7 @@ void rose_start_t1timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t1; add_timer(&rose->timer); @@ -62,8 +60,7 @@ void rose_start_t2timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t2; add_timer(&rose->timer); @@ -75,8 +72,7 @@ void rose_start_t3timer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->t3; add_timer(&rose->timer); @@ -88,8 +84,7 @@ void rose_start_hbtimer(struct sock *sk) del_timer(&rose->timer); - rose->timer.data = (unsigned long)sk; - rose->timer.function = &rose_timer_expiry; + rose->timer.function = (TIMER_FUNC_TYPE)rose_timer_expiry; rose->timer.expires = jiffies + rose->hb; add_timer(&rose->timer); @@ -102,8 +97,7 @@ void rose_start_idletimer(struct sock *sk) del_timer(&rose->idletimer); if (rose->idle > 0) { - rose->idletimer.data = (unsigned long)sk; - rose->idletimer.function = &rose_idletimer_expiry; + rose->idletimer.function = (TIMER_FUNC_TYPE)rose_idletimer_expiry; rose->idletimer.expires = jiffies + rose->idle; add_timer(&rose->idletimer); @@ -125,9 +119,9 @@ void rose_stop_idletimer(struct sock *sk) del_timer(&rose_sk(sk)->idletimer); } -static void rose_heartbeat_expiry(unsigned long param) +static void rose_heartbeat_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct sock *sk = from_timer(sk, t, sk_timer); struct rose_sock *rose = rose_sk(sk); bh_lock_sock(sk); @@ -163,10 +157,10 @@ static void rose_heartbeat_expiry(unsigned long param) bh_unlock_sock(sk); } -static void rose_timer_expiry(unsigned long param) +static void rose_timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; - struct rose_sock *rose = rose_sk(sk); + struct rose_sock *rose = from_timer(rose, t, timer); + struct sock *sk = &rose->sock; bh_lock_sock(sk); switch (rose->state) { @@ -192,9 +186,10 @@ static void rose_timer_expiry(unsigned long param) bh_unlock_sock(sk); } -static void rose_idletimer_expiry(unsigned long param) +static void rose_idletimer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct rose_sock *rose = from_timer(rose, t, idletimer); + struct sock *sk = &rose->sock; bh_lock_sock(sk); rose_clear_queues(sk); diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 9c68d2f8ba39..6ffb7e9887ce 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for Linux kernel RxRPC # diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c index a2ad4482376f..9b5c46b052fd 100644 --- a/net/rxrpc/af_rxrpc.c +++ b/net/rxrpc/af_rxrpc.c @@ -246,6 +246,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) ret = 0; break; } + /* Fall through */ default: ret = -EBUSY; break; @@ -265,6 +266,7 @@ static int rxrpc_listen(struct socket *sock, int backlog) * @tx_total_len: Total length of data to transmit during the call (or -1) * @gfp: The allocation constraints * @notify_rx: Where to send notifications instead of socket queue + * @upgrade: Request service upgrade for call * * Allow a kernel service to begin a call on the nominated socket. This just * sets up all the internal tracking structures and allocates connection and @@ -279,7 +281,8 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, unsigned long user_call_ID, s64 tx_total_len, gfp_t gfp, - rxrpc_notify_rx_t notify_rx) + rxrpc_notify_rx_t notify_rx, + bool upgrade) { struct rxrpc_conn_parameters cp; struct rxrpc_call *call; @@ -304,19 +307,29 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock, cp.key = key; cp.security_level = 0; cp.exclusive = false; + cp.upgrade = upgrade; cp.service_id = srx->srx_service; call = rxrpc_new_client_call(rx, &cp, srx, user_call_ID, tx_total_len, gfp); /* The socket has been unlocked. */ - if (!IS_ERR(call)) + if (!IS_ERR(call)) { call->notify_rx = notify_rx; + mutex_unlock(&call->user_mutex); + } - mutex_unlock(&call->user_mutex); _leave(" = %p", call); return call; } EXPORT_SYMBOL(rxrpc_kernel_begin_call); +/* + * Dummy function used to stop the notifier talking to recvmsg(). + */ +static void rxrpc_dummy_notify_rx(struct sock *sk, struct rxrpc_call *rxcall, + unsigned long call_user_ID) +{ +} + /** * rxrpc_kernel_end_call - Allow a kernel service to end a call it was using * @sock: The socket the call is on @@ -331,12 +344,108 @@ void rxrpc_kernel_end_call(struct socket *sock, struct rxrpc_call *call) mutex_lock(&call->user_mutex); rxrpc_release_call(rxrpc_sk(sock->sk), call); + + /* Make sure we're not going to call back into a kernel service */ + if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); + call->notify_rx = rxrpc_dummy_notify_rx; + spin_unlock_bh(&call->notify_lock); + } + mutex_unlock(&call->user_mutex); rxrpc_put_call(call, rxrpc_call_put_kernel); } EXPORT_SYMBOL(rxrpc_kernel_end_call); /** + * rxrpc_kernel_check_life - Check to see whether a call is still alive + * @sock: The socket the call is on + * @call: The call to check + * + * Allow a kernel service to find out whether a call is still alive - ie. we're + * getting ACKs from the server. Returns a number representing the life state + * which can be compared to that returned by a previous call. + * + * If this is a client call, ping ACKs will be sent to the server to find out + * whether it's still responsive and whether the call is still alive on the + * server. + */ +u32 rxrpc_kernel_check_life(struct socket *sock, struct rxrpc_call *call) +{ + return call->acks_latest; +} +EXPORT_SYMBOL(rxrpc_kernel_check_life); + +/** + * rxrpc_kernel_check_call - Check a call's state + * @sock: The socket the call is on + * @call: The call to check + * @_compl: Where to store the completion state + * @_abort_code: Where to store any abort code + * + * Allow a kernel service to query the state of a call and find out the manner + * of its termination if it has completed. Returns -EINPROGRESS if the call is + * still going, 0 if the call finished successfully, -ECONNABORTED if the call + * was aborted and an appropriate error if the call failed in some other way. + */ +int rxrpc_kernel_check_call(struct socket *sock, struct rxrpc_call *call, + enum rxrpc_call_completion *_compl, u32 *_abort_code) +{ + if (call->state != RXRPC_CALL_COMPLETE) + return -EINPROGRESS; + smp_rmb(); + *_compl = call->completion; + *_abort_code = call->abort_code; + return call->error; +} +EXPORT_SYMBOL(rxrpc_kernel_check_call); + +/** + * rxrpc_kernel_retry_call - Allow a kernel service to retry a call + * @sock: The socket the call is on + * @call: The call to retry + * @srx: The address of the peer to contact + * @key: The security context to use (defaults to socket setting) + * + * Allow a kernel service to try resending a client call that failed due to a + * network error to a new address. The Tx queue is maintained intact, thereby + * relieving the need to re-encrypt any request data that has already been + * buffered. + */ +int rxrpc_kernel_retry_call(struct socket *sock, struct rxrpc_call *call, + struct sockaddr_rxrpc *srx, struct key *key) +{ + struct rxrpc_conn_parameters cp; + struct rxrpc_sock *rx = rxrpc_sk(sock->sk); + int ret; + + _enter("%d{%d}", call->debug_id, atomic_read(&call->usage)); + + if (!key) + key = rx->key; + if (key && !key->payload.data[0]) + key = NULL; /* a no-security key */ + + memset(&cp, 0, sizeof(cp)); + cp.local = rx->local; + cp.key = key; + cp.security_level = 0; + cp.exclusive = false; + cp.service_id = srx->srx_service; + + mutex_lock(&call->user_mutex); + + ret = rxrpc_prepare_call_for_retry(rx, call); + if (ret == 0) + ret = rxrpc_retry_client_call(rx, call, &cp, srx, GFP_KERNEL); + + mutex_unlock(&call->user_mutex); + _leave(" = %d", ret); + return ret; +} +EXPORT_SYMBOL(rxrpc_kernel_retry_call); + +/** * rxrpc_kernel_new_call_notification - Get notifications of new calls * @sock: The socket to intercept received messages on * @notify_new_call: Function to be called when new calls appear @@ -468,6 +577,7 @@ static int rxrpc_sendmsg(struct socket *sock, struct msghdr *m, size_t len) m->msg_name = &rx->connect_srx; m->msg_namelen = sizeof(rx->connect_srx); } + /* Fall through */ case RXRPC_SERVER_BOUND: case RXRPC_SERVER_LISTENING: ret = rxrpc_do_sendmsg(rx, m, len); @@ -591,13 +701,13 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname, char __user *optval, int __user *_optlen) { int optlen; - + if (level != SOL_RXRPC) return -EOPNOTSUPP; if (get_user(optlen, _optlen)) return -EFAULT; - + switch (optname) { case RXRPC_SUPPORTED_CMSG: if (optlen < sizeof(int)) @@ -606,7 +716,7 @@ static int rxrpc_getsockopt(struct socket *sock, int level, int optname, put_user(sizeof(int), _optlen)) return -EFAULT; return 0; - + default: return -EOPNOTSUPP; } diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 69b97339ff9d..b2151993d384 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -15,7 +15,7 @@ #include <net/netns/generic.h> #include <net/sock.h> #include <net/af_rxrpc.h> -#include <rxrpc/packet.h> +#include "protocol.h" #if 0 #define CHECK_SLAB_OKAY(X) \ @@ -445,6 +445,7 @@ enum rxrpc_call_flag { RXRPC_CALL_EXPOSED, /* The call was exposed to the world */ RXRPC_CALL_RX_LAST, /* Received the last packet (at rxtx_top) */ RXRPC_CALL_TX_LAST, /* Last packet in Tx buffer (at rxtx_top) */ + RXRPC_CALL_TX_LASTQ, /* Last packet has been queued */ RXRPC_CALL_SEND_PING, /* A ping will need to be sent */ RXRPC_CALL_PINGING, /* Ping in process */ RXRPC_CALL_RETRANS_TIMEOUT, /* Retransmission due to timeout occurred */ @@ -482,18 +483,6 @@ enum rxrpc_call_state { }; /* - * Call completion condition (state == RXRPC_CALL_COMPLETE). - */ -enum rxrpc_call_completion { - RXRPC_CALL_SUCCEEDED, /* - Normal termination */ - RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */ - RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */ - RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */ - RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */ - NR__RXRPC_CALL_COMPLETIONS -}; - -/* * Call Tx congestion management modes. */ enum rxrpc_congest_mode { @@ -536,6 +525,7 @@ struct rxrpc_call { unsigned long flags; unsigned long events; spinlock_t lock; + spinlock_t notify_lock; /* Kernel notification lock */ rwlock_t state_lock; /* lock for state transition */ u32 abort_code; /* Local/remote abort code */ int error; /* Local error incurred */ @@ -687,9 +677,15 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *, struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *, unsigned long, s64, gfp_t); +int rxrpc_retry_client_call(struct rxrpc_sock *, + struct rxrpc_call *, + struct rxrpc_conn_parameters *, + struct sockaddr_rxrpc *, + gfp_t); void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *, struct sk_buff *); void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *); +int rxrpc_prepare_call_for_retry(struct rxrpc_sock *, struct rxrpc_call *); void rxrpc_release_calls_on_socket(struct rxrpc_sock *); bool __rxrpc_queue_call(struct rxrpc_call *); bool rxrpc_queue_call(struct rxrpc_call *); @@ -830,7 +826,6 @@ void rxrpc_process_connection(struct work_struct *); */ extern unsigned int rxrpc_connection_expiry; -int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); struct rxrpc_connection *rxrpc_alloc_connection(gfp_t); struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *, struct sk_buff *); @@ -894,7 +889,7 @@ extern struct key_type key_type_rxrpc_s; int rxrpc_request_key(struct rxrpc_sock *, char __user *, int); int rxrpc_server_keyring(struct rxrpc_sock *, char __user *, int); -int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time_t, +int rxrpc_get_server_data_key(struct rxrpc_connection *, const void *, time64_t, u32); /* @@ -1060,7 +1055,8 @@ static inline void rxrpc_sysctl_exit(void) {} /* * utils.c */ -int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *, struct sk_buff *); +int rxrpc_extract_addr_from_skb(struct rxrpc_local *, struct sockaddr_rxrpc *, + struct sk_buff *); static inline bool before(u32 seq1, u32 seq2) { diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index dd30d74824b0..cbd1701e813a 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -223,6 +223,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; + call->socket = rx; if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); @@ -276,7 +277,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx, * anticipation - and to save on stack space. */ xpeer = b->peer_backlog[peer_tail]; - if (rxrpc_extract_addr_from_skb(&xpeer->srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(local, &xpeer->srx, skb) < 0) return NULL; peer = rxrpc_lookup_incoming_peer(local, xpeer); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index 7a77844aab16..3574508baf9a 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -386,7 +386,7 @@ recheck_state: now = ktime_get_real(); if (ktime_before(call->expire_at, now)) { - rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME); + rxrpc_abort_call("EXP", call, 0, RX_USER_ABORT, -ETIME); set_bit(RXRPC_CALL_EV_ABORT, &call->events); goto recheck_state; } diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c index d7809a0620b4..4c7fbc6dcce7 100644 --- a/net/rxrpc/call_object.c +++ b/net/rxrpc/call_object.c @@ -124,6 +124,7 @@ struct rxrpc_call *rxrpc_alloc_call(gfp_t gfp) INIT_LIST_HEAD(&call->sock_link); init_waitqueue_head(&call->waitq); spin_lock_init(&call->lock); + spin_lock_init(&call->notify_lock); rwlock_init(&call->state_lock); atomic_set(&call->usage, 1); call->debug_id = atomic_inc_return(&rxrpc_debug_id); @@ -269,11 +270,6 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx, trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), here, NULL); - spin_lock_bh(&call->conn->params.peer->lock); - hlist_add_head(&call->error_link, - &call->conn->params.peer->error_targets); - spin_unlock_bh(&call->conn->params.peer->lock); - rxrpc_start_call_timer(call); _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); @@ -304,6 +300,48 @@ error: } /* + * Retry a call to a new address. It is expected that the Tx queue of the call + * will contain data previously packaged for an old call. + */ +int rxrpc_retry_client_call(struct rxrpc_sock *rx, + struct rxrpc_call *call, + struct rxrpc_conn_parameters *cp, + struct sockaddr_rxrpc *srx, + gfp_t gfp) +{ + const void *here = __builtin_return_address(0); + int ret; + + /* Set up or get a connection record and set the protocol parameters, + * including channel number and call ID. + */ + ret = rxrpc_connect_call(call, cp, srx, gfp); + if (ret < 0) + goto error; + + trace_rxrpc_call(call, rxrpc_call_connected, atomic_read(&call->usage), + here, NULL); + + rxrpc_start_call_timer(call); + + _net("CALL new %d on CONN %d", call->debug_id, call->conn->debug_id); + + if (!test_and_set_bit(RXRPC_CALL_EV_RESEND, &call->events)) + rxrpc_queue_call(call); + + _leave(" = 0"); + return 0; + +error: + rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, + RX_CALL_DEAD, ret); + trace_rxrpc_call(call, rxrpc_call_error, atomic_read(&call->usage), + here, ERR_PTR(ret)); + _leave(" = %d", ret); + return ret; +} + +/* * Set up an incoming call. call->conn points to the connection. * This is called in BH context and isn't allowed to fail. */ @@ -471,6 +509,61 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call) } /* + * Prepare a kernel service call for retry. + */ +int rxrpc_prepare_call_for_retry(struct rxrpc_sock *rx, struct rxrpc_call *call) +{ + const void *here = __builtin_return_address(0); + int i; + u8 last = 0; + + _enter("{%d,%d}", call->debug_id, atomic_read(&call->usage)); + + trace_rxrpc_call(call, rxrpc_call_release, atomic_read(&call->usage), + here, (const void *)call->flags); + + ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE); + ASSERTCMP(call->completion, !=, RXRPC_CALL_REMOTELY_ABORTED); + ASSERTCMP(call->completion, !=, RXRPC_CALL_LOCALLY_ABORTED); + ASSERT(list_empty(&call->recvmsg_link)); + + del_timer_sync(&call->timer); + + _debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, call->conn); + + if (call->conn) + rxrpc_disconnect_call(call); + + if (rxrpc_is_service_call(call) || + !call->tx_phase || + call->tx_hard_ack != 0 || + call->rx_hard_ack != 0 || + call->rx_top != 0) + return -EINVAL; + + call->state = RXRPC_CALL_UNINITIALISED; + call->completion = RXRPC_CALL_SUCCEEDED; + call->call_id = 0; + call->cid = 0; + call->cong_cwnd = 0; + call->cong_extra = 0; + call->cong_ssthresh = 0; + call->cong_mode = 0; + call->cong_dup_acks = 0; + call->cong_cumul_acks = 0; + call->acks_lowest_nak = 0; + + for (i = 0; i < RXRPC_RXTX_BUFF_SIZE; i++) { + last |= call->rxtx_annotations[i]; + call->rxtx_annotations[i] &= RXRPC_TX_ANNO_LAST; + call->rxtx_annotations[i] |= RXRPC_TX_ANNO_RETRANS; + } + + _leave(" = 0"); + return 0; +} + +/* * release all the calls associated with a socket */ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx) diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index eb2157680399..5f9624bd311c 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -555,7 +555,10 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn, trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate); write_lock_bh(&call->state_lock); - call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + if (!test_bit(RXRPC_CALL_TX_LASTQ, &call->flags)) + call->state = RXRPC_CALL_CLIENT_SEND_REQUEST; + else + call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; write_unlock_bh(&call->state_lock); rxrpc_see_call(call); @@ -688,15 +691,23 @@ int rxrpc_connect_call(struct rxrpc_call *call, ret = rxrpc_get_client_conn(call, cp, srx, gfp); if (ret < 0) - return ret; + goto out; rxrpc_animate_client_conn(rxnet, call->conn); rxrpc_activate_channels(call->conn); ret = rxrpc_wait_for_channel(call, gfp); - if (ret < 0) + if (ret < 0) { rxrpc_disconnect_client_call(call); + goto out; + } + + spin_lock_bh(&call->conn->params.peer->lock); + hlist_add_head(&call->error_link, + &call->conn->params.peer->error_targets); + spin_unlock_bh(&call->conn->params.peer->lock); +out: _leave(" = %d", ret); return ret; } diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c index 929b50d5afe8..fe575798592f 100644 --- a/net/rxrpc/conn_object.c +++ b/net/rxrpc/conn_object.c @@ -72,7 +72,7 @@ struct rxrpc_connection *rxrpc_find_connection_rcu(struct rxrpc_local *local, _enter(",%x", sp->hdr.cid & RXRPC_CIDMASK); - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) goto not_found; k.epoch = sp->hdr.epoch; diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c index e60fcd2a4a02..f6fcdb3130a1 100644 --- a/net/rxrpc/conn_service.c +++ b/net/rxrpc/conn_service.c @@ -50,12 +50,11 @@ struct rxrpc_connection *rxrpc_find_service_conn_rcu(struct rxrpc_peer *peer, else if (conn->proto.index_key > k.index_key) p = rcu_dereference_raw(p->rb_right); else - goto done; + break; conn = NULL; } } while (need_seqretry(&peer->service_conn_lock, seq)); -done: done_seqretry(&peer->service_conn_lock, seq); _leave(" = %d", conn ? conn->debug_id : -1); return conn; diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index e56e23ed2229..1b592073ec96 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -298,8 +298,6 @@ static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun, write_unlock(&call->state_lock); if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY) { - rxrpc_propose_ACK(call, RXRPC_ACK_IDLE, 0, 0, false, true, - rxrpc_propose_ack_client_tx_end); trace_rxrpc_transmit(call, rxrpc_transmit_await_reply); } else { trace_rxrpc_transmit(call, rxrpc_transmit_end); @@ -1125,6 +1123,7 @@ void rxrpc_data_ready(struct sock *udp_sk) case RXRPC_PACKET_TYPE_BUSY: if (sp->hdr.flags & RXRPC_CLIENT_INITIATED) goto discard; + /* Fall through */ case RXRPC_PACKET_TYPE_DATA: if (sp->hdr.callNumber == 0) diff --git a/net/rxrpc/key.c b/net/rxrpc/key.c index 54369225766e..e7f6b8823eb6 100644 --- a/net/rxrpc/key.c +++ b/net/rxrpc/key.c @@ -92,6 +92,7 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, const __be32 *xdr, unsigned int toklen) { struct rxrpc_key_token *token, **pptoken; + time64_t expiry; size_t plen; u32 tktlen; @@ -158,8 +159,9 @@ static int rxrpc_preparse_xdr_rxkad(struct key_preparsed_payload *prep, pptoken = &(*pptoken)->next) continue; *pptoken = token; - if (token->kad->expiry < prep->expiry) - prep->expiry = token->kad->expiry; + expiry = rxrpc_u32_to_time64(token->kad->expiry); + if (expiry < prep->expiry) + prep->expiry = expiry; _leave(" = 0"); return 0; @@ -433,6 +435,7 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep, struct rxrpc_key_token *token, **pptoken; struct rxk5_key *rxk5; const __be32 *end_xdr = xdr + (toklen >> 2); + time64_t expiry; int ret; _enter(",{%x,%x,%x,%x},%u", @@ -533,8 +536,9 @@ static int rxrpc_preparse_xdr_rxk5(struct key_preparsed_payload *prep, pptoken = &(*pptoken)->next) continue; *pptoken = token; - if (token->kad->expiry < prep->expiry) - prep->expiry = token->kad->expiry; + expiry = rxrpc_u32_to_time64(token->k5->endtime); + if (expiry < prep->expiry) + prep->expiry = expiry; _leave(" = 0"); return 0; @@ -691,6 +695,7 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep) { const struct rxrpc_key_data_v1 *v1; struct rxrpc_key_token *token, **pp; + time64_t expiry; size_t plen; u32 kver; int ret; @@ -777,8 +782,9 @@ static int rxrpc_preparse(struct key_preparsed_payload *prep) while (*pp) pp = &(*pp)->next; *pp = token; - if (token->kad->expiry < prep->expiry) - prep->expiry = token->kad->expiry; + expiry = rxrpc_u32_to_time64(token->kad->expiry); + if (expiry < prep->expiry) + prep->expiry = expiry; token = NULL; ret = 0; @@ -955,7 +961,7 @@ int rxrpc_server_keyring(struct rxrpc_sock *rx, char __user *optval, */ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, const void *session_key, - time_t expiry, + time64_t expiry, u32 kvno) { const struct cred *cred = current_cred(); @@ -982,7 +988,7 @@ int rxrpc_get_server_data_key(struct rxrpc_connection *conn, data.kver = 1; data.v1.security_index = RXRPC_SECURITY_RXKAD; data.v1.ticket_length = 0; - data.v1.expiry = expiry; + data.v1.expiry = rxrpc_time64_to_u32(expiry); data.v1.kvno = 0; memcpy(&data.v1.session_key, session_key, sizeof(data.v1.session_key)); diff --git a/net/rxrpc/local_event.c b/net/rxrpc/local_event.c index 540d3955c1bc..93b5d910b4a1 100644 --- a/net/rxrpc/local_event.c +++ b/net/rxrpc/local_event.c @@ -39,7 +39,7 @@ static void rxrpc_send_version_request(struct rxrpc_local *local, _enter(""); - if (rxrpc_extract_addr_from_skb(&srx, skb) < 0) + if (rxrpc_extract_addr_from_skb(local, &srx, skb) < 0) return; msg.msg_name = &srx.transport; diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 5bd2d0fa4a03..f47659c7b224 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -35,7 +35,8 @@ struct rxrpc_abort_buffer { /* * Fill out an ACK packet. */ -static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, +static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn, + struct rxrpc_call *call, struct rxrpc_ack_buffer *pkt, rxrpc_seq_t *_hard_ack, rxrpc_seq_t *_top, @@ -77,8 +78,8 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_call *call, } while (before_eq(seq, top)); } - mtu = call->conn->params.peer->if_mtu; - mtu -= call->conn->params.peer->hdrsize; + mtu = conn->params.peer->if_mtu; + mtu -= conn->params.peer->hdrsize; jmax = (call->nr_jumbo_bad > 3) ? 1 : rxrpc_rx_jumbo_max; pkt->ackinfo.rxMTU = htonl(rxrpc_rx_mtu); pkt->ackinfo.maxMTU = htonl(mtu); @@ -148,7 +149,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, bool ping) } call->ackr_reason = 0; } - n = rxrpc_fill_out_ack(call, pkt, &hard_ack, &top, reason); + n = rxrpc_fill_out_ack(conn, call, pkt, &hard_ack, &top, reason); spin_unlock_bh(&call->lock); @@ -221,6 +222,16 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call) rxrpc_serial_t serial; int ret; + /* Don't bother sending aborts for a client call once the server has + * hard-ACK'd all of its request data. After that point, we're not + * going to stop the operation proceeding, and whilst we might limit + * the reply, it's not worth it if we can send a new call on the same + * channel instead, thereby closing off this call. + */ + if (rxrpc_is_client_call(call) && + test_bit(RXRPC_CALL_TX_LAST, &call->flags)) + return 0; + spin_lock_bh(&call->lock); if (call->conn) conn = rxrpc_get_connection_maybe(call->conn); @@ -444,7 +455,7 @@ void rxrpc_reject_packets(struct rxrpc_local *local) rxrpc_see_skb(skb, rxrpc_skb_rx_seen); sp = rxrpc_skb(skb); - if (rxrpc_extract_addr_from_skb(&srx, skb) == 0) { + if (rxrpc_extract_addr_from_skb(local, &srx, skb) == 0) { msg.msg_namelen = srx.transport_len; code = htonl(skb->priority); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 1ed9c0c2e94f..7f749505e699 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -37,6 +37,7 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, memset(&srx, 0, sizeof(srx)); srx.transport_type = local->srx.transport_type; + srx.transport_len = local->srx.transport_len; srx.transport.family = local->srx.transport.family; /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice @@ -45,7 +46,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, switch (srx.transport.family) { case AF_INET: srx.transport.sin.sin_port = serr->port; - srx.transport_len = sizeof(struct sockaddr_in); switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP: _net("Rx ICMP"); @@ -69,7 +69,6 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, #ifdef CONFIG_AF_RXRPC_IPV6 case AF_INET6: srx.transport.sin6.sin6_port = serr->port; - srx.transport_len = sizeof(struct sockaddr_in6); switch (serr->ee.ee_origin) { case SO_EE_ORIGIN_ICMP6: _net("Rx ICMP6"); @@ -79,6 +78,9 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, break; case SO_EE_ORIGIN_ICMP: _net("Rx ICMP on v6 sock"); + srx.transport.sin6.sin6_addr.s6_addr32[0] = 0; + srx.transport.sin6.sin6_addr.s6_addr32[1] = 0; + srx.transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); memcpy(srx.transport.sin6.sin6_addr.s6_addr + 12, skb_network_header(skb) + serr->addr_offset, sizeof(struct in_addr)); diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 5787f97f5330..d02a99f37f5f 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -411,3 +411,16 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, *_srx = call->peer->srx; } EXPORT_SYMBOL(rxrpc_kernel_get_peer); + +/** + * rxrpc_kernel_get_rtt - Get a call's peer RTT + * @sock: The socket on which the call is in progress. + * @call: The call to query + * + * Get the call's peer RTT. + */ +u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call) +{ + return call->peer->rtt; +} +EXPORT_SYMBOL(rxrpc_kernel_get_rtt); diff --git a/net/rxrpc/protocol.h b/net/rxrpc/protocol.h new file mode 100644 index 000000000000..4bddcf3face3 --- /dev/null +++ b/net/rxrpc/protocol.h @@ -0,0 +1,190 @@ +/* packet.h: Rx packet layout and definitions + * + * Copyright (C) 2002, 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#ifndef _LINUX_RXRPC_PACKET_H +#define _LINUX_RXRPC_PACKET_H + +typedef u32 rxrpc_seq_t; /* Rx message sequence number */ +typedef u32 rxrpc_serial_t; /* Rx message serial number */ +typedef __be32 rxrpc_seq_net_t; /* on-the-wire Rx message sequence number */ +typedef __be32 rxrpc_serial_net_t; /* on-the-wire Rx message serial number */ + +/*****************************************************************************/ +/* + * on-the-wire Rx packet header + * - all multibyte fields should be in network byte order + */ +struct rxrpc_wire_header { + __be32 epoch; /* client boot timestamp */ +#define RXRPC_RANDOM_EPOCH 0x80000000 /* Random if set, date-based if not */ + + __be32 cid; /* connection and channel ID */ +#define RXRPC_MAXCALLS 4 /* max active calls per conn */ +#define RXRPC_CHANNELMASK (RXRPC_MAXCALLS-1) /* mask for channel ID */ +#define RXRPC_CIDMASK (~RXRPC_CHANNELMASK) /* mask for connection ID */ +#define RXRPC_CIDSHIFT ilog2(RXRPC_MAXCALLS) /* shift for connection ID */ +#define RXRPC_CID_INC (1 << RXRPC_CIDSHIFT) /* connection ID increment */ + + __be32 callNumber; /* call ID (0 for connection-level packets) */ + __be32 seq; /* sequence number of pkt in call stream */ + __be32 serial; /* serial number of pkt sent to network */ + + uint8_t type; /* packet type */ +#define RXRPC_PACKET_TYPE_DATA 1 /* data */ +#define RXRPC_PACKET_TYPE_ACK 2 /* ACK */ +#define RXRPC_PACKET_TYPE_BUSY 3 /* call reject */ +#define RXRPC_PACKET_TYPE_ABORT 4 /* call/connection abort */ +#define RXRPC_PACKET_TYPE_ACKALL 5 /* ACK all outstanding packets on call */ +#define RXRPC_PACKET_TYPE_CHALLENGE 6 /* connection security challenge (SRVR->CLNT) */ +#define RXRPC_PACKET_TYPE_RESPONSE 7 /* connection secutity response (CLNT->SRVR) */ +#define RXRPC_PACKET_TYPE_DEBUG 8 /* debug info request */ +#define RXRPC_PACKET_TYPE_VERSION 13 /* version string request */ +#define RXRPC_N_PACKET_TYPES 14 /* number of packet types (incl type 0) */ + + uint8_t flags; /* packet flags */ +#define RXRPC_CLIENT_INITIATED 0x01 /* signifies a packet generated by a client */ +#define RXRPC_REQUEST_ACK 0x02 /* request an unconditional ACK of this packet */ +#define RXRPC_LAST_PACKET 0x04 /* the last packet from this side for this call */ +#define RXRPC_MORE_PACKETS 0x08 /* more packets to come */ +#define RXRPC_JUMBO_PACKET 0x20 /* [DATA] this is a jumbo packet */ +#define RXRPC_SLOW_START_OK 0x20 /* [ACK] slow start supported */ + + uint8_t userStatus; /* app-layer defined status */ +#define RXRPC_USERSTATUS_SERVICE_UPGRADE 0x01 /* AuriStor service upgrade request */ + + uint8_t securityIndex; /* security protocol ID */ + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; + __be16 serviceId; /* service ID */ + +} __packed; + +#define RXRPC_SUPPORTED_PACKET_TYPES ( \ + (1 << RXRPC_PACKET_TYPE_DATA) | \ + (1 << RXRPC_PACKET_TYPE_ACK) | \ + (1 << RXRPC_PACKET_TYPE_BUSY) | \ + (1 << RXRPC_PACKET_TYPE_ABORT) | \ + (1 << RXRPC_PACKET_TYPE_ACKALL) | \ + (1 << RXRPC_PACKET_TYPE_CHALLENGE) | \ + (1 << RXRPC_PACKET_TYPE_RESPONSE) | \ + /*(1 << RXRPC_PACKET_TYPE_DEBUG) | */ \ + (1 << RXRPC_PACKET_TYPE_VERSION)) + +/*****************************************************************************/ +/* + * jumbo packet secondary header + * - can be mapped to read header by: + * - new_serial = serial + 1 + * - new_seq = seq + 1 + * - new_flags = j_flags + * - new__rsvd = j__rsvd + * - duplicating all other fields + */ +struct rxrpc_jumbo_header { + uint8_t flags; /* packet flags (as per rxrpc_header) */ + uint8_t pad; + union { + __be16 _rsvd; /* reserved */ + __be16 cksum; /* kerberos security checksum */ + }; +}; + +#define RXRPC_JUMBO_DATALEN 1412 /* non-terminal jumbo packet data length */ +#define RXRPC_JUMBO_SUBPKTLEN (RXRPC_JUMBO_DATALEN + sizeof(struct rxrpc_jumbo_header)) + +/*****************************************************************************/ +/* + * on-the-wire Rx ACK packet data payload + * - all multibyte fields should be in network byte order + */ +struct rxrpc_ackpacket { + __be16 bufferSpace; /* number of packet buffers available */ + __be16 maxSkew; /* diff between serno being ACK'd and highest serial no + * received */ + __be32 firstPacket; /* sequence no of first ACK'd packet in attached list */ + __be32 previousPacket; /* sequence no of previous packet received */ + __be32 serial; /* serial no of packet that prompted this ACK */ + + uint8_t reason; /* reason for ACK */ +#define RXRPC_ACK_REQUESTED 1 /* ACK was requested on packet */ +#define RXRPC_ACK_DUPLICATE 2 /* duplicate packet received */ +#define RXRPC_ACK_OUT_OF_SEQUENCE 3 /* out of sequence packet received */ +#define RXRPC_ACK_EXCEEDS_WINDOW 4 /* packet received beyond end of ACK window */ +#define RXRPC_ACK_NOSPACE 5 /* packet discarded due to lack of buffer space */ +#define RXRPC_ACK_PING 6 /* keep alive ACK */ +#define RXRPC_ACK_PING_RESPONSE 7 /* response to RXRPC_ACK_PING */ +#define RXRPC_ACK_DELAY 8 /* nothing happened since received packet */ +#define RXRPC_ACK_IDLE 9 /* ACK due to fully received ACK window */ +#define RXRPC_ACK__INVALID 10 /* Representation of invalid ACK reason */ + + uint8_t nAcks; /* number of ACKs */ +#define RXRPC_MAXACKS 255 + + uint8_t acks[0]; /* list of ACK/NAKs */ +#define RXRPC_ACK_TYPE_NACK 0 +#define RXRPC_ACK_TYPE_ACK 1 + +} __packed; + +/* Some ACKs refer to specific packets and some are general and can be updated. */ +#define RXRPC_ACK_UPDATEABLE ((1 << RXRPC_ACK_REQUESTED) | \ + (1 << RXRPC_ACK_PING_RESPONSE) | \ + (1 << RXRPC_ACK_DELAY) | \ + (1 << RXRPC_ACK_IDLE)) + + +/* + * ACK packets can have a further piece of information tagged on the end + */ +struct rxrpc_ackinfo { + __be32 rxMTU; /* maximum Rx MTU size (bytes) [AFS 3.3] */ + __be32 maxMTU; /* maximum interface MTU size (bytes) [AFS 3.3] */ + __be32 rwind; /* Rx window size (packets) [AFS 3.4] */ + __be32 jumbo_max; /* max packets to stick into a jumbo packet [AFS 3.5] */ +}; + +/*****************************************************************************/ +/* + * Kerberos security type-2 challenge packet + */ +struct rxkad_challenge { + __be32 version; /* version of this challenge type */ + __be32 nonce; /* encrypted random number */ + __be32 min_level; /* minimum security level */ + __be32 __padding; /* padding to 8-byte boundary */ +} __packed; + +/*****************************************************************************/ +/* + * Kerberos security type-2 response packet + */ +struct rxkad_response { + __be32 version; /* version of this response type */ + __be32 __pad; + + /* encrypted bit of the response */ + struct { + __be32 epoch; /* current epoch */ + __be32 cid; /* parent connection ID */ + __be32 checksum; /* checksum */ + __be32 securityIndex; /* security type */ + __be32 call_id[4]; /* encrypted call IDs */ + __be32 inc_nonce; /* challenge nonce + 1 */ + __be32 level; /* desired level */ + } encrypted; + + __be32 kvno; /* Kerberos key version number */ + __be32 ticket_len; /* Kerberos ticket length */ +} __packed; + +#endif /* _LINUX_RXRPC_PACKET_H */ diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index bdece21f313d..8510a98b87e1 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -40,7 +40,9 @@ void rxrpc_notify_socket(struct rxrpc_call *call) sk = &rx->sk; if (rx && sk->sk_state < RXRPC_CLOSE) { if (call->notify_rx) { + spin_lock_bh(&call->notify_lock); call->notify_rx(sk, call, call->user_call_ID); + spin_unlock_bh(&call->notify_lock); } else { write_lock_bh(&rx->recvmsg_lock); if (list_empty(&call->recvmsg_link)) { @@ -607,6 +609,7 @@ wait_error: * @_offset: The running offset into the buffer. * @want_more: True if more data is expected to be read * @_abort: Where the abort code is stored if -ECONNABORTED is returned + * @_service: Where to store the actual service ID (may be upgraded) * * Allow a kernel service to receive data and pick up information about the * state of a call. Returns 0 if got what was asked for and there's more @@ -624,7 +627,7 @@ wait_error: */ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, void *buf, size_t size, size_t *_offset, - bool want_more, u32 *_abort) + bool want_more, u32 *_abort, u16 *_service) { struct iov_iter iter; struct kvec iov; @@ -680,6 +683,8 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call, read_phase_complete: ret = 1; out: + if (_service) + *_service = call->service_id; mutex_unlock(&call->user_mutex); _leave(" = %d [%zu,%d]", ret, *_offset, *_abort); return ret; diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 46d1a1f0b55b..c38b3a1de56c 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -634,8 +634,8 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn) challenge.min_level = htonl(0); challenge.__padding = 0; - msg.msg_name = &conn->params.peer->srx.transport.sin; - msg.msg_namelen = sizeof(conn->params.peer->srx.transport.sin); + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -689,8 +689,8 @@ static int rxkad_send_response(struct rxrpc_connection *conn, _enter(""); - msg.msg_name = &conn->params.peer->srx.transport.sin; - msg.msg_namelen = sizeof(conn->params.peer->srx.transport.sin); + msg.msg_name = &conn->params.peer->srx.transport; + msg.msg_namelen = conn->params.peer->srx.transport_len; msg.msg_control = NULL; msg.msg_controllen = 0; msg.msg_flags = 0; @@ -854,7 +854,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, struct sk_buff *skb, void *ticket, size_t ticket_len, struct rxrpc_crypt *_session_key, - time_t *_expiry, + time64_t *_expiry, u32 *_abort_code) { struct skcipher_request *req; @@ -864,7 +864,7 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, struct in_addr addr; unsigned int life; const char *eproto; - time_t issue, now; + time64_t issue, now; bool little_endian; int ret; u32 abort_code; @@ -960,15 +960,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn, if (little_endian) { __le32 stamp; memcpy(&stamp, p, 4); - issue = le32_to_cpu(stamp); + issue = rxrpc_u32_to_time64(le32_to_cpu(stamp)); } else { __be32 stamp; memcpy(&stamp, p, 4); - issue = be32_to_cpu(stamp); + issue = rxrpc_u32_to_time64(be32_to_cpu(stamp)); } p += 4; - now = get_seconds(); - _debug("KIV ISSUE: %lx [%lx]", issue, now); + now = ktime_get_real_seconds(); + _debug("KIV ISSUE: %llx [%llx]", issue, now); /* check the ticket is in date */ if (issue > now) { @@ -1053,7 +1053,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, struct rxrpc_skb_priv *sp = rxrpc_skb(skb); struct rxrpc_crypt session_key; const char *eproto; - time_t expiry; + time64_t expiry; void *ticket; u32 abort_code, version, kvno, ticket_len, level; __be32 csum; diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index b0d2cda6ec0a..7d2595582c09 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -38,12 +38,86 @@ struct rxrpc_send_params { }; /* + * Wait for space to appear in the Tx queue or a signal to occur. + */ +static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx, + struct rxrpc_call *call, + long *timeo) +{ + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + if (call->tx_top - call->tx_hard_ack < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + if (signal_pending(current)) + return sock_intr_errno(*timeo); + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + mutex_unlock(&call->user_mutex); + *timeo = schedule_timeout(*timeo); + if (mutex_lock_interruptible(&call->user_mutex) < 0) + return sock_intr_errno(*timeo); + } +} + +/* + * Wait for space to appear in the Tx queue uninterruptibly, but with + * a timeout of 2*RTT if no progress was made and a signal occurred. + */ +static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx, + struct rxrpc_call *call) +{ + rxrpc_seq_t tx_start, tx_win; + signed long rtt2, timeout; + u64 rtt; + + rtt = READ_ONCE(call->peer->rtt); + rtt2 = nsecs_to_jiffies64(rtt) * 2; + if (rtt2 < 1) + rtt2 = 1; + + timeout = rtt2; + tx_start = READ_ONCE(call->tx_hard_ack); + + for (;;) { + set_current_state(TASK_UNINTERRUPTIBLE); + + tx_win = READ_ONCE(call->tx_hard_ack); + if (call->tx_top - tx_win < + min_t(unsigned int, call->tx_winsize, + call->cong_cwnd + call->cong_extra)) + return 0; + + if (call->state >= RXRPC_CALL_COMPLETE) + return call->error; + + if (timeout == 0 && + tx_win == tx_start && signal_pending(current)) + return -EINTR; + + if (tx_win != tx_start) { + timeout = rtt2; + tx_start = tx_win; + } + + trace_rxrpc_transmit(call, rxrpc_transmit_wait); + timeout = schedule_timeout(timeout); + } +} + +/* * wait for space to appear in the transmit/ACK window * - caller holds the socket locked */ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, struct rxrpc_call *call, - long *timeo) + long *timeo, + bool waitall) { DECLARE_WAITQUEUE(myself, current); int ret; @@ -53,30 +127,10 @@ static int rxrpc_wait_for_tx_window(struct rxrpc_sock *rx, add_wait_queue(&call->waitq, &myself); - for (;;) { - set_current_state(TASK_INTERRUPTIBLE); - ret = 0; - if (call->tx_top - call->tx_hard_ack < - min_t(unsigned int, call->tx_winsize, - call->cong_cwnd + call->cong_extra)) - break; - if (call->state >= RXRPC_CALL_COMPLETE) { - ret = -call->error; - break; - } - if (signal_pending(current)) { - ret = sock_intr_errno(*timeo); - break; - } - - trace_rxrpc_transmit(call, rxrpc_transmit_wait); - mutex_unlock(&call->user_mutex); - *timeo = schedule_timeout(*timeo); - if (mutex_lock_interruptible(&call->user_mutex) < 0) { - ret = sock_intr_errno(*timeo); - break; - } - } + if (waitall) + ret = rxrpc_wait_for_tx_window_nonintr(rx, call); + else + ret = rxrpc_wait_for_tx_window_intr(rx, call, timeo); remove_wait_queue(&call->waitq, &myself); set_current_state(TASK_RUNNING); @@ -101,11 +155,23 @@ static inline void rxrpc_instant_resend(struct rxrpc_call *call, int ix) } /* + * Notify the owner of the call that the transmit phase is ended and the last + * packet has been queued. + */ +static void rxrpc_notify_end_tx(struct rxrpc_sock *rx, struct rxrpc_call *call, + rxrpc_notify_end_tx_t notify_end_tx) +{ + if (notify_end_tx) + notify_end_tx(&rx->sk, call, call->user_call_ID); +} + +/* * Queue a DATA packet for transmission, set the resend timeout and send the * packet immediately */ -static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, - bool last) +static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, + struct sk_buff *skb, bool last, + rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp = rxrpc_skb(skb); rxrpc_seq_t seq = sp->hdr.seq; @@ -116,8 +182,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ASSERTCMP(seq, ==, call->tx_top + 1); - if (last) + if (last) { annotation |= RXRPC_TX_ANNO_LAST; + set_bit(RXRPC_CALL_TX_LASTQ, &call->flags); + } /* We have to set the timestamp before queueing as the retransmit * algorithm can see the packet as soon as we queue it. @@ -141,6 +209,7 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, switch (call->state) { case RXRPC_CALL_CLIENT_SEND_REQUEST: call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY; + rxrpc_notify_end_tx(rx, call, notify_end_tx); break; case RXRPC_CALL_SERVER_ACK_REQUEST: call->state = RXRPC_CALL_SERVER_SEND_REPLY; @@ -151,8 +220,10 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, ktime_get_real()); if (!last) break; + /* Fall through */ case RXRPC_CALL_SERVER_SEND_REPLY: call->state = RXRPC_CALL_SERVER_AWAIT_ACK; + rxrpc_notify_end_tx(rx, call, notify_end_tx); break; default: break; @@ -189,7 +260,8 @@ static void rxrpc_queue_packet(struct rxrpc_call *call, struct sk_buff *skb, */ static int rxrpc_send_data(struct rxrpc_sock *rx, struct rxrpc_call *call, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len, + rxrpc_notify_end_tx_t notify_end_tx) { struct rxrpc_skb_priv *sp; struct sk_buff *skb; @@ -237,7 +309,8 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (msg->msg_flags & MSG_DONTWAIT) goto maybe_error; ret = rxrpc_wait_for_tx_window(rx, call, - &timeo); + &timeo, + msg->msg_flags & MSG_WAITALL); if (ret < 0) goto maybe_error; } @@ -311,11 +384,6 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, call->tx_total_len -= copy; } - /* check for the far side aborting the call or a network error - * occurring */ - if (call->state == RXRPC_CALL_COMPLETE) - goto call_terminated; - /* add the packet to the send queue if it's now full */ if (sp->remain <= 0 || (msg_data_left(msg) == 0 && !more)) { @@ -350,9 +418,21 @@ static int rxrpc_send_data(struct rxrpc_sock *rx, if (ret < 0) goto out; - rxrpc_queue_packet(call, skb, !msg_data_left(msg) && !more); + rxrpc_queue_packet(rx, call, skb, + !msg_data_left(msg) && !more, + notify_end_tx); skb = NULL; } + + /* Check for the far side aborting the call or a network error + * occurring. If this happens, save any packet that was under + * construction so that in the case of a network error, the + * call can be retried or redirected. + */ + if (call->state == RXRPC_CALL_COMPLETE) { + ret = call->error; + goto out; + } } while (msg_data_left(msg) > 0); success: @@ -362,11 +442,6 @@ out: _leave(" = %d", ret); return ret; -call_terminated: - rxrpc_free_skb(skb, rxrpc_skb_tx_freed); - _leave(" = %d", -call->error); - return -call->error; - maybe_error: if (copied) goto success; @@ -611,7 +686,7 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len) /* Reply phase not begun or not complete for service call. */ ret = -EPROTO; } else { - ret = rxrpc_send_data(rx, call, msg, len); + ret = rxrpc_send_data(rx, call, msg, len, NULL); } mutex_unlock(&call->user_mutex); @@ -631,6 +706,7 @@ error_release_sock: * @call: The call to send data through * @msg: The data to send * @len: The amount of data to send + * @notify_end_tx: Notification that the last packet is queued. * * Allow a kernel service to send data on a call. The call must be in an state * appropriate to sending data. No control data should be supplied in @msg, @@ -638,7 +714,8 @@ error_release_sock: * more data to come, otherwise this data will end the transmission phase. */ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, - struct msghdr *msg, size_t len) + struct msghdr *msg, size_t len, + rxrpc_notify_end_tx_t notify_end_tx) { int ret; @@ -656,11 +733,12 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call, case RXRPC_CALL_CLIENT_SEND_REQUEST: case RXRPC_CALL_SERVER_ACK_REQUEST: case RXRPC_CALL_SERVER_SEND_REPLY: - ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len); + ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len, + notify_end_tx); break; case RXRPC_CALL_COMPLETE: read_lock_bh(&call->state_lock); - ret = -call->error; + ret = call->error; read_unlock_bh(&call->state_lock); break; default: diff --git a/net/rxrpc/utils.c b/net/rxrpc/utils.c index ff7af71c4b49..e801171fa351 100644 --- a/net/rxrpc/utils.c +++ b/net/rxrpc/utils.c @@ -17,17 +17,28 @@ /* * Fill out a peer address from a socket buffer containing a packet. */ -int rxrpc_extract_addr_from_skb(struct sockaddr_rxrpc *srx, struct sk_buff *skb) +int rxrpc_extract_addr_from_skb(struct rxrpc_local *local, + struct sockaddr_rxrpc *srx, + struct sk_buff *skb) { memset(srx, 0, sizeof(*srx)); switch (ntohs(skb->protocol)) { case ETH_P_IP: - srx->transport_type = SOCK_DGRAM; - srx->transport_len = sizeof(srx->transport.sin); - srx->transport.sin.sin_family = AF_INET; - srx->transport.sin.sin_port = udp_hdr(skb)->source; - srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + if (local->srx.transport.family == AF_INET6) { + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin6); + srx->transport.sin6.sin6_family = AF_INET6; + srx->transport.sin6.sin6_port = udp_hdr(skb)->source; + srx->transport.sin6.sin6_addr.s6_addr32[2] = htonl(0xffff); + srx->transport.sin6.sin6_addr.s6_addr32[3] = ip_hdr(skb)->saddr; + } else { + srx->transport_type = SOCK_DGRAM; + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.sin.sin_family = AF_INET; + srx->transport.sin.sin_port = udp_hdr(skb)->source; + srx->transport.sin.sin_addr.s_addr = ip_hdr(skb)->saddr; + } return 0; #ifdef CONFIG_AF_RXRPC_IPV6 diff --git a/net/sched/Kconfig b/net/sched/Kconfig index e70ed26485a2..c03d86a7775e 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -172,6 +172,17 @@ config NET_SCH_TBF To compile this code as a module, choose M here: the module will be called sch_tbf. +config NET_SCH_CBS + tristate "Credit Based Shaper (CBS)" + ---help--- + Say Y here if you want to use the Credit Based Shaper (CBS) packet + scheduling algorithm. + + See the top of <file:net/sched/sch_cbs.c> for more details. + + To compile this code as a module, choose M here: the + module will be called sch_cbs. + config NET_SCH_GRED tristate "Generic Random Early Detection (GRED)" ---help--- diff --git a/net/sched/Makefile b/net/sched/Makefile index 7b915d226de7..5b635447e3f8 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux Traffic Control Unit. # @@ -52,6 +53,7 @@ obj-$(CONFIG_NET_SCH_FQ_CODEL) += sch_fq_codel.o obj-$(CONFIG_NET_SCH_FQ) += sch_fq.o obj-$(CONFIG_NET_SCH_HHF) += sch_hhf.o obj-$(CONFIG_NET_SCH_PIE) += sch_pie.o +obj-$(CONFIG_NET_SCH_CBS) += sch_cbs.o obj-$(CONFIG_NET_CLS_U32) += cls_u32.o obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o diff --git a/net/sched/act_api.c b/net/sched/act_api.c index f2e9ed34a963..4d33a50a8a6d 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -21,6 +21,8 @@ #include <linux/kmod.h> #include <linux/err.h> #include <linux/module.h> +#include <linux/rhashtable.h> +#include <linux/list.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/sch_generic.h> @@ -53,10 +55,13 @@ static void tcf_action_goto_chain_exec(const struct tc_action *a, res->goto_tp = rcu_dereference_bh(chain->filter_chain); } -static void free_tcf(struct rcu_head *head) +/* XXX: For standalone actions, we don't need a RCU grace period either, because + * actions are always connected to filters and filters are already destroyed in + * RCU callbacks, so after a RCU grace period actions are already disconnected + * from filters. Readers later can not find us. + */ +static void free_tcf(struct tc_action *p) { - struct tc_action *p = container_of(head, struct tc_action, tcfa_rcu); - free_percpu(p->cpu_bstats); free_percpu(p->cpu_qstats); @@ -70,23 +75,21 @@ static void free_tcf(struct rcu_head *head) kfree(p); } -static void tcf_hash_destroy(struct tcf_hashinfo *hinfo, struct tc_action *p) +static void tcf_idr_remove(struct tcf_idrinfo *idrinfo, struct tc_action *p) { - spin_lock_bh(&hinfo->lock); - hlist_del(&p->tcfa_head); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_remove_ext(&idrinfo->action_idr, p->tcfa_index); + spin_unlock_bh(&idrinfo->lock); gen_kill_estimator(&p->tcfa_rate_est); - /* - * gen_estimator est_timer() might access p->tcfa_lock - * or bstats, wait a RCU grace period before freeing p - */ - call_rcu(&p->tcfa_rcu, free_tcf); + free_tcf(p); } -int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) +int __tcf_idr_release(struct tc_action *p, bool bind, bool strict) { int ret = 0; + ASSERT_RTNL(); + if (p) { if (bind) p->tcfa_bindcnt--; @@ -97,55 +100,64 @@ int __tcf_hash_release(struct tc_action *p, bool bind, bool strict) if (p->tcfa_bindcnt <= 0 && p->tcfa_refcnt <= 0) { if (p->ops->cleanup) p->ops->cleanup(p, bind); - tcf_hash_destroy(p->hinfo, p); + tcf_idr_remove(p->idrinfo, p); ret = ACT_P_DELETED; } } return ret; } -EXPORT_SYMBOL(__tcf_hash_release); +EXPORT_SYMBOL(__tcf_idr_release); -static int tcf_dump_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, struct netlink_callback *cb) { - int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; + int err = 0, index = -1, s_i = 0, n_i = 0; + u32 act_flags = cb->args[2]; + unsigned long jiffy_since = cb->args[3]; struct nlattr *nest; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; - spin_lock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); s_i = cb->args[0]; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - - hlist_for_each_entry_rcu(p, head, tcfa_head) { - index++; - if (index < s_i) - continue; - - nest = nla_nest_start(skb, n_i); - if (nest == NULL) - goto nla_put_failure; - err = tcf_action_dump_1(skb, p, 0, 0); - if (err < 0) { - index--; - nlmsg_trim(skb, nest); - goto done; - } - nla_nest_end(skb, nest); - n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) - goto done; + idr_for_each_entry_ext(idr, p, id) { + index++; + if (index < s_i) + continue; + + if (jiffy_since && + time_after(jiffy_since, + (unsigned long)p->tcfa_tm.lastuse)) + continue; + + nest = nla_nest_start(skb, n_i); + if (!nest) + goto nla_put_failure; + err = tcf_action_dump_1(skb, p, 0, 0); + if (err < 0) { + index--; + nlmsg_trim(skb, nest); + goto done; } + nla_nest_end(skb, nest); + n_i++; + if (!(act_flags & TCA_FLAG_LARGE_DUMP_ON) && + n_i >= TCA_ACT_MAX_PRIO) + goto done; } done: - spin_unlock_bh(&hinfo->lock); - if (n_i) - cb->args[0] += n_i; + if (index >= 0) + cb->args[0] = index + 1; + + spin_unlock_bh(&idrinfo->lock); + if (n_i) { + if (act_flags & TCA_FLAG_LARGE_DUMP_ON) + cb->args[1] = n_i; + } return n_i; nla_put_failure: @@ -153,31 +165,29 @@ nla_put_failure: goto done; } -static int tcf_del_walker(struct tcf_hashinfo *hinfo, struct sk_buff *skb, +static int tcf_del_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb, const struct tc_action_ops *ops) { struct nlattr *nest; - int i = 0, n_i = 0; + int n_i = 0; int ret = -EINVAL; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + unsigned long id = 1; nest = nla_nest_start(skb, 0); if (nest == NULL) goto nla_put_failure; if (nla_put_string(skb, TCA_KIND, ops->kind)) goto nla_put_failure; - for (i = 0; i < (hinfo->hmask + 1); i++) { - struct hlist_head *head; - struct hlist_node *n; - struct tc_action *p; - - head = &hinfo->htab[tcf_hash(i, hinfo->hmask)]; - hlist_for_each_entry_safe(p, n, head, tcfa_head) { - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) { - module_put(p->ops->owner); - n_i++; - } else if (ret < 0) - goto nla_put_failure; + + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) { + module_put(ops->owner); + n_i++; + } else if (ret < 0) { + goto nla_put_failure; } } if (nla_put_u32(skb, TCA_FCNT, n_i)) @@ -194,12 +204,12 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, struct netlink_callback *cb, int type, const struct tc_action_ops *ops) { - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; if (type == RTM_DELACTION) { - return tcf_del_walker(hinfo, skb, ops); + return tcf_del_walker(idrinfo, skb, ops); } else if (type == RTM_GETACTION) { - return tcf_dump_walker(hinfo, skb, cb); + return tcf_dump_walker(idrinfo, skb, cb); } else { WARN(1, "tcf_generic_walker: unknown action %d\n", type); return -EINVAL; @@ -207,40 +217,21 @@ int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, } EXPORT_SYMBOL(tcf_generic_walker); -static struct tc_action *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +static struct tc_action *tcf_idr_lookup(u32 index, struct tcf_idrinfo *idrinfo) { struct tc_action *p = NULL; - struct hlist_head *head; - spin_lock_bh(&hinfo->lock); - head = &hinfo->htab[tcf_hash(index, hinfo->hmask)]; - hlist_for_each_entry_rcu(p, head, tcfa_head) - if (p->tcfa_index == index) - break; - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + p = idr_find_ext(&idrinfo->action_idr, index); + spin_unlock_bh(&idrinfo->lock); return p; } -u32 tcf_hash_new_index(struct tc_action_net *tn) -{ - struct tcf_hashinfo *hinfo = tn->hinfo; - u32 val = hinfo->index; - - do { - if (++val == 0) - val = 1; - } while (tcf_hash_lookup(val, hinfo)); - - hinfo->index = val; - return val; -} -EXPORT_SYMBOL(tcf_hash_new_index); - -int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) +int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = tcf_hash_lookup(index, hinfo); + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); if (p) { *a = p; @@ -248,15 +239,15 @@ int tcf_hash_search(struct tc_action_net *tn, struct tc_action **a, u32 index) } return 0; } -EXPORT_SYMBOL(tcf_hash_search); +EXPORT_SYMBOL(tcf_idr_search); -bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, - int bind) +bool tcf_idr_check(struct tc_action_net *tn, u32 index, struct tc_action **a, + int bind) { - struct tcf_hashinfo *hinfo = tn->hinfo; - struct tc_action *p = NULL; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct tc_action *p = tcf_idr_lookup(index, idrinfo); - if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (index && p) { if (bind) p->tcfa_bindcnt++; p->tcfa_refcnt++; @@ -265,23 +256,25 @@ bool tcf_hash_check(struct tc_action_net *tn, u32 index, struct tc_action **a, } return false; } -EXPORT_SYMBOL(tcf_hash_check); +EXPORT_SYMBOL(tcf_idr_check); -void tcf_hash_cleanup(struct tc_action *a, struct nlattr *est) +void tcf_idr_cleanup(struct tc_action *a, struct nlattr *est) { if (est) gen_kill_estimator(&a->tcfa_rate_est); - call_rcu(&a->tcfa_rcu, free_tcf); + free_tcf(a); } -EXPORT_SYMBOL(tcf_hash_cleanup); +EXPORT_SYMBOL(tcf_idr_cleanup); -int tcf_hash_create(struct tc_action_net *tn, u32 index, struct nlattr *est, - struct tc_action **a, const struct tc_action_ops *ops, - int bind, bool cpustats) +int tcf_idr_create(struct tc_action_net *tn, u32 index, struct nlattr *est, + struct tc_action **a, const struct tc_action_ops *ops, + int bind, bool cpustats) { struct tc_action *p = kzalloc(ops->size, GFP_KERNEL); - struct tcf_hashinfo *hinfo = tn->hinfo; + struct tcf_idrinfo *idrinfo = tn->idrinfo; + struct idr *idr = &idrinfo->action_idr; int err = -ENOMEM; + unsigned long idr_index; if (unlikely(!p)) return -ENOMEM; @@ -304,8 +297,32 @@ err2: } } spin_lock_init(&p->tcfa_lock); - INIT_HLIST_NODE(&p->tcfa_head); - p->tcfa_index = index ? index : tcf_hash_new_index(tn); + /* user doesn't specify an index */ + if (!index) { + idr_preload(GFP_KERNEL); + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, &idr_index, 1, 0, + GFP_ATOMIC); + spin_unlock_bh(&idrinfo->lock); + idr_preload_end(); + if (err) { +err3: + free_percpu(p->cpu_qstats); + goto err2; + } + p->tcfa_index = idr_index; + } else { + idr_preload(GFP_KERNEL); + spin_lock_bh(&idrinfo->lock); + err = idr_alloc_ext(idr, NULL, NULL, index, index + 1, + GFP_ATOMIC); + spin_unlock_bh(&idrinfo->lock); + idr_preload_end(); + if (err) + goto err3; + p->tcfa_index = index; + } + p->tcfa_tm.install = jiffies; p->tcfa_tm.lastuse = jiffies; p->tcfa_tm.firstuse = 0; @@ -314,52 +331,46 @@ err2: &p->tcfa_rate_est, &p->tcfa_lock, NULL, est); if (err) { - free_percpu(p->cpu_qstats); - goto err2; + goto err3; } } - p->hinfo = hinfo; + p->idrinfo = idrinfo; p->ops = ops; INIT_LIST_HEAD(&p->list); *a = p; return 0; } -EXPORT_SYMBOL(tcf_hash_create); +EXPORT_SYMBOL(tcf_idr_create); -void tcf_hash_insert(struct tc_action_net *tn, struct tc_action *a) +void tcf_idr_insert(struct tc_action_net *tn, struct tc_action *a) { - struct tcf_hashinfo *hinfo = tn->hinfo; - unsigned int h = tcf_hash(a->tcfa_index, hinfo->hmask); + struct tcf_idrinfo *idrinfo = tn->idrinfo; - spin_lock_bh(&hinfo->lock); - hlist_add_head(&a->tcfa_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + spin_lock_bh(&idrinfo->lock); + idr_replace_ext(&idrinfo->action_idr, a, a->tcfa_index); + spin_unlock_bh(&idrinfo->lock); } -EXPORT_SYMBOL(tcf_hash_insert); +EXPORT_SYMBOL(tcf_idr_insert); -void tcf_hashinfo_destroy(const struct tc_action_ops *ops, - struct tcf_hashinfo *hinfo) +void tcf_idrinfo_destroy(const struct tc_action_ops *ops, + struct tcf_idrinfo *idrinfo) { - int i; - - for (i = 0; i < hinfo->hmask + 1; i++) { - struct tc_action *p; - struct hlist_node *n; - - hlist_for_each_entry_safe(p, n, &hinfo->htab[i], tcfa_head) { - int ret; + struct idr *idr = &idrinfo->action_idr; + struct tc_action *p; + int ret; + unsigned long id = 1; - ret = __tcf_hash_release(p, false, true); - if (ret == ACT_P_DELETED) - module_put(ops->owner); - else if (ret < 0) - return; - } + idr_for_each_entry_ext(idr, p, id) { + ret = __tcf_idr_release(p, false, true); + if (ret == ACT_P_DELETED) + module_put(ops->owner); + else if (ret < 0) + return; } - kfree(hinfo->htab); + idr_destroy(&idrinfo->action_idr); } -EXPORT_SYMBOL(tcf_hashinfo_destroy); +EXPORT_SYMBOL(tcf_idrinfo_destroy); static LIST_HEAD(act_base); static DEFINE_RWLOCK(act_mod_lock); @@ -460,9 +471,10 @@ static struct tc_action_ops *tc_lookup_action(struct nlattr *kind) int tcf_action_exec(struct sk_buff *skb, struct tc_action **actions, int nr_actions, struct tcf_result *res) { - int ret = -1, i; u32 jmp_prgcnt = 0; u32 jmp_ttl = TCA_ACT_MAX_PRIO; /*matches actions per filter */ + int i; + int ret = TC_ACT_OK; if (skb_skip_tc_classify(skb)) return TC_ACT_OK; @@ -506,13 +518,15 @@ EXPORT_SYMBOL(tcf_action_exec); int tcf_action_destroy(struct list_head *actions, int bind) { + const struct tc_action_ops *ops; struct tc_action *a, *tmp; int ret = 0; list_for_each_entry_safe(a, tmp, actions, list) { - ret = __tcf_hash_release(a, bind, true); + ops = a->ops; + ret = __tcf_idr_release(a, bind, true); if (ret == ACT_P_DELETED) - module_put(a->ops->owner); + module_put(ops->owner); else if (ret < 0) return ret; } @@ -1068,11 +1082,18 @@ static int tcf_action_add(struct net *net, struct nlattr *nla, return tcf_add_notify(net, n, &actions, portid); } +static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON; +static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = { + [TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32, + .validation_data = &tcaa_root_flags_allowed }, + [TCA_ROOT_TIME_DELTA] = { .type = NLA_U32 }, +}; + static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); - struct nlattr *tca[TCA_ACT_MAX + 1]; + struct nlattr *tca[TCA_ROOT_MAX + 1]; u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; @@ -1080,7 +1101,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, !netlink_capable(skb, CAP_NET_ADMIN)) return -EPERM; - ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL, + ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ROOT_MAX, NULL, extack); if (ret < 0) return ret; @@ -1121,16 +1142,12 @@ replay: return ret; } -static struct nlattr *find_dump_kind(const struct nlmsghdr *n) +static struct nlattr *find_dump_kind(struct nlattr **nla) { struct nlattr *tb1, *tb2[TCA_ACT_MAX + 1]; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; - struct nlattr *nla[TCAA_MAX + 1]; struct nlattr *kind; - if (nlmsg_parse(n, sizeof(struct tcamsg), nla, TCAA_MAX, - NULL, NULL) < 0) - return NULL; tb1 = nla[TCA_ACT_TAB]; if (tb1 == NULL) return NULL; @@ -1157,8 +1174,20 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) struct tc_action_ops *a_o; int ret = 0; struct tcamsg *t = (struct tcamsg *) nlmsg_data(cb->nlh); - struct nlattr *kind = find_dump_kind(cb->nlh); + struct nlattr *tb[TCA_ROOT_MAX + 1]; + struct nlattr *count_attr = NULL; + unsigned long jiffy_since = 0; + struct nlattr *kind = NULL; + struct nla_bitfield32 bf; + u32 msecs_since = 0; + u32 act_count = 0; + + ret = nlmsg_parse(cb->nlh, sizeof(struct tcamsg), tb, TCA_ROOT_MAX, + tcaa_policy, NULL); + if (ret < 0) + return ret; + kind = find_dump_kind(tb); if (kind == NULL) { pr_info("tc_dump_action: action bad kind\n"); return 0; @@ -1168,14 +1197,32 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (a_o == NULL) return 0; + cb->args[2] = 0; + if (tb[TCA_ROOT_FLAGS]) { + bf = nla_get_bitfield32(tb[TCA_ROOT_FLAGS]); + cb->args[2] = bf.value; + } + + if (tb[TCA_ROOT_TIME_DELTA]) { + msecs_since = nla_get_u32(tb[TCA_ROOT_TIME_DELTA]); + } + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; + + if (msecs_since) + jiffy_since = jiffies - msecs_to_jiffies(msecs_since); + t = nlmsg_data(nlh); t->tca_family = AF_UNSPEC; t->tca__pad1 = 0; t->tca__pad2 = 0; + cb->args[3] = jiffy_since; + count_attr = nla_reserve(skb, TCA_ROOT_COUNT, sizeof(u32)); + if (!count_attr) + goto out_module_put; nest = nla_nest_start(skb, TCA_ACT_TAB); if (nest == NULL) @@ -1188,6 +1235,9 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (ret > 0) { nla_nest_end(skb, nest); ret = skb->len; + act_count = cb->args[1]; + memcpy(nla_data(count_attr), &act_count, sizeof(u32)); + cb->args[1] = 0; } else nlmsg_trim(skb, b); @@ -1203,12 +1253,231 @@ out_module_put: return skb->len; } +struct tcf_action_net { + struct rhashtable egdev_ht; +}; + +static unsigned int tcf_action_net_id; + +struct tcf_action_egdev_cb { + struct list_head list; + tc_setup_cb_t *cb; + void *cb_priv; +}; + +struct tcf_action_egdev { + struct rhash_head ht_node; + const struct net_device *dev; + unsigned int refcnt; + struct list_head cb_list; +}; + +static const struct rhashtable_params tcf_action_egdev_ht_params = { + .key_offset = offsetof(struct tcf_action_egdev, dev), + .head_offset = offsetof(struct tcf_action_egdev, ht_node), + .key_len = sizeof(const struct net_device *), +}; + +static struct tcf_action_egdev * +tcf_action_egdev_lookup(const struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + return rhashtable_lookup_fast(&tan->egdev_ht, &dev, + tcf_action_egdev_ht_params); +} + +static struct tcf_action_egdev * +tcf_action_egdev_get(const struct net_device *dev) +{ + struct tcf_action_egdev *egdev; + struct tcf_action_net *tan; + + egdev = tcf_action_egdev_lookup(dev); + if (egdev) + goto inc_ref; + + egdev = kzalloc(sizeof(*egdev), GFP_KERNEL); + if (!egdev) + return NULL; + INIT_LIST_HEAD(&egdev->cb_list); + egdev->dev = dev; + tan = net_generic(dev_net(dev), tcf_action_net_id); + rhashtable_insert_fast(&tan->egdev_ht, &egdev->ht_node, + tcf_action_egdev_ht_params); + +inc_ref: + egdev->refcnt++; + return egdev; +} + +static void tcf_action_egdev_put(struct tcf_action_egdev *egdev) +{ + struct tcf_action_net *tan; + + if (--egdev->refcnt) + return; + tan = net_generic(dev_net(egdev->dev), tcf_action_net_id); + rhashtable_remove_fast(&tan->egdev_ht, &egdev->ht_node, + tcf_action_egdev_ht_params); + kfree(egdev); +} + +static struct tcf_action_egdev_cb * +tcf_action_egdev_cb_lookup(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + list_for_each_entry(egdev_cb, &egdev->cb_list, list) + if (egdev_cb->cb == cb && egdev_cb->cb_priv == cb_priv) + return egdev_cb; + return NULL; +} + +static int tcf_action_egdev_cb_call(struct tcf_action_egdev *egdev, + enum tc_setup_type type, + void *type_data, bool err_stop) +{ + struct tcf_action_egdev_cb *egdev_cb; + int ok_count = 0; + int err; + + list_for_each_entry(egdev_cb, &egdev->cb_list, list) { + err = egdev_cb->cb(type, type_data, egdev_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } + } + return ok_count; +} + +static int tcf_action_egdev_cb_add(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); + if (WARN_ON(egdev_cb)) + return -EEXIST; + egdev_cb = kzalloc(sizeof(*egdev_cb), GFP_KERNEL); + if (!egdev_cb) + return -ENOMEM; + egdev_cb->cb = cb; + egdev_cb->cb_priv = cb_priv; + list_add(&egdev_cb->list, &egdev->cb_list); + return 0; +} + +static void tcf_action_egdev_cb_del(struct tcf_action_egdev *egdev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev_cb *egdev_cb; + + egdev_cb = tcf_action_egdev_cb_lookup(egdev, cb, cb_priv); + if (WARN_ON(!egdev_cb)) + return; + list_del(&egdev_cb->list); + kfree(egdev_cb); +} + +static int __tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_get(dev); + int err; + + if (!egdev) + return -ENOMEM; + err = tcf_action_egdev_cb_add(egdev, cb, cb_priv); + if (err) + goto err_cb_add; + return 0; + +err_cb_add: + tcf_action_egdev_put(egdev); + return err; +} +int tc_setup_cb_egdev_register(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + int err; + + rtnl_lock(); + err = __tc_setup_cb_egdev_register(dev, cb, cb_priv); + rtnl_unlock(); + return err; +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_register); + +static void __tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); + + if (WARN_ON(!egdev)) + return; + tcf_action_egdev_cb_del(egdev, cb, cb_priv); + tcf_action_egdev_put(egdev); +} +void tc_setup_cb_egdev_unregister(const struct net_device *dev, + tc_setup_cb_t *cb, void *cb_priv) +{ + rtnl_lock(); + __tc_setup_cb_egdev_unregister(dev, cb, cb_priv); + rtnl_unlock(); +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_unregister); + +int tc_setup_cb_egdev_call(const struct net_device *dev, + enum tc_setup_type type, void *type_data, + bool err_stop) +{ + struct tcf_action_egdev *egdev = tcf_action_egdev_lookup(dev); + + if (!egdev) + return 0; + return tcf_action_egdev_cb_call(egdev, type, type_data, err_stop); +} +EXPORT_SYMBOL_GPL(tc_setup_cb_egdev_call); + +static __net_init int tcf_action_net_init(struct net *net) +{ + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + return rhashtable_init(&tan->egdev_ht, &tcf_action_egdev_ht_params); +} + +static void __net_exit tcf_action_net_exit(struct net *net) +{ + struct tcf_action_net *tan = net_generic(net, tcf_action_net_id); + + rhashtable_destroy(&tan->egdev_ht); +} + +static struct pernet_operations tcf_action_net_ops = { + .init = tcf_action_net_init, + .exit = tcf_action_net_exit, + .id = &tcf_action_net_id, + .size = sizeof(struct tcf_action_net), +}; + static int __init tc_action_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, NULL); + int err; + + err = register_pernet_subsys(&tcf_action_net_ops); + if (err) + return err; + + rtnl_register(PF_UNSPEC, RTM_NEWACTION, tc_ctl_action, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELACTION, tc_ctl_action, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETACTION, tc_ctl_action, tc_dump_action, - NULL); + 0); return 0; } diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index 9afe1337cfd1..5ef8ce8c83d4 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -21,7 +21,6 @@ #include <linux/tc_act/tc_bpf.h> #include <net/tc_act/tc_bpf.h> -#define BPF_TAB_MASK 15 #define ACT_BPF_NAME_LEN 256 struct tcf_bpf_cfg { @@ -50,11 +49,11 @@ static int tcf_bpf(struct sk_buff *skb, const struct tc_action *act, filter = rcu_dereference(prog->filter); if (at_ingress) { __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(filter, skb); } rcu_read_unlock(); @@ -295,9 +294,9 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_ACT_BPF_PARMS]); - if (!tcf_hash_check(tn, parm->index, act, bind)) { - ret = tcf_hash_create(tn, parm->index, est, act, - &act_bpf_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, act, bind)) { + ret = tcf_idr_create(tn, parm->index, est, act, + &act_bpf_ops, bind, true); if (ret < 0) return ret; @@ -307,7 +306,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, if (bind) return 0; - tcf_hash_release(*act, bind); + tcf_idr_release(*act, bind); if (!replace) return -EEXIST; } @@ -343,7 +342,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, rcu_assign_pointer(prog->filter, cfg.filter); if (res == ACT_P_CREATED) { - tcf_hash_insert(tn, *act); + tcf_idr_insert(tn, *act); } else { /* make sure the program being replaced is no longer executing */ synchronize_rcu(); @@ -353,7 +352,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, return res; out: if (res == ACT_P_CREATED) - tcf_hash_cleanup(*act, est); + tcf_idr_cleanup(*act, est); return ret; } @@ -379,7 +378,7 @@ static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_bpf_ops __read_mostly = { @@ -399,7 +398,7 @@ static __net_init int bpf_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, bpf_net_id); - return tc_action_net_init(tn, &act_bpf_ops, BPF_TAB_MASK); + return tc_action_net_init(tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct net *net) diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 2155bc6c6a1e..10b7a8855a6c 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -28,8 +28,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> -#define CONNMARK_TAB_MASK 3 - static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; @@ -119,9 +117,9 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_CONNMARK_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_connmark_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_connmark_ops, bind, false); if (ret) return ret; @@ -130,13 +128,13 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, ci->net = net; ci->zone = parm->zone; - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); ret = ACT_P_CREATED; } else { ci = to_connmark(*a); if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; /* replacing action and zone */ @@ -189,7 +187,7 @@ static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_connmark_ops = { @@ -208,7 +206,7 @@ static __net_init int connmark_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, connmark_net_id); - return tc_action_net_init(tn, &act_connmark_ops, CONNMARK_TAB_MASK); + return tc_action_net_init(tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct net *net) diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 3317a2f579da..1c40caadcff9 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,8 +37,6 @@ #include <linux/tc_act/tc_csum.h> #include <net/tc_act/tc_csum.h> -#define CSUM_TAB_MASK 15 - static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; @@ -67,16 +65,16 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_CSUM_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_csum_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_csum_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -88,7 +86,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -231,9 +229,6 @@ static int tcf_csum_ipv4_udp(struct sk_buff *skb, unsigned int ihl, const struct iphdr *iph; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -287,9 +282,6 @@ static int tcf_csum_ipv6_udp(struct sk_buff *skb, unsigned int ihl, const struct ipv6hdr *ip6h; u16 ul; - if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP) - return 1; - /* * Support both UDP and UDPLITE checksum algorithms, Don't use * udph->len to get the real length without any protocol check, @@ -615,7 +607,7 @@ static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_csum_ops = { @@ -634,7 +626,7 @@ static __net_init int csum_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, csum_net_id); - return tc_action_net_init(tn, &act_csum_ops, CSUM_TAB_MASK); + return tc_action_net_init(tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct net *net) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index 99afe8b1f1fb..e29a48ef7fc3 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -23,8 +23,6 @@ #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> -#define GACT_TAB_MASK 15 - static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; @@ -92,16 +90,16 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_gact_ops, bind, true); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_gact_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -122,7 +120,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, } #endif if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -214,7 +212,7 @@ static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_gact_ops = { @@ -234,7 +232,7 @@ static __net_init int gact_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, gact_net_id); - return tc_action_net_init(tn, &act_gact_ops, GACT_TAB_MASK); + return tc_action_net_init(tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct net *net) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index c5dec308b8b1..3007cb1310ea 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -34,8 +34,6 @@ #include <linux/etherdevice.h> #include <net/ife.h> -#define IFE_TAB_MASK 15 - static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -250,6 +248,22 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) return ret; } +#ifdef CONFIG_MODULES +static const char *ife_meta_id2name(u32 metaid) +{ + switch (metaid) { + case IFE_META_SKBMARK: + return "skbmark"; + case IFE_META_PRIO: + return "skbprio"; + case IFE_META_TCINDEX: + return "tcindex"; + default: + return "unknown"; + } +} +#endif + /* called when adding new meta information * under ife->tcf_lock for existing action */ @@ -265,7 +279,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (exists) spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); - request_module("ifemeta%u", metaid); + request_module("ife-meta-%s", ife_meta_id2name(metaid)); rtnl_lock(); if (exists) spin_lock_bh(&ife->tcf_lock); @@ -394,10 +408,14 @@ static void _tcf_ife_cleanup(struct tc_action *a, int bind) static void tcf_ife_cleanup(struct tc_action *a, int bind) { struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p; spin_lock_bh(&ife->tcf_lock); _tcf_ife_cleanup(a, bind); spin_unlock_bh(&ife->tcf_lock); + + p = rcu_dereference_protected(ife->params, 1); + kfree_rcu(p, rcu); } /* under ife->tcf_lock for existing action */ @@ -434,9 +452,10 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tc_action_net *tn = net_generic(net, ife_net_id); struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; + struct tcf_ife_params *p, *p_old; struct tcf_ife_info *ife; + u16 ife_type = ETH_P_IFE; struct tc_ife *parm; - u16 ife_type = 0; u8 *daddr = NULL; u8 *saddr = NULL; bool exists = false; @@ -452,63 +471,70 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_IFE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); - if (exists && bind) - return 0; + /* IFE_DECODE is 0 and indicates the opposite of IFE_ENCODE because + * they cannot run as the same time. Check on all other values which + * are not supported right now. + */ + if (parm->flags & ~IFE_ENCODE) + return -EINVAL; - if (parm->flags & IFE_ENCODE) { - /* Until we get issued the ethertype, we cant have - * a default.. - **/ - if (!tb[TCA_IFE_TYPE]) { - if (exists) - tcf_hash_release(*a, bind); - pr_info("You MUST pass etherype for encoding\n"); - return -EINVAL; - } + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + exists = tcf_idr_check(tn, parm->index, a, bind); + if (exists && bind) { + kfree(p); + return 0; } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, &act_ife_ops, - bind, false); - if (ret) + ret = tcf_idr_create(tn, parm->index, est, a, &act_ife_ops, + bind, true); + if (ret) { + kfree(p); return ret; + } ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); - if (!ovr) + tcf_idr_release(*a, bind); + if (!ovr) { + kfree(p); return -EEXIST; + } } ife = to_ife(*a); - ife->flags = parm->flags; + p->flags = parm->flags; if (parm->flags & IFE_ENCODE) { - ife_type = nla_get_u16(tb[TCA_IFE_TYPE]); + if (tb[TCA_IFE_TYPE]) + ife_type = nla_get_u16(tb[TCA_IFE_TYPE]); if (tb[TCA_IFE_DMAC]) daddr = nla_data(tb[TCA_IFE_DMAC]); if (tb[TCA_IFE_SMAC]) saddr = nla_data(tb[TCA_IFE_SMAC]); } - if (exists) - spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { if (daddr) - ether_addr_copy(ife->eth_dst, daddr); + ether_addr_copy(p->eth_dst, daddr); else - eth_zero_addr(ife->eth_dst); + eth_zero_addr(p->eth_dst); if (saddr) - ether_addr_copy(ife->eth_src, saddr); + ether_addr_copy(p->eth_src, saddr); else - eth_zero_addr(ife->eth_src); + eth_zero_addr(p->eth_src); - ife->eth_type = ife_type; + p->eth_type = ife_type; } + if (exists) + spin_lock_bh(&ife->tcf_lock); + if (ret == ACT_P_CREATED) INIT_LIST_HEAD(&ife->metalist); @@ -518,12 +544,13 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, if (err) { metadata_parse_err: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (ret == ACT_P_CREATED) _tcf_ife_cleanup(*a, bind); if (exists) spin_unlock_bh(&ife->tcf_lock); + kfree(p); return err; } @@ -544,6 +571,7 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + kfree(p); return err; } } @@ -551,8 +579,13 @@ metadata_parse_err: if (exists) spin_unlock_bh(&ife->tcf_lock); + p_old = rtnl_dereference(ife->params); + rcu_assign_pointer(ife->params, p); + if (p_old) + kfree_rcu(p_old, rcu); + if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -562,12 +595,13 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, { unsigned char *b = skb_tail_pointer(skb); struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p = rtnl_dereference(ife->params); struct tc_ife opt = { .index = ife->tcf_index, .refcnt = ife->tcf_refcnt - ref, .bindcnt = ife->tcf_bindcnt - bind, .action = ife->tcf_action, - .flags = ife->flags, + .flags = p->flags, }; struct tcf_t t; @@ -578,17 +612,17 @@ static int tcf_ife_dump(struct sk_buff *skb, struct tc_action *a, int bind, if (nla_put_64bit(skb, TCA_IFE_TM, sizeof(t), &t, TCA_IFE_PAD)) goto nla_put_failure; - if (!is_zero_ether_addr(ife->eth_dst)) { - if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, ife->eth_dst)) + if (!is_zero_ether_addr(p->eth_dst)) { + if (nla_put(skb, TCA_IFE_DMAC, ETH_ALEN, p->eth_dst)) goto nla_put_failure; } - if (!is_zero_ether_addr(ife->eth_src)) { - if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, ife->eth_src)) + if (!is_zero_ether_addr(p->eth_src)) { + if (nla_put(skb, TCA_IFE_SMAC, ETH_ALEN, p->eth_src)) goto nla_put_failure; } - if (nla_put(skb, TCA_IFE_TYPE, 2, &ife->eth_type)) + if (nla_put(skb, TCA_IFE_TYPE, 2, &p->eth_type)) goto nla_put_failure; if (dump_metalist(skb, ife)) { @@ -630,19 +664,15 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, u8 *tlv_data; u16 metalen; - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); - spin_unlock(&ife->tcf_lock); if (skb_at_tc_ingress(skb)) skb_push(skb, skb->dev->hard_header_len); tlv_data = ife_decode(skb, &metalen); if (unlikely(!tlv_data)) { - spin_lock(&ife->tcf_lock); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -660,14 +690,12 @@ static int tcf_ife_decode(struct sk_buff *skb, const struct tc_action *a, */ pr_info_ratelimited("Unknown metaid %d dlen %d\n", mtype, dlen); - ife->tcf_qstats.overlimits++; + qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); } } if (WARN_ON(tlv_data != ifehdr_end)) { - spin_lock(&ife->tcf_lock); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -696,7 +724,7 @@ static int ife_get_sz(struct sk_buff *skb, struct tcf_ife_info *ife) } static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, - struct tcf_result *res) + struct tcf_result *res, struct tcf_ife_params *p) { struct tcf_ife_info *ife = to_ife(a); int action = ife->tcf_action; @@ -719,23 +747,20 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, exceed_mtu = true; } - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); + bstats_cpu_update(this_cpu_ptr(ife->common.cpu_bstats), skb); tcf_lastuse_update(&ife->tcf_tm); if (!metalen) { /* no metadata to send */ /* abuse overlimits to count when we allow packet * with no metadata */ - ife->tcf_qstats.overlimits++; - spin_unlock(&ife->tcf_lock); + qstats_overlimit_inc(this_cpu_ptr(ife->common.cpu_qstats)); return action; } /* could be stupid policy setup or mtu config * so lets be conservative.. */ if ((action == TC_ACT_SHOT) || exceed_mtu) { - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } @@ -744,6 +769,8 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, ife_meta = ife_encode(skb, metalen); + spin_lock(&ife->tcf_lock); + /* XXX: we dont have a clever way of telling encode to * not repeat some of the computations that are done by * ops->presence_check... @@ -755,25 +782,24 @@ static int tcf_ife_encode(struct sk_buff *skb, const struct tc_action *a, } if (err < 0) { /* too corrupt to keep around if overwritten */ - ife->tcf_qstats.drops++; spin_unlock(&ife->tcf_lock); + qstats_drop_inc(this_cpu_ptr(ife->common.cpu_qstats)); return TC_ACT_SHOT; } skboff += err; } + spin_unlock(&ife->tcf_lock); oethh = (struct ethhdr *)skb->data; - if (!is_zero_ether_addr(ife->eth_src)) - ether_addr_copy(oethh->h_source, ife->eth_src); - if (!is_zero_ether_addr(ife->eth_dst)) - ether_addr_copy(oethh->h_dest, ife->eth_dst); - oethh->h_proto = htons(ife->eth_type); + if (!is_zero_ether_addr(p->eth_src)) + ether_addr_copy(oethh->h_source, p->eth_src); + if (!is_zero_ether_addr(p->eth_dst)) + ether_addr_copy(oethh->h_dest, p->eth_dst); + oethh->h_proto = htons(p->eth_type); if (skb_at_tc_ingress(skb)) skb_pull(skb, skb->dev->hard_header_len); - spin_unlock(&ife->tcf_lock); - return action; } @@ -781,21 +807,19 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_ife_info *ife = to_ife(a); + struct tcf_ife_params *p; + int ret; + + rcu_read_lock(); + p = rcu_dereference(ife->params); + if (p->flags & IFE_ENCODE) { + ret = tcf_ife_encode(skb, a, res, p); + rcu_read_unlock(); + return ret; + } + rcu_read_unlock(); - if (ife->flags & IFE_ENCODE) - return tcf_ife_encode(skb, a, res); - - if (!(ife->flags & IFE_ENCODE)) - return tcf_ife_decode(skb, a, res); - - pr_info_ratelimited("unknown failure(policy neither de/encode\n"); - spin_lock(&ife->tcf_lock); - bstats_update(&ife->tcf_bstats, skb); - tcf_lastuse_update(&ife->tcf_tm); - ife->tcf_qstats.drops++; - spin_unlock(&ife->tcf_lock); - - return TC_ACT_SHOT; + return tcf_ife_decode(skb, a, res); } static int tcf_ife_walker(struct net *net, struct sk_buff *skb, @@ -811,7 +835,7 @@ static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ife_ops = { @@ -831,7 +855,7 @@ static __net_init int ife_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ife_net_id); - return tc_action_net_init(tn, &act_ife_ops, IFE_TAB_MASK); + return tc_action_net_init(tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct net *net) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d516ba8178b8..d9e399a7e3d5 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -28,8 +28,6 @@ #include <linux/netfilter_ipv4/ip_tables.h> -#define IPT_TAB_MASK 15 - static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; @@ -41,6 +39,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, { struct xt_tgchk_param par; struct xt_target *target; + struct ipt_entry e = {}; int ret = 0; target = xt_request_find_target(AF_INET, t->u.user.name, @@ -52,6 +51,7 @@ static int ipt_init_target(struct net *net, struct xt_entry_target *t, memset(&par, 0, sizeof(par)); par.net = net; par.table = table; + par.entryinfo = &e; par.target = target; par.targinfo = t->data; par.hook_mask = hook; @@ -116,33 +116,33 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, if (tb[TCA_IPT_INDEX] != NULL) index = nla_get_u32(tb[TCA_IPT_INDEX]); - exists = tcf_hash_check(tn, index, a, bind); + exists = tcf_idr_check(tn, index, a, bind); if (exists && bind) return 0; if (tb[TCA_IPT_HOOK] == NULL || tb[TCA_IPT_TARG] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, index, est, a, ops, bind, - false); + ret = tcf_idr_create(tn, index, est, a, ops, bind, + false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind)/* dont override defaults */ return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -178,7 +178,7 @@ static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, ipt->tcfi_hook = hook; spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err3: @@ -187,7 +187,7 @@ err2: kfree(tname); err1: if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -314,7 +314,7 @@ static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_ipt_ops = { @@ -334,7 +334,7 @@ static __net_init int ipt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, ipt_net_id); - return tc_action_net_init(tn, &act_ipt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct net *net) @@ -364,7 +364,7 @@ static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_xt_ops = { @@ -384,7 +384,7 @@ static __net_init int xt_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, xt_net_id); - return tc_action_net_init(tn, &act_xt_ops, IPT_TAB_MASK); + return tc_action_net_init(tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct net *net) diff --git a/net/sched/act_meta_mark.c b/net/sched/act_meta_mark.c index 82892170ce4f..1e3f10e5da99 100644 --- a/net/sched/act_meta_mark.c +++ b/net/sched/act_meta_mark.c @@ -76,4 +76,4 @@ module_exit(ifemark_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE skb mark metadata module"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_SKBMARK); +MODULE_ALIAS_IFE_META("skbmark"); diff --git a/net/sched/act_meta_skbprio.c b/net/sched/act_meta_skbprio.c index 26bf4d86030b..4033f9fc4d4a 100644 --- a/net/sched/act_meta_skbprio.c +++ b/net/sched/act_meta_skbprio.c @@ -73,4 +73,4 @@ module_exit(ifeprio_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2015)"); MODULE_DESCRIPTION("Inter-FE skb prio metadata action"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_PRIO); +MODULE_ALIAS_IFE_META("skbprio"); diff --git a/net/sched/act_meta_skbtcindex.c b/net/sched/act_meta_skbtcindex.c index 3b35774ce890..2ea1f26c9e96 100644 --- a/net/sched/act_meta_skbtcindex.c +++ b/net/sched/act_meta_skbtcindex.c @@ -76,4 +76,4 @@ module_exit(ifetc_index_cleanup_module); MODULE_AUTHOR("Jamal Hadi Salim(2016)"); MODULE_DESCRIPTION("Inter-FE skb tc_index metadata module"); MODULE_LICENSE("GPL"); -MODULE_ALIAS_IFE_META(IFE_META_SKBTCINDEX); +MODULE_ALIAS_IFE_META("tcindex"); diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index 1b5549ababd4..8b3e59388480 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -28,7 +28,6 @@ #include <linux/tc_act/tc_mirred.h> #include <net/tc_act/tc_mirred.h> -#define MIRRED_TAB_MASK 7 static LIST_HEAD(mirred_list); static DEFINE_SPINLOCK(mirred_list_lock); @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_MIRRED_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -106,14 +105,14 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (parm->ifindex) { dev = __dev_get_by_index(net, parm->ifindex); if (dev == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENODEV; } mac_header_xmit = dev_is_mac_header_xmit(dev); @@ -124,13 +123,13 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, if (!exists) { if (dev == NULL) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_mirred_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_mirred_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -141,6 +140,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, m->tcfm_eaction = parm->eaction; if (dev != NULL) { m->tcfm_ifindex = parm->ifindex; + m->net = net; if (ret != ACT_P_CREATED) dev_put(rcu_dereference_protected(m->tcfm_dev, 1)); dev_hold(dev); @@ -152,7 +152,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, spin_lock_bh(&mirred_list_lock); list_add(&m->tcfm_list, &mirred_list); spin_unlock_bh(&mirred_list_lock); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); } return ret; @@ -283,7 +283,7 @@ static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static int mirred_device_event(struct notifier_block *unused, @@ -314,15 +314,11 @@ static struct notifier_block mirred_device_notifier = { .notifier_call = mirred_device_event, }; -static int tcf_mirred_device(const struct tc_action *a, struct net *net, - struct net_device **mirred_dev) +static struct net_device *tcf_mirred_get_dev(const struct tc_action *a) { - int ifindex = tcf_mirred_ifindex(a); + struct tcf_mirred *m = to_mirred(a); - *mirred_dev = __dev_get_by_index(net, ifindex); - if (!*mirred_dev) - return -EINVAL; - return 0; + return __dev_get_by_index(m->net, m->tcfm_ifindex); } static struct tc_action_ops act_mirred_ops = { @@ -337,14 +333,14 @@ static struct tc_action_ops act_mirred_ops = { .walk = tcf_mirred_walker, .lookup = tcf_mirred_search, .size = sizeof(struct tcf_mirred), - .get_dev = tcf_mirred_device, + .get_dev = tcf_mirred_get_dev, }; static __net_init int mirred_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, mirred_net_id); - return tc_action_net_init(tn, &act_mirred_ops, MIRRED_TAB_MASK); + return tc_action_net_init(tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct net *net) diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 9016ab8a0649..c365d01b99c8 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -29,8 +29,6 @@ #include <net/udp.h> -#define NAT_TAB_MASK 15 - static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; @@ -58,16 +56,16 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, return -EINVAL; parm = nla_data(tb[TCA_NAT_PARMS]); - if (!tcf_hash_check(tn, parm->index, a, bind)) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_nat_ops, bind, false); + if (!tcf_idr_check(tn, parm->index, a, bind)) { + ret = tcf_idr_create(tn, parm->index, est, a, + &act_nat_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -83,7 +81,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -290,7 +288,7 @@ static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_nat_ops = { @@ -309,7 +307,7 @@ static __net_init int nat_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, nat_net_id); - return tc_action_net_init(tn, &act_nat_ops, NAT_TAB_MASK); + return tc_action_net_init(tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct net *net) diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 7dc5892671c8..491fe5deb09e 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -24,8 +24,6 @@ #include <net/tc_act/tc_pedit.h> #include <uapi/linux/tc_act/tc_pedit.h> -#define PEDIT_TAB_MASK 15 - static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; @@ -168,17 +166,17 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, if (IS_ERR(keys_ex)) return PTR_ERR(keys_ex); - if (!tcf_hash_check(tn, parm->index, a, bind)) { + if (!tcf_idr_check(tn, parm->index, a, bind)) { if (!parm->nkeys) return -EINVAL; - ret = tcf_hash_create(tn, parm->index, est, a, - &act_pedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_pedit_ops, bind, false); if (ret) return ret; p = to_pedit(*a); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); kfree(keys_ex); return -ENOMEM; } @@ -186,7 +184,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, } else { if (bind) return 0; - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; p = to_pedit(*a); @@ -214,7 +212,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -432,7 +430,7 @@ static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_pedit_ops = { @@ -452,7 +450,7 @@ static __net_init int pedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, pedit_net_id); - return tc_action_net_init(tn, &act_pedit_ops, PEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct net *net) diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b062bc80c7cb..3bb2ebf9e9ae 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -40,8 +40,6 @@ struct tcf_police { #define to_police(pc) ((struct tcf_police *)pc) -#define POL_TAB_MASK 15 - /* old policer structure from before tc actions */ struct tc_police_compat { u32 index; @@ -101,18 +99,18 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_POLICE_TBF]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, NULL, a, - &act_police_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, NULL, a, + &act_police_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -188,7 +186,7 @@ static int tcf_act_police_init(struct net *net, struct nlattr *nla, return ret; police->tcfp_t_c = ktime_get_ns(); - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; @@ -196,7 +194,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return err; } @@ -310,7 +308,7 @@ static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } MODULE_AUTHOR("Alexey Kuznetsov"); @@ -333,7 +331,7 @@ static __net_init int police_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, police_net_id); - return tc_action_net_init(tn, &act_police_ops, POL_TAB_MASK); + return tc_action_net_init(tn, &act_police_ops); } static void __net_exit police_exit_net(struct net *net) diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 59d6645a4007..8b5abcd2f32f 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -25,7 +25,6 @@ #include <linux/if_arp.h> -#define SAMPLE_TAB_MASK 7 static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; @@ -59,18 +58,18 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SAMPLE_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_sample_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_sample_ops, bind, false); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -82,7 +81,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, psample_group = psample_group_get(net, s->psample_group_num); if (!psample_group) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } RCU_INIT_POINTER(s->psample_group, psample_group); @@ -93,7 +92,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +220,7 @@ static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_sample_ops = { @@ -241,7 +240,7 @@ static __net_init int sample_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, sample_net_id); - return tc_action_net_init(tn, &act_sample_ops, SAMPLE_TAB_MASK); + return tc_action_net_init(tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct net *net) @@ -265,12 +264,13 @@ static int __init sample_init_module(void) static void __exit sample_cleanup_module(void) { + rcu_barrier(); tcf_unregister_action(&act_sample_ops, &sample_net_ops); } module_init(sample_init_module); module_exit(sample_cleanup_module); -MODULE_AUTHOR("Yotam Gigi <yotamg@mellanox.com>"); +MODULE_AUTHOR("Yotam Gigi <yotam.gi@gmail.com>"); MODULE_DESCRIPTION("Packet sampling action"); MODULE_LICENSE("GPL v2"); diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 43605e7ce051..e7b57e5071a3 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -24,8 +24,6 @@ #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> -#define SIMP_TAB_MASK 7 - static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; @@ -102,28 +100,28 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_DEF_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (tb[TCA_DEF_DATA] == NULL) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } defdata = nla_data(tb[TCA_DEF_DATA]); if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_simp_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_simp_ops, bind, false); if (ret) return ret; d = to_defact(*a); ret = alloc_defdata(d, defdata); if (ret < 0) { - tcf_hash_cleanup(*a, est); + tcf_idr_cleanup(*a, est); return ret; } d->tcf_action = parm->action; @@ -131,7 +129,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } else { d = to_defact(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; @@ -139,7 +137,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, } if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -183,7 +181,7 @@ static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_simp_ops = { @@ -203,7 +201,7 @@ static __net_init int simp_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, simp_net_id); - return tc_action_net_init(tn, &act_simp_ops, SIMP_TAB_MASK); + return tc_action_net_init(tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct net *net) diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index 6b3e65d7de0c..59949d61f20d 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -27,8 +27,6 @@ #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> -#define SKBEDIT_TAB_MASK 15 - static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; @@ -118,18 +116,18 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, parm = nla_data(tb[TCA_SKBEDIT_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; if (!flags) { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbedit_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbedit_ops, bind, false); if (ret) return ret; @@ -137,7 +135,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, ret = ACT_P_CREATED; } else { d = to_skbedit(*a); - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -163,7 +161,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, spin_unlock_bh(&d->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -221,7 +219,7 @@ static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbedit_ops = { @@ -240,7 +238,7 @@ static __net_init int skbedit_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbedit_net_id); - return tc_action_net_init(tn, &act_skbedit_ops, SKBEDIT_TAB_MASK); + return tc_action_net_init(tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct net *net) diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index a73c4bbcada2..b642ad3d39dd 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -20,8 +20,6 @@ #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> -#define SKBMOD_TAB_MASK 15 - static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; @@ -129,7 +127,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, if (parm->flags & SKBMOD_F_SWAPMAC) lflags = SKBMOD_F_SWAPMAC; - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -137,14 +135,14 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, return -EINVAL; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_skbmod_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_skbmod_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -155,7 +153,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, p = kzalloc(sizeof(struct tcf_skbmod_params), GFP_KERNEL); if (unlikely(!p)) { if (ovr) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -182,7 +180,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } @@ -245,7 +243,7 @@ static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_skbmod_ops = { @@ -265,7 +263,7 @@ static __net_init int skbmod_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, skbmod_net_id); - return tc_action_net_init(tn, &act_skbmod_ops, SKBMOD_TAB_MASK); + return tc_action_net_init(tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct net *net) diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index fd7e75679c69..30c96274c638 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,8 +20,6 @@ #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> -#define TUNNEL_KEY_TAB_MASK 15 - static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; @@ -100,7 +98,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, return -EINVAL; parm = nla_data(tb[TCA_TUNNEL_KEY_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -159,14 +157,14 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, } if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_tunnel_key_ops, bind, true); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_tunnel_key_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } @@ -177,7 +175,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, params_new = kzalloc(sizeof(*params_new), GFP_KERNEL); if (unlikely(!params_new)) { if (ret == ACT_P_CREATED) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ENOMEM; } @@ -193,13 +191,13 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, kfree_rcu(params_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; err_out: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return ret; } @@ -304,7 +302,7 @@ static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_tunnel_key_ops = { @@ -324,7 +322,7 @@ static __net_init int tunnel_key_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - return tc_action_net_init(tn, &act_tunnel_key_ops, TUNNEL_KEY_TAB_MASK); + return tc_action_net_init(tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct net *net) diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 13ba3a89f675..97f717a13ad5 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -19,8 +19,6 @@ #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> -#define VLAN_TAB_MASK 15 - static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; @@ -28,14 +26,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, struct tcf_result *res) { struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; int action; int err; u16 tci; - spin_lock(&v->tcf_lock); tcf_lastuse_update(&v->tcf_tm); - bstats_update(&v->tcf_bstats, skb); - action = v->tcf_action; + bstats_cpu_update(this_cpu_ptr(v->common.cpu_bstats), skb); /* Ensure 'data' points at mac_header prior calling vlan manipulating * functions. @@ -43,15 +40,21 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, if (skb_at_tc_ingress(skb)) skb_push_rcsum(skb, skb->mac_len); - switch (v->tcfv_action) { + rcu_read_lock(); + + action = READ_ONCE(v->tcf_action); + + p = rcu_dereference(v->vlan_p); + + switch (p->tcfv_action) { case TCA_VLAN_ACT_POP: err = skb_vlan_pop(skb); if (err) goto drop; break; case TCA_VLAN_ACT_PUSH: - err = skb_vlan_push(skb, v->tcfv_push_proto, v->tcfv_push_vid | - (v->tcfv_push_prio << VLAN_PRIO_SHIFT)); + err = skb_vlan_push(skb, p->tcfv_push_proto, p->tcfv_push_vid | + (p->tcfv_push_prio << VLAN_PRIO_SHIFT)); if (err) goto drop; break; @@ -70,14 +73,14 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, goto drop; } /* replace the vid */ - tci = (tci & ~VLAN_VID_MASK) | v->tcfv_push_vid; + tci = (tci & ~VLAN_VID_MASK) | p->tcfv_push_vid; /* replace prio bits, if tcfv_push_prio specified */ - if (v->tcfv_push_prio) { + if (p->tcfv_push_prio) { tci &= ~VLAN_PRIO_MASK; - tci |= v->tcfv_push_prio << VLAN_PRIO_SHIFT; + tci |= p->tcfv_push_prio << VLAN_PRIO_SHIFT; } /* put updated tci as hwaccel tag */ - __vlan_hwaccel_put_tag(skb, v->tcfv_push_proto, tci); + __vlan_hwaccel_put_tag(skb, p->tcfv_push_proto, tci); break; default: BUG(); @@ -87,12 +90,13 @@ static int tcf_vlan(struct sk_buff *skb, const struct tc_action *a, drop: action = TC_ACT_SHOT; - v->tcf_qstats.drops++; + qstats_drop_inc(this_cpu_ptr(v->common.cpu_qstats)); + unlock: + rcu_read_unlock(); if (skb_at_tc_ingress(skb)) skb_pull_rcsum(skb, skb->mac_len); - spin_unlock(&v->tcf_lock); return action; } @@ -109,6 +113,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, { struct tc_action_net *tn = net_generic(net, vlan_net_id); struct nlattr *tb[TCA_VLAN_MAX + 1]; + struct tcf_vlan_params *p, *p_old; struct tc_vlan *parm; struct tcf_vlan *v; int action; @@ -128,7 +133,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, if (!tb[TCA_VLAN_PARMS]) return -EINVAL; parm = nla_data(tb[TCA_VLAN_PARMS]); - exists = tcf_hash_check(tn, parm->index, a, bind); + exists = tcf_idr_check(tn, parm->index, a, bind); if (exists && bind) return 0; @@ -139,13 +144,13 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, case TCA_VLAN_ACT_MODIFY: if (!tb[TCA_VLAN_PUSH_VLAN_ID]) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } push_vid = nla_get_u16(tb[TCA_VLAN_PUSH_VLAN_ID]); if (push_vid >= VLAN_VID_MASK) { if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -ERANGE; } @@ -167,66 +172,87 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, break; default: if (exists) - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); return -EINVAL; } action = parm->v_action; if (!exists) { - ret = tcf_hash_create(tn, parm->index, est, a, - &act_vlan_ops, bind, false); + ret = tcf_idr_create(tn, parm->index, est, a, + &act_vlan_ops, bind, true); if (ret) return ret; ret = ACT_P_CREATED; } else { - tcf_hash_release(*a, bind); + tcf_idr_release(*a, bind); if (!ovr) return -EEXIST; } v = to_vlan(*a); - spin_lock_bh(&v->tcf_lock); - - v->tcfv_action = action; - v->tcfv_push_vid = push_vid; - v->tcfv_push_prio = push_prio; - v->tcfv_push_proto = push_proto; + ASSERT_RTNL(); + p = kzalloc(sizeof(*p), GFP_KERNEL); + if (!p) { + if (ovr) + tcf_idr_release(*a, bind); + return -ENOMEM; + } v->tcf_action = parm->action; - spin_unlock_bh(&v->tcf_lock); + p_old = rtnl_dereference(v->vlan_p); + + p->tcfv_action = action; + p->tcfv_push_vid = push_vid; + p->tcfv_push_prio = push_prio; + p->tcfv_push_proto = push_proto; + + rcu_assign_pointer(v->vlan_p, p); + + if (p_old) + kfree_rcu(p_old, rcu); if (ret == ACT_P_CREATED) - tcf_hash_insert(tn, *a); + tcf_idr_insert(tn, *a); return ret; } +static void tcf_vlan_cleanup(struct tc_action *a, int bind) +{ + struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p; + + p = rcu_dereference_protected(v->vlan_p, 1); + kfree_rcu(p, rcu); +} + static int tcf_vlan_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb_tail_pointer(skb); struct tcf_vlan *v = to_vlan(a); + struct tcf_vlan_params *p = rtnl_dereference(v->vlan_p); struct tc_vlan opt = { .index = v->tcf_index, .refcnt = v->tcf_refcnt - ref, .bindcnt = v->tcf_bindcnt - bind, .action = v->tcf_action, - .v_action = v->tcfv_action, + .v_action = p->tcfv_action, }; struct tcf_t t; if (nla_put(skb, TCA_VLAN_PARMS, sizeof(opt), &opt)) goto nla_put_failure; - if ((v->tcfv_action == TCA_VLAN_ACT_PUSH || - v->tcfv_action == TCA_VLAN_ACT_MODIFY) && - (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, v->tcfv_push_vid) || + if ((p->tcfv_action == TCA_VLAN_ACT_PUSH || + p->tcfv_action == TCA_VLAN_ACT_MODIFY) && + (nla_put_u16(skb, TCA_VLAN_PUSH_VLAN_ID, p->tcfv_push_vid) || nla_put_be16(skb, TCA_VLAN_PUSH_VLAN_PROTOCOL, - v->tcfv_push_proto) || + p->tcfv_push_proto) || (nla_put_u8(skb, TCA_VLAN_PUSH_VLAN_PRIORITY, - v->tcfv_push_prio)))) + p->tcfv_push_prio)))) goto nla_put_failure; tcf_tm_dump(&t, &v->tcf_tm); @@ -252,7 +278,7 @@ static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tcf_hash_search(tn, a, index); + return tcf_idr_search(tn, a, index); } static struct tc_action_ops act_vlan_ops = { @@ -262,6 +288,7 @@ static struct tc_action_ops act_vlan_ops = { .act = tcf_vlan, .dump = tcf_vlan_dump, .init = tcf_vlan_init, + .cleanup = tcf_vlan_cleanup, .walk = tcf_vlan_walker, .lookup = tcf_vlan_search, .size = sizeof(struct tcf_vlan), @@ -271,7 +298,7 @@ static __net_init int vlan_init_net(struct net *net) { struct tc_action_net *tn = net_generic(net, vlan_net_id); - return tc_action_net_init(tn, &act_vlan_ops, VLAN_TAB_MASK); + return tc_action_net_init(tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct net *net) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 39da0c5801c9..ab255b421781 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -77,6 +77,8 @@ out: } EXPORT_SYMBOL(register_tcf_proto_ops); +static struct workqueue_struct *tc_filter_wq; + int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) { struct tcf_proto_ops *t; @@ -86,6 +88,7 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) * tcf_proto_ops's destroy() handler. */ rcu_barrier(); + flush_workqueue(tc_filter_wq); write_lock(&cls_mod_lock); list_for_each_entry(t, &tcf_proto_base, head) { @@ -100,20 +103,11 @@ int unregister_tcf_proto_ops(struct tcf_proto_ops *ops) } EXPORT_SYMBOL(unregister_tcf_proto_ops); -static int tfilter_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event, bool unicast); - -static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, - struct tcf_chain *chain, int event) +bool tcf_queue_work(struct work_struct *work) { - struct tcf_proto *tp; - - for (tp = rtnl_dereference(chain->filter_chain); - tp; tp = rtnl_dereference(tp->next)) - tfilter_notify(net, oskb, n, tp, 0, event, false); + return queue_work(tc_filter_wq, work); } +EXPORT_SYMBOL(tcf_queue_work); /* Select new prio value from the range, managed by kernel. */ @@ -201,14 +195,22 @@ static struct tcf_chain *tcf_chain_create(struct tcf_block *block, return chain; } +static void tcf_chain_head_change(struct tcf_chain *chain, + struct tcf_proto *tp_head) +{ + if (chain->chain_head_change) + chain->chain_head_change(tp_head, + chain->chain_head_change_priv); +} + static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - if (*chain->p_filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, NULL); + tcf_chain_head_change(chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); + tcf_chain_put(chain); tcf_proto_destroy(tp); } } @@ -216,10 +218,14 @@ static void tcf_chain_flush(struct tcf_chain *chain) static void tcf_chain_destroy(struct tcf_chain *chain) { list_del(&chain->list); - tcf_chain_flush(chain); kfree(chain); } +static void tcf_chain_hold(struct tcf_chain *chain) +{ + ++chain->refcnt; +} + struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, bool create) { @@ -227,36 +233,51 @@ struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, list_for_each_entry(chain, &block->chain_list, list) { if (chain->index == chain_index) { - chain->refcnt++; + tcf_chain_hold(chain); return chain; } } - if (create) - return tcf_chain_create(block, chain_index); - else - return NULL; + + return create ? tcf_chain_create(block, chain_index) : NULL; } EXPORT_SYMBOL(tcf_chain_get); void tcf_chain_put(struct tcf_chain *chain) { - /* Destroy unused chain, with exception of chain 0, which is the - * default one and has to be always present. - */ - if (--chain->refcnt == 0 && !chain->filter_chain && chain->index != 0) + if (--chain->refcnt == 0) tcf_chain_destroy(chain); } EXPORT_SYMBOL(tcf_chain_put); -static void -tcf_chain_filter_chain_ptr_set(struct tcf_chain *chain, - struct tcf_proto __rcu **p_filter_chain) +static void tcf_block_offload_cmd(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei, + enum tc_block_command command) { - chain->p_filter_chain = p_filter_chain; + struct net_device *dev = q->dev_queue->dev; + struct tc_block_offload bo = {}; + + if (!dev->netdev_ops->ndo_setup_tc) + return; + bo.command = command; + bo.binder_type = ei->binder_type; + bo.block = block; + dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo); } -int tcf_block_get(struct tcf_block **p_block, - struct tcf_proto __rcu **p_filter_chain) +static void tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + tcf_block_offload_cmd(block, q, ei, TC_BLOCK_BIND); +} + +static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + tcf_block_offload_cmd(block, q, ei, TC_BLOCK_UNBIND); +} + +int tcf_block_get_ext(struct tcf_block **p_block, struct Qdisc *q, + struct tcf_block_ext_info *ei) { struct tcf_block *block = kzalloc(sizeof(*block), GFP_KERNEL); struct tcf_chain *chain; @@ -265,13 +286,20 @@ int tcf_block_get(struct tcf_block **p_block, if (!block) return -ENOMEM; INIT_LIST_HEAD(&block->chain_list); + INIT_LIST_HEAD(&block->cb_list); + /* Create chain 0 by default, it has to be always present. */ chain = tcf_chain_create(block, 0); if (!chain) { err = -ENOMEM; goto err_chain_create; } - tcf_chain_filter_chain_ptr_set(chain, p_filter_chain); + WARN_ON(!ei->chain_head_change); + chain->chain_head_change = ei->chain_head_change; + chain->chain_head_change_priv = ei->chain_head_change_priv; + block->net = qdisc_net(q); + block->q = q; + tcf_block_offload_bind(block, q, ei); *p_block = block; return 0; @@ -279,21 +307,179 @@ err_chain_create: kfree(block); return err; } +EXPORT_SYMBOL(tcf_block_get_ext); + +static void tcf_chain_head_change_dflt(struct tcf_proto *tp_head, void *priv) +{ + struct tcf_proto __rcu **p_filter_chain = priv; + + rcu_assign_pointer(*p_filter_chain, tp_head); +} + +int tcf_block_get(struct tcf_block **p_block, + struct tcf_proto __rcu **p_filter_chain, struct Qdisc *q) +{ + struct tcf_block_ext_info ei = { + .chain_head_change = tcf_chain_head_change_dflt, + .chain_head_change_priv = p_filter_chain, + }; + + WARN_ON(!p_filter_chain); + return tcf_block_get_ext(p_block, q, &ei); +} EXPORT_SYMBOL(tcf_block_get); -void tcf_block_put(struct tcf_block *block) +static void tcf_block_put_final(struct work_struct *work) { + struct tcf_block *block = container_of(work, struct tcf_block, work); struct tcf_chain *chain, *tmp; - if (!block) - return; - + rtnl_lock(); + /* Only chain 0 should be still here. */ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) - tcf_chain_destroy(chain); + tcf_chain_put(chain); + rtnl_unlock(); kfree(block); } + +/* XXX: Standalone actions are not allowed to jump to any chain, and bound + * actions should be all removed after flushing. However, filters are now + * destroyed in tc filter workqueue with RTNL lock, they can not race here. + */ +void tcf_block_put_ext(struct tcf_block *block, struct Qdisc *q, + struct tcf_block_ext_info *ei) +{ + struct tcf_chain *chain, *tmp; + + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + tcf_chain_flush(chain); + + tcf_block_offload_unbind(block, q, ei); + + INIT_WORK(&block->work, tcf_block_put_final); + /* Wait for existing RCU callbacks to cool down, make sure their works + * have been queued before this. We can not flush pending works here + * because we are holding the RTNL lock. + */ + rcu_barrier(); + tcf_queue_work(&block->work); +} +EXPORT_SYMBOL(tcf_block_put_ext); + +void tcf_block_put(struct tcf_block *block) +{ + struct tcf_block_ext_info ei = {0, }; + + if (!block) + return; + tcf_block_put_ext(block, block->q, &ei); +} + EXPORT_SYMBOL(tcf_block_put); +struct tcf_block_cb { + struct list_head list; + tc_setup_cb_t *cb; + void *cb_ident; + void *cb_priv; + unsigned int refcnt; +}; + +void *tcf_block_cb_priv(struct tcf_block_cb *block_cb) +{ + return block_cb->cb_priv; +} +EXPORT_SYMBOL(tcf_block_cb_priv); + +struct tcf_block_cb *tcf_block_cb_lookup(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ struct tcf_block_cb *block_cb; + + list_for_each_entry(block_cb, &block->cb_list, list) + if (block_cb->cb == cb && block_cb->cb_ident == cb_ident) + return block_cb; + return NULL; +} +EXPORT_SYMBOL(tcf_block_cb_lookup); + +void tcf_block_cb_incref(struct tcf_block_cb *block_cb) +{ + block_cb->refcnt++; +} +EXPORT_SYMBOL(tcf_block_cb_incref); + +unsigned int tcf_block_cb_decref(struct tcf_block_cb *block_cb) +{ + return --block_cb->refcnt; +} +EXPORT_SYMBOL(tcf_block_cb_decref); + +struct tcf_block_cb *__tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + struct tcf_block_cb *block_cb; + + block_cb = kzalloc(sizeof(*block_cb), GFP_KERNEL); + if (!block_cb) + return NULL; + block_cb->cb = cb; + block_cb->cb_ident = cb_ident; + block_cb->cb_priv = cb_priv; + list_add(&block_cb->list, &block->cb_list); + return block_cb; +} +EXPORT_SYMBOL(__tcf_block_cb_register); + +int tcf_block_cb_register(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident, + void *cb_priv) +{ + struct tcf_block_cb *block_cb; + + block_cb = __tcf_block_cb_register(block, cb, cb_ident, cb_priv); + return block_cb ? 0 : -ENOMEM; +} +EXPORT_SYMBOL(tcf_block_cb_register); + +void __tcf_block_cb_unregister(struct tcf_block_cb *block_cb) +{ + list_del(&block_cb->list); + kfree(block_cb); +} +EXPORT_SYMBOL(__tcf_block_cb_unregister); + +void tcf_block_cb_unregister(struct tcf_block *block, + tc_setup_cb_t *cb, void *cb_ident) +{ + struct tcf_block_cb *block_cb; + + block_cb = tcf_block_cb_lookup(block, cb, cb_ident); + if (!block_cb) + return; + __tcf_block_cb_unregister(block_cb); +} +EXPORT_SYMBOL(tcf_block_cb_unregister); + +static int tcf_block_cb_call(struct tcf_block *block, enum tc_setup_type type, + void *type_data, bool err_stop) +{ + struct tcf_block_cb *block_cb; + int ok_count = 0; + int err; + + list_for_each_entry(block_cb, &block->cb_list, list) { + err = block_cb->cb(type, type_data, block_cb->cb_priv); + if (err) { + if (err_stop) + return err; + } else { + ok_count++; + } + } + return ok_count; +} + /* Main classifier routine: scans classifier chain attached * to this qdisc, (optionally) tests for protocol and asks * specific classifiers. @@ -362,11 +548,11 @@ static void tcf_chain_tp_insert(struct tcf_chain *chain, struct tcf_chain_info *chain_info, struct tcf_proto *tp) { - if (chain->p_filter_chain && - *chain_info->pprev == chain->filter_chain) - rcu_assign_pointer(*chain->p_filter_chain, tp); + if (*chain_info->pprev == chain->filter_chain) + tcf_chain_head_change(chain, tp); RCU_INIT_POINTER(tp->next, tcf_chain_tp_prev(chain_info)); rcu_assign_pointer(*chain_info->pprev, tp); + tcf_chain_hold(chain); } static void tcf_chain_tp_remove(struct tcf_chain *chain, @@ -375,9 +561,10 @@ static void tcf_chain_tp_remove(struct tcf_chain *chain, { struct tcf_proto *next = rtnl_dereference(chain_info->next); - if (chain->p_filter_chain && tp == chain->filter_chain) - RCU_INIT_POINTER(*chain->p_filter_chain, next); + if (tp == chain->filter_chain) + tcf_chain_head_change(chain, next); RCU_INIT_POINTER(*chain_info->pprev, next); + tcf_chain_put(chain); } static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, @@ -407,6 +594,112 @@ static struct tcf_proto *tcf_chain_tp_find(struct tcf_chain *chain, return tp; } +static int tcf_fill_node(struct net *net, struct sk_buff *skb, + struct tcf_proto *tp, struct Qdisc *q, u32 parent, + void *fh, u32 portid, u32 seq, u16 flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = parent; + tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); + if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) + goto nla_put_failure; + if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) + goto nla_put_failure; + if (!fh) { + tcm->tcm_handle = 0; + } else { + if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) + goto nla_put_failure; + } + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tfilter_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + struct Qdisc *q, u32 parent, + void *fh, int event, bool unicast) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, event) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct tcf_proto *tp, + struct Qdisc *q, u32 parent, + void *fh, bool unicast, bool *last) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tcf_fill_node(net, skb, tp, q, parent, fh, portid, n->nlmsg_seq, + n->nlmsg_flags, RTM_DELTFILTER) <= 0) { + kfree_skb(skb); + return -EINVAL; + } + + err = tp->ops->delete(tp, fh, last); + if (err) { + kfree_skb(skb); + return err; + } + + if (unicast) + return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb, + struct Qdisc *q, u32 parent, + struct nlmsghdr *n, + struct tcf_chain *chain, int event) +{ + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) + tfilter_notify(net, oskb, n, tp, q, parent, 0, event, false); +} + /* Add/change/delete/get a filter node */ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, @@ -428,7 +721,7 @@ static int tc_ctl_tfilter(struct sk_buff *skb, struct nlmsghdr *n, struct tcf_proto *tp; const struct Qdisc_class_ops *cops; unsigned long cl; - unsigned long fh; + void *fh; int err; int tp_created; @@ -498,7 +791,7 @@ replay: /* Do we search for filter, attached to class? */ if (TC_H_MIN(parent)) { - cl = cops->get(q, parent); + cl = cops->find(q, parent); if (cl == 0) return -ENOENT; } @@ -523,7 +816,8 @@ replay: } if (n->nlmsg_type == RTM_DELTFILTER && prio == 0) { - tfilter_notify_chain(net, skb, n, chain, RTM_DELTFILTER); + tfilter_notify_chain(net, skb, q, parent, n, + chain, RTM_DELTFILTER); tcf_chain_flush(chain); err = 0; goto errout; @@ -567,10 +861,10 @@ replay: fh = tp->ops->get(tp, t->tcm_handle); - if (fh == 0) { + if (!fh) { if (n->nlmsg_type == RTM_DELTFILTER && t->tcm_handle == 0) { tcf_chain_tp_remove(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, fh, + tfilter_notify(net, skb, n, tp, q, parent, fh, RTM_DELTFILTER, false); tcf_proto_destroy(tp); err = 0; @@ -595,18 +889,17 @@ replay: } break; case RTM_DELTFILTER: - err = tp->ops->delete(tp, fh, &last); + err = tfilter_del_notify(net, skb, n, tp, q, parent, + fh, false, &last); if (err) goto errout; - tfilter_notify(net, skb, n, tp, t->tcm_handle, - RTM_DELTFILTER, false); if (last) { tcf_chain_tp_remove(chain, &chain_info, tp); tcf_proto_destroy(tp); } goto errout; case RTM_GETTFILTER: - err = tfilter_notify(net, skb, n, tp, fh, + err = tfilter_notify(net, skb, n, tp, q, parent, fh, RTM_NEWTFILTER, true); goto errout; default: @@ -620,7 +913,8 @@ replay: if (err == 0) { if (tp_created) tcf_chain_tp_insert(chain, &chain_info, tp); - tfilter_notify(net, skb, n, tp, fh, RTM_NEWTFILTER, false); + tfilter_notify(net, skb, n, tp, q, parent, fh, + RTM_NEWTFILTER, false); } else { if (tp_created) tcf_proto_destroy(tp); @@ -629,94 +923,33 @@ replay: errout: if (chain) tcf_chain_put(chain); - if (cl) - cops->put(q, cl); if (err == -EAGAIN) /* Replay the request. */ goto replay; return err; } -static int tcf_fill_node(struct net *net, struct sk_buff *skb, - struct tcf_proto *tp, unsigned long fh, u32 portid, - u32 seq, u16 flags, int event) -{ - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(tp->q)->ifindex; - tcm->tcm_parent = tp->classid; - tcm->tcm_info = TC_H_MAKE(tp->prio, tp->protocol); - if (nla_put_string(skb, TCA_KIND, tp->ops->kind)) - goto nla_put_failure; - if (nla_put_u32(skb, TCA_CHAIN, tp->chain->index)) - goto nla_put_failure; - tcm->tcm_handle = fh; - if (RTM_DELTFILTER != event) { - tcm->tcm_handle = 0; - if (tp->ops->dump && tp->ops->dump(net, tp, fh, skb, tcm) < 0) - goto nla_put_failure; - } - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int tfilter_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct tcf_proto *tp, - unsigned long fh, int event, bool unicast) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tcf_fill_node(net, skb, tp, fh, portid, n->nlmsg_seq, - n->nlmsg_flags, event) <= 0) { - kfree_skb(skb); - return -EINVAL; - } - - if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); -} - struct tcf_dump_args { struct tcf_walker w; struct sk_buff *skb; struct netlink_callback *cb; + struct Qdisc *q; + u32 parent; }; -static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, - struct tcf_walker *arg) +static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_dump_args *a = (void *)arg; struct net *net = sock_net(a->skb->sk); - return tcf_fill_node(net, a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, + return tcf_fill_node(net, a->skb, tp, a->q, a->parent, + n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } -static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, - struct netlink_callback *cb, +static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent, + struct sk_buff *skb, struct netlink_callback *cb, long index_start, long *p_index) { struct net *net = sock_net(skb->sk); @@ -738,7 +971,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, memset(&cb->args[1], 0, sizeof(cb->args) - sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(net, skb, tp, 0, + if (tcf_fill_node(net, skb, tp, q, parent, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) @@ -751,6 +984,8 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct sk_buff *skb, arg.w.fn = tcf_node_dump; arg.skb = skb; arg.cb = cb; + arg.q = q; + arg.parent = parent; arg.w.stop = 0; arg.w.skip = cb->args[1] - 1; arg.w.count = 0; @@ -776,6 +1011,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) const struct Qdisc_class_ops *cops; long index_start; long index; + u32 parent; int err; if (nlmsg_len(cb->nlh) < sizeof(*tcm)) @@ -789,25 +1025,28 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (!dev) return skb->len; - if (!tcm->tcm_parent) + parent = tcm->tcm_parent; + if (!parent) { q = dev->qdisc; - else + parent = q->handle; + } else { q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); + } if (!q) goto out; cops = q->ops->cl_ops; if (!cops) - goto errout; + goto out; if (!cops->tcf_block) - goto errout; + goto out; if (TC_H_MIN(tcm->tcm_parent)) { - cl = cops->get(q, tcm->tcm_parent); + cl = cops->find(q, tcm->tcm_parent); if (cl == 0) - goto errout; + goto out; } block = cops->tcf_block(q, cl); if (!block) - goto errout; + goto out; index_start = cb->args[0]; index = 0; @@ -816,15 +1055,13 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (tca[TCA_CHAIN] && nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; - if (!tcf_chain_dump(chain, skb, cb, index_start, &index)) + if (!tcf_chain_dump(chain, q, parent, skb, cb, + index_start, &index)) break; } cb->args[0] = index; -errout: - if (cl) - cops->put(q, cl); out: return skb->len; } @@ -834,6 +1071,7 @@ void tcf_exts_destroy(struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT LIST_HEAD(actions); + ASSERT_RTNL(); tcf_exts_to_list(exts, &actions); tcf_action_destroy(&actions, TCA_ACT_UNBIND); kfree(exts->actions); @@ -872,6 +1110,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, exts->actions[i++] = act; exts->nr_actions = i; } + exts->net = net; } #else if ((exts->action && tb[exts->action]) || @@ -883,18 +1122,12 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp, struct nlattr **tb, } EXPORT_SYMBOL(tcf_exts_validate); -void tcf_exts_change(struct tcf_proto *tp, struct tcf_exts *dst, - struct tcf_exts *src) +void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src) { #ifdef CONFIG_NET_CLS_ACT struct tcf_exts old = *dst; - tcf_tree_lock(tp); - dst->nr_actions = src->nr_actions; - dst->actions = src->actions; - dst->type = src->type; - tcf_tree_unlock(tp); - + *dst = *src; tcf_exts_destroy(&old); #endif } @@ -915,7 +1148,7 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts) #ifdef CONFIG_NET_CLS_ACT struct nlattr *nest; - if (exts->action && exts->nr_actions) { + if (exts->action && tcf_exts_has_actions(exts)) { /* * again for backward compatible mode - we want * to work with both old and new modes of entering @@ -965,36 +1198,67 @@ int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts) } EXPORT_SYMBOL(tcf_exts_dump_stats); -int tcf_exts_get_dev(struct net_device *dev, struct tcf_exts *exts, - struct net_device **hw_dev) +static int tc_exts_setup_cb_egdev_call(struct tcf_exts *exts, + enum tc_setup_type type, + void *type_data, bool err_stop) { + int ok_count = 0; #ifdef CONFIG_NET_CLS_ACT const struct tc_action *a; - LIST_HEAD(actions); + struct net_device *dev; + int i, ret; - if (tc_no_actions(exts)) - return -EINVAL; + if (!tcf_exts_has_actions(exts)) + return 0; - tcf_exts_to_list(exts, &actions); - list_for_each_entry(a, &actions, list) { - if (a->ops->get_dev) { - a->ops->get_dev(a, dev_net(dev), hw_dev); - break; - } + for (i = 0; i < exts->nr_actions; i++) { + a = exts->actions[i]; + if (!a->ops->get_dev) + continue; + dev = a->ops->get_dev(a); + if (!dev) + continue; + ret = tc_setup_cb_egdev_call(dev, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count += ret; } - if (*hw_dev) - return 0; #endif - return -EOPNOTSUPP; + return ok_count; +} + +int tc_setup_cb_call(struct tcf_block *block, struct tcf_exts *exts, + enum tc_setup_type type, void *type_data, bool err_stop) +{ + int ok_count; + int ret; + + ret = tcf_block_cb_call(block, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count = ret; + + if (!exts) + return ok_count; + ret = tc_exts_setup_cb_egdev_call(exts, type, type_data, err_stop); + if (ret < 0) + return ret; + ok_count += ret; + + return ok_count; } -EXPORT_SYMBOL(tcf_exts_get_dev); +EXPORT_SYMBOL(tc_setup_cb_call); static int __init tc_filter_init(void) { - rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, NULL); + tc_filter_wq = alloc_ordered_workqueue("tc_filter_workqueue", 0); + if (!tc_filter_wq) + return -ENOMEM; + + rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_ctl_tfilter, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_ctl_tfilter, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTFILTER, tc_ctl_tfilter, - tc_dump_tfilter, NULL); + tc_dump_tfilter, 0); return 0; } diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index c4fd63a068f9..5f169ded347e 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -17,13 +17,14 @@ #include <linux/errno.h> #include <linux/rtnetlink.h> #include <linux/skbuff.h> +#include <linux/idr.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> struct basic_head { - u32 hgenerator; struct list_head flist; + struct idr handle_idr; struct rcu_head rcu; }; @@ -34,7 +35,10 @@ struct basic_filter { struct tcf_result res; struct tcf_proto *tp; struct list_head link; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -56,20 +60,18 @@ static int basic_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static unsigned long basic_get(struct tcf_proto *tp, u32 handle) +static void *basic_get(struct tcf_proto *tp, u32 handle) { - unsigned long l = 0UL; struct basic_head *head = rtnl_dereference(tp->root); struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { if (f->handle == handle) { - l = (unsigned long) f; - break; + return f; } } - return l; + return NULL; } static int basic_init(struct tcf_proto *tp) @@ -80,19 +82,36 @@ static int basic_init(struct tcf_proto *tp) if (head == NULL) return -ENOBUFS; INIT_LIST_HEAD(&head->flist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; } -static void basic_delete_filter(struct rcu_head *head) +static void __basic_delete_filter(struct basic_filter *f) { - struct basic_filter *f = container_of(head, struct basic_filter, rcu); - tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); } +static void basic_delete_filter_work(struct work_struct *work) +{ + struct basic_filter *f = container_of(work, struct basic_filter, work); + + rtnl_lock(); + __basic_delete_filter(f); + rtnl_unlock(); +} + +static void basic_delete_filter(struct rcu_head *head) +{ + struct basic_filter *f = container_of(head, struct basic_filter, rcu); + + INIT_WORK(&f->work, basic_delete_filter_work); + tcf_queue_work(&f->work); +} + static void basic_destroy(struct tcf_proto *tp) { struct basic_head *head = rtnl_dereference(tp->root); @@ -101,18 +120,25 @@ static void basic_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, n, &head->flist, link) { list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, basic_delete_filter); + idr_remove_ext(&head->handle_idr, f->handle); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, basic_delete_filter); + else + __basic_delete_filter(f); } + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } -static int basic_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int basic_delete(struct tcf_proto *tp, void *arg, bool *last) { struct basic_head *head = rtnl_dereference(tp->root); - struct basic_filter *f = (struct basic_filter *) arg; + struct basic_filter *f = arg; list_del_rcu(&f->link); tcf_unbind_filter(tp, &f->res); + idr_remove_ext(&head->handle_idr, f->handle); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, basic_delete_filter); *last = list_empty(&head->flist); return 0; @@ -129,44 +155,34 @@ static int basic_set_parms(struct net *net, struct tcf_proto *tp, struct nlattr *est, bool ovr) { int err; - struct tcf_exts e; - struct tcf_ematch_tree t; - err = tcf_exts_init(&e, TCA_BASIC_ACT, TCA_BASIC_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &t); + err = tcf_em_tree_validate(tp, tb[TCA_BASIC_EMATCHES], &f->ematches); if (err < 0) - goto errout; + return err; if (tb[TCA_BASIC_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_BASIC_CLASSID]); tcf_bind_filter(tp, &f->res, base); } - tcf_exts_change(tp, &f->exts, &e); - tcf_em_tree_change(tp, &f->ematches, &t); f->tp = tp; - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int basic_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { int err; struct basic_head *head = rtnl_dereference(tp->root); struct nlattr *tb[TCA_BASIC_MAX + 1]; struct basic_filter *fold = (struct basic_filter *) *arg; struct basic_filter *fnew; + unsigned long idr_index; if (tca[TCA_OPTIONS] == NULL) return -EINVAL; @@ -189,35 +205,36 @@ static int basic_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = -EINVAL; if (handle) { fnew->handle = handle; - } else if (fold) { - fnew->handle = fold->handle; + if (!fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + } } else { - unsigned int i = 0x80000000; - do { - if (++head->hgenerator == 0x7FFFFFFF) - head->hgenerator = 1; - } while (--i > 0 && basic_get(tp, head->hgenerator)); - - if (i <= 0) { - pr_err("Insufficient number of handles\n"); + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (err) goto errout; - } - - fnew->handle = head->hgenerator; + fnew->handle = idr_index; } err = basic_set_parms(net, tp, fnew, base, tb, tca[TCA_RATE], ovr); - if (err < 0) + if (err < 0) { + if (!fold) + idr_remove_ext(&head->handle_idr, fnew->handle); goto errout; + } - *arg = (unsigned long)fnew; + *arg = fnew; if (fold) { + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->link, &fnew->link); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, basic_delete_filter); } else { list_add_rcu(&fnew->link, &head->flist); @@ -239,7 +256,7 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } @@ -248,10 +265,18 @@ skip: } } -static int basic_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static void basic_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct basic_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + +static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct basic_filter *f = (struct basic_filter *) fh; + struct basic_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -293,6 +318,7 @@ static struct tcf_proto_ops cls_basic_ops __read_mostly = { .delete = basic_delete, .walk = basic_walk, .dump = basic_dump, + .bind_class = basic_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index f57bd531ba98..fb680dafac5a 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -17,6 +17,7 @@ #include <linux/skbuff.h> #include <linux/filter.h> #include <linux/bpf.h> +#include <linux/idr.h> #include <net/rtnetlink.h> #include <net/pkt_cls.h> @@ -32,7 +33,7 @@ MODULE_DESCRIPTION("TC BPF based classifier"); struct cls_bpf_head { struct list_head plist; - u32 hgen; + struct idr handle_idr; struct rcu_head rcu; }; @@ -49,7 +50,10 @@ struct cls_bpf_prog { struct sock_filter *bpf_ops; const char *bpf_name; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static const struct nla_policy bpf_policy[TCA_BPF_MAX + 1] = { @@ -99,11 +103,11 @@ static int cls_bpf_classify(struct sk_buff *skb, const struct tcf_proto *tp, } else if (at_ingress) { /* It is safe to push/pull even if skb_shared() */ __skb_push(skb, skb->mac_len); - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); __skb_pull(skb, skb->mac_len); } else { - bpf_compute_data_end(skb); + bpf_compute_data_pointers(skb); filter_res = BPF_PROG_RUN(prog->filter, skb); } @@ -146,35 +150,39 @@ static bool cls_bpf_is_ebpf(const struct cls_bpf_prog *prog) static int cls_bpf_offload_cmd(struct tcf_proto *tp, struct cls_bpf_prog *prog, enum tc_clsbpf_command cmd) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_bpf_offload bpf_offload = {}; - struct tc_to_netdev offload; + bool addorrep = cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(prog->gen_flags); + struct tc_cls_bpf_offload cls_bpf = {}; int err; - offload.type = TC_SETUP_CLSBPF; - offload.cls_bpf = &bpf_offload; - - bpf_offload.command = cmd; - bpf_offload.exts = &prog->exts; - bpf_offload.prog = prog->filter; - bpf_offload.name = prog->bpf_name; - bpf_offload.exts_integrated = prog->exts_integrated; - bpf_offload.gen_flags = prog->gen_flags; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, - tp->protocol, &offload); + tc_cls_common_offload_init(&cls_bpf.common, tp); + cls_bpf.command = cmd; + cls_bpf.exts = &prog->exts; + cls_bpf.prog = prog->filter; + cls_bpf.name = prog->bpf_name; + cls_bpf.exts_integrated = prog->exts_integrated; + cls_bpf.gen_flags = prog->gen_flags; + + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSBPF, &cls_bpf, skip_sw); + if (addorrep) { + if (err < 0) { + cls_bpf_offload_cmd(tp, prog, TC_CLSBPF_DESTROY); + return err; + } else if (err > 0) { + prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + } + } - if (!err && (cmd == TC_CLSBPF_ADD || cmd == TC_CLSBPF_REPLACE)) - prog->gen_flags |= TCA_CLS_FLAGS_IN_HW; + if (addorrep && skip_sw && !(prog->gen_flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - return err; + return 0; } static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, struct cls_bpf_prog *oldprog) { - struct net_device *dev = tp->q->dev_queue->dev; struct cls_bpf_prog *obj = prog; enum tc_clsbpf_command cmd; bool skip_sw; @@ -184,7 +192,7 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, (oldprog && tc_skip_sw(oldprog->gen_flags)); if (oldprog && oldprog->offloaded) { - if (tc_should_offload(dev, tp, prog->gen_flags)) { + if (!tc_skip_hw(prog->gen_flags)) { cmd = TC_CLSBPF_REPLACE; } else if (!tc_skip_sw(prog->gen_flags)) { obj = oldprog; @@ -193,14 +201,14 @@ static int cls_bpf_offload(struct tcf_proto *tp, struct cls_bpf_prog *prog, return -EINVAL; } } else { - if (!tc_should_offload(dev, tp, prog->gen_flags)) + if (tc_skip_hw(prog->gen_flags)) return skip_sw ? -EINVAL : 0; cmd = TC_CLSBPF_ADD; } ret = cls_bpf_offload_cmd(tp, obj, cmd); if (ret) - return skip_sw ? ret : 0; + return ret; obj->offloaded = true; if (oldprog) @@ -244,6 +252,7 @@ static int cls_bpf_init(struct tcf_proto *tp) return -ENOBUFS; INIT_LIST_HEAD_RCU(&head->plist); + idr_init(&head->handle_idr); rcu_assign_pointer(tp->root, head); return 0; @@ -252,6 +261,7 @@ static int cls_bpf_init(struct tcf_proto *tp) static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) { tcf_exts_destroy(&prog->exts); + tcf_exts_put_net(&prog->exts); if (cls_bpf_is_ebpf(prog)) bpf_prog_put(prog->filter); @@ -263,24 +273,42 @@ static void __cls_bpf_delete_prog(struct cls_bpf_prog *prog) kfree(prog); } +static void cls_bpf_delete_prog_work(struct work_struct *work) +{ + struct cls_bpf_prog *prog = container_of(work, struct cls_bpf_prog, work); + + rtnl_lock(); + __cls_bpf_delete_prog(prog); + rtnl_unlock(); +} + static void cls_bpf_delete_prog_rcu(struct rcu_head *rcu) { - __cls_bpf_delete_prog(container_of(rcu, struct cls_bpf_prog, rcu)); + struct cls_bpf_prog *prog = container_of(rcu, struct cls_bpf_prog, rcu); + + INIT_WORK(&prog->work, cls_bpf_delete_prog_work); + tcf_queue_work(&prog->work); } static void __cls_bpf_delete(struct tcf_proto *tp, struct cls_bpf_prog *prog) { + struct cls_bpf_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, prog->handle); cls_bpf_stop_offload(tp, prog); list_del_rcu(&prog->link); tcf_unbind_filter(tp, &prog->res); - call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + if (tcf_exts_get_net(&prog->exts)) + call_rcu(&prog->rcu, cls_bpf_delete_prog_rcu); + else + __cls_bpf_delete_prog(prog); } -static int cls_bpf_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int cls_bpf_delete(struct tcf_proto *tp, void *arg, bool *last) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - __cls_bpf_delete(tp, (struct cls_bpf_prog *) arg); + __cls_bpf_delete(tp, arg); *last = list_empty(&head->plist); return 0; } @@ -293,23 +321,21 @@ static void cls_bpf_destroy(struct tcf_proto *tp) list_for_each_entry_safe(prog, tmp, &head->plist, link) __cls_bpf_delete(tp, prog); + idr_destroy(&head->handle_idr); kfree_rcu(head, rcu); } -static unsigned long cls_bpf_get(struct tcf_proto *tp, u32 handle) +static void *cls_bpf_get(struct tcf_proto *tp, u32 handle) { struct cls_bpf_head *head = rtnl_dereference(tp->root); struct cls_bpf_prog *prog; - unsigned long ret = 0UL; list_for_each_entry(prog, &head->plist, link) { - if (prog->handle == handle) { - ret = (unsigned long) prog; - break; - } + if (prog->handle == handle) + return prog; } - return ret; + return NULL; } static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) @@ -352,7 +378,7 @@ static int cls_bpf_prog_from_ops(struct nlattr **tb, struct cls_bpf_prog *prog) } static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, - const struct tcf_proto *tp) + u32 gen_flags, const struct tcf_proto *tp) { struct bpf_prog *fp; char *name = NULL; @@ -360,7 +386,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, bpf_fd = nla_get_u32(tb[TCA_BPF_FD]); - fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); + if (gen_flags & TCA_CLS_FLAGS_SKIP_SW) + fp = bpf_prog_get_type_dev(bpf_fd, BPF_PROG_TYPE_SCHED_CLS, + qdisc_dev(tp->q)); + else + fp = bpf_prog_get_type(bpf_fd, BPF_PROG_TYPE_SCHED_CLS); if (IS_ERR(fp)) return PTR_ERR(fp); @@ -382,13 +412,11 @@ static int cls_bpf_prog_from_efd(struct nlattr **tb, struct cls_bpf_prog *prog, return 0; } -static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, - struct cls_bpf_prog *prog, - unsigned long base, struct nlattr **tb, - struct nlattr *est, bool ovr) +static int cls_bpf_set_parms(struct net *net, struct tcf_proto *tp, + struct cls_bpf_prog *prog, unsigned long base, + struct nlattr **tb, struct nlattr *est, bool ovr) { bool is_bpf, is_ebpf, have_exts = false; - struct tcf_exts exts; u32 gen_flags = 0; int ret; @@ -397,83 +425,51 @@ static int cls_bpf_modify_existing(struct net *net, struct tcf_proto *tp, if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) return -EINVAL; - ret = tcf_exts_init(&exts, TCA_BPF_ACT, TCA_BPF_POLICE); + ret = tcf_exts_validate(net, tp, tb, est, &prog->exts, ovr); if (ret < 0) return ret; - ret = tcf_exts_validate(net, tp, tb, est, &exts, ovr); - if (ret < 0) - goto errout; if (tb[TCA_BPF_FLAGS]) { u32 bpf_flags = nla_get_u32(tb[TCA_BPF_FLAGS]); - if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) { - ret = -EINVAL; - goto errout; - } + if (bpf_flags & ~TCA_BPF_FLAG_ACT_DIRECT) + return -EINVAL; have_exts = bpf_flags & TCA_BPF_FLAG_ACT_DIRECT; } if (tb[TCA_BPF_FLAGS_GEN]) { gen_flags = nla_get_u32(tb[TCA_BPF_FLAGS_GEN]); if (gen_flags & ~CLS_BPF_SUPPORTED_GEN_FLAGS || - !tc_flags_valid(gen_flags)) { - ret = -EINVAL; - goto errout; - } + !tc_flags_valid(gen_flags)) + return -EINVAL; } prog->exts_integrated = have_exts; prog->gen_flags = gen_flags; ret = is_bpf ? cls_bpf_prog_from_ops(tb, prog) : - cls_bpf_prog_from_efd(tb, prog, tp); + cls_bpf_prog_from_efd(tb, prog, gen_flags, tp); if (ret < 0) - goto errout; + return ret; if (tb[TCA_BPF_CLASSID]) { prog->res.classid = nla_get_u32(tb[TCA_BPF_CLASSID]); tcf_bind_filter(tp, &prog->res, base); } - tcf_exts_change(tp, &prog->exts, &exts); return 0; - -errout: - tcf_exts_destroy(&exts); - return ret; -} - -static u32 cls_bpf_grab_new_handle(struct tcf_proto *tp, - struct cls_bpf_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && cls_bpf_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; } static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_bpf_head *head = rtnl_dereference(tp->root); - struct cls_bpf_prog *oldprog = (struct cls_bpf_prog *) *arg; + struct cls_bpf_prog *oldprog = *arg; struct nlattr *tb[TCA_BPF_MAX + 1]; struct cls_bpf_prog *prog; + unsigned long idr_index; int ret; if (tca[TCA_OPTIONS] == NULL) @@ -499,22 +495,30 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, } } - if (handle == 0) - prog->handle = cls_bpf_grab_new_handle(tp, head); - else + if (handle == 0) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + 1, 0x7FFFFFFF, GFP_KERNEL); + if (ret) + goto errout; + prog->handle = idr_index; + } else { + if (!oldprog) { + ret = idr_alloc_ext(&head->handle_idr, prog, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (ret) + goto errout; + } prog->handle = handle; - if (prog->handle == 0) { - ret = -EINVAL; - goto errout; } - ret = cls_bpf_modify_existing(net, tp, prog, base, tb, tca[TCA_RATE], - ovr); + ret = cls_bpf_set_parms(net, tp, prog, base, tb, tca[TCA_RATE], ovr); if (ret < 0) - goto errout; + goto errout_idr; ret = cls_bpf_offload(tp, prog, oldprog); if (ret) { + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); __cls_bpf_delete_prog(prog); return ret; } @@ -523,16 +527,21 @@ static int cls_bpf_change(struct net *net, struct sk_buff *in_skb, prog->gen_flags |= TCA_CLS_FLAGS_NOT_IN_HW; if (oldprog) { + idr_replace_ext(&head->handle_idr, prog, handle); list_replace_rcu(&oldprog->link, &prog->link); tcf_unbind_filter(tp, &oldprog->res); + tcf_exts_get_net(&oldprog->exts); call_rcu(&oldprog->rcu, cls_bpf_delete_prog_rcu); } else { list_add_rcu(&prog->link, &head->plist); } - *arg = (unsigned long) prog; + *arg = prog; return 0; +errout_idr: + if (!oldprog) + idr_remove_ext(&head->handle_idr, prog->handle); errout: tcf_exts_destroy(&prog->exts); kfree(prog); @@ -578,10 +587,10 @@ static int cls_bpf_dump_ebpf_info(const struct cls_bpf_prog *prog, return 0; } -static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int cls_bpf_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *tm) { - struct cls_bpf_prog *prog = (struct cls_bpf_prog *) fh; + struct cls_bpf_prog *prog = fh; struct nlattr *nest; u32 bpf_flags = 0; int ret; @@ -631,6 +640,14 @@ nla_put_failure: return -1; } +static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_bpf_prog *prog = fh; + + if (prog && prog->res.classid == classid) + prog->res.class = cl; +} + static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) { struct cls_bpf_head *head = rtnl_dereference(tp->root); @@ -639,7 +656,7 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry(prog, &head->plist, link) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) prog, arg) < 0) { + if (arg->fn(tp, prog, arg) < 0) { arg->stop = 1; break; } @@ -659,6 +676,7 @@ static struct tcf_proto_ops cls_bpf_ops __read_mostly = { .delete = cls_bpf_delete, .walk = cls_bpf_walk, .dump = cls_bpf_dump, + .bind_class = cls_bpf_bind_class, }; static int __init cls_bpf_init_mod(void) diff --git a/net/sched/cls_cgroup.c b/net/sched/cls_cgroup.c index 12ce547eea04..309d5899265f 100644 --- a/net/sched/cls_cgroup.c +++ b/net/sched/cls_cgroup.c @@ -23,7 +23,10 @@ struct cls_cgroup_head { struct tcf_exts exts; struct tcf_ematch_tree ematches; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -43,9 +46,9 @@ static int cls_cgroup_classify(struct sk_buff *skb, const struct tcf_proto *tp, return tcf_exts_exec(skb, &head->exts, res); } -static unsigned long cls_cgroup_get(struct tcf_proto *tp, u32 handle) +static void *cls_cgroup_get(struct tcf_proto *tp, u32 handle) { - return 0UL; + return NULL; } static int cls_cgroup_init(struct tcf_proto *tp) @@ -57,27 +60,42 @@ static const struct nla_policy cgroup_policy[TCA_CGROUP_MAX + 1] = { [TCA_CGROUP_EMATCHES] = { .type = NLA_NESTED }, }; +static void __cls_cgroup_destroy(struct cls_cgroup_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_em_tree_destroy(&head->ematches); + tcf_exts_put_net(&head->exts); + kfree(head); +} + +static void cls_cgroup_destroy_work(struct work_struct *work) +{ + struct cls_cgroup_head *head = container_of(work, + struct cls_cgroup_head, + work); + rtnl_lock(); + __cls_cgroup_destroy(head); + rtnl_unlock(); +} + static void cls_cgroup_destroy_rcu(struct rcu_head *root) { struct cls_cgroup_head *head = container_of(root, struct cls_cgroup_head, rcu); - tcf_exts_destroy(&head->exts); - tcf_em_tree_destroy(&head->ematches); - kfree(head); + INIT_WORK(&head->work, cls_cgroup_destroy_work); + tcf_queue_work(&head->work); } static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct nlattr *tb[TCA_CGROUP_MAX + 1]; struct cls_cgroup_head *head = rtnl_dereference(tp->root); struct cls_cgroup_head *new; - struct tcf_ematch_tree t; - struct tcf_exts e; int err; if (!tca[TCA_OPTIONS]) @@ -103,27 +121,19 @@ static int cls_cgroup_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout; - err = tcf_exts_init(&e, TCA_CGROUP_ACT, TCA_CGROUP_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &new->exts, ovr); if (err < 0) goto errout; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) { - tcf_exts_destroy(&e); - goto errout; - } - err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &t); - if (err < 0) { - tcf_exts_destroy(&e); + err = tcf_em_tree_validate(tp, tb[TCA_CGROUP_EMATCHES], &new->ematches); + if (err < 0) goto errout; - } - - tcf_exts_change(tp, &new->exts, &e); - tcf_em_tree_change(tp, &new->ematches, &t); rcu_assign_pointer(tp->root, new); - if (head) + if (head) { + tcf_exts_get_net(&head->exts); call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + } return 0; errout: tcf_exts_destroy(&new->exts); @@ -136,11 +146,15 @@ static void cls_cgroup_destroy(struct tcf_proto *tp) struct cls_cgroup_head *head = rtnl_dereference(tp->root); /* Head can still be NULL due to cls_cgroup_init(). */ - if (head) - call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + if (head) { + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, cls_cgroup_destroy_rcu); + else + __cls_cgroup_destroy(head); + } } -static int cls_cgroup_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int cls_cgroup_delete(struct tcf_proto *tp, void *arg, bool *last) { return -EOPNOTSUPP; } @@ -152,7 +166,7 @@ static void cls_cgroup_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) head, arg) < 0) { + if (arg->fn(tp, head, arg) < 0) { arg->stop = 1; return; } @@ -160,7 +174,7 @@ skip: arg->count++; } -static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int cls_cgroup_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_cgroup_head *head = rtnl_dereference(tp->root); diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 3065752b9cda..25c2a888e1f0 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -57,7 +57,10 @@ struct flow_filter { u32 divisor; u32 baseclass; u32 hashrnd; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline u32 addr_fold(void *addr) @@ -345,9 +348,9 @@ static int flow_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static void flow_perturbation(unsigned long arg) +static void flow_perturbation(struct timer_list *t) { - struct flow_filter *f = (struct flow_filter *)arg; + struct flow_filter *f = from_timer(f, t, perturb_timer); get_random_bytes(&f->hashrnd, 4); if (f->perturb_period) @@ -369,27 +372,41 @@ static const struct nla_policy flow_policy[TCA_FLOW_MAX + 1] = { [TCA_FLOW_PERTURB] = { .type = NLA_U32 }, }; -static void flow_destroy_filter(struct rcu_head *head) +static void __flow_destroy_filter(struct flow_filter *f) { - struct flow_filter *f = container_of(head, struct flow_filter, rcu); - del_timer_sync(&f->perturb_timer); tcf_exts_destroy(&f->exts); tcf_em_tree_destroy(&f->ematches); + tcf_exts_put_net(&f->exts); kfree(f); } +static void flow_destroy_filter_work(struct work_struct *work) +{ + struct flow_filter *f = container_of(work, struct flow_filter, work); + + rtnl_lock(); + __flow_destroy_filter(f); + rtnl_unlock(); +} + +static void flow_destroy_filter(struct rcu_head *head) +{ + struct flow_filter *f = container_of(head, struct flow_filter, rcu); + + INIT_WORK(&f->work, flow_destroy_filter_work); + tcf_queue_work(&f->work); +} + static int flow_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *fold, *fnew; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FLOW_MAX + 1]; - struct tcf_exts e; - struct tcf_ematch_tree t; unsigned int nkeys = 0; unsigned int perturb_period = 0; u32 baseclass = 0; @@ -425,31 +442,27 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, return -EOPNOTSUPP; } - err = tcf_exts_init(&e, TCA_FLOW_ACT, TCA_FLOW_POLICE); - if (err < 0) - goto err1; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) - goto err1; + fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); + if (!fnew) + return -ENOBUFS; - err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &t); + err = tcf_em_tree_validate(tp, tb[TCA_FLOW_EMATCHES], &fnew->ematches); if (err < 0) goto err1; - err = -ENOBUFS; - fnew = kzalloc(sizeof(*fnew), GFP_KERNEL); - if (!fnew) + err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + if (err < 0) goto err2; - err = tcf_exts_init(&fnew->exts, TCA_FLOW_ACT, TCA_FLOW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &fnew->exts, ovr); if (err < 0) - goto err3; + goto err2; - fold = (struct flow_filter *)*arg; + fold = *arg; if (fold) { err = -EINVAL; if (fold->handle != handle && handle) - goto err3; + goto err2; /* Copy fold into fnew */ fnew->tp = fold->tp; @@ -469,36 +482,39 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err3; + goto err2; if (mode == FLOW_MODE_HASH) perturb_period = fold->perturb_period; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err3; + goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } } else { err = -EINVAL; if (!handle) - goto err3; + goto err2; if (!tb[TCA_FLOW_KEYS]) - goto err3; + goto err2; mode = FLOW_MODE_MAP; if (tb[TCA_FLOW_MODE]) mode = nla_get_u32(tb[TCA_FLOW_MODE]); if (mode != FLOW_MODE_HASH && nkeys > 1) - goto err3; + goto err2; if (tb[TCA_FLOW_PERTURB]) { if (mode != FLOW_MODE_HASH) - goto err3; + goto err2; perturb_period = nla_get_u32(tb[TCA_FLOW_PERTURB]) * HZ; } - if (TC_H_MAJ(baseclass) == 0) - baseclass = TC_H_MAKE(tp->q->handle, baseclass); + if (TC_H_MAJ(baseclass) == 0) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + + baseclass = TC_H_MAKE(q->handle, baseclass); + } if (TC_H_MIN(baseclass) == 0) baseclass = TC_H_MAKE(baseclass, 1); @@ -508,11 +524,7 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, get_random_bytes(&fnew->hashrnd, 4); } - setup_deferrable_timer(&fnew->perturb_timer, flow_perturbation, - (unsigned long)fnew); - - tcf_exts_change(tp, &fnew->exts, &e); - tcf_em_tree_change(tp, &fnew->ematches, &t); + timer_setup(&fnew->perturb_timer, flow_perturbation, TIMER_DEFERRABLE); netif_keep_dst(qdisc_dev(tp->q)); @@ -541,33 +553,34 @@ static int flow_change(struct net *net, struct sk_buff *in_skb, if (perturb_period) mod_timer(&fnew->perturb_timer, jiffies + perturb_period); - if (*arg == 0) + if (!*arg) list_add_tail_rcu(&fnew->list, &head->filters); else list_replace_rcu(&fold->list, &fnew->list); - *arg = (unsigned long)fnew; + *arg = fnew; - if (fold) + if (fold) { + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, flow_destroy_filter); + } return 0; -err3: - tcf_exts_destroy(&fnew->exts); err2: - tcf_em_tree_destroy(&t); - kfree(fnew); + tcf_exts_destroy(&fnew->exts); + tcf_em_tree_destroy(&fnew->ematches); err1: - tcf_exts_destroy(&e); + kfree(fnew); return err; } -static int flow_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int flow_delete(struct tcf_proto *tp, void *arg, bool *last) { struct flow_head *head = rtnl_dereference(tp->root); - struct flow_filter *f = (struct flow_filter *)arg; + struct flow_filter *f = arg; list_del_rcu(&f->list); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, flow_destroy_filter); *last = list_empty(&head->filters); return 0; @@ -592,26 +605,29 @@ static void flow_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) { list_del_rcu(&f->list); - call_rcu(&f->rcu, flow_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, flow_destroy_filter); + else + __flow_destroy_filter(f); } kfree_rcu(head, rcu); } -static unsigned long flow_get(struct tcf_proto *tp, u32 handle) +static void *flow_get(struct tcf_proto *tp, u32 handle) { struct flow_head *head = rtnl_dereference(tp->root); struct flow_filter *f; list_for_each_entry(f, &head->filters, list) if (f->handle == handle) - return (unsigned long)f; - return 0; + return f; + return NULL; } -static int flow_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int flow_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct flow_filter *f = (struct flow_filter *)fh; + struct flow_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -677,7 +693,7 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 7832eb93379b..543a3e875d05 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -68,7 +68,6 @@ struct cls_fl_head { struct rhashtable ht; struct fl_flow_mask mask; struct flow_dissector dissector; - u32 hgen; bool mask_assigned; struct list_head filters; struct rhashtable_params ht_params; @@ -76,6 +75,7 @@ struct cls_fl_head { struct work_struct work; struct rcu_head rcu; }; + struct idr handle_idr; }; struct cls_fl_filter { @@ -87,8 +87,10 @@ struct cls_fl_filter { struct list_head list; u32 handle; u32 flags; - struct rcu_head rcu; - struct tc_to_netdev tc; + union { + struct work_struct work; + struct rcu_head rcu; + }; struct net_device *hw_dev; }; @@ -153,37 +155,12 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp, struct cls_fl_filter *f; struct fl_flow_key skb_key; struct fl_flow_key skb_mkey; - struct ip_tunnel_info *info; if (!atomic_read(&head->ht.nelems)) return -1; fl_clear_masked_range(&skb_key, &head->mask); - info = skb_tunnel_info(skb); - if (info) { - struct ip_tunnel_key *key = &info->key; - - switch (ip_tunnel_info_af(info)) { - case AF_INET: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV4_ADDRS; - skb_key.enc_ipv4.src = key->u.ipv4.src; - skb_key.enc_ipv4.dst = key->u.ipv4.dst; - break; - case AF_INET6: - skb_key.enc_control.addr_type = - FLOW_DISSECTOR_KEY_IPV6_ADDRS; - skb_key.enc_ipv6.src = key->u.ipv6.src; - skb_key.enc_ipv6.dst = key->u.ipv6.dst; - break; - } - - skb_key.enc_key_id.keyid = tunnel_id_to_key32(key->tun_id); - skb_key.enc_tp.src = key->tp_src; - skb_key.enc_tp.dst = key->tp_dst; - } - skb_key.indev_ifindex = skb->skb_iif; /* skb_flow_dissect() does not set n_proto in case an unknown protocol, * so do it rather here. @@ -211,36 +188,46 @@ static int fl_init(struct tcf_proto *tp) INIT_LIST_HEAD_RCU(&head->filters); rcu_assign_pointer(tp->root, head); + idr_init(&head->handle_idr); return 0; } -static void fl_destroy_filter(struct rcu_head *head) +static void __fl_destroy_filter(struct cls_fl_filter *f) { - struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); - tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); kfree(f); } -static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +static void fl_destroy_filter_work(struct work_struct *work) { - struct tc_cls_flower_offload offload = {0}; - struct net_device *dev = f->hw_dev; - struct tc_to_netdev *tc = &f->tc; + struct cls_fl_filter *f = container_of(work, struct cls_fl_filter, work); - if (!tc_can_offload(dev, tp)) - return; + rtnl_lock(); + __fl_destroy_filter(f); + rtnl_unlock(); +} - offload.command = TC_CLSFLOWER_DESTROY; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; +static void fl_destroy_filter(struct rcu_head *head) +{ + struct cls_fl_filter *f = container_of(head, struct cls_fl_filter, rcu); + + INIT_WORK(&f->work, fl_destroy_filter_work); + tcf_queue_work(&f->work); +} + +static void fl_hw_destroy_filter(struct tcf_proto *tp, struct cls_fl_filter *f) +{ + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_DESTROY; + cls_flower.cookie = (unsigned long) f; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, - tp->protocol, tc); + tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, false); } static int fl_hw_replace_filter(struct tcf_proto *tp, @@ -248,72 +235,63 @@ static int fl_hw_replace_filter(struct tcf_proto *tp, struct fl_flow_key *mask, struct cls_fl_filter *f) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_flower_offload offload = {0}; - struct tc_to_netdev *tc = &f->tc; + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(f->flags); int err; - if (!tc_can_offload(dev, tp)) { - if (tcf_exts_get_dev(dev, &f->exts, &f->hw_dev) || - (f->hw_dev && !tc_can_offload(f->hw_dev, tp))) { - f->hw_dev = dev; - return tc_skip_sw(f->flags) ? -EINVAL : 0; - } - dev = f->hw_dev; - tc->egress_dev = true; - } else { - f->hw_dev = dev; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_REPLACE; + cls_flower.cookie = (unsigned long) f; + cls_flower.dissector = dissector; + cls_flower.mask = mask; + cls_flower.key = &f->mkey; + cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; + + err = tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, skip_sw); + if (err < 0) { + fl_hw_destroy_filter(tp, f); + return err; + } else if (err > 0) { + f->flags |= TCA_CLS_FLAGS_IN_HW; } - offload.command = TC_CLSFLOWER_REPLACE; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; - offload.dissector = dissector; - offload.mask = mask; - offload.key = &f->mkey; - offload.exts = &f->exts; - - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, tc); - if (!err) - f->flags |= TCA_CLS_FLAGS_IN_HW; + if (skip_sw && !(f->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; - if (tc_skip_sw(f->flags)) - return err; return 0; } static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) { - struct tc_cls_flower_offload offload = {0}; - struct net_device *dev = f->hw_dev; - struct tc_to_netdev *tc = &f->tc; - - if (!tc_can_offload(dev, tp)) - return; - - offload.command = TC_CLSFLOWER_STATS; - offload.prio = tp->prio; - offload.cookie = (unsigned long)f; - offload.exts = &f->exts; + struct tc_cls_flower_offload cls_flower = {}; + struct tcf_block *block = tp->chain->block; - tc->type = TC_SETUP_CLSFLOWER; - tc->cls_flower = &offload; + tc_cls_common_offload_init(&cls_flower.common, tp); + cls_flower.command = TC_CLSFLOWER_STATS; + cls_flower.cookie = (unsigned long) f; + cls_flower.exts = &f->exts; + cls_flower.classid = f->res.classid; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, tc); + tc_setup_cb_call(block, &f->exts, TC_SETUP_CLSFLOWER, + &cls_flower, false); } static void __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f) { + struct cls_fl_head *head = rtnl_dereference(tp->root); + + idr_remove_ext(&head->handle_idr, f->handle); list_del_rcu(&f->list); if (!tc_skip_hw(f->flags)) fl_hw_destroy_filter(tp, f); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fl_destroy_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fl_destroy_filter); + else + __fl_destroy_filter(f); } static void fl_destroy_sleepable(struct work_struct *work) @@ -341,20 +319,17 @@ static void fl_destroy(struct tcf_proto *tp) list_for_each_entry_safe(f, next, &head->filters, list) __fl_delete(tp, f); + idr_destroy(&head->handle_idr); __module_get(THIS_MODULE); call_rcu(&head->rcu, fl_destroy_rcu); } -static unsigned long fl_get(struct tcf_proto *tp, u32 handle) +static void *fl_get(struct tcf_proto *tp, u32 handle) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f; - list_for_each_entry(f, &head->filters, list) - if (f->handle == handle) - return (unsigned long) f; - return 0; + return idr_find_ext(&head->handle_idr, handle); } static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { @@ -852,15 +827,11 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_FLOWER_ACT, 0); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; if (tb[TCA_FLOWER_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FLOWER_CLASSID]); @@ -869,50 +840,25 @@ static int fl_set_parms(struct net *net, struct tcf_proto *tp, err = fl_set_key(net, tb, &f->key, &mask->key); if (err) - goto errout; + return err; fl_mask_update_range(mask); fl_set_masked_key(&f->mkey, &f->key, mask); - tcf_exts_change(tp, &f->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; -} - -static u32 fl_grab_new_handle(struct tcf_proto *tp, - struct cls_fl_head *head) -{ - unsigned int i = 0x80000000; - u32 handle; - - do { - if (++head->hgen == 0x7FFFFFFF) - head->hgen = 1; - } while (--i > 0 && fl_get(tp, head->hgen)); - - if (unlikely(i == 0)) { - pr_err("Insufficient number of handles\n"); - handle = 0; - } else { - handle = head->hgen; - } - - return handle; } static int fl_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *fold = (struct cls_fl_filter *) *arg; + struct cls_fl_filter *fold = *arg; struct cls_fl_filter *fnew; struct nlattr **tb; struct fl_flow_mask mask = {}; + unsigned long idr_index; int err; if (!tca[TCA_OPTIONS]) @@ -943,41 +889,49 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, goto errout; if (!handle) { - handle = fl_grab_new_handle(tp, head); - if (!handle) { - err = -EINVAL; + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + 1, 0x80000000, GFP_KERNEL); + if (err) goto errout; - } + fnew->handle = idr_index; + } + + /* user specifies a handle and it doesn't exist */ + if (handle && !fold) { + err = idr_alloc_ext(&head->handle_idr, fnew, &idr_index, + handle, handle + 1, GFP_KERNEL); + if (err) + goto errout; + fnew->handle = idr_index; } - fnew->handle = handle; if (tb[TCA_FLOWER_FLAGS]) { fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]); if (!tc_flags_valid(fnew->flags)) { err = -EINVAL; - goto errout; + goto errout_idr; } } err = fl_set_parms(net, tp, fnew, &mask, base, tb, tca[TCA_RATE], ovr); if (err) - goto errout; + goto errout_idr; err = fl_check_assign_mask(head, &mask); if (err) - goto errout; + goto errout_idr; if (!tc_skip_sw(fnew->flags)) { if (!fold && fl_lookup(head, &fnew->mkey)) { err = -EEXIST; - goto errout; + goto errout_idr; } err = rhashtable_insert_fast(&head->ht, &fnew->ht_node, head->ht_params); if (err) - goto errout; + goto errout_idr; } if (!tc_skip_hw(fnew->flags)) { @@ -986,7 +940,7 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, &mask.key, fnew); if (err) - goto errout; + goto errout_idr; } if (!tc_in_hw(fnew->flags)) @@ -1000,11 +954,14 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, fl_hw_destroy_filter(tp, fold); } - *arg = (unsigned long) fnew; + *arg = fnew; if (fold) { + fnew->handle = handle; + idr_replace_ext(&head->handle_idr, fnew, fnew->handle); list_replace_rcu(&fold->list, &fnew->list); tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, fl_destroy_filter); } else { list_add_tail_rcu(&fnew->list, &head->filters); @@ -1013,6 +970,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb, kfree(tb); return 0; +errout_idr: + if (fnew->handle) + idr_remove_ext(&head->handle_idr, fnew->handle); errout: tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -1021,10 +981,10 @@ errout_tb: return err; } -static int fl_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int fl_delete(struct tcf_proto *tp, void *arg, bool *last) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f = (struct cls_fl_filter *) arg; + struct cls_fl_filter *f = arg; if (!tc_skip_sw(f->flags)) rhashtable_remove_fast(&head->ht, &f->ht_node, @@ -1042,7 +1002,7 @@ static void fl_walk(struct tcf_proto *tp, struct tcf_walker *arg) list_for_each_entry_rcu(f, &head->filters, list) { if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; break; } @@ -1177,11 +1137,11 @@ static int fl_dump_key_flags(struct sk_buff *skb, u32 flags_key, u32 flags_mask) return nla_put(skb, TCA_FLOWER_KEY_FLAGS_MASK, 4, &_mask); } -static int fl_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int fl_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct cls_fl_head *head = rtnl_dereference(tp->root); - struct cls_fl_filter *f = (struct cls_fl_filter *) fh; + struct cls_fl_filter *f = fh; struct nlattr *nest; struct fl_flow_key *key, *mask; @@ -1383,6 +1343,14 @@ nla_put_failure: return -1; } +static void fl_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_fl_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fl_ops __read_mostly = { .kind = "flower", .classify = fl_classify, @@ -1393,6 +1361,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = { .delete = fl_delete, .walk = fl_walk, .dump = fl_dump, + .bind_class = fl_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index d3885362e017..20f0de1a960a 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -28,6 +28,7 @@ #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> +#include <net/sch_generic.h> #define HTSIZE 256 @@ -46,7 +47,10 @@ struct fw_filter { #endif /* CONFIG_NET_CLS_IND */ struct tcf_exts exts; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static u32 fw_hash(u32 handle) @@ -83,9 +87,11 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, } } } else { + struct Qdisc *q = tcf_block_q(tp->chain->block); + /* Old method: classify the packet using its skb mark. */ if (id && (TC_H_MAJ(id) == 0 || - !(TC_H_MAJ(id ^ tp->q->handle)))) { + !(TC_H_MAJ(id ^ q->handle)))) { res->classid = id; res->class = 0; return 0; @@ -95,20 +101,20 @@ static int fw_classify(struct sk_buff *skb, const struct tcf_proto *tp, return -1; } -static unsigned long fw_get(struct tcf_proto *tp, u32 handle) +static void *fw_get(struct tcf_proto *tp, u32 handle) { struct fw_head *head = rtnl_dereference(tp->root); struct fw_filter *f; if (head == NULL) - return 0; + return NULL; f = rtnl_dereference(head->ht[fw_hash(handle)]); for (; f; f = rtnl_dereference(f->next)) { if (f->id == handle) - return (unsigned long)f; + return f; } - return 0; + return NULL; } static int fw_init(struct tcf_proto *tp) @@ -119,12 +125,28 @@ static int fw_init(struct tcf_proto *tp) return 0; } +static void __fw_delete_filter(struct fw_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + +static void fw_delete_filter_work(struct work_struct *work) +{ + struct fw_filter *f = container_of(work, struct fw_filter, work); + + rtnl_lock(); + __fw_delete_filter(f); + rtnl_unlock(); +} + static void fw_delete_filter(struct rcu_head *head) { struct fw_filter *f = container_of(head, struct fw_filter, rcu); - tcf_exts_destroy(&f->exts); - kfree(f); + INIT_WORK(&f->work, fw_delete_filter_work); + tcf_queue_work(&f->work); } static void fw_destroy(struct tcf_proto *tp) @@ -141,16 +163,19 @@ static void fw_destroy(struct tcf_proto *tp) RCU_INIT_POINTER(head->ht[h], rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, fw_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, fw_delete_filter); + else + __fw_delete_filter(f); } } kfree_rcu(head, rcu); } -static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int fw_delete(struct tcf_proto *tp, void *arg, bool *last) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *)arg; + struct fw_filter *f = arg; struct fw_filter __rcu **fp; struct fw_filter *pfp; int ret = -EINVAL; @@ -166,6 +191,7 @@ static int fw_delete(struct tcf_proto *tp, unsigned long arg, bool *last) if (pfp == f) { RCU_INIT_POINTER(*fp, rtnl_dereference(f->next)); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); ret = 0; break; @@ -190,22 +216,17 @@ static const struct nla_policy fw_policy[TCA_FW_MAX + 1] = { [TCA_FW_MASK] = { .type = NLA_U32 }, }; -static int -fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, - struct nlattr **tb, struct nlattr **tca, unsigned long base, - bool ovr) +static int fw_set_parms(struct net *net, struct tcf_proto *tp, + struct fw_filter *f, struct nlattr **tb, + struct nlattr **tca, unsigned long base, bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); - struct tcf_exts e; u32 mask; int err; - err = tcf_exts_init(&e, TCA_FW_ACT, TCA_FW_POLICE); + err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, ovr); - if (err < 0) - goto errout; if (tb[TCA_FW_CLASSID]) { f->res.classid = nla_get_u32(tb[TCA_FW_CLASSID]); @@ -216,10 +237,8 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (tb[TCA_FW_INDEV]) { int ret; ret = tcf_change_indev(net, tb[TCA_FW_INDEV]); - if (ret < 0) { - err = ret; - goto errout; - } + if (ret < 0) + return ret; f->ifindex = ret; } #endif /* CONFIG_NET_CLS_IND */ @@ -228,25 +247,20 @@ fw_change_attrs(struct net *net, struct tcf_proto *tp, struct fw_filter *f, if (tb[TCA_FW_MASK]) { mask = nla_get_u32(tb[TCA_FW_MASK]); if (mask != head->mask) - goto errout; + return err; } else if (head->mask != 0xFFFFFFFF) - goto errout; - - tcf_exts_change(tp, &f->exts, &e); + return err; return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int fw_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, - u32 handle, struct nlattr **tca, unsigned long *arg, + u32 handle, struct nlattr **tca, void **arg, bool ovr) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *) *arg; + struct fw_filter *f = *arg; struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_FW_MAX + 1]; int err; @@ -282,7 +296,7 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, return err; } - err = fw_change_attrs(net, tp, fnew, tb, tca, base, ovr); + err = fw_set_parms(net, tp, fnew, tb, tca, base, ovr); if (err < 0) { tcf_exts_destroy(&fnew->exts); kfree(fnew); @@ -298,9 +312,10 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(fnew->next, rtnl_dereference(pfp->next)); rcu_assign_pointer(*fp, fnew); tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, fw_delete_filter); - *arg = (unsigned long)fnew; + *arg = fnew; return err; } @@ -330,14 +345,14 @@ static int fw_change(struct net *net, struct sk_buff *in_skb, f->id = handle; f->tp = tp; - err = fw_change_attrs(net, tp, f, tb, tca, base, ovr); + err = fw_set_parms(net, tp, f, tb, tca, base, ovr); if (err < 0) goto errout; RCU_INIT_POINTER(f->next, head->ht[fw_hash(handle)]); rcu_assign_pointer(head->ht[fw_hash(handle)], f); - *arg = (unsigned long)f; + *arg = f; return 0; errout: @@ -366,7 +381,7 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -375,11 +390,11 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int fw_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct fw_head *head = rtnl_dereference(tp->root); - struct fw_filter *f = (struct fw_filter *)fh; + struct fw_filter *f = fh; struct nlattr *nest; if (f == NULL) @@ -387,7 +402,7 @@ static int fw_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, t->tcm_handle = f->id; - if (!f->res.classid && !tcf_exts_is_available(&f->exts)) + if (!f->res.classid && !tcf_exts_has_actions(&f->exts)) return skb->len; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -424,6 +439,14 @@ nla_put_failure: return -1; } +static void fw_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct fw_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_fw_ops __read_mostly = { .kind = "fw", .classify = fw_classify, @@ -434,6 +457,7 @@ static struct tcf_proto_ops cls_fw_ops __read_mostly = { .delete = fw_delete, .walk = fw_walk, .dump = fw_dump, + .bind_class = fw_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 9dc26c32cf32..66d4e0099158 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -21,7 +21,10 @@ struct cls_mall_head { struct tcf_result res; u32 handle; u32 flags; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, @@ -32,6 +35,7 @@ static int mall_classify(struct sk_buff *skb, const struct tcf_proto *tp, if (tc_skip_sw(head->flags)) return -1; + *res = head->res; return tcf_exts_exec(skb, &head->exts, res); } @@ -40,74 +44,93 @@ static int mall_init(struct tcf_proto *tp) return 0; } +static void __mall_destroy(struct cls_mall_head *head) +{ + tcf_exts_destroy(&head->exts); + tcf_exts_put_net(&head->exts); + kfree(head); +} + +static void mall_destroy_work(struct work_struct *work) +{ + struct cls_mall_head *head = container_of(work, struct cls_mall_head, + work); + rtnl_lock(); + __mall_destroy(head); + rtnl_unlock(); +} + static void mall_destroy_rcu(struct rcu_head *rcu) { struct cls_mall_head *head = container_of(rcu, struct cls_mall_head, rcu); - tcf_exts_destroy(&head->exts); - kfree(head); + INIT_WORK(&head->work, mall_destroy_work); + tcf_queue_work(&head->work); +} + +static void mall_destroy_hw_filter(struct tcf_proto *tp, + struct cls_mall_head *head, + unsigned long cookie) +{ + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_DESTROY; + cls_mall.cookie = cookie; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, &cls_mall, false); } static int mall_replace_hw_filter(struct tcf_proto *tp, struct cls_mall_head *head, unsigned long cookie) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_to_netdev offload; - struct tc_cls_matchall_offload mall_offload = {0}; + struct tc_cls_matchall_offload cls_mall = {}; + struct tcf_block *block = tp->chain->block; + bool skip_sw = tc_skip_sw(head->flags); int err; - offload.type = TC_SETUP_MATCHALL; - offload.cls_mall = &mall_offload; - offload.cls_mall->command = TC_CLSMATCHALL_REPLACE; - offload.cls_mall->exts = &head->exts; - offload.cls_mall->cookie = cookie; + tc_cls_common_offload_init(&cls_mall.common, tp); + cls_mall.command = TC_CLSMATCHALL_REPLACE; + cls_mall.exts = &head->exts; + cls_mall.cookie = cookie; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, - tp->protocol, &offload); - if (!err) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSMATCHALL, + &cls_mall, skip_sw); + if (err < 0) { + mall_destroy_hw_filter(tp, head, cookie); + return err; + } else if (err > 0) { head->flags |= TCA_CLS_FLAGS_IN_HW; + } - return err; -} + if (skip_sw && !(head->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; -static void mall_destroy_hw_filter(struct tcf_proto *tp, - struct cls_mall_head *head, - unsigned long cookie) -{ - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_to_netdev offload; - struct tc_cls_matchall_offload mall_offload = {0}; - - offload.type = TC_SETUP_MATCHALL; - offload.cls_mall = &mall_offload; - offload.cls_mall->command = TC_CLSMATCHALL_DESTROY; - offload.cls_mall->exts = NULL; - offload.cls_mall->cookie = cookie; - - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, tp->chain->index, - tp->protocol, &offload); + return 0; } static void mall_destroy(struct tcf_proto *tp) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct net_device *dev = tp->q->dev_queue->dev; if (!head) return; - if (tc_should_offload(dev, tp, head->flags)) + if (!tc_skip_hw(head->flags)) mall_destroy_hw_filter(tp, head, (unsigned long) head); - call_rcu(&head->rcu, mall_destroy_rcu); + if (tcf_exts_get_net(&head->exts)) + call_rcu(&head->rcu, mall_destroy_rcu); + else + __mall_destroy(head); } -static unsigned long mall_get(struct tcf_proto *tp, u32 handle) +static void *mall_get(struct tcf_proto *tp, u32 handle) { - return 0UL; + return NULL; } static const struct nla_policy mall_policy[TCA_MATCHALL_MAX + 1] = { @@ -120,36 +143,25 @@ static int mall_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_MATCHALL_ACT, 0); - if (err) - return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); + err = tcf_exts_validate(net, tp, tb, est, &head->exts, ovr); if (err < 0) - goto errout; + return err; if (tb[TCA_MATCHALL_CLASSID]) { head->res.classid = nla_get_u32(tb[TCA_MATCHALL_CLASSID]); tcf_bind_filter(tp, &head->res, base); } - - tcf_exts_change(tp, &head->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int mall_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct cls_mall_head *head = rtnl_dereference(tp->root); - struct net_device *dev = tp->q->dev_queue->dev; struct nlattr *tb[TCA_MATCHALL_MAX + 1]; struct cls_mall_head *new; u32 flags = 0; @@ -189,20 +201,16 @@ static int mall_change(struct net *net, struct sk_buff *in_skb, if (err) goto err_set_parms; - if (tc_should_offload(dev, tp, flags)) { + if (!tc_skip_hw(new->flags)) { err = mall_replace_hw_filter(tp, new, (unsigned long) new); - if (err) { - if (tc_skip_sw(flags)) - goto err_replace_hw_filter; - else - err = 0; - } + if (err) + goto err_replace_hw_filter; } if (!tc_in_hw(new->flags)) new->flags |= TCA_CLS_FLAGS_NOT_IN_HW; - *arg = (unsigned long) head; + *arg = head; rcu_assign_pointer(tp->root, new); return 0; @@ -214,7 +222,7 @@ err_exts_init: return err; } -static int mall_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int mall_delete(struct tcf_proto *tp, void *arg, bool *last) { return -EOPNOTSUPP; } @@ -225,16 +233,16 @@ static void mall_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (arg->count < arg->skip) goto skip; - if (arg->fn(tp, (unsigned long) head, arg) < 0) + if (arg->fn(tp, head, arg) < 0) arg->stop = 1; skip: arg->count++; } -static int mall_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct cls_mall_head *head = (struct cls_mall_head *) fh; + struct cls_mall_head *head = fh; struct nlattr *nest; if (!head) @@ -268,6 +276,14 @@ nla_put_failure: return -1; } +static void mall_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct cls_mall_head *head = fh; + + if (head && head->res.classid == classid) + head->res.class = cl; +} + static struct tcf_proto_ops cls_mall_ops __read_mostly = { .kind = "matchall", .classify = mall_classify, @@ -278,6 +294,7 @@ static struct tcf_proto_ops cls_mall_ops __read_mostly = { .delete = mall_delete, .walk = mall_walk, .dump = mall_dump, + .bind_class = mall_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index d63d5502ee02..ac9a5b8825b9 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -57,7 +57,10 @@ struct route4_filter { u32 handle; struct route4_bucket *bkt; struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; #define ROUTE4_FAILURE ((struct route4_filter *)(-1L)) @@ -113,7 +116,7 @@ static inline int route4_hash_wild(void) #define ROUTE4_APPLY_RESULT() \ { \ *res = f->res; \ - if (tcf_exts_is_available(&f->exts)) { \ + if (tcf_exts_has_actions(&f->exts)) { \ int r = tcf_exts_exec(skb, &f->exts, res); \ if (r < 0) { \ dont_cache = 1; \ @@ -216,7 +219,7 @@ static inline u32 from_hash(u32 id) return 16 + (id & 0xF); } -static unsigned long route4_get(struct tcf_proto *tp, u32 handle) +static void *route4_get(struct tcf_proto *tp, u32 handle) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_bucket *b; @@ -225,11 +228,11 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) h1 = to_hash(handle); if (h1 > 256) - return 0; + return NULL; h2 = from_hash(handle >> 16); if (h2 > 32) - return 0; + return NULL; b = rtnl_dereference(head->table[h1]); if (b) { @@ -237,9 +240,9 @@ static unsigned long route4_get(struct tcf_proto *tp, u32 handle) f; f = rtnl_dereference(f->next)) if (f->handle == handle) - return (unsigned long)f; + return f; } - return 0; + return NULL; } static int route4_init(struct tcf_proto *tp) @@ -254,12 +257,28 @@ static int route4_init(struct tcf_proto *tp) return 0; } +static void __route4_delete_filter(struct route4_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + +static void route4_delete_filter_work(struct work_struct *work) +{ + struct route4_filter *f = container_of(work, struct route4_filter, work); + + rtnl_lock(); + __route4_delete_filter(f); + rtnl_unlock(); +} + static void route4_delete_filter(struct rcu_head *head) { struct route4_filter *f = container_of(head, struct route4_filter, rcu); - tcf_exts_destroy(&f->exts); - kfree(f); + INIT_WORK(&f->work, route4_delete_filter_work); + tcf_queue_work(&f->work); } static void route4_destroy(struct tcf_proto *tp) @@ -284,7 +303,10 @@ static void route4_destroy(struct tcf_proto *tp) next = rtnl_dereference(f->next); RCU_INIT_POINTER(b->ht[h2], next); tcf_unbind_filter(tp, &f->res); - call_rcu(&f->rcu, route4_delete_filter); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, route4_delete_filter); + else + __route4_delete_filter(f); } } RCU_INIT_POINTER(head->table[h1], NULL); @@ -294,10 +316,10 @@ static void route4_destroy(struct tcf_proto *tp) kfree_rcu(head, rcu); } -static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int route4_delete(struct tcf_proto *tp, void *arg, bool *last) { struct route4_head *head = rtnl_dereference(tp->root); - struct route4_filter *f = (struct route4_filter *)arg; + struct route4_filter *f = arg; struct route4_filter __rcu **fp; struct route4_filter *nf; struct route4_bucket *b; @@ -325,6 +347,7 @@ static int route4_delete(struct tcf_proto *tp, unsigned long arg, bool *last) /* Delete it */ tcf_unbind_filter(tp, &f->res); + tcf_exts_get_net(&f->exts); call_rcu(&f->rcu, route4_delete_filter); /* Strip RTNL protected tree */ @@ -372,37 +395,32 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, struct route4_filter *fp; unsigned int h1; struct route4_bucket *b; - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_ROUTE4_ACT, TCA_ROUTE4_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &f->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = -EINVAL; if (tb[TCA_ROUTE4_TO]) { if (new && handle & 0x8000) - goto errout; + return -EINVAL; to = nla_get_u32(tb[TCA_ROUTE4_TO]); if (to > 0xFF) - goto errout; + return -EINVAL; nhandle = to; } if (tb[TCA_ROUTE4_FROM]) { if (tb[TCA_ROUTE4_IIF]) - goto errout; + return -EINVAL; id = nla_get_u32(tb[TCA_ROUTE4_FROM]); if (id > 0xFF) - goto errout; + return -EINVAL; nhandle |= id << 16; } else if (tb[TCA_ROUTE4_IIF]) { id = nla_get_u32(tb[TCA_ROUTE4_IIF]); if (id > 0x7FFF) - goto errout; + return -EINVAL; nhandle |= (id | 0x8000) << 16; } else nhandle |= 0xFFFF << 16; @@ -410,27 +428,25 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, if (handle && new) { nhandle |= handle & 0x7F00; if (nhandle != handle) - goto errout; + return -EINVAL; } h1 = to_hash(nhandle); b = rtnl_dereference(head->table[h1]); if (!b) { - err = -ENOBUFS; b = kzalloc(sizeof(struct route4_bucket), GFP_KERNEL); if (b == NULL) - goto errout; + return -ENOBUFS; rcu_assign_pointer(head->table[h1], b); } else { unsigned int h2 = from_hash(nhandle >> 16); - err = -EEXIST; for (fp = rtnl_dereference(b->ht[h2]); fp; fp = rtnl_dereference(fp->next)) if (fp->handle == f->handle) - goto errout; + return -EEXIST; } if (tb[TCA_ROUTE4_TO]) @@ -450,17 +466,12 @@ static int route4_set_parms(struct net *net, struct tcf_proto *tp, tcf_bind_filter(tp, &f->res, base); } - tcf_exts_change(tp, &f->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static int route4_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct route4_head *head = rtnl_dereference(tp->root); struct route4_filter __rcu **fp; @@ -479,7 +490,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, if (err < 0) return err; - fold = (struct route4_filter *)*arg; + fold = *arg; if (fold && handle && fold->handle != handle) return -EINVAL; @@ -537,9 +548,10 @@ static int route4_change(struct net *net, struct sk_buff *in_skb, } route4_reset_fastmap(head); - *arg = (unsigned long)f; + *arg = f; if (fold) { tcf_unbind_filter(tp, &fold->res); + tcf_exts_get_net(&fold->exts); call_rcu(&fold->rcu, route4_delete_filter); } return 0; @@ -576,7 +588,7 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -587,10 +599,10 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int route4_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int route4_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct route4_filter *f = (struct route4_filter *)fh; + struct route4_filter *f = fh; struct nlattr *nest; u32 id; @@ -636,6 +648,14 @@ nla_put_failure: return -1; } +static void route4_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct route4_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops cls_route4_ops __read_mostly = { .kind = "route", .classify = route4_classify, @@ -646,6 +666,7 @@ static struct tcf_proto_ops cls_route4_ops __read_mostly = { .delete = route4_delete, .walk = route4_walk, .dump = route4_dump, + .bind_class = route4_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 0d9d07798699..cf325625c99d 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -97,7 +97,10 @@ struct rsvp_filter { u32 handle; struct rsvp_session *sess; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid) @@ -248,7 +251,7 @@ static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h) BUG_ON(1); } -static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) +static void *rsvp_get(struct tcf_proto *tp, u32 handle) { struct rsvp_head *head = rtnl_dereference(tp->root); struct rsvp_session *s; @@ -257,17 +260,17 @@ static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle) unsigned int h2 = (handle >> 8) & 0xFF; if (h2 > 16) - return 0; + return NULL; for (s = rtnl_dereference(head->ht[h1]); s; s = rtnl_dereference(s->next)) { for (f = rtnl_dereference(s->ht[h2]); f; f = rtnl_dereference(f->next)) { if (f->handle == handle) - return (unsigned long)f; + return f; } } - return 0; + return NULL; } static int rsvp_init(struct tcf_proto *tp) @@ -282,12 +285,28 @@ static int rsvp_init(struct tcf_proto *tp) return -ENOBUFS; } +static void __rsvp_delete_filter(struct rsvp_filter *f) +{ + tcf_exts_destroy(&f->exts); + tcf_exts_put_net(&f->exts); + kfree(f); +} + +static void rsvp_delete_filter_work(struct work_struct *work) +{ + struct rsvp_filter *f = container_of(work, struct rsvp_filter, work); + + rtnl_lock(); + __rsvp_delete_filter(f); + rtnl_unlock(); +} + static void rsvp_delete_filter_rcu(struct rcu_head *head) { struct rsvp_filter *f = container_of(head, struct rsvp_filter, rcu); - tcf_exts_destroy(&f->exts); - kfree(f); + INIT_WORK(&f->work, rsvp_delete_filter_work); + tcf_queue_work(&f->work); } static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) @@ -297,7 +316,10 @@ static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f) * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - call_rcu(&f->rcu, rsvp_delete_filter_rcu); + if (tcf_exts_get_net(&f->exts)) + call_rcu(&f->rcu, rsvp_delete_filter_rcu); + else + __rsvp_delete_filter(f); } static void rsvp_destroy(struct tcf_proto *tp) @@ -328,10 +350,10 @@ static void rsvp_destroy(struct tcf_proto *tp) kfree_rcu(data, rcu); } -static int rsvp_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last) { struct rsvp_head *head = rtnl_dereference(tp->root); - struct rsvp_filter *nfp, *f = (struct rsvp_filter *)arg; + struct rsvp_filter *nfp, *f = arg; struct rsvp_filter __rcu **fp; unsigned int h = f->handle; struct rsvp_session __rcu **sp; @@ -389,7 +411,7 @@ static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt) if ((data->hgenerator += 0x10000) == 0) data->hgenerator = 0x10000; h = data->hgenerator|salt; - if (rsvp_get(tp, h) == 0) + if (!rsvp_get(tp, h)) return h; } return 0; @@ -464,7 +486,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, struct nlattr **tca, - unsigned long *arg, bool ovr) + void **arg, bool ovr) { struct rsvp_head *data = rtnl_dereference(tp->root); struct rsvp_filter *f, *nfp; @@ -493,7 +515,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, if (err < 0) goto errout2; - f = (struct rsvp_filter *)*arg; + f = *arg; if (f) { /* Node exists: adjust only classid */ struct rsvp_filter *n; @@ -518,7 +540,7 @@ static int rsvp_change(struct net *net, struct sk_buff *in_skb, tcf_bind_filter(tp, &n->res, base); } - tcf_exts_change(tp, &n->exts, &e); + tcf_exts_change(&n->exts, &e); rsvp_replace(tp, n, handle); return 0; } @@ -591,7 +613,7 @@ insert: if (f->tunnelhdr == 0) tcf_bind_filter(tp, &f->res, base); - tcf_exts_change(tp, &f->exts, &e); + tcf_exts_change(&f->exts, &e); fp = &s->ht[h2]; for (nfp = rtnl_dereference(*fp); nfp; @@ -604,7 +626,7 @@ insert: RCU_INIT_POINTER(f->next, nfp); rcu_assign_pointer(*fp, f); - *arg = (unsigned long)f; + *arg = f; return 0; } } @@ -663,7 +685,7 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)f, arg) < 0) { + if (arg->fn(tp, f, arg) < 0) { arg->stop = 1; return; } @@ -674,10 +696,10 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int rsvp_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct rsvp_filter *f = (struct rsvp_filter *)fh; + struct rsvp_filter *f = fh; struct rsvp_session *s; struct nlattr *nest; struct tc_rsvp_pinfo pinfo; @@ -723,6 +745,14 @@ nla_put_failure: return -1; } +static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct rsvp_filter *f = fh; + + if (f && f->res.classid == classid) + f->res.class = cl; +} + static struct tcf_proto_ops RSVP_OPS __read_mostly = { .kind = RSVP_ID, .classify = rsvp_classify, @@ -733,6 +763,7 @@ static struct tcf_proto_ops RSVP_OPS __read_mostly = { .delete = rsvp_delete, .walk = rsvp_walk, .dump = rsvp_dump, + .bind_class = rsvp_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 8a8a58357c39..67467ae24c97 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -13,6 +13,7 @@ #include <net/act_api.h> #include <net/netlink.h> #include <net/pkt_cls.h> +#include <net/sch_generic.h> /* * Passing parameters to the root seems to be done more awkwardly than really @@ -27,14 +28,20 @@ struct tcindex_filter_result { struct tcf_exts exts; struct tcf_result res; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; struct tcindex_filter { u16 key; struct tcindex_filter_result result; struct tcindex_filter __rcu *next; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; }; @@ -52,7 +59,7 @@ struct tcindex_data { static inline int tcindex_filter_is_set(struct tcindex_filter_result *r) { - return tcf_exts_is_predicative(&r->exts) || r->res.classid; + return tcf_exts_has_actions(&r->exts) || r->res.classid; } static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p, @@ -90,9 +97,11 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, f = tcindex_lookup(p, key); if (!f) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + if (!p->fall_through) return -1; - res->classid = TC_H_MAKE(TC_H_MAJ(tp->q->handle), key); + res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key); res->class = 0; pr_debug("alg 0x%x\n", res->classid); return 0; @@ -104,16 +113,16 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp, } -static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle) +static void *tcindex_get(struct tcf_proto *tp, u32 handle) { struct tcindex_data *p = rtnl_dereference(tp->root); struct tcindex_filter_result *r; pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle); if (p->perfect && handle >= p->alloc_hash) - return 0; + return NULL; r = tcindex_lookup(p, handle); - return r && tcindex_filter_is_set(r) ? (unsigned long) r : 0UL; + return r && tcindex_filter_is_set(r) ? r : NULL; } static int tcindex_init(struct tcf_proto *tp) @@ -133,12 +142,46 @@ static int tcindex_init(struct tcf_proto *tp) return 0; } +static void __tcindex_destroy_rexts(struct tcindex_filter_result *r) +{ + tcf_exts_destroy(&r->exts); + tcf_exts_put_net(&r->exts); +} + +static void tcindex_destroy_rexts_work(struct work_struct *work) +{ + struct tcindex_filter_result *r; + + r = container_of(work, struct tcindex_filter_result, work); + rtnl_lock(); + __tcindex_destroy_rexts(r); + rtnl_unlock(); +} + static void tcindex_destroy_rexts(struct rcu_head *head) { struct tcindex_filter_result *r; r = container_of(head, struct tcindex_filter_result, rcu); - tcf_exts_destroy(&r->exts); + INIT_WORK(&r->work, tcindex_destroy_rexts_work); + tcf_queue_work(&r->work); +} + +static void __tcindex_destroy_fexts(struct tcindex_filter *f) +{ + tcf_exts_destroy(&f->result.exts); + tcf_exts_put_net(&f->result.exts); + kfree(f); +} + +static void tcindex_destroy_fexts_work(struct work_struct *work) +{ + struct tcindex_filter *f = container_of(work, struct tcindex_filter, + work); + + rtnl_lock(); + __tcindex_destroy_fexts(f); + rtnl_unlock(); } static void tcindex_destroy_fexts(struct rcu_head *head) @@ -146,18 +189,18 @@ static void tcindex_destroy_fexts(struct rcu_head *head) struct tcindex_filter *f = container_of(head, struct tcindex_filter, rcu); - tcf_exts_destroy(&f->result.exts); - kfree(f); + INIT_WORK(&f->work, tcindex_destroy_fexts_work); + tcf_queue_work(&f->work); } -static int tcindex_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg; + struct tcindex_filter_result *r = arg; struct tcindex_filter __rcu **walk; struct tcindex_filter *f = NULL; - pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p); + pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p); if (p->perfect) { if (!r->res.class) return -ENOENT; @@ -182,18 +225,24 @@ found: * grace period, since converted-to-rcu actions are relying on that * in cleanup() callback */ - if (f) - call_rcu(&f->rcu, tcindex_destroy_fexts); - else - call_rcu(&r->rcu, tcindex_destroy_rexts); + if (f) { + if (tcf_exts_get_net(&f->result.exts)) + call_rcu(&f->rcu, tcindex_destroy_fexts); + else + __tcindex_destroy_fexts(f); + } else { + if (tcf_exts_get_net(&r->exts)) + call_rcu(&r->rcu, tcindex_destroy_rexts); + else + __tcindex_destroy_rexts(r); + } *last = false; return 0; } static int tcindex_destroy_element(struct tcf_proto *tp, - unsigned long arg, - struct tcf_walker *walker) + void *arg, struct tcf_walker *walker) { bool last; @@ -419,9 +468,9 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, } if (old_r) - tcf_exts_change(tp, &r->exts, &e); + tcf_exts_change(&r->exts, &e); else - tcf_exts_change(tp, &cr.exts, &e); + tcf_exts_change(&cr.exts, &e); if (old_r && old_r != r) { err = tcindex_filter_result_init(old_r); @@ -439,7 +488,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base, struct tcindex_filter *nfp; struct tcindex_filter __rcu **fp; - tcf_exts_change(tp, &f->result.exts, &r->exts); + tcf_exts_change(&f->result.exts, &r->exts); fp = cp->h + (handle % cp->hash); for (nfp = rtnl_dereference(*fp); @@ -471,17 +520,17 @@ errout: static int tcindex_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct nlattr *opt = tca[TCA_OPTIONS]; struct nlattr *tb[TCA_TCINDEX_MAX + 1]; struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg; + struct tcindex_filter_result *r = *arg; int err; pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p," - "p %p,r %p,*arg 0x%lx\n", - tp, handle, tca, arg, opt, p, r, arg ? *arg : 0L); + "p %p,r %p,*arg %p\n", + tp, handle, tca, arg, opt, p, r, arg ? *arg : NULL); if (!opt) return 0; @@ -506,9 +555,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) if (!p->perfect[i].res.class) continue; if (walker->count >= walker->skip) { - if (walker->fn(tp, - (unsigned long) (p->perfect+i), walker) - < 0) { + if (walker->fn(tp, p->perfect + i, walker) < 0) { walker->stop = 1; return; } @@ -522,8 +569,7 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker) for (f = rtnl_dereference(p->h[i]); f; f = next) { next = rtnl_dereference(f->next); if (walker->count >= walker->skip) { - if (walker->fn(tp, (unsigned long) &f->result, - walker) < 0) { + if (walker->fn(tp, &f->result, walker) < 0) { walker->stop = 1; return; } @@ -548,14 +594,14 @@ static void tcindex_destroy(struct tcf_proto *tp) } -static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { struct tcindex_data *p = rtnl_dereference(tp->root); - struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh; + struct tcindex_filter_result *r = fh; struct nlattr *nest; - pr_debug("tcindex_dump(tp %p,fh 0x%lx,skb %p,t %p),p %p,r %p\n", + pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n", tp, fh, skb, t, p, r); pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h); @@ -610,6 +656,14 @@ nla_put_failure: return -1; } +static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tcindex_filter_result *r = fh; + + if (r && r->res.classid == classid) + r->res.class = cl; +} + static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .kind = "tcindex", .classify = tcindex_classify, @@ -620,6 +674,7 @@ static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { .delete = tcindex_delete, .walk = tcindex_walk, .dump = tcindex_dump, + .bind_class = tcindex_bind_class, .owner = THIS_MODULE, }; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 2d01195153e6..ac152b4f4247 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -40,10 +40,13 @@ #include <linux/rtnetlink.h> #include <linux/skbuff.h> #include <linux/bitmap.h> +#include <linux/netdevice.h> +#include <linux/hash.h> #include <net/netlink.h> #include <net/act_api.h> #include <net/pkt_cls.h> #include <linux/netdevice.h> +#include <linux/idr.h> struct tc_u_knode { struct tc_u_knode __rcu *next; @@ -66,7 +69,10 @@ struct tc_u_knode { u32 __percpu *pcpu_success; #endif struct tcf_proto *tp; - struct rcu_head rcu; + union { + struct work_struct work; + struct rcu_head rcu; + }; /* The 'sel' field MUST be the last field in structure to allow for * tc_u32_keys allocated at end of structure. */ @@ -80,6 +86,7 @@ struct tc_u_hnode { struct tc_u_common *tp_c; int refcnt; unsigned int divisor; + struct idr handle_idr; struct rcu_head rcu; /* The 'ht' field MUST be the last field in structure to allow for * more entries allocated at end of structure. @@ -89,9 +96,10 @@ struct tc_u_hnode { struct tc_u_common { struct tc_u_hnode __rcu *hlist; - struct Qdisc *q; + struct tcf_block *block; int refcnt; - u32 hgenerator; + struct idr handle_idr; + struct hlist_node hnode; struct rcu_head rcu; }; @@ -289,7 +297,7 @@ out: } -static unsigned long u32_get(struct tcf_proto *tp, u32 handle) +static void *u32_get(struct tcf_proto *tp, u32 handle) { struct tc_u_hnode *ht; struct tc_u_common *tp_c = tp->data; @@ -300,43 +308,68 @@ static unsigned long u32_get(struct tcf_proto *tp, u32 handle) ht = u32_lookup_ht(tp_c, TC_U32_HTID(handle)); if (!ht) - return 0; + return NULL; if (TC_U32_KEY(handle) == 0) - return (unsigned long)ht; + return ht; - return (unsigned long)u32_lookup_key(ht, handle); + return u32_lookup_key(ht, handle); } -static u32 gen_new_htid(struct tc_u_common *tp_c) +static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr) { - int i = 0x800; + unsigned long idr_index; + int err; - /* hgenerator only used inside rtnl lock it is safe to increment + /* This is only used inside rtnl lock it is safe to increment * without read _copy_ update semantics */ - do { - if (++tp_c->hgenerator == 0x7FF) - tp_c->hgenerator = 1; - } while (--i > 0 && u32_lookup_ht(tp_c, (tp_c->hgenerator|0x800)<<20)); + err = idr_alloc_ext(&tp_c->handle_idr, ptr, &idr_index, + 1, 0x7FF, GFP_KERNEL); + if (err) + return 0; + return (u32)(idr_index | 0x800) << 20; +} + +static struct hlist_head *tc_u_common_hash; + +#define U32_HASH_SHIFT 10 +#define U32_HASH_SIZE (1 << U32_HASH_SHIFT) - return i > 0 ? (tp_c->hgenerator|0x800)<<20 : 0; +static unsigned int tc_u_hash(const struct tcf_proto *tp) +{ + return hash_ptr(tp->chain->block, U32_HASH_SHIFT); +} + +static struct tc_u_common *tc_u_common_find(const struct tcf_proto *tp) +{ + struct tc_u_common *tc; + unsigned int h; + + h = tc_u_hash(tp); + hlist_for_each_entry(tc, &tc_u_common_hash[h], hnode) { + if (tc->block == tp->chain->block) + return tc; + } + return NULL; } static int u32_init(struct tcf_proto *tp) { struct tc_u_hnode *root_ht; struct tc_u_common *tp_c; + unsigned int h; - tp_c = tp->q->u32_node; + tp_c = tc_u_common_find(tp); root_ht = kzalloc(sizeof(*root_ht), GFP_KERNEL); if (root_ht == NULL) return -ENOBUFS; root_ht->refcnt++; - root_ht->handle = tp_c ? gen_new_htid(tp_c) : 0x80000000; + root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000; root_ht->prio = tp->prio; + idr_init(&root_ht->handle_idr); if (tp_c == NULL) { tp_c = kzalloc(sizeof(*tp_c), GFP_KERNEL); @@ -344,8 +377,12 @@ static int u32_init(struct tcf_proto *tp) kfree(root_ht); return -ENOBUFS; } - tp_c->q = tp->q; - tp->q->u32_node = tp_c; + tp_c->block = tp->chain->block; + INIT_HLIST_NODE(&tp_c->hnode); + idr_init(&tp_c->handle_idr); + + h = tc_u_hash(tp); + hlist_add_head(&tp_c->hnode, &tc_u_common_hash[h]); } tp_c->refcnt++; @@ -362,6 +399,7 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, bool free_pf) { tcf_exts_destroy(&n->exts); + tcf_exts_put_net(&n->exts); if (n->ht_down) n->ht_down->refcnt--; #ifdef CONFIG_CLS_U32_PERF @@ -384,11 +422,21 @@ static int u32_destroy_key(struct tcf_proto *tp, struct tc_u_knode *n, * this the u32_delete_key_rcu variant does not free the percpu * statistics. */ +static void u32_delete_key_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, false); + rtnl_unlock(); +} + static void u32_delete_key_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, false); + INIT_WORK(&key->work, u32_delete_key_work); + tcf_queue_work(&key->work); } /* u32_delete_key_freepf_rcu is the rcu callback variant @@ -398,11 +446,21 @@ static void u32_delete_key_rcu(struct rcu_head *rcu) * for the variant that should be used with keys return from * u32_init_knode() */ +static void u32_delete_key_freepf_work(struct work_struct *work) +{ + struct tc_u_knode *key = container_of(work, struct tc_u_knode, work); + + rtnl_lock(); + u32_destroy_key(key->tp, key, true); + rtnl_unlock(); +} + static void u32_delete_key_freepf_rcu(struct rcu_head *rcu) { struct tc_u_knode *key = container_of(rcu, struct tc_u_knode, rcu); - u32_destroy_key(key->tp, key, true); + INIT_WORK(&key->work, u32_delete_key_freepf_work); + tcf_queue_work(&key->work); } static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) @@ -419,6 +477,7 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) RCU_INIT_POINTER(*kp, key->next); tcf_unbind_filter(tp, &key->res); + tcf_exts_get_net(&key->exts); call_rcu(&key->rcu, u32_delete_key_freepf_rcu); return 0; } @@ -428,111 +487,95 @@ static int u32_delete_key(struct tcf_proto *tp, struct tc_u_knode *key) return 0; } -static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) +static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (tc_should_offload(dev, tp, 0)) { - offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; - offload.cls_u32->knode.handle = handle; - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - } + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_DELETE_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h, u32 flags) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + bool skip_sw = tc_skip_sw(flags); + bool offloaded = false; int err; - if (!tc_should_offload(dev, tp, flags)) - return tc_skip_sw(flags) ? -EINVAL : 0; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_NEW_HNODE; + cls_u32.hnode.divisor = h->divisor; + cls_u32.hnode.handle = h->handle; + cls_u32.hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - if (tc_skip_sw(flags)) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + if (err < 0) { + u32_clear_hw_hnode(tp, h); return err; + } else if (err > 0) { + offloaded = true; + } + + if (skip_sw && !offloaded) + return -EINVAL; return 0; } -static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) +static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; - - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (tc_should_offload(dev, tp, 0)) { - offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; - - dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); - } + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_DELETE_KNODE; + cls_u32.knode.handle = handle; + + tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, false); } static int u32_replace_hw_knode(struct tcf_proto *tp, struct tc_u_knode *n, u32 flags) { - struct net_device *dev = tp->q->dev_queue->dev; - struct tc_cls_u32_offload u32_offload = {0}; - struct tc_to_netdev offload; + struct tcf_block *block = tp->chain->block; + struct tc_cls_u32_offload cls_u32 = {}; + bool skip_sw = tc_skip_sw(flags); int err; - offload.type = TC_SETUP_CLSU32; - offload.cls_u32 = &u32_offload; - - if (!tc_should_offload(dev, tp, flags)) - return tc_skip_sw(flags) ? -EINVAL : 0; - - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; + tc_cls_common_offload_init(&cls_u32.common, tp); + cls_u32.command = TC_CLSU32_REPLACE_KNODE; + cls_u32.knode.handle = n->handle; + cls_u32.knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; + cls_u32.knode.val = n->val; + cls_u32.knode.mask = n->mask; #else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; + cls_u32.knode.val = 0; + cls_u32.knode.mask = 0; #endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; + cls_u32.knode.sel = &n->sel; + cls_u32.knode.exts = &n->exts; if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->chain->index, tp->protocol, - &offload); + cls_u32.knode.link_handle = n->ht_down->handle; - if (!err) + err = tc_setup_cb_call(block, NULL, TC_SETUP_CLSU32, &cls_u32, skip_sw); + if (err < 0) { + u32_remove_hw_knode(tp, n->handle); + return err; + } else if (err > 0) { n->flags |= TCA_CLS_FLAGS_IN_HW; + } - if (tc_skip_sw(flags)) - return err; + if (skip_sw && !(n->flags & TCA_CLS_FLAGS_IN_HW)) + return -EINVAL; return 0; } @@ -548,7 +591,11 @@ static void u32_clear_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) rtnl_dereference(n->next)); tcf_unbind_filter(tp, &n->res); u32_remove_hw_knode(tp, n->handle); - call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + idr_remove_ext(&ht->handle_idr, n->handle); + if (tcf_exts_get_net(&n->exts)) + call_rcu(&n->rcu, u32_delete_key_freepf_rcu); + else + u32_destroy_key(n->tp, n, true); } } } @@ -569,6 +616,8 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht) hn = &phn->next, phn = rtnl_dereference(*hn)) { if (phn == ht) { u32_clear_hw_hnode(tp, ht); + idr_destroy(&ht->handle_idr); + idr_remove_ext(&tp_c->handle_idr, ht->handle); RCU_INIT_POINTER(*hn, ht->next); kfree_rcu(ht, rcu); return 0; @@ -602,7 +651,7 @@ static void u32_destroy(struct tcf_proto *tp) if (--tp_c->refcnt == 0) { struct tc_u_hnode *ht; - tp->q->u32_node = NULL; + hlist_del(&tp_c->hnode); for (ht = rtnl_dereference(tp_c->hlist); ht; @@ -616,15 +665,16 @@ static void u32_destroy(struct tcf_proto *tp) kfree_rcu(ht, rcu); } + idr_destroy(&tp_c->handle_idr); kfree(tp_c); } tp->data = NULL; } -static int u32_delete(struct tcf_proto *tp, unsigned long arg, bool *last) +static int u32_delete(struct tcf_proto *tp, void *arg, bool *last) { - struct tc_u_hnode *ht = (struct tc_u_hnode *)arg; + struct tc_u_hnode *ht = arg; struct tc_u_hnode *root_ht = rtnl_dereference(tp->root); struct tc_u_common *tp_c = tp->data; int ret = 0; @@ -684,27 +734,21 @@ ret: return ret; } -#define NR_U32_NODE (1<<12) -static u32 gen_new_kid(struct tc_u_hnode *ht, u32 handle) +static u32 gen_new_kid(struct tc_u_hnode *ht, u32 htid) { - struct tc_u_knode *n; - unsigned long i; - unsigned long *bitmap = kzalloc(BITS_TO_LONGS(NR_U32_NODE) * sizeof(unsigned long), - GFP_KERNEL); - if (!bitmap) - return handle | 0xFFF; - - for (n = rtnl_dereference(ht->ht[TC_U32_HASH(handle)]); - n; - n = rtnl_dereference(n->next)) - set_bit(TC_U32_NODE(n->handle), bitmap); - - i = find_next_zero_bit(bitmap, NR_U32_NODE, 0x800); - if (i >= NR_U32_NODE) - i = find_next_zero_bit(bitmap, NR_U32_NODE, 1); + unsigned long idr_index; + u32 start = htid | 0x800; + u32 max = htid | 0xFFF; + u32 min = htid; + + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + start, max + 1, GFP_KERNEL)) { + if (idr_alloc_ext(&ht->handle_idr, NULL, &idr_index, + min + 1, max + 1, GFP_KERNEL)) + return max; + } - kfree(bitmap); - return handle | (i >= NR_U32_NODE ? 0xFFF : i); + return (u32)idr_index; } static const struct nla_policy u32_policy[TCA_U32_MAX + 1] = { @@ -723,29 +767,24 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, struct tc_u_knode *n, struct nlattr **tb, struct nlattr *est, bool ovr) { - struct tcf_exts e; int err; - err = tcf_exts_init(&e, TCA_U32_ACT, TCA_U32_POLICE); + err = tcf_exts_validate(net, tp, tb, est, &n->exts, ovr); if (err < 0) return err; - err = tcf_exts_validate(net, tp, tb, est, &e, ovr); - if (err < 0) - goto errout; - err = -EINVAL; if (tb[TCA_U32_LINK]) { u32 handle = nla_get_u32(tb[TCA_U32_LINK]); struct tc_u_hnode *ht_down = NULL, *ht_old; if (TC_U32_KEY(handle)) - goto errout; + return -EINVAL; if (handle) { ht_down = u32_lookup_ht(ht->tp_c, handle); if (ht_down == NULL) - goto errout; + return -EINVAL; ht_down->refcnt++; } @@ -765,16 +804,11 @@ static int u32_set_parms(struct net *net, struct tcf_proto *tp, int ret; ret = tcf_change_indev(net, tb[TCA_U32_INDEV]); if (ret < 0) - goto errout; + return -EINVAL; n->ifindex = ret; } #endif - tcf_exts_change(tp, &n->exts, &e); - return 0; -errout: - tcf_exts_destroy(&e); - return err; } static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, @@ -799,6 +833,7 @@ static void u32_replace_knode(struct tcf_proto *tp, struct tc_u_common *tp_c, if (pins->handle == n->handle) break; + idr_replace_ext(&ht->handle_idr, n, n->handle); RCU_INIT_POINTER(n->next, pins->next); rcu_assign_pointer(*ins, n); } @@ -858,7 +893,7 @@ static struct tc_u_knode *u32_init_knode(struct tcf_proto *tp, static int u32_change(struct net *net, struct sk_buff *in_skb, struct tcf_proto *tp, unsigned long base, u32 handle, - struct nlattr **tca, unsigned long *arg, bool ovr) + struct nlattr **tca, void **arg, bool ovr) { struct tc_u_common *tp_c = tp->data; struct tc_u_hnode *ht; @@ -885,7 +920,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; } - n = (struct tc_u_knode *)*arg; + n = *arg; if (n) { struct tc_u_knode *new; @@ -919,6 +954,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, u32_replace_knode(tp, tp_c, new); tcf_unbind_filter(tp, &n->res); + tcf_exts_get_net(&n->exts); call_rcu(&n->rcu, u32_delete_key_rcu); return 0; } @@ -930,29 +966,40 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, return -EINVAL; if (TC_U32_KEY(handle)) return -EINVAL; - if (handle == 0) { - handle = gen_new_htid(tp->data); - if (handle == 0) - return -ENOMEM; - } ht = kzalloc(sizeof(*ht) + divisor*sizeof(void *), GFP_KERNEL); if (ht == NULL) return -ENOBUFS; + if (handle == 0) { + handle = gen_new_htid(tp->data, ht); + if (handle == 0) { + kfree(ht); + return -ENOMEM; + } + } else { + err = idr_alloc_ext(&tp_c->handle_idr, ht, NULL, + handle, handle + 1, GFP_KERNEL); + if (err) { + kfree(ht); + return err; + } + } ht->tp_c = tp_c; ht->refcnt = 1; ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + idr_init(&ht->handle_idr); err = u32_replace_hw_hnode(tp, ht, flags); if (err) { + idr_remove_ext(&tp_c->handle_idr, handle); kfree(ht); return err; } RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); - *arg = (unsigned long)ht; + *arg = ht; return 0; } @@ -979,24 +1026,33 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (TC_U32_HTID(handle) && TC_U32_HTID(handle^htid)) return -EINVAL; handle = htid | TC_U32_NODE(handle); + err = idr_alloc_ext(&ht->handle_idr, NULL, NULL, + handle, handle + 1, + GFP_KERNEL); + if (err) + return err; } else handle = gen_new_kid(ht, htid); - if (tb[TCA_U32_SEL] == NULL) - return -EINVAL; + if (tb[TCA_U32_SEL] == NULL) { + err = -EINVAL; + goto erridr; + } s = nla_data(tb[TCA_U32_SEL]); n = kzalloc(sizeof(*n) + s->nkeys*sizeof(struct tc_u32_key), GFP_KERNEL); - if (n == NULL) - return -ENOBUFS; + if (n == NULL) { + err = -ENOBUFS; + goto erridr; + } #ifdef CONFIG_CLS_U32_PERF size = sizeof(struct tc_u32_pcnt) + s->nkeys * sizeof(u64); n->pf = __alloc_percpu(size, __alignof__(struct tc_u32_pcnt)); if (!n->pf) { - kfree(n); - return -ENOBUFS; + err = -ENOBUFS; + goto errfree; } #endif @@ -1047,7 +1103,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, RCU_INIT_POINTER(n->next, pins); rcu_assign_pointer(*ins, n); - *arg = (unsigned long)n; + *arg = n; return 0; } @@ -1059,9 +1115,12 @@ errhw: errout: tcf_exts_destroy(&n->exts); #ifdef CONFIG_CLS_U32_PERF +errfree: free_percpu(n->pf); #endif kfree(n); +erridr: + idr_remove_ext(&ht->handle_idr, handle); return err; } @@ -1081,7 +1140,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) if (ht->prio != tp->prio) continue; if (arg->count >= arg->skip) { - if (arg->fn(tp, (unsigned long)ht, arg) < 0) { + if (arg->fn(tp, ht, arg) < 0) { arg->stop = 1; return; } @@ -1095,7 +1154,7 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) arg->count++; continue; } - if (arg->fn(tp, (unsigned long)n, arg) < 0) { + if (arg->fn(tp, n, arg) < 0) { arg->stop = 1; return; } @@ -1105,10 +1164,18 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg) } } -static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, +static void u32_bind_class(void *fh, u32 classid, unsigned long cl) +{ + struct tc_u_knode *n = fh; + + if (n && n->res.classid == classid) + n->res.class = cl; +} + +static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, struct sk_buff *skb, struct tcmsg *t) { - struct tc_u_knode *n = (struct tc_u_knode *)fh; + struct tc_u_knode *n = fh; struct tc_u_hnode *ht_up, *ht_down; struct nlattr *nest; @@ -1122,7 +1189,7 @@ static int u32_dump(struct net *net, struct tcf_proto *tp, unsigned long fh, goto nla_put_failure; if (TC_U32_KEY(n->handle) == 0) { - struct tc_u_hnode *ht = (struct tc_u_hnode *)fh; + struct tc_u_hnode *ht = fh; u32 divisor = ht->divisor + 1; if (nla_put_u32(skb, TCA_U32_DIVISOR, divisor)) @@ -1235,11 +1302,14 @@ static struct tcf_proto_ops cls_u32_ops __read_mostly = { .delete = u32_delete, .walk = u32_walk, .dump = u32_dump, + .bind_class = u32_bind_class, .owner = THIS_MODULE, }; static int __init init_u32(void) { + int i, ret; + pr_info("u32 classifier\n"); #ifdef CONFIG_CLS_U32_PERF pr_info(" Performance counters on\n"); @@ -1250,12 +1320,25 @@ static int __init init_u32(void) #ifdef CONFIG_NET_CLS_ACT pr_info(" Actions configured\n"); #endif - return register_tcf_proto_ops(&cls_u32_ops); + tc_u_common_hash = kvmalloc_array(U32_HASH_SIZE, + sizeof(struct hlist_head), + GFP_KERNEL); + if (!tc_u_common_hash) + return -ENOMEM; + + for (i = 0; i < U32_HASH_SIZE; i++) + INIT_HLIST_HEAD(&tc_u_common_hash[i]); + + ret = register_tcf_proto_ops(&cls_u32_ops); + if (ret) + kvfree(tc_u_common_hash); + return ret; } static void __exit exit_u32(void) { unregister_tcf_proto_ops(&cls_u32_ops); + kvfree(tc_u_common_hash); } module_init(init_u32) diff --git a/net/sched/ematch.c b/net/sched/ematch.c index 03b677bc0700..1331a4c2d8ff 100644 --- a/net/sched/ematch.c +++ b/net/sched/ematch.c @@ -178,7 +178,7 @@ static int tcf_em_validate(struct tcf_proto *tp, struct tcf_ematch_hdr *em_hdr = nla_data(nla); int data_len = nla_len(nla) - sizeof(*em_hdr); void *data = (void *) em_hdr + sizeof(*em_hdr); - struct net *net = dev_net(qdisc_dev(tp->q)); + struct net *net = tp->chain->block->net; if (!TCF_EM_REL_VALID(em_hdr->flags)) goto errout; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a3fa144b8648..b6c4f536876b 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -35,13 +35,7 @@ #include <net/sock.h> #include <net/netlink.h> #include <net/pkt_sched.h> - -static int qdisc_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, u32 clid, - struct Qdisc *old, struct Qdisc *new); -static int tclass_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct Qdisc *q, - unsigned long cl, int event); +#include <net/pkt_cls.h> /* @@ -160,7 +154,7 @@ int register_qdisc(struct Qdisc_ops *qops) if (qops->cl_ops) { const struct Qdisc_class_ops *cops = qops->cl_ops; - if (!(cops->get && cops->put && cops->walk && cops->leaf)) + if (!(cops->find && cops->walk && cops->leaf)) goto out_einval; if (cops->tcf_block && !(cops->bind_tcf && cops->unbind_tcf)) @@ -307,6 +301,8 @@ struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { struct Qdisc *q; + if (!handle) + return NULL; q = qdisc_match_from_root(dev->qdisc, handle); if (q) goto out; @@ -327,12 +323,11 @@ static struct Qdisc *qdisc_leaf(struct Qdisc *p, u32 classid) if (cops == NULL) return NULL; - cl = cops->get(p, classid); + cl = cops->find(p, classid); if (cl == 0) return NULL; leaf = cops->leaf(p, cl); - cops->put(p, cl); return leaf; } @@ -621,14 +616,10 @@ EXPORT_SYMBOL(qdisc_watchdog_cancel); static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) { - unsigned int size = n * sizeof(struct hlist_head), i; struct hlist_head *h; + unsigned int i; - if (size <= PAGE_SIZE) - h = kmalloc(size, GFP_KERNEL); - else - h = (struct hlist_head *) - __get_free_pages(GFP_KERNEL, get_order(size)); + h = kvmalloc_array(n, sizeof(struct hlist_head), GFP_KERNEL); if (h != NULL) { for (i = 0; i < n; i++) @@ -637,16 +628,6 @@ static struct hlist_head *qdisc_class_hash_alloc(unsigned int n) return h; } -static void qdisc_class_hash_free(struct hlist_head *h, unsigned int n) -{ - unsigned int size = n * sizeof(struct hlist_head); - - if (size <= PAGE_SIZE) - kfree(h); - else - free_pages((unsigned long)h, get_order(size)); -} - void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) { struct Qdisc_class_common *cl; @@ -679,7 +660,7 @@ void qdisc_class_hash_grow(struct Qdisc *sch, struct Qdisc_class_hash *clhash) clhash->hashmask = nmask; sch_tree_unlock(sch); - qdisc_class_hash_free(ohash, osize); + kvfree(ohash); } EXPORT_SYMBOL(qdisc_class_hash_grow); @@ -699,7 +680,7 @@ EXPORT_SYMBOL(qdisc_class_hash_init); void qdisc_class_hash_destroy(struct Qdisc_class_hash *clhash) { - qdisc_class_hash_free(clhash->hash, clhash->hashsize); + kvfree(clhash->hash); } EXPORT_SYMBOL(qdisc_class_hash_destroy); @@ -749,6 +730,7 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, const struct Qdisc_class_ops *cops; unsigned long cl; u32 parentid; + bool notify; int drops; if (n == 0 && len == 0) @@ -761,6 +743,13 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, if (sch->flags & TCQ_F_NOPARENT) break; + /* Notify parent qdisc only if child qdisc becomes empty. + * + * If child was empty even before update then backlog + * counter is screwed and we skip notification because + * parent class is already passive. + */ + notify = !sch->q.qlen && !WARN_ON_ONCE(!n); /* TODO: perform the search on a per txq basis */ sch = qdisc_lookup(qdisc_dev(sch), TC_H_MAJ(parentid)); if (sch == NULL) { @@ -768,10 +757,9 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, break; } cops = sch->ops->cl_ops; - if (cops->qlen_notify) { - cl = cops->get(sch, parentid); + if (notify && cops->qlen_notify) { + cl = cops->find(sch, parentid); cops->qlen_notify(sch, cl); - cops->put(sch, cl); } sch->q.qlen -= n; sch->qstats.backlog -= len; @@ -781,6 +769,111 @@ void qdisc_tree_reduce_backlog(struct Qdisc *sch, unsigned int n, } EXPORT_SYMBOL(qdisc_tree_reduce_backlog); +static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, + u32 portid, u32 seq, u16 flags, int event) +{ + struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; + struct gnet_stats_queue __percpu *cpu_qstats = NULL; + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + struct gnet_dump d; + struct qdisc_size_table *stab; + __u32 qlen; + + cond_resched(); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = clid; + tcm->tcm_handle = q->handle; + tcm->tcm_info = refcount_read(&q->refcnt); + if (nla_put_string(skb, TCA_KIND, q->ops->id)) + goto nla_put_failure; + if (q->ops->dump && q->ops->dump(q, skb) < 0) + goto nla_put_failure; + qlen = q->q.qlen; + + stab = rtnl_dereference(q->stab); + if (stab && qdisc_dump_stab(skb, stab) < 0) + goto nla_put_failure; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + NULL, &d, TCA_PAD) < 0) + goto nla_put_failure; + + if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) + goto nla_put_failure; + + if (qdisc_is_percpu_stats(q)) { + cpu_bstats = q->cpu_bstats; + cpu_qstats = q->cpu_qstats; + } + + if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), + &d, cpu_bstats, &q->bstats) < 0 || + gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || + gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) + goto nla_put_failure; + + if (gnet_stats_finish_copy(&d) < 0) + goto nla_put_failure; + + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) +{ + if (q->flags & TCQ_F_BUILTIN) + return true; + if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) + return true; + + return false; +} + +static int qdisc_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, u32 clid, + struct Qdisc *old, struct Qdisc *new) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (old && !tc_qdisc_dump_ignore(old, false)) { + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, + 0, RTM_DELQDISC) < 0) + goto err_out; + } + if (new && !tc_qdisc_dump_ignore(new, false)) { + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, + old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) + goto err_out; + } + + if (skb->len) + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + +err_out: + kfree_skb(skb); + return -EINVAL; +} + static void notify_and_destroy(struct net *net, struct sk_buff *skb, struct nlmsghdr *n, u32 clid, struct Qdisc *old, struct Qdisc *new) @@ -836,7 +929,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, old = dev_graft_qdisc(dev_queue, new); if (new && i > 0) - refcount_inc(&new->refcnt); + qdisc_refcount_inc(new); if (!ingress) qdisc_destroy(old); @@ -847,7 +940,7 @@ skip: notify_and_destroy(net, skb, n, classid, dev->qdisc, new); if (new && !new->ops->attach) - refcount_inc(&new->refcnt); + qdisc_refcount_inc(new); dev->qdisc = new ? : &noop_qdisc; if (new && new->ops->attach) @@ -863,11 +956,11 @@ skip: err = -EOPNOTSUPP; if (cops && cops->graft) { - unsigned long cl = cops->get(parent, classid); - if (cl) { + unsigned long cl = cops->find(parent, classid); + + if (cl) err = cops->graft(parent, cl, new, &old); - cops->put(parent, cl); - } else + else err = -ENOENT; } if (!err) @@ -1256,7 +1349,7 @@ replay: if (q == p || (p && check_loop(q, p, 0))) return -ELOOP; - refcount_inc(&q->refcnt); + qdisc_refcount_inc(q); goto graft; } else { if (!q) @@ -1348,111 +1441,6 @@ graft: return 0; } -static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 portid, u32 seq, u16 flags, int event) -{ - struct gnet_stats_basic_cpu __percpu *cpu_bstats = NULL; - struct gnet_stats_queue __percpu *cpu_qstats = NULL; - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - struct gnet_dump d; - struct qdisc_size_table *stab; - __u32 qlen; - - cond_resched(); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(q)->ifindex; - tcm->tcm_parent = clid; - tcm->tcm_handle = q->handle; - tcm->tcm_info = refcount_read(&q->refcnt); - if (nla_put_string(skb, TCA_KIND, q->ops->id)) - goto nla_put_failure; - if (q->ops->dump && q->ops->dump(q, skb) < 0) - goto nla_put_failure; - qlen = q->q.qlen; - - stab = rtnl_dereference(q->stab); - if (stab && qdisc_dump_stab(skb, stab) < 0) - goto nla_put_failure; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - NULL, &d, TCA_PAD) < 0) - goto nla_put_failure; - - if (q->ops->dump_stats && q->ops->dump_stats(q, &d) < 0) - goto nla_put_failure; - - if (qdisc_is_percpu_stats(q)) { - cpu_bstats = q->cpu_bstats; - cpu_qstats = q->cpu_qstats; - } - - if (gnet_stats_copy_basic(qdisc_root_sleeping_running(q), - &d, cpu_bstats, &q->bstats) < 0 || - gnet_stats_copy_rate_est(&d, &q->rate_est) < 0 || - gnet_stats_copy_queue(&d, cpu_qstats, &q->qstats, qlen) < 0) - goto nla_put_failure; - - if (gnet_stats_finish_copy(&d) < 0) - goto nla_put_failure; - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible) -{ - if (q->flags & TCQ_F_BUILTIN) - return true; - if ((q->flags & TCQ_F_INVISIBLE) && !dump_invisible) - return true; - - return false; -} - -static int qdisc_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, u32 clid, - struct Qdisc *old, struct Qdisc *new) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (old && !tc_qdisc_dump_ignore(old, false)) { - if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, - 0, RTM_DELQDISC) < 0) - goto err_out; - } - if (new && !tc_qdisc_dump_ignore(new, false)) { - if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, - old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) - goto err_out; - } - - if (skb->len) - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); - -err_out: - kfree_skb(skb); - return -EINVAL; -} - static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, struct netlink_callback *cb, int *q_idx_p, int s_q_idx, bool recur, @@ -1514,7 +1502,6 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) int s_idx, s_q_idx; struct net_device *dev; const struct nlmsghdr *nlh = cb->nlh; - struct tcmsg *tcm = nlmsg_data(nlh); struct nlattr *tca[TCA_MAX + 1]; int err; @@ -1524,7 +1511,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) idx = 0; ASSERT_RTNL(); - err = nlmsg_parse(nlh, sizeof(*tcm), tca, TCA_MAX, NULL, NULL); + err = nlmsg_parse(nlh, sizeof(struct tcmsg), tca, TCA_MAX, NULL, NULL); if (err < 0) return err; @@ -1565,7 +1552,163 @@ done: * Traffic classes manipulation. * ************************************************/ +static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, + unsigned long cl, + u32 portid, u32 seq, u16 flags, int event) +{ + struct tcmsg *tcm; + struct nlmsghdr *nlh; + unsigned char *b = skb_tail_pointer(skb); + struct gnet_dump d; + const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; + + cond_resched(); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); + if (!nlh) + goto out_nlmsg_trim; + tcm = nlmsg_data(nlh); + tcm->tcm_family = AF_UNSPEC; + tcm->tcm__pad1 = 0; + tcm->tcm__pad2 = 0; + tcm->tcm_ifindex = qdisc_dev(q)->ifindex; + tcm->tcm_parent = q->handle; + tcm->tcm_handle = q->handle; + tcm->tcm_info = 0; + if (nla_put_string(skb, TCA_KIND, q->ops->id)) + goto nla_put_failure; + if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) + goto nla_put_failure; + + if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, + NULL, &d, TCA_PAD) < 0) + goto nla_put_failure; + + if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) + goto nla_put_failure; + + if (gnet_stats_finish_copy(&d) < 0) + goto nla_put_failure; + + nlh->nlmsg_len = skb_tail_pointer(skb) - b; + return skb->len; + +out_nlmsg_trim: +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static int tclass_notify(struct net *net, struct sk_buff *oskb, + struct nlmsghdr *n, struct Qdisc *q, + unsigned long cl, int event) +{ + struct sk_buff *skb; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +static int tclass_del_notify(struct net *net, + const struct Qdisc_class_ops *cops, + struct sk_buff *oskb, struct nlmsghdr *n, + struct Qdisc *q, unsigned long cl) +{ + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + struct sk_buff *skb; + int err = 0; + + if (!cops->delete) + return -EOPNOTSUPP; + + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return -ENOBUFS; + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, + RTM_DELTCLASS) < 0) { + kfree_skb(skb); + return -EINVAL; + } + + err = cops->delete(q, cl); + if (err) { + kfree_skb(skb); + return err; + } + + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); +} + +#ifdef CONFIG_NET_CLS + +struct tcf_bind_args { + struct tcf_walker w; + u32 classid; + unsigned long cl; +}; + +static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) +{ + struct tcf_bind_args *a = (void *)arg; + + if (tp->ops->bind_class) { + struct Qdisc *q = tcf_block_q(tp->chain->block); + + sch_tree_lock(q); + tp->ops->bind_class(n, a->classid, a->cl); + sch_tree_unlock(q); + } + return 0; +} + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ + const struct Qdisc_class_ops *cops = q->ops->cl_ops; + struct tcf_block *block; + struct tcf_chain *chain; + unsigned long cl; + + cl = cops->find(q, portid); + if (!cl) + return; + block = cops->tcf_block(q, cl); + if (!block) + return; + list_for_each_entry(chain, &block->chain_list, list) { + struct tcf_proto *tp; + + for (tp = rtnl_dereference(chain->filter_chain); + tp; tp = rtnl_dereference(tp->next)) { + struct tcf_bind_args arg = {}; + + arg.w.fn = tcf_node_bind; + arg.classid = clid; + arg.cl = new_cl; + tp->ops->walk(tp, &arg.w); + } + } +} + +#else + +static void tc_bind_tclass(struct Qdisc *q, u32 portid, u32 clid, + unsigned long new_cl) +{ +} + +#endif static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, struct netlink_ext_ack *extack) @@ -1656,7 +1799,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, clid = TC_H_MAKE(qid, clid); if (clid) - cl = cops->get(q, clid); + cl = cops->find(q, clid); if (cl == 0) { err = -ENOENT; @@ -1671,12 +1814,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, goto out; break; case RTM_DELTCLASS: - err = -EOPNOTSUPP; - if (cops->delete) - err = cops->delete(q, cl); - if (err == 0) - tclass_notify(net, skb, n, q, cl, - RTM_DELTCLASS); + err = tclass_del_notify(net, cops, skb, n, q, cl); + /* Unbind the class with flilters with 0 */ + tc_bind_tclass(q, portid, clid, 0); goto out; case RTM_GETTCLASS: err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS); @@ -1691,83 +1831,16 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, err = -EOPNOTSUPP; if (cops->change) err = cops->change(q, clid, portid, tca, &new_cl); - if (err == 0) + if (err == 0) { tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); - + /* We just create a new class, need to do reverse binding. */ + if (cl != new_cl) + tc_bind_tclass(q, portid, clid, new_cl); + } out: - if (cl) - cops->put(q, cl); - return err; } - -static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, - unsigned long cl, - u32 portid, u32 seq, u16 flags, int event) -{ - struct tcmsg *tcm; - struct nlmsghdr *nlh; - unsigned char *b = skb_tail_pointer(skb); - struct gnet_dump d; - const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - - cond_resched(); - nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); - if (!nlh) - goto out_nlmsg_trim; - tcm = nlmsg_data(nlh); - tcm->tcm_family = AF_UNSPEC; - tcm->tcm__pad1 = 0; - tcm->tcm__pad2 = 0; - tcm->tcm_ifindex = qdisc_dev(q)->ifindex; - tcm->tcm_parent = q->handle; - tcm->tcm_handle = q->handle; - tcm->tcm_info = 0; - if (nla_put_string(skb, TCA_KIND, q->ops->id)) - goto nla_put_failure; - if (cl_ops->dump && cl_ops->dump(q, cl, skb, tcm) < 0) - goto nla_put_failure; - - if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, TCA_XSTATS, - NULL, &d, TCA_PAD) < 0) - goto nla_put_failure; - - if (cl_ops->dump_stats && cl_ops->dump_stats(q, cl, &d) < 0) - goto nla_put_failure; - - if (gnet_stats_finish_copy(&d) < 0) - goto nla_put_failure; - - nlh->nlmsg_len = skb_tail_pointer(skb) - b; - return skb->len; - -out_nlmsg_trim: -nla_put_failure: - nlmsg_trim(skb, b); - return -1; -} - -static int tclass_notify(struct net *net, struct sk_buff *oskb, - struct nlmsghdr *n, struct Qdisc *q, - unsigned long cl, int event) -{ - struct sk_buff *skb; - u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; - - skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - return -ENOBUFS; - - if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { - kfree_skb(skb); - return -EINVAL; - } - - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); -} - struct qdisc_dump_args { struct qdisc_walker w; struct sk_buff *skb; @@ -1949,14 +2022,14 @@ static int __init pktsched_init(void) register_qdisc(&mq_qdisc_ops); register_qdisc(&noqueue_qdisc_ops); - rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, NULL); + rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELQDISC, tc_get_qdisc, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETQDISC, tc_get_qdisc, tc_dump_qdisc, - NULL); - rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, NULL); - rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, NULL); + 0); + rtnl_register(PF_UNSPEC, RTM_NEWTCLASS, tc_ctl_tclass, NULL, 0); + rtnl_register(PF_UNSPEC, RTM_DELTCLASS, tc_ctl_tclass, NULL, 0); rtnl_register(PF_UNSPEC, RTM_GETTCLASS, tc_ctl_tclass, tc_dump_tclass, - NULL); + 0); return 0; } diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index c403c87aff7a..2dbd249c0b2f 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -41,6 +41,7 @@ #define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back)) struct atm_flow_data { + struct Qdisc_class_common common; struct Qdisc *q; /* FIFO, TBF, etc. */ struct tcf_proto __rcu *filter_list; struct tcf_block *block; @@ -49,7 +50,6 @@ struct atm_flow_data { struct sk_buff *skb); /* chaining */ struct atm_qdisc_data *parent; /* parent qdisc */ struct socket *sock; /* for closing */ - u32 classid; /* x:y type ID */ int ref; /* reference count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; @@ -75,7 +75,7 @@ static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid) struct atm_flow_data *flow; list_for_each_entry(flow, &p->flows, list) { - if (flow->classid == classid) + if (flow->common.classid == classid) return flow; } return NULL; @@ -108,23 +108,29 @@ static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl) return flow ? flow->q : NULL; } -static unsigned long atm_tc_get(struct Qdisc *sch, u32 classid) +static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid) { struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); struct atm_flow_data *flow; - pr_debug("atm_tc_get(sch %p,[qdisc %p],classid %x)\n", sch, p, classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); flow = lookup_flow(sch, classid); - if (flow) - flow->ref++; - pr_debug("atm_tc_get: flow %p\n", flow); + pr_debug("%s: flow %p\n", __func__, flow); return (unsigned long)flow; } static unsigned long atm_tc_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return atm_tc_get(sch, classid); + struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch); + struct atm_flow_data *flow; + + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid); + flow = lookup_flow(sch, classid); + if (flow) + flow->ref++; + pr_debug("%s: flow %p\n", __func__, flow); + return (unsigned long)flow; } /* @@ -234,7 +240,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, excess = NULL; else { excess = (struct atm_flow_data *) - atm_tc_get(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); + atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS])); if (!excess) return -ENOENT; } @@ -262,10 +268,9 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, for (i = 1; i < 0x8000; i++) { classid = TC_H_MAKE(sch->handle, 0x8000 | i); - cl = atm_tc_get(sch, classid); + cl = atm_tc_find(sch, classid); if (!cl) break; - atm_tc_put(sch, cl); } } pr_debug("atm_tc_change: new id %x\n", classid); @@ -276,7 +281,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } - error = tcf_block_get(&flow->block, &flow->filter_list); + error = tcf_block_get(&flow->block, &flow->filter_list, sch); if (error) { kfree(flow); goto err_out; @@ -293,7 +298,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, flow->old_pop = flow->vcc->pop; flow->parent = p; flow->vcc->pop = sch_atm_pop; - flow->classid = classid; + flow->common.classid = classid; flow->ref = 1; flow->excess = excess; list_add(&flow->list, &p->link.list); @@ -305,8 +310,6 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, *arg = (unsigned long)flow; return 0; err_out: - if (excess) - atm_tc_put(sch, (unsigned long)excess); sockfd_put(sock); return error; } @@ -377,7 +380,7 @@ static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch, result = TC_ACT_OK; /* be nice to gcc */ flow = NULL; if (TC_H_MAJ(skb->priority) != sch->handle || - !(flow = (struct atm_flow_data *)atm_tc_get(sch, skb->priority))) { + !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) { struct tcf_proto *fl; list_for_each_entry(flow, &p->flows, list) { @@ -543,13 +546,13 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) p->link.q = &noop_qdisc; pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q); - err = tcf_block_get(&p->link.block, &p->link.filter_list); + err = tcf_block_get(&p->link.block, &p->link.filter_list, sch); if (err) return err; p->link.vcc = NULL; p->link.sock = NULL; - p->link.classid = sch->handle; + p->link.common.classid = sch->handle; p->link.ref = 1; tasklet_init(&p->task, sch_atm_dequeue, (unsigned long)sch); return 0; @@ -596,7 +599,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, sch, p, flow, skb, tcm); if (list_empty(&flow->list)) return -EINVAL; - tcm->tcm_handle = flow->classid; + tcm->tcm_handle = flow->common.classid; tcm->tcm_info = flow->q->handle; nest = nla_nest_start(skb, TCA_OPTIONS); @@ -621,7 +624,7 @@ static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl, goto nla_put_failure; } if (flow->excess) { - if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->classid)) + if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid)) goto nla_put_failure; } else { if (nla_put_u32(skb, TCA_ATM_EXCESS, 0)) @@ -655,8 +658,7 @@ static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb) static const struct Qdisc_class_ops atm_class_ops = { .graft = atm_tc_graft, .leaf = atm_tc_leaf, - .get = atm_tc_get, - .put = atm_tc_put, + .find = atm_tc_find, .change = atm_tc_change, .delete = atm_tc_delete, .walk = atm_tc_walk, diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 780db43300b1..6361be7881f1 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -129,7 +129,6 @@ struct cbq_class { struct tcf_proto __rcu *filter_list; struct tcf_block *block; - int refcnt; int filters; struct cbq_class *defaults[TC_PRIO_MAX + 1]; @@ -256,6 +255,7 @@ cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; case TC_ACT_RECLASSIFY: @@ -1139,6 +1139,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) struct tc_ratespec *r; int err; + qdisc_watchdog_init(&q->watchdog, sch); + hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + q->delay_timer.function = cbq_undelay; + + if (!opt) + return -EINVAL; + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1155,7 +1162,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) if (err < 0) goto put_rtab; - q->link.refcnt = 1; q->link.sibling = &q->link; q->link.common.classid = sch->handle; q->link.qdisc = sch; @@ -1177,9 +1183,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - qdisc_watchdog_init(&q->watchdog, sch); - hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); - q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); @@ -1385,20 +1388,14 @@ static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct cbq_class *cl = (struct cbq_class *)arg; - if (cl->q->q.qlen == 0) - cbq_deactivate_class(cl); + cbq_deactivate_class(cl); } -static unsigned long cbq_get(struct Qdisc *sch, u32 classid) +static unsigned long cbq_find(struct Qdisc *sch, u32 classid) { struct cbq_sched_data *q = qdisc_priv(sch); - struct cbq_class *cl = cbq_class_lookup(q, classid); - if (cl) { - cl->refcnt++; - return (unsigned long)cl; - } - return 0; + return (unsigned long)cbq_class_lookup(q, classid); } static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl) @@ -1444,25 +1441,6 @@ static void cbq_destroy(struct Qdisc *sch) qdisc_class_hash_destroy(&q->clhash); } -static void cbq_put(struct Qdisc *sch, unsigned long arg) -{ - struct cbq_class *cl = (struct cbq_class *)arg; - - if (--cl->refcnt == 0) { -#ifdef CONFIG_NET_CLS_ACT - spinlock_t *root_lock = qdisc_root_sleeping_lock(sch); - struct cbq_sched_data *q = qdisc_priv(sch); - - spin_lock_bh(root_lock); - if (q->rx_class == cl) - q->rx_class = NULL; - spin_unlock_bh(root_lock); -#endif - - cbq_destroy_class(sch, cl); - } -} - static int cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca, unsigned long *arg) @@ -1589,7 +1567,7 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t if (cl == NULL) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); return err; @@ -1609,7 +1587,6 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; - cl->refcnt = 1; cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!cl->q) cl->q = &noop_qdisc; @@ -1690,12 +1667,7 @@ static int cbq_delete(struct Qdisc *sch, unsigned long arg) cbq_rmprio(q, cl); sch_tree_unlock(sch); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - + cbq_destroy_class(sch, cl); return 0; } @@ -1761,8 +1733,7 @@ static const struct Qdisc_class_ops cbq_class_ops = { .graft = cbq_graft, .leaf = cbq_leaf, .qlen_notify = cbq_qlen_notify, - .get = cbq_get, - .put = cbq_put, + .find = cbq_find, .change = cbq_change_class, .delete = cbq_delete, .walk = cbq_walk, diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c new file mode 100644 index 000000000000..7a72980c1509 --- /dev/null +++ b/net/sched/sch_cbs.c @@ -0,0 +1,373 @@ +/* + * net/sched/sch_cbs.c Credit Based Shaper + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * Authors: Vinicius Costa Gomes <vinicius.gomes@intel.com> + * + */ + +/* Credit Based Shaper (CBS) + * ========================= + * + * This is a simple rate-limiting shaper aimed at TSN applications on + * systems with known traffic workloads. + * + * Its algorithm is defined by the IEEE 802.1Q-2014 Specification, + * Section 8.6.8.2, and explained in more detail in the Annex L of the + * same specification. + * + * There are four tunables to be considered: + * + * 'idleslope': Idleslope is the rate of credits that is + * accumulated (in kilobits per second) when there is at least + * one packet waiting for transmission. Packets are transmitted + * when the current value of credits is equal or greater than + * zero. When there is no packet to be transmitted the amount of + * credits is set to zero. This is the main tunable of the CBS + * algorithm. + * + * 'sendslope': + * Sendslope is the rate of credits that is depleted (it should be a + * negative number of kilobits per second) when a transmission is + * ocurring. It can be calculated as follows, (IEEE 802.1Q-2014 Section + * 8.6.8.2 item g): + * + * sendslope = idleslope - port_transmit_rate + * + * 'hicredit': Hicredit defines the maximum amount of credits (in + * bytes) that can be accumulated. Hicredit depends on the + * characteristics of interfering traffic, + * 'max_interference_size' is the maximum size of any burst of + * traffic that can delay the transmission of a frame that is + * available for transmission for this traffic class, (IEEE + * 802.1Q-2014 Annex L, Equation L-3): + * + * hicredit = max_interference_size * (idleslope / port_transmit_rate) + * + * 'locredit': Locredit is the minimum amount of credits that can + * be reached. It is a function of the traffic flowing through + * this qdisc (IEEE 802.1Q-2014 Annex L, Equation L-2): + * + * locredit = max_frame_size * (sendslope / port_transmit_rate) + */ + +#include <linux/module.h> +#include <linux/types.h> +#include <linux/kernel.h> +#include <linux/string.h> +#include <linux/errno.h> +#include <linux/skbuff.h> +#include <net/netlink.h> +#include <net/sch_generic.h> +#include <net/pkt_sched.h> + +#define BYTES_PER_KBIT (1000LL / 8) + +struct cbs_sched_data { + bool offload; + int queue; + s64 port_rate; /* in bytes/s */ + s64 last; /* timestamp in ns */ + s64 credits; /* in bytes */ + s32 locredit; /* in bytes */ + s32 hicredit; /* in bytes */ + s64 sendslope; /* in bytes/s */ + s64 idleslope; /* in bytes/s */ + struct qdisc_watchdog watchdog; + int (*enqueue)(struct sk_buff *skb, struct Qdisc *sch); + struct sk_buff *(*dequeue)(struct Qdisc *sch); +}; + +static int cbs_enqueue_offload(struct sk_buff *skb, struct Qdisc *sch) +{ + return qdisc_enqueue_tail(skb, sch); +} + +static int cbs_enqueue_soft(struct sk_buff *skb, struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + if (sch->q.qlen == 0 && q->credits > 0) { + /* We need to stop accumulating credits when there's + * no enqueued packets and q->credits is positive. + */ + q->credits = 0; + q->last = ktime_get_ns(); + } + + return qdisc_enqueue_tail(skb, sch); +} + +static int cbs_enqueue(struct sk_buff *skb, struct Qdisc *sch, + struct sk_buff **to_free) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->enqueue(skb, sch); +} + +/* timediff is in ns, slope is in bytes/s */ +static s64 timediff_to_credits(s64 timediff, s64 slope) +{ + return div64_s64(timediff * slope, NSEC_PER_SEC); +} + +static s64 delay_from_credits(s64 credits, s64 slope) +{ + if (unlikely(slope == 0)) + return S64_MAX; + + return div64_s64(-credits * NSEC_PER_SEC, slope); +} + +static s64 credits_from_len(unsigned int len, s64 slope, s64 port_rate) +{ + if (unlikely(port_rate == 0)) + return S64_MAX; + + return div64_s64(len * slope, port_rate); +} + +static struct sk_buff *cbs_dequeue_soft(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + s64 now = ktime_get_ns(); + struct sk_buff *skb; + s64 credits; + int len; + + if (q->credits < 0) { + credits = timediff_to_credits(now - q->last, q->idleslope); + + credits = q->credits + credits; + q->credits = min_t(s64, credits, q->hicredit); + + if (q->credits < 0) { + s64 delay; + + delay = delay_from_credits(q->credits, q->idleslope); + qdisc_watchdog_schedule_ns(&q->watchdog, now + delay); + + q->last = now; + + return NULL; + } + } + + skb = qdisc_dequeue_head(sch); + if (!skb) + return NULL; + + len = qdisc_pkt_len(skb); + + /* As sendslope is a negative number, this will decrease the + * amount of q->credits. + */ + credits = credits_from_len(len, q->sendslope, q->port_rate); + credits += q->credits; + + q->credits = max_t(s64, credits, q->locredit); + q->last = now; + + return skb; +} + +static struct sk_buff *cbs_dequeue_offload(struct Qdisc *sch) +{ + return qdisc_dequeue_head(sch); +} + +static struct sk_buff *cbs_dequeue(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + + return q->dequeue(sch); +} + +static const struct nla_policy cbs_policy[TCA_CBS_MAX + 1] = { + [TCA_CBS_PARMS] = { .len = sizeof(struct tc_cbs_qopt) }, +}; + +static void cbs_disable_offload(struct net_device *dev, + struct cbs_sched_data *q) +{ + struct tc_cbs_qopt_offload cbs = { }; + const struct net_device_ops *ops; + int err; + + if (!q->offload) + return; + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + + ops = dev->netdev_ops; + if (!ops->ndo_setup_tc) + return; + + cbs.queue = q->queue; + cbs.enable = 0; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); + if (err < 0) + pr_warn("Couldn't disable CBS offload for queue %d\n", + cbs.queue); +} + +static int cbs_enable_offload(struct net_device *dev, struct cbs_sched_data *q, + const struct tc_cbs_qopt *opt) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_cbs_qopt_offload cbs = { }; + int err; + + if (!ops->ndo_setup_tc) + return -EOPNOTSUPP; + + cbs.queue = q->queue; + + cbs.enable = 1; + cbs.hicredit = opt->hicredit; + cbs.locredit = opt->locredit; + cbs.idleslope = opt->idleslope; + cbs.sendslope = opt->sendslope; + + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_CBS, &cbs); + if (err < 0) + return err; + + q->enqueue = cbs_enqueue_offload; + q->dequeue = cbs_dequeue_offload; + + return 0; +} + +static int cbs_change(struct Qdisc *sch, struct nlattr *opt) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct nlattr *tb[TCA_CBS_MAX + 1]; + struct tc_cbs_qopt *qopt; + int err; + + err = nla_parse_nested(tb, TCA_CBS_MAX, opt, cbs_policy, NULL); + if (err < 0) + return err; + + if (!tb[TCA_CBS_PARMS]) + return -EINVAL; + + qopt = nla_data(tb[TCA_CBS_PARMS]); + + if (!qopt->offload) { + struct ethtool_link_ksettings ecmd; + s64 link_speed; + + if (!__ethtool_get_link_ksettings(dev, &ecmd)) + link_speed = ecmd.base.speed; + else + link_speed = SPEED_1000; + + q->port_rate = link_speed * 1000 * BYTES_PER_KBIT; + + cbs_disable_offload(dev, q); + } else { + err = cbs_enable_offload(dev, q, qopt); + if (err < 0) + return err; + } + + /* Everything went OK, save the parameters used. */ + q->hicredit = qopt->hicredit; + q->locredit = qopt->locredit; + q->idleslope = qopt->idleslope * BYTES_PER_KBIT; + q->sendslope = qopt->sendslope * BYTES_PER_KBIT; + q->offload = qopt->offload; + + return 0; +} + +static int cbs_init(struct Qdisc *sch, struct nlattr *opt) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + if (!opt) + return -EINVAL; + + q->queue = sch->dev_queue - netdev_get_tx_queue(dev, 0); + + q->enqueue = cbs_enqueue_soft; + q->dequeue = cbs_dequeue_soft; + + qdisc_watchdog_init(&q->watchdog, sch); + + return cbs_change(sch, opt); +} + +static void cbs_destroy(struct Qdisc *sch) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + + qdisc_watchdog_cancel(&q->watchdog); + + cbs_disable_offload(dev, q); +} + +static int cbs_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct cbs_sched_data *q = qdisc_priv(sch); + struct tc_cbs_qopt opt = { }; + struct nlattr *nest; + + nest = nla_nest_start(skb, TCA_OPTIONS); + if (!nest) + goto nla_put_failure; + + opt.hicredit = q->hicredit; + opt.locredit = q->locredit; + opt.sendslope = div64_s64(q->sendslope, BYTES_PER_KBIT); + opt.idleslope = div64_s64(q->idleslope, BYTES_PER_KBIT); + opt.offload = q->offload; + + if (nla_put(skb, TCA_CBS_PARMS, sizeof(opt), &opt)) + goto nla_put_failure; + + return nla_nest_end(skb, nest); + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + +static struct Qdisc_ops cbs_qdisc_ops __read_mostly = { + .id = "cbs", + .priv_size = sizeof(struct cbs_sched_data), + .enqueue = cbs_enqueue, + .dequeue = cbs_dequeue, + .peek = qdisc_peek_dequeued, + .init = cbs_init, + .reset = qdisc_reset_queue, + .destroy = cbs_destroy, + .change = cbs_change, + .dump = cbs_dump, + .owner = THIS_MODULE, +}; + +static int __init cbs_module_init(void) +{ + return register_qdisc(&cbs_qdisc_ops); +} + +static void __exit cbs_module_exit(void) +{ + unregister_qdisc(&cbs_qdisc_ops); +} +module_init(cbs_module_init) +module_exit(cbs_module_exit) +MODULE_LICENSE("GPL"); diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a413dc1c2098..5bbcef3dcd8c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -20,7 +20,6 @@ struct drr_class { struct Qdisc_class_common common; - unsigned int refcnt; unsigned int filter_cnt; struct gnet_stats_basic_packed bstats; @@ -111,7 +110,6 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - cl->refcnt = 1; cl->common.classid = classid; cl->quantum = quantum; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -163,32 +161,15 @@ static int drr_delete_class(struct Qdisc *sch, unsigned long arg) drr_purge_queue(cl); qdisc_class_hash_remove(&q->clhash, &cl->common); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); - return 0; -} - -static unsigned long drr_get_class(struct Qdisc *sch, u32 classid) -{ - struct drr_class *cl = drr_find_class(sch, classid); - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; + drr_destroy_class(sch, cl); + return 0; } -static void drr_put_class(struct Qdisc *sch, unsigned long arg) +static unsigned long drr_search_class(struct Qdisc *sch, u32 classid) { - struct drr_class *cl = (struct drr_class *)arg; - - if (--cl->refcnt == 0) - drr_destroy_class(sch, cl); + return (unsigned long)drr_find_class(sch, classid); } static struct tcf_block *drr_tcf_block(struct Qdisc *sch, unsigned long cl) @@ -246,8 +227,7 @@ static void drr_qlen_notify(struct Qdisc *csh, unsigned long arg) { struct drr_class *cl = (struct drr_class *)arg; - if (cl->qdisc->q.qlen == 0) - list_del(&cl->alist); + list_del(&cl->alist); } static int drr_dump_class(struct Qdisc *sch, unsigned long arg, @@ -341,6 +321,7 @@ static struct drr_class *drr_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -432,7 +413,7 @@ static int drr_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct drr_sched *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; err = qdisc_class_hash_init(&q->clhash); @@ -479,8 +460,7 @@ static void drr_destroy_qdisc(struct Qdisc *sch) static const struct Qdisc_class_ops drr_class_ops = { .change = drr_change_class, .delete = drr_delete_class, - .get = drr_get_class, - .put = drr_put_class, + .find = drr_search_class, .tcf_block = drr_tcf_block, .bind_tcf = drr_bind_tcf, .unbind_tcf = drr_unbind_tcf, diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 6d94fcc3592a..fb4fb71c68cf 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -85,21 +85,21 @@ static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg) return p->q; } -static unsigned long dsmark_get(struct Qdisc *sch, u32 classid) +static unsigned long dsmark_find(struct Qdisc *sch, u32 classid) { - pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", - __func__, sch, qdisc_priv(sch), classid); - return TC_H_MIN(classid) + 1; } static unsigned long dsmark_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return dsmark_get(sch, classid); + pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", + __func__, sch, qdisc_priv(sch), classid); + + return dsmark_find(sch, classid); } -static void dsmark_put(struct Qdisc *sch, unsigned long cl) +static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl) { } @@ -344,7 +344,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) goto errout; - err = tcf_block_get(&p->block, &p->filter_list); + err = tcf_block_get(&p->block, &p->filter_list, sch); if (err) return err; @@ -469,14 +469,13 @@ nla_put_failure: static const struct Qdisc_class_ops dsmark_class_ops = { .graft = dsmark_graft, .leaf = dsmark_leaf, - .get = dsmark_get, - .put = dsmark_put, + .find = dsmark_find, .change = dsmark_change, .delete = dsmark_delete, .walk = dsmark_walk, .tcf_block = dsmark_tcf_block, .bind_tcf = dsmark_bind_filter, - .unbind_tcf = dsmark_put, + .unbind_tcf = dsmark_unbind_filter, .dump = dsmark_dump_class, }; diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 337f2d6d81e4..0305d791ea94 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -105,6 +105,7 @@ static unsigned int fq_codel_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return 0; } @@ -481,7 +482,7 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) return err; } - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -491,10 +492,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) if (!q->flows) return -ENOMEM; q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); - if (!q->backlogs) { - kvfree(q->flows); + if (!q->backlogs) return -ENOMEM; - } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; @@ -579,7 +578,7 @@ static struct Qdisc *fq_codel_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long fq_codel_get(struct Qdisc *sch, u32 classid) +static unsigned long fq_codel_find(struct Qdisc *sch, u32 classid) { return 0; } @@ -592,7 +591,7 @@ static unsigned long fq_codel_bind(struct Qdisc *sch, unsigned long parent, return 0; } -static void fq_codel_put(struct Qdisc *q, unsigned long cl) +static void fq_codel_unbind(struct Qdisc *q, unsigned long cl) { } @@ -683,11 +682,10 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) static const struct Qdisc_class_ops fq_codel_class_ops = { .leaf = fq_codel_leaf, - .get = fq_codel_get, - .put = fq_codel_put, + .find = fq_codel_find, .tcf_block = fq_codel_tcf_block, .bind_tcf = fq_codel_bind, - .unbind_tcf = fq_codel_put, + .unbind_tcf = fq_codel_unbind, .dump = fq_codel_dump_class, .dump_stats = fq_codel_dump_class_stats, .walk = fq_codel_walk, diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 57ba406f1437..3839cbbdc32b 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -29,6 +29,7 @@ #include <net/sch_generic.h> #include <net/pkt_sched.h> #include <net/dst.h> +#include <trace/events/qdisc.h> /* Qdisc to use by default */ const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops; @@ -126,7 +127,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, q->q.qlen--; } else skb = NULL; - return skb; + goto trace; } *validate = true; skb = q->skb_bad_txq; @@ -139,7 +140,8 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, q->q.qlen--; goto bulk; } - return NULL; + skb = NULL; + goto trace; } if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) @@ -151,6 +153,8 @@ bulk: else try_bulk_dequeue_skb_slow(q, skb, packets); } +trace: + trace_qdisc_dequeue(q, txq, *packets, skb); return skb; } @@ -284,9 +288,9 @@ unsigned long dev_trans_start(struct net_device *dev) } EXPORT_SYMBOL(dev_trans_start); -static void dev_watchdog(unsigned long arg) +static void dev_watchdog(struct timer_list *t) { - struct net_device *dev = (struct net_device *)arg; + struct net_device *dev = from_timer(dev, t, watchdog_timer); netif_tx_lock(dev); if (!qdisc_tx_is_noop(dev)) { @@ -599,8 +603,14 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc *sch; unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size; int err = -ENOBUFS; - struct net_device *dev = dev_queue->dev; + struct net_device *dev; + if (!dev_queue) { + err = -EINVAL; + goto errout; + } + + dev = dev_queue->dev; p = kzalloc_node(size, GFP_KERNEL, netdev_queue_numa_node_read(dev_queue)); @@ -681,13 +691,12 @@ void qdisc_reset(struct Qdisc *qdisc) qdisc->gso_skb = NULL; } qdisc->q.qlen = 0; + qdisc->qstats.backlog = 0; } EXPORT_SYMBOL(qdisc_reset); -static void qdisc_rcu_free(struct rcu_head *head) +static void qdisc_free(struct Qdisc *qdisc) { - struct Qdisc *qdisc = container_of(head, struct Qdisc, rcu_head); - if (qdisc_is_percpu_stats(qdisc)) { free_percpu(qdisc->cpu_bstats); free_percpu(qdisc->cpu_qstats); @@ -720,11 +729,7 @@ void qdisc_destroy(struct Qdisc *qdisc) kfree_skb_list(qdisc->gso_skb); kfree_skb(qdisc->skb_bad_txq); - /* - * gen_estimator est_timer() might access qdisc->q.lock, - * wait a RCU grace period before freeing qdisc. - */ - call_rcu(&qdisc->rcu_head, qdisc_rcu_free); + qdisc_free(qdisc); } EXPORT_SYMBOL(qdisc_destroy); @@ -785,7 +790,7 @@ static void attach_default_qdiscs(struct net_device *dev) dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); dev->qdisc = txq->qdisc_sleeping; - refcount_inc(&dev->qdisc->refcnt); + qdisc_refcount_inc(dev->qdisc); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { @@ -955,7 +960,7 @@ void dev_init_scheduler(struct net_device *dev) if (dev_ingress_queue(dev)) dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc); - setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev); + timer_setup(&dev->watchdog_timer, dev_watchdog, 0); } static void shutdown_scheduler_queue(struct net_device *dev, @@ -1019,3 +1024,49 @@ void psched_ratecfg_precompute(struct psched_ratecfg *r, } } EXPORT_SYMBOL(psched_ratecfg_precompute); + +static void mini_qdisc_rcu_func(struct rcu_head *head) +{ +} + +void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp, + struct tcf_proto *tp_head) +{ + struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq); + struct mini_Qdisc *miniq; + + if (!tp_head) { + RCU_INIT_POINTER(*miniqp->p_miniq, NULL); + return; + } + + miniq = !miniq_old || miniq_old == &miniqp->miniq2 ? + &miniqp->miniq1 : &miniqp->miniq2; + + /* We need to make sure that readers won't see the miniq + * we are about to modify. So wait until previous call_rcu_bh callback + * is done. + */ + rcu_barrier_bh(); + miniq->filter_list = tp_head; + rcu_assign_pointer(*miniqp->p_miniq, miniq); + + if (miniq_old) + /* This is counterpart of the rcu barrier above. We need to + * block potential new user of miniq_old until all readers + * are not seeing it. + */ + call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func); +} +EXPORT_SYMBOL(mini_qdisc_pair_swap); + +void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc, + struct mini_Qdisc __rcu **p_miniq) +{ + miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats; + miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats; + miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats; + miniqp->p_miniq = p_miniq; +} +EXPORT_SYMBOL(mini_qdisc_pair_init); diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index fd15200f8627..d04068a97d81 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -110,7 +110,6 @@ enum hfsc_class_flags { struct hfsc_class { struct Qdisc_class_common cl_common; - unsigned int refcnt; /* usage count */ struct gnet_stats_basic_packed bstats; struct gnet_stats_queue qstats; @@ -829,28 +828,6 @@ update_vf(struct hfsc_class *cl, unsigned int len, u64 cur_time) } } -static void -set_active(struct hfsc_class *cl, unsigned int len) -{ - if (cl->cl_flags & HFSC_RSC) - init_ed(cl, len); - if (cl->cl_flags & HFSC_FSC) - init_vf(cl, len); - -} - -static void -set_passive(struct hfsc_class *cl) -{ - if (cl->cl_flags & HFSC_RSC) - eltree_remove(cl); - - /* - * vttree is now handled in update_vf() so that update_vf(cl, 0, 0) - * needs to be called explicitly to remove a class from vttree. - */ -} - static unsigned int qdisc_peek_len(struct Qdisc *sch) { @@ -981,6 +958,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } if (cl != NULL) { + int old_flags; + if (parentid) { if (cl->cl_parent && cl->cl_parent->cl_common.classid != parentid) @@ -1001,6 +980,8 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, } sch_tree_lock(sch); + old_flags = cl->cl_flags; + if (rsc != NULL) hfsc_change_rsc(cl, rsc, cur_time); if (fsc != NULL) @@ -1009,10 +990,21 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, hfsc_change_usc(cl, usc, cur_time); if (cl->qdisc->q.qlen != 0) { - if (cl->cl_flags & HFSC_RSC) - update_ed(cl, qdisc_peek_len(cl->qdisc)); - if (cl->cl_flags & HFSC_FSC) - update_vf(cl, 0, cur_time); + int len = qdisc_peek_len(cl->qdisc); + + if (cl->cl_flags & HFSC_RSC) { + if (old_flags & HFSC_RSC) + update_ed(cl, len); + else + init_ed(cl, len); + } + + if (cl->cl_flags & HFSC_FSC) { + if (old_flags & HFSC_FSC) + update_vf(cl, 0, cur_time); + else + init_vf(cl, len); + } } sch_tree_unlock(sch); @@ -1041,7 +1033,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); return err; @@ -1067,7 +1059,6 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, hfsc_change_usc(cl, usc, 0); cl->cl_common.classid = classid; - cl->refcnt = 1; cl->sched = q; cl->cl_parent = parent; cl->qdisc = qdisc_create_dflt(sch->dev_queue, @@ -1123,13 +1114,9 @@ hfsc_delete_class(struct Qdisc *sch, unsigned long arg) hfsc_purge_queue(sch, cl); qdisc_class_hash_remove(&q->clhash, &cl->cl_common); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); + + hfsc_destroy_class(sch, cl); return 0; } @@ -1157,6 +1144,7 @@ hfsc_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -1221,30 +1209,18 @@ hfsc_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct hfsc_class *cl = (struct hfsc_class *)arg; - if (cl->qdisc->q.qlen == 0) { - update_vf(cl, 0, 0); - set_passive(cl); - } + /* vttree is now handled in update_vf() so that update_vf(cl, 0, 0) + * needs to be called explicitly to remove a class from vttree. + */ + update_vf(cl, 0, 0); + if (cl->cl_flags & HFSC_RSC) + eltree_remove(cl); } static unsigned long -hfsc_get_class(struct Qdisc *sch, u32 classid) -{ - struct hfsc_class *cl = hfsc_find_class(classid, sch); - - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; -} - -static void -hfsc_put_class(struct Qdisc *sch, unsigned long arg) +hfsc_search_class(struct Qdisc *sch, u32 classid) { - struct hfsc_class *cl = (struct hfsc_class *)arg; - - if (--cl->refcnt == 0) - hfsc_destroy_class(sch, cl); + return (unsigned long)hfsc_find_class(classid, sch); } static unsigned long @@ -1418,6 +1394,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct tc_hfsc_qopt *qopt; int err; + qdisc_watchdog_init(&q->watchdog, sch); + if (opt == NULL || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); @@ -1428,12 +1406,11 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; - err = tcf_block_get(&q->root.block, &q->root.filter_list); + err = tcf_block_get(&q->root.block, &q->root.filter_list, sch); if (err) - goto err_tcf; + return err; q->root.cl_common.classid = sch->handle; - q->root.refcnt = 1; q->root.sched = q; q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); @@ -1448,13 +1425,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); qdisc_class_hash_grow(sch, &q->clhash); - qdisc_watchdog_init(&q->watchdog, sch); - return 0; - -err_tcf: - qdisc_class_hash_destroy(&q->clhash); - return err; } static int @@ -1585,7 +1556,12 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) } if (cl->qdisc->q.qlen == 1) { - set_active(cl, qdisc_pkt_len(skb)); + unsigned int len = qdisc_pkt_len(skb); + + if (cl->cl_flags & HFSC_RSC) + init_ed(cl, len); + if (cl->cl_flags & HFSC_FSC) + init_vf(cl, len); /* * If this is the first packet, isolate the head so an eventual * head drop before the first dequeue operation has no chance @@ -1649,18 +1625,18 @@ hfsc_dequeue(struct Qdisc *sch) if (realtime) cl->cl_cumul += qdisc_pkt_len(skb); - if (cl->qdisc->q.qlen != 0) { - if (cl->cl_flags & HFSC_RSC) { + if (cl->cl_flags & HFSC_RSC) { + if (cl->qdisc->q.qlen != 0) { /* update ed */ next_len = qdisc_peek_len(cl->qdisc); if (realtime) update_ed(cl, next_len); else update_d(cl, next_len); + } else { + /* the class becomes passive */ + eltree_remove(cl); } - } else { - /* the class becomes passive */ - set_passive(cl); } qdisc_bstats_update(sch, skb); @@ -1676,8 +1652,7 @@ static const struct Qdisc_class_ops hfsc_class_ops = { .graft = hfsc_graft_class, .leaf = hfsc_class_leaf, .qlen_notify = hfsc_qlen_notify, - .get = hfsc_get_class, - .put = hfsc_put_class, + .find = hfsc_search_class, .bind_tcf = hfsc_bind_tcf, .unbind_tcf = hfsc_unbind_tcf, .tcf_block = hfsc_tcf_block, diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 51d3ba682af9..73a53c08091b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -477,6 +477,9 @@ static void hhf_destroy(struct Qdisc *sch) kvfree(q->hhf_valid_bits[i]); } + if (!q->hh_flows) + return; + for (i = 0; i < HH_FLOWS_CNT; i++) { struct hh_flow_state *flow, *next; struct list_head *head = &q->hh_flows[i]; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 5d65ec5207e9..fa0380730ff0 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -107,7 +107,6 @@ struct htb_class { struct tcf_proto __rcu *filter_list; /* class attached filters */ struct tcf_block *block; int filter_cnt; - int refcnt; /* usage count of this class */ int level; /* our level (see above) */ unsigned int children; @@ -143,6 +142,7 @@ struct htb_class { struct rb_node node[TC_HTB_NUMPRIO]; /* node for self or feed tree */ unsigned int drops ____cacheline_aligned_in_smp; + unsigned int overlimits; }; struct htb_level { @@ -193,6 +193,10 @@ static inline struct htb_class *htb_find(u32 handle, struct Qdisc *sch) return container_of(clc, struct htb_class, common); } +static unsigned long htb_search(struct Qdisc *sch, u32 handle) +{ + return (unsigned long)htb_find(handle, sch); +} /** * htb_classify - classify a packet into class * @@ -240,6 +244,7 @@ static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -530,6 +535,9 @@ htb_change_class_mode(struct htb_sched *q, struct htb_class *cl, s64 *diff) if (new_mode == cl->cmode) return; + if (new_mode == HTB_CANT_SEND) + cl->overlimits++; + if (cl->prio_activity) { /* not necessary: speed optimization */ if (cl->cmode != HTB_CANT_SEND) htb_deactivate_prios(q, cl); @@ -1017,10 +1025,13 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) int err; int i; + qdisc_watchdog_init(&q->watchdog, sch); + INIT_WORK(&q->work, htb_work_func); + if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -1041,8 +1052,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); - qdisc_watchdog_init(&q->watchdog, sch); - INIT_WORK(&q->work, htb_work_func); qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) @@ -1139,6 +1148,7 @@ htb_dump_class_stats(struct Qdisc *sch, unsigned long arg, struct gnet_dump *d) struct htb_class *cl = (struct htb_class *)arg; struct gnet_stats_queue qs = { .drops = cl->drops, + .overlimits = cl->overlimits, }; __u32 qlen = 0; @@ -1186,16 +1196,7 @@ static void htb_qlen_notify(struct Qdisc *sch, unsigned long arg) { struct htb_class *cl = (struct htb_class *)arg; - if (cl->un.leaf.q->q.qlen == 0) - htb_deactivate(qdisc_priv(sch), cl); -} - -static unsigned long htb_get(struct Qdisc *sch, u32 classid) -{ - struct htb_class *cl = htb_find(classid, sch); - if (cl) - cl->refcnt++; - return (unsigned long)cl; + htb_deactivate(qdisc_priv(sch), cl); } static inline int htb_parent_last_child(struct htb_class *cl) @@ -1317,22 +1318,10 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) if (last_child) htb_parent_to_leaf(q, cl, new_q); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); - return 0; -} -static void htb_put(struct Qdisc *sch, unsigned long arg) -{ - struct htb_class *cl = (struct htb_class *)arg; - - if (--cl->refcnt == 0) - htb_destroy_class(sch, cl); + htb_destroy_class(sch, cl); + return 0; } static int htb_change_class(struct Qdisc *sch, u32 classid, @@ -1405,7 +1394,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, if (!cl) goto failure; - err = tcf_block_get(&cl->block, &cl->filter_list); + err = tcf_block_get(&cl->block, &cl->filter_list, sch); if (err) { kfree(cl); goto failure; @@ -1423,7 +1412,6 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, } } - cl->refcnt = 1; cl->children = 0; INIT_LIST_HEAD(&cl->un.leaf.drop_list); RB_CLEAR_NODE(&cl->pq_node); @@ -1599,8 +1587,7 @@ static const struct Qdisc_class_ops htb_class_ops = { .graft = htb_graft, .leaf = htb_leaf, .qlen_notify = htb_qlen_notify, - .get = htb_get, - .put = htb_put, + .find = htb_search, .change = htb_change_class, .delete = htb_delete, .walk = htb_walk, diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index d8a9bebcab90..5ecc38f35d47 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -20,6 +20,8 @@ struct ingress_sched_data { struct tcf_block *block; + struct tcf_block_ext_info block_info; + struct mini_Qdisc_pair miniqp; }; static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) @@ -27,23 +29,18 @@ static struct Qdisc *ingress_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long ingress_get(struct Qdisc *sch, u32 classid) +static unsigned long ingress_find(struct Qdisc *sch, u32 classid) { return TC_H_MIN(classid) + 1; } -static bool ingress_cl_offload(u32 classid) -{ - return true; -} - static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return ingress_get(sch, classid); + return ingress_find(sch, classid); } -static void ingress_put(struct Qdisc *sch, unsigned long cl) +static void ingress_unbind_filter(struct Qdisc *sch, unsigned long cl) { } @@ -58,13 +55,26 @@ static struct tcf_block *ingress_tcf_block(struct Qdisc *sch, unsigned long cl) return q->block; } +static void clsact_chain_head_change(struct tcf_proto *tp_head, void *priv) +{ + struct mini_Qdisc_pair *miniqp = priv; + + mini_qdisc_pair_swap(miniqp, tp_head); +} + static int ingress_init(struct Qdisc *sch, struct nlattr *opt) { struct ingress_sched_data *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->block, &dev->ingress_cl_list); + mini_qdisc_pair_init(&q->miniqp, sch, &dev->miniq_ingress); + + q->block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->block_info.chain_head_change = clsact_chain_head_change; + q->block_info.chain_head_change_priv = &q->miniqp; + + err = tcf_block_get_ext(&q->block, sch, &q->block_info); if (err) return err; @@ -78,7 +88,7 @@ static void ingress_destroy(struct Qdisc *sch) { struct ingress_sched_data *q = qdisc_priv(sch); - tcf_block_put(q->block); + tcf_block_put_ext(q->block, sch, &q->block_info); net_dec_ingress_queue(); } @@ -99,13 +109,11 @@ nla_put_failure: static const struct Qdisc_class_ops ingress_class_ops = { .leaf = ingress_leaf, - .get = ingress_get, - .put = ingress_put, + .find = ingress_find, .walk = ingress_walk, .tcf_block = ingress_tcf_block, - .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, - .unbind_tcf = ingress_put, + .unbind_tcf = ingress_unbind_filter, }; static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { @@ -121,9 +129,13 @@ static struct Qdisc_ops ingress_qdisc_ops __read_mostly = { struct clsact_sched_data { struct tcf_block *ingress_block; struct tcf_block *egress_block; + struct tcf_block_ext_info ingress_block_info; + struct tcf_block_ext_info egress_block_info; + struct mini_Qdisc_pair miniqp_ingress; + struct mini_Qdisc_pair miniqp_egress; }; -static unsigned long clsact_get(struct Qdisc *sch, u32 classid) +static unsigned long clsact_find(struct Qdisc *sch, u32 classid) { switch (TC_H_MIN(classid)) { case TC_H_MIN(TC_H_MIN_INGRESS): @@ -134,15 +146,10 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } -static bool clsact_cl_offload(u32 classid) -{ - return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); -} - static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { - return clsact_get(sch, classid); + return clsact_find(sch, classid); } static struct tcf_block *clsact_tcf_block(struct Qdisc *sch, unsigned long cl) @@ -165,13 +172,25 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) struct net_device *dev = qdisc_dev(sch); int err; - err = tcf_block_get(&q->ingress_block, &dev->ingress_cl_list); + mini_qdisc_pair_init(&q->miniqp_ingress, sch, &dev->miniq_ingress); + + q->ingress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS; + q->ingress_block_info.chain_head_change = clsact_chain_head_change; + q->ingress_block_info.chain_head_change_priv = &q->miniqp_ingress; + + err = tcf_block_get_ext(&q->ingress_block, sch, &q->ingress_block_info); if (err) return err; - err = tcf_block_get(&q->egress_block, &dev->egress_cl_list); + mini_qdisc_pair_init(&q->miniqp_egress, sch, &dev->miniq_egress); + + q->egress_block_info.binder_type = TCF_BLOCK_BINDER_TYPE_CLSACT_EGRESS; + q->egress_block_info.chain_head_change = clsact_chain_head_change; + q->egress_block_info.chain_head_change_priv = &q->miniqp_egress; + + err = tcf_block_get_ext(&q->egress_block, sch, &q->egress_block_info); if (err) - return err; + goto err_egress_block_get; net_inc_ingress_queue(); net_inc_egress_queue(); @@ -179,14 +198,18 @@ static int clsact_init(struct Qdisc *sch, struct nlattr *opt) sch->flags |= TCQ_F_CPUSTATS; return 0; + +err_egress_block_get: + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); + return err; } static void clsact_destroy(struct Qdisc *sch) { struct clsact_sched_data *q = qdisc_priv(sch); - tcf_block_put(q->egress_block); - tcf_block_put(q->ingress_block); + tcf_block_put_ext(q->egress_block, sch, &q->egress_block_info); + tcf_block_put_ext(q->ingress_block, sch, &q->ingress_block_info); net_dec_ingress_queue(); net_dec_egress_queue(); @@ -194,13 +217,11 @@ static void clsact_destroy(struct Qdisc *sch) static const struct Qdisc_class_ops clsact_class_ops = { .leaf = ingress_leaf, - .get = clsact_get, - .put = ingress_put, + .find = clsact_find, .walk = ingress_walk, .tcf_block = clsact_tcf_block, - .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, - .unbind_tcf = ingress_put, + .unbind_tcf = ingress_unbind_filter, }; static struct Qdisc_ops clsact_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index cadfdd4f1e52..213b586a06a0 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -130,15 +130,7 @@ static struct netdev_queue *mq_queue_get(struct Qdisc *sch, unsigned long cl) static struct netdev_queue *mq_select_queue(struct Qdisc *sch, struct tcmsg *tcm) { - unsigned int ntx = TC_H_MIN(tcm->tcm_parent); - struct netdev_queue *dev_queue = mq_queue_get(sch, ntx); - - if (!dev_queue) { - struct net_device *dev = qdisc_dev(sch); - - return netdev_get_tx_queue(dev, 0); - } - return dev_queue; + return mq_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); } static int mq_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, @@ -165,7 +157,7 @@ static struct Qdisc *mq_leaf(struct Qdisc *sch, unsigned long cl) return dev_queue->qdisc_sleeping; } -static unsigned long mq_get(struct Qdisc *sch, u32 classid) +static unsigned long mq_find(struct Qdisc *sch, u32 classid) { unsigned int ntx = TC_H_MIN(classid); @@ -174,10 +166,6 @@ static unsigned long mq_get(struct Qdisc *sch, u32 classid) return ntx; } -static void mq_put(struct Qdisc *sch, unsigned long cl) -{ -} - static int mq_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { @@ -223,8 +211,7 @@ static const struct Qdisc_class_ops mq_class_ops = { .select_queue = mq_select_queue, .graft = mq_graft, .leaf = mq_leaf, - .get = mq_get, - .put = mq_put, + .find = mq_find, .walk = mq_walk, .dump = mq_dump_class, .dump_stats = mq_dump_class_stats, diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index e0c02725cd48..b85885a9d8a1 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -18,10 +18,16 @@ #include <net/netlink.h> #include <net/pkt_sched.h> #include <net/sch_generic.h> +#include <net/pkt_cls.h> struct mqprio_sched { struct Qdisc **qdiscs; + u16 mode; + u16 shaper; int hw_offload; + u32 flags; + u64 min_rate[TC_QOPT_MAX_QUEUE]; + u64 max_rate[TC_QOPT_MAX_QUEUE]; }; static void mqprio_destroy(struct Qdisc *sch) @@ -39,11 +45,18 @@ static void mqprio_destroy(struct Qdisc *sch) } if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) { - struct tc_mqprio_qopt offload = { 0 }; - struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, - { .mqprio = &offload } }; - - dev->netdev_ops->ndo_setup_tc(dev, sch->handle, 0, 0, &tc); + struct tc_mqprio_qopt_offload mqprio = { { 0 } }; + + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + case TC_MQPRIO_MODE_CHANNEL: + dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_MQPRIO, + &mqprio); + break; + default: + return; + } } else { netdev_set_num_tc(dev, 0); } @@ -99,6 +112,26 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt) return 0; } +static const struct nla_policy mqprio_policy[TCA_MQPRIO_MAX + 1] = { + [TCA_MQPRIO_MODE] = { .len = sizeof(u16) }, + [TCA_MQPRIO_SHAPER] = { .len = sizeof(u16) }, + [TCA_MQPRIO_MIN_RATE64] = { .type = NLA_NESTED }, + [TCA_MQPRIO_MAX_RATE64] = { .type = NLA_NESTED }, +}; + +static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, + const struct nla_policy *policy, int len) +{ + int nested_len = nla_len(nla) - NLA_ALIGN(len); + + if (nested_len >= nla_attr_size(0)) + return nla_parse(tb, maxtype, nla_data(nla) + NLA_ALIGN(len), + nested_len, policy, NULL); + + memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1)); + return 0; +} + static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) { struct net_device *dev = qdisc_dev(sch); @@ -107,6 +140,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) struct Qdisc *qdisc; int i, err = -EOPNOTSUPP; struct tc_mqprio_qopt *qopt = NULL; + struct nlattr *tb[TCA_MQPRIO_MAX + 1]; + struct nlattr *attr; + int rem; + int len; BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE); BUILD_BUG_ON(TC_BITMASK != TC_QOPT_BITMASK); @@ -117,6 +154,10 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (!netif_is_multiqueue(dev)) return -EOPNOTSUPP; + /* make certain can allocate enough classids to handle queues */ + if (dev->num_tx_queues >= TC_H_MIN_PRIORITY) + return -ENOMEM; + if (!opt || nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -124,6 +165,59 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) if (mqprio_parse_opt(dev, qopt)) return -EINVAL; + len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt)); + if (len > 0) { + err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy, + sizeof(*qopt)); + if (err < 0) + return err; + + if (!qopt->hw) + return -EINVAL; + + if (tb[TCA_MQPRIO_MODE]) { + priv->flags |= TC_MQPRIO_F_MODE; + priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]); + } + + if (tb[TCA_MQPRIO_SHAPER]) { + priv->flags |= TC_MQPRIO_F_SHAPER; + priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]); + } + + if (tb[TCA_MQPRIO_MIN_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->min_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MIN_RATE; + } + + if (tb[TCA_MQPRIO_MAX_RATE64]) { + if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE) + return -EINVAL; + i = 0; + nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64], + rem) { + if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64) + return -EINVAL; + if (i >= qopt->num_tc) + break; + priv->max_rate[i] = *(u64 *)nla_data(attr); + i++; + } + priv->flags |= TC_MQPRIO_F_MAX_RATE; + } + } + /* pre-allocate qdisc, attachment can't fail */ priv->qdiscs = kcalloc(dev->num_tx_queues, sizeof(priv->qdiscs[0]), GFP_KERNEL); @@ -148,16 +242,36 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt) * supplied and verified mapping */ if (qopt->hw) { - struct tc_mqprio_qopt offload = *qopt; - struct tc_to_netdev tc = { .type = TC_SETUP_MQPRIO, - { .mqprio = &offload } }; + struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt}; - err = dev->netdev_ops->ndo_setup_tc(dev, sch->handle, - 0, 0, &tc); + switch (priv->mode) { + case TC_MQPRIO_MODE_DCB: + if (priv->shaper != TC_MQPRIO_SHAPER_DCB) + return -EINVAL; + break; + case TC_MQPRIO_MODE_CHANNEL: + mqprio.flags = priv->flags; + if (priv->flags & TC_MQPRIO_F_MODE) + mqprio.mode = priv->mode; + if (priv->flags & TC_MQPRIO_F_SHAPER) + mqprio.shaper = priv->shaper; + if (priv->flags & TC_MQPRIO_F_MIN_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.min_rate[i] = priv->min_rate[i]; + if (priv->flags & TC_MQPRIO_F_MAX_RATE) + for (i = 0; i < mqprio.qopt.num_tc; i++) + mqprio.max_rate[i] = priv->max_rate[i]; + break; + default: + return -EINVAL; + } + err = dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_MQPRIO, + &mqprio); if (err) return err; - priv->hw_offload = offload.hw; + priv->hw_offload = mqprio.qopt.hw; } else { netdev_set_num_tc(dev, qopt->num_tc); for (i = 0; i < qopt->num_tc; i++) @@ -197,7 +311,7 @@ static struct netdev_queue *mqprio_queue_get(struct Qdisc *sch, unsigned long cl) { struct net_device *dev = qdisc_dev(sch); - unsigned long ntx = cl - 1 - netdev_get_num_tc(dev); + unsigned long ntx = cl - 1; if (ntx >= dev->num_tx_queues) return NULL; @@ -227,11 +341,51 @@ static int mqprio_graft(struct Qdisc *sch, unsigned long cl, struct Qdisc *new, return 0; } +static int dump_rates(struct mqprio_sched *priv, + struct tc_mqprio_qopt *opt, struct sk_buff *skb) +{ + struct nlattr *nest; + int i; + + if (priv->flags & TC_MQPRIO_F_MIN_RATE) { + nest = nla_nest_start(skb, TCA_MQPRIO_MIN_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MIN_RATE64, + sizeof(priv->min_rate[i]), + &priv->min_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + + if (priv->flags & TC_MQPRIO_F_MAX_RATE) { + nest = nla_nest_start(skb, TCA_MQPRIO_MAX_RATE64); + if (!nest) + goto nla_put_failure; + + for (i = 0; i < opt->num_tc; i++) { + if (nla_put(skb, TCA_MQPRIO_MAX_RATE64, + sizeof(priv->max_rate[i]), + &priv->max_rate[i])) + goto nla_put_failure; + } + nla_nest_end(skb, nest); + } + return 0; + +nla_put_failure: + nla_nest_cancel(skb, nest); + return -1; +} + static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct net_device *dev = qdisc_dev(sch); struct mqprio_sched *priv = qdisc_priv(sch); - unsigned char *b = skb_tail_pointer(skb); + struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb); struct tc_mqprio_qopt opt = { 0 }; struct Qdisc *qdisc; unsigned int i; @@ -262,12 +416,25 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb) opt.offset[i] = dev->tc_to_txq[i].offset; } - if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt)) + if (nla_put(skb, TCA_OPTIONS, NLA_ALIGN(sizeof(opt)), &opt)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_MODE) && + nla_put_u16(skb, TCA_MQPRIO_MODE, priv->mode)) goto nla_put_failure; - return skb->len; + if ((priv->flags & TC_MQPRIO_F_SHAPER) && + nla_put_u16(skb, TCA_MQPRIO_SHAPER, priv->shaper)) + goto nla_put_failure; + + if ((priv->flags & TC_MQPRIO_F_MIN_RATE || + priv->flags & TC_MQPRIO_F_MAX_RATE) && + (dump_rates(priv, &opt, skb) != 0)) + goto nla_put_failure; + + return nla_nest_end(skb, nla); nla_put_failure: - nlmsg_trim(skb, b); + nlmsg_trim(skb, nla); return -1; } @@ -281,47 +448,40 @@ static struct Qdisc *mqprio_leaf(struct Qdisc *sch, unsigned long cl) return dev_queue->qdisc_sleeping; } -static unsigned long mqprio_get(struct Qdisc *sch, u32 classid) +static unsigned long mqprio_find(struct Qdisc *sch, u32 classid) { struct net_device *dev = qdisc_dev(sch); unsigned int ntx = TC_H_MIN(classid); - if (ntx > dev->num_tx_queues + netdev_get_num_tc(dev)) - return 0; - return ntx; -} + /* There are essentially two regions here that have valid classid + * values. The first region will have a classid value of 1 through + * num_tx_queues. All of these are backed by actual Qdiscs. + */ + if (ntx < TC_H_MIN_PRIORITY) + return (ntx <= dev->num_tx_queues) ? ntx : 0; -static void mqprio_put(struct Qdisc *sch, unsigned long cl) -{ + /* The second region represents the hardware traffic classes. These + * are represented by classid values of TC_H_MIN_PRIORITY through + * TC_H_MIN_PRIORITY + netdev_get_num_tc - 1 + */ + return ((ntx - TC_H_MIN_PRIORITY) < netdev_get_num_tc(dev)) ? ntx : 0; } static int mqprio_dump_class(struct Qdisc *sch, unsigned long cl, struct sk_buff *skb, struct tcmsg *tcm) { - struct net_device *dev = qdisc_dev(sch); + if (cl < TC_H_MIN_PRIORITY) { + struct netdev_queue *dev_queue = mqprio_queue_get(sch, cl); + struct net_device *dev = qdisc_dev(sch); + int tc = netdev_txq_to_tc(dev, cl - 1); - if (cl <= netdev_get_num_tc(dev)) { + tcm->tcm_parent = (tc < 0) ? 0 : + TC_H_MAKE(TC_H_MAJ(sch->handle), + TC_H_MIN(tc + TC_H_MIN_PRIORITY)); + tcm->tcm_info = dev_queue->qdisc_sleeping->handle; + } else { tcm->tcm_parent = TC_H_ROOT; tcm->tcm_info = 0; - } else { - int i; - struct netdev_queue *dev_queue; - - dev_queue = mqprio_queue_get(sch, cl); - tcm->tcm_parent = 0; - for (i = 0; i < netdev_get_num_tc(dev); i++) { - struct netdev_tc_txq tc = dev->tc_to_txq[i]; - int q_idx = cl - netdev_get_num_tc(dev); - - if (q_idx > tc.offset && - q_idx <= tc.offset + tc.count) { - tcm->tcm_parent = - TC_H_MAKE(TC_H_MAJ(sch->handle), - TC_H_MIN(i + 1)); - break; - } - } - tcm->tcm_info = dev_queue->qdisc_sleeping->handle; } tcm->tcm_handle |= TC_H_MIN(cl); return 0; @@ -332,15 +492,14 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl, __releases(d->lock) __acquires(d->lock) { - struct net_device *dev = qdisc_dev(sch); - - if (cl <= netdev_get_num_tc(dev)) { + if (cl >= TC_H_MIN_PRIORITY) { int i; __u32 qlen = 0; struct Qdisc *qdisc; struct gnet_stats_queue qstats = {0}; struct gnet_stats_basic_packed bstats = {0}; - struct netdev_tc_txq tc = dev->tc_to_txq[cl - 1]; + struct net_device *dev = qdisc_dev(sch); + struct netdev_tc_txq tc = dev->tc_to_txq[cl & TC_BITMASK]; /* Drop lock here it will be reclaimed before touching * statistics this is required because the d->lock we @@ -393,25 +552,44 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) /* Walk hierarchy with a virtual class per tc */ arg->count = arg->skip; - for (ntx = arg->skip; - ntx < dev->num_tx_queues + netdev_get_num_tc(dev); - ntx++) { + for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) { + if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) { + arg->stop = 1; + return; + } + arg->count++; + } + + /* Pad the values and skip over unused traffic classes */ + if (ntx < TC_MAX_QUEUE) { + arg->count = TC_MAX_QUEUE; + ntx = TC_MAX_QUEUE; + } + + /* Reset offset, sort out remaining per-queue qdiscs */ + for (ntx -= TC_MAX_QUEUE; ntx < dev->num_tx_queues; ntx++) { if (arg->fn(sch, ntx + 1, arg) < 0) { arg->stop = 1; - break; + return; } arg->count++; } } +static struct netdev_queue *mqprio_select_queue(struct Qdisc *sch, + struct tcmsg *tcm) +{ + return mqprio_queue_get(sch, TC_H_MIN(tcm->tcm_parent)); +} + static const struct Qdisc_class_ops mqprio_class_ops = { .graft = mqprio_graft, .leaf = mqprio_leaf, - .get = mqprio_get, - .put = mqprio_put, + .find = mqprio_find, .walk = mqprio_walk, .dump = mqprio_dump_class, .dump_stats = mqprio_dump_class_stats, + .select_queue = mqprio_select_queue, }; static struct Qdisc_ops mqprio_qdisc_ops __read_mostly = { diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index f143b7bbaa0d..012216386c0b 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -54,6 +54,7 @@ multiq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -245,7 +246,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) if (opt == NULL) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -257,12 +258,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->max_bands; i++) q->queues[i] = &noop_qdisc; - err = multiq_tune(sch, opt); - - if (err) - kfree(q->queues); - - return err; + return multiq_tune(sch, opt); } static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) @@ -306,7 +302,7 @@ multiq_leaf(struct Qdisc *sch, unsigned long arg) return q->queues[band]; } -static unsigned long multiq_get(struct Qdisc *sch, u32 classid) +static unsigned long multiq_find(struct Qdisc *sch, u32 classid) { struct multiq_sched_data *q = qdisc_priv(sch); unsigned long band = TC_H_MIN(classid); @@ -319,11 +315,11 @@ static unsigned long multiq_get(struct Qdisc *sch, u32 classid) static unsigned long multiq_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - return multiq_get(sch, classid); + return multiq_find(sch, classid); } -static void multiq_put(struct Qdisc *q, unsigned long cl) +static void multiq_unbind(struct Qdisc *q, unsigned long cl) { } @@ -385,12 +381,11 @@ static struct tcf_block *multiq_tcf_block(struct Qdisc *sch, unsigned long cl) static const struct Qdisc_class_ops multiq_class_ops = { .graft = multiq_graft, .leaf = multiq_leaf, - .get = multiq_get, - .put = multiq_put, + .find = multiq_find, .walk = multiq_walk, .tcf_block = multiq_tcf_block, .bind_tcf = multiq_bind, - .unbind_tcf = multiq_put, + .unbind_tcf = multiq_unbind, .dump = multiq_dump_class, .dump_stats = multiq_dump_class_stats, }; diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1b3dd6190e93..dd70924cbcdf 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -77,8 +77,8 @@ struct netem_sched_data { struct qdisc_watchdog watchdog; - psched_tdiff_t latency; - psched_tdiff_t jitter; + s64 latency; + s64 jitter; u32 loss; u32 ecn; @@ -135,6 +135,13 @@ struct netem_sched_data { u32 a5; /* p23 used only in 4-states */ } clg; + struct tc_netem_slot slot_config; + struct slotstate { + u64 slot_next; + s32 packets_left; + s32 bytes_left; + } slot; + }; /* Time stamp put into socket buffer control block @@ -145,16 +152,9 @@ struct netem_sched_data { * we save skb->tstamp value in skb->cb[] before destroying it. */ struct netem_skb_cb { - psched_time_t time_to_send; - ktime_t tstamp_save; + u64 time_to_send; }; - -static struct sk_buff *netem_rb_to_skb(struct rb_node *rb) -{ - return rb_entry(rb, struct sk_buff, rbnode); -} - static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb) { /* we assume we can use skb next/prev/tstamp as storage for rb_node */ @@ -312,11 +312,11 @@ static bool loss_event(struct netem_sched_data *q) * std deviation sigma. Uses table lookup to approximate the desired * distribution, and a uniformly-distributed pseudo-random source. */ -static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, - struct crndstate *state, - const struct disttable *dist) +static s64 tabledist(s64 mu, s32 sigma, + struct crndstate *state, + const struct disttable *dist) { - psched_tdiff_t x; + s64 x; long t; u32 rnd; @@ -327,7 +327,7 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, /* default uniform distribution */ if (dist == NULL) - return (rnd % (2*sigma)) - sigma + mu; + return (rnd % (2 * sigma)) - sigma + mu; t = dist->table[rnd % dist->size]; x = (sigma % NETEM_DIST_SCALE) * t; @@ -339,10 +339,8 @@ static psched_tdiff_t tabledist(psched_tdiff_t mu, psched_tdiff_t sigma, return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; } -static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sched_data *q) +static u64 packet_time_ns(u64 len, const struct netem_sched_data *q) { - u64 ticks; - len += q->packet_overhead; if (q->cell_size) { @@ -353,21 +351,19 @@ static psched_time_t packet_len_2_sched_time(unsigned int len, struct netem_sche len = cells * (q->cell_size + q->cell_overhead); } - ticks = (u64)len * NSEC_PER_SEC; - - do_div(ticks, q->rate); - return PSCHED_NS2TICKS(ticks); + return div64_u64(len * NSEC_PER_SEC, q->rate); } static void tfifo_reset(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - struct rb_node *p; + struct rb_node *p = rb_first(&q->t_root); - while ((p = rb_first(&q->t_root))) { - struct sk_buff *skb = netem_rb_to_skb(p); + while (p) { + struct sk_buff *skb = rb_to_skb(p); - rb_erase(p, &q->t_root); + p = rb_next(p); + rb_erase(&skb->rbnode, &q->t_root); rtnl_kfree_skbs(skb, skb); } } @@ -375,14 +371,14 @@ static void tfifo_reset(struct Qdisc *sch) static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); - psched_time_t tnext = netem_skb_cb(nskb)->time_to_send; + u64 tnext = netem_skb_cb(nskb)->time_to_send; struct rb_node **p = &q->t_root.rb_node, *parent = NULL; while (*p) { struct sk_buff *skb; parent = *p; - skb = netem_rb_to_skb(parent); + skb = rb_to_skb(parent); if (tnext >= netem_skb_cb(skb)->time_to_send) p = &parent->rb_right; else @@ -521,13 +517,13 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (q->gap == 0 || /* not doing reordering */ q->counter < q->gap - 1 || /* inside last reordering gap */ q->reorder < get_crandom(&q->reorder_cor)) { - psched_time_t now; - psched_tdiff_t delay; + u64 now; + s64 delay; delay = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist); - now = psched_get_time(); + now = ktime_get_ns(); if (q->rate) { struct netem_skb_cb *last = NULL; @@ -538,7 +534,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff *t_skb; struct netem_skb_cb *t_last; - t_skb = netem_rb_to_skb(rb_last(&q->t_root)); + t_skb = skb_rb_last(&q->t_root); t_last = netem_skb_cb(t_skb); if (!last || t_last->time_to_send > last->time_to_send) { @@ -553,15 +549,14 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * from delay. */ delay -= last->time_to_send - now; - delay = max_t(psched_tdiff_t, 0, delay); + delay = max_t(s64, 0, delay); now = last->time_to_send; } - delay += packet_len_2_sched_time(qdisc_pkt_len(skb), q); + delay += packet_time_ns(qdisc_pkt_len(skb), q); } cb->time_to_send = now + delay; - cb->tstamp_save = skb->tstamp; ++q->counter; tfifo_enqueue(skb, sch); } else { @@ -569,7 +564,7 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch, * Do re-ordering by putting one out of N packets at the front * of the queue. */ - cb->time_to_send = psched_get_time(); + cb->time_to_send = ktime_get_ns(); q->counter = 0; netem_enqueue_skb_head(&sch->q, skb); @@ -600,6 +595,20 @@ finish_segs: return NET_XMIT_SUCCESS; } +/* Delay the next round with a new future slot with a + * correct number of bytes and packets. + */ + +static void get_slot_next(struct netem_sched_data *q, u64 now) +{ + q->slot.slot_next = now + q->slot_config.min_delay + + (prandom_u32() * + (q->slot_config.max_delay - + q->slot_config.min_delay) >> 32); + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; +} + static struct sk_buff *netem_dequeue(struct Qdisc *sch) { struct netem_sched_data *q = qdisc_priv(sch); @@ -616,20 +625,26 @@ deliver: } p = rb_first(&q->t_root); if (p) { - psched_time_t time_to_send; + u64 time_to_send; + u64 now = ktime_get_ns(); - skb = netem_rb_to_skb(p); + skb = rb_to_skb(p); /* if more time remaining? */ time_to_send = netem_skb_cb(skb)->time_to_send; - if (time_to_send <= psched_get_time()) { - rb_erase(p, &q->t_root); + if (q->slot.slot_next && q->slot.slot_next < time_to_send) + get_slot_next(q, now); + if (time_to_send <= now && q->slot.slot_next <= now) { + rb_erase(p, &q->t_root); sch->q.qlen--; qdisc_qstats_backlog_dec(sch, skb); skb->next = NULL; skb->prev = NULL; - skb->tstamp = netem_skb_cb(skb)->tstamp_save; + /* skb->dev shares skb->rbnode area, + * we need to restore its value. + */ + skb->dev = qdisc_dev(sch); #ifdef CONFIG_NET_CLS_ACT /* @@ -640,6 +655,14 @@ deliver: skb->tstamp = 0; #endif + if (q->slot.slot_next) { + q->slot.packets_left--; + q->slot.bytes_left -= qdisc_pkt_len(skb); + if (q->slot.packets_left <= 0 || + q->slot.bytes_left <= 0) + get_slot_next(q, now); + } + if (q->qdisc) { unsigned int pkt_len = qdisc_pkt_len(skb); struct sk_buff *to_free = NULL; @@ -663,7 +686,10 @@ deliver: if (skb) goto deliver; } - qdisc_watchdog_schedule(&q->watchdog, time_to_send); + + qdisc_watchdog_schedule_ns(&q->watchdog, + max(time_to_send, + q->slot.slot_next)); } if (q->qdisc) { @@ -694,6 +720,7 @@ static void dist_free(struct disttable *d) * Distribution data is a variable size payload containing * signed 16 bit values. */ + static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) { struct netem_sched_data *q = qdisc_priv(sch); @@ -724,6 +751,23 @@ static int get_dist_table(struct Qdisc *sch, const struct nlattr *attr) return 0; } +static void get_slot(struct netem_sched_data *q, const struct nlattr *attr) +{ + const struct tc_netem_slot *c = nla_data(attr); + + q->slot_config = *c; + if (q->slot_config.max_packets == 0) + q->slot_config.max_packets = INT_MAX; + if (q->slot_config.max_bytes == 0) + q->slot_config.max_bytes = INT_MAX; + q->slot.packets_left = q->slot_config.max_packets; + q->slot.bytes_left = q->slot_config.max_bytes; + if (q->slot_config.min_delay | q->slot_config.max_delay) + q->slot.slot_next = ktime_get_ns(); + else + q->slot.slot_next = 0; +} + static void get_correlation(struct netem_sched_data *q, const struct nlattr *attr) { const struct tc_netem_corr *c = nla_data(attr); @@ -825,6 +869,9 @@ static const struct nla_policy netem_policy[TCA_NETEM_MAX + 1] = { [TCA_NETEM_LOSS] = { .type = NLA_NESTED }, [TCA_NETEM_ECN] = { .type = NLA_U32 }, [TCA_NETEM_RATE64] = { .type = NLA_U64 }, + [TCA_NETEM_LATENCY64] = { .type = NLA_S64 }, + [TCA_NETEM_JITTER64] = { .type = NLA_S64 }, + [TCA_NETEM_SLOT] = { .len = sizeof(struct tc_netem_slot) }, }; static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla, @@ -892,8 +939,8 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) sch->limit = qopt->limit; - q->latency = qopt->latency; - q->jitter = qopt->jitter; + q->latency = PSCHED_TICKS2NS(qopt->latency); + q->jitter = PSCHED_TICKS2NS(qopt->jitter); q->limit = qopt->limit; q->gap = qopt->gap; q->counter = 0; @@ -922,9 +969,18 @@ static int netem_change(struct Qdisc *sch, struct nlattr *opt) q->rate = max_t(u64, q->rate, nla_get_u64(tb[TCA_NETEM_RATE64])); + if (tb[TCA_NETEM_LATENCY64]) + q->latency = nla_get_s64(tb[TCA_NETEM_LATENCY64]); + + if (tb[TCA_NETEM_JITTER64]) + q->jitter = nla_get_s64(tb[TCA_NETEM_JITTER64]); + if (tb[TCA_NETEM_ECN]) q->ecn = nla_get_u32(tb[TCA_NETEM_ECN]); + if (tb[TCA_NETEM_SLOT]) + get_slot(q, tb[TCA_NETEM_SLOT]); + return ret; } @@ -933,11 +989,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) struct netem_sched_data *q = qdisc_priv(sch); int ret; + qdisc_watchdog_init(&q->watchdog, sch); + if (!opt) return -EINVAL; - qdisc_watchdog_init(&q->watchdog, sch); - q->loss_model = CLG_RANDOM; ret = netem_change(sch, opt); if (ret) @@ -1014,9 +1070,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) struct tc_netem_reorder reorder; struct tc_netem_corrupt corrupt; struct tc_netem_rate rate; + struct tc_netem_slot slot; - qopt.latency = q->latency; - qopt.jitter = q->jitter; + qopt.latency = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->latency), + UINT_MAX); + qopt.jitter = min_t(psched_tdiff_t, PSCHED_NS2TICKS(q->jitter), + UINT_MAX); qopt.limit = q->limit; qopt.loss = q->loss; qopt.gap = q->gap; @@ -1024,6 +1083,12 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) goto nla_put_failure; + if (nla_put(skb, TCA_NETEM_LATENCY64, sizeof(q->latency), &q->latency)) + goto nla_put_failure; + + if (nla_put(skb, TCA_NETEM_JITTER64, sizeof(q->jitter), &q->jitter)) + goto nla_put_failure; + cor.delay_corr = q->delay_cor.rho; cor.loss_corr = q->loss_cor.rho; cor.dup_corr = q->dup_cor.rho; @@ -1060,6 +1125,16 @@ static int netem_dump(struct Qdisc *sch, struct sk_buff *skb) if (dump_loss_model(q, skb) != 0) goto nla_put_failure; + if (q->slot_config.min_delay | q->slot_config.max_delay) { + slot = q->slot_config; + if (slot.max_packets == INT_MAX) + slot.max_packets = 0; + if (slot.max_bytes == INT_MAX) + slot.max_bytes = 0; + if (nla_put(skb, TCA_NETEM_SLOT, sizeof(slot), &slot)) + goto nla_put_failure; + } + return nla_nest_end(skb, nla); nla_put_failure: @@ -1096,15 +1171,11 @@ static struct Qdisc *netem_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long netem_get(struct Qdisc *sch, u32 classid) +static unsigned long netem_find(struct Qdisc *sch, u32 classid) { return 1; } -static void netem_put(struct Qdisc *sch, unsigned long arg) -{ -} - static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -1120,8 +1191,7 @@ static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) static const struct Qdisc_class_ops netem_class_ops = { .graft = netem_graft, .leaf = netem_leaf, - .get = netem_get, - .put = netem_put, + .find = netem_find, .walk = netem_walk, .dump = netem_dump_class, }; diff --git a/net/sched/sch_pie.c b/net/sched/sch_pie.c index 6c2791d6102d..776c694c77c7 100644 --- a/net/sched/sch_pie.c +++ b/net/sched/sch_pie.c @@ -74,6 +74,7 @@ struct pie_sched_data { struct pie_vars vars; struct pie_stats stats; struct timer_list adapt_timer; + struct Qdisc *sch; }; static void pie_params_init(struct pie_params *params) @@ -422,10 +423,10 @@ static void calculate_probability(struct Qdisc *sch) pie_vars_init(&q->vars); } -static void pie_timer(unsigned long arg) +static void pie_timer(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct pie_sched_data *q = qdisc_priv(sch); + struct pie_sched_data *q = from_timer(q, t, adapt_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -446,7 +447,8 @@ static int pie_init(struct Qdisc *sch, struct nlattr *opt) pie_vars_init(&q->vars); sch->limit = q->params.limit; - setup_timer(&q->adapt_timer, pie_timer, (unsigned long)sch); + q->sch = sch; + timer_setup(&q->adapt_timer, pie_timer, 0); if (opt) { int err = pie_change(sch, opt); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index e3e364cc9a70..2c79559a0d31 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -50,6 +50,7 @@ prio_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr) case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -80,7 +81,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } #endif @@ -212,7 +213,7 @@ static int prio_init(struct Qdisc *sch, struct nlattr *opt) if (!opt) return -EINVAL; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -260,7 +261,7 @@ prio_leaf(struct Qdisc *sch, unsigned long arg) return q->queues[band]; } -static unsigned long prio_get(struct Qdisc *sch, u32 classid) +static unsigned long prio_find(struct Qdisc *sch, u32 classid) { struct prio_sched_data *q = qdisc_priv(sch); unsigned long band = TC_H_MIN(classid); @@ -272,11 +273,11 @@ static unsigned long prio_get(struct Qdisc *sch, u32 classid) static unsigned long prio_bind(struct Qdisc *sch, unsigned long parent, u32 classid) { - return prio_get(sch, classid); + return prio_find(sch, classid); } -static void prio_put(struct Qdisc *q, unsigned long cl) +static void prio_unbind(struct Qdisc *q, unsigned long cl) { } @@ -338,12 +339,11 @@ static struct tcf_block *prio_tcf_block(struct Qdisc *sch, unsigned long cl) static const struct Qdisc_class_ops prio_class_ops = { .graft = prio_graft, .leaf = prio_leaf, - .get = prio_get, - .put = prio_put, + .find = prio_find, .walk = prio_walk, .tcf_block = prio_tcf_block, .bind_tcf = prio_bind, - .unbind_tcf = prio_put, + .unbind_tcf = prio_unbind, .dump = prio_dump_class, .dump_stats = prio_dump_class_stats, }; diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 0e16dfda0bd7..6962b37a3ad3 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -132,7 +132,6 @@ struct qfq_aggregate; struct qfq_class { struct Qdisc_class_common common; - unsigned int refcnt; unsigned int filter_cnt; struct gnet_stats_basic_packed bstats; @@ -477,7 +476,6 @@ static int qfq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, if (cl == NULL) return -ENOBUFS; - cl->refcnt = 1; cl->common.classid = classid; cl->deficit = lmax; @@ -555,32 +553,15 @@ static int qfq_delete_class(struct Qdisc *sch, unsigned long arg) qfq_purge_queue(cl); qdisc_class_hash_remove(&q->clhash, &cl->common); - BUG_ON(--cl->refcnt == 0); - /* - * This shouldn't happen: we "hold" one cops->get() when called - * from tc_ctl_tclass; the destroy method is done from cops->put(). - */ - sch_tree_unlock(sch); - return 0; -} - -static unsigned long qfq_get_class(struct Qdisc *sch, u32 classid) -{ - struct qfq_class *cl = qfq_find_class(sch, classid); - if (cl != NULL) - cl->refcnt++; - - return (unsigned long)cl; + qfq_destroy_class(sch, cl); + return 0; } -static void qfq_put_class(struct Qdisc *sch, unsigned long arg) +static unsigned long qfq_search_class(struct Qdisc *sch, u32 classid) { - struct qfq_class *cl = (struct qfq_class *)arg; - - if (--cl->refcnt == 0) - qfq_destroy_class(sch, cl); + return (unsigned long)qfq_find_class(sch, classid); } static struct tcf_block *qfq_tcf_block(struct Qdisc *sch, unsigned long cl) @@ -728,6 +709,7 @@ static struct qfq_class *qfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_STOLEN: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return NULL; } @@ -1234,7 +1216,7 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, if (cl == NULL) { if (err & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return err; } pr_debug("qfq_enqueue: cl = %x\n", cl->common.classid); @@ -1428,8 +1410,7 @@ static void qfq_qlen_notify(struct Qdisc *sch, unsigned long arg) struct qfq_sched *q = qdisc_priv(sch); struct qfq_class *cl = (struct qfq_class *)arg; - if (cl->qdisc->q.qlen == 0) - qfq_deactivate_class(q, cl); + qfq_deactivate_class(q, cl); } static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) @@ -1439,7 +1420,7 @@ static int qfq_init_qdisc(struct Qdisc *sch, struct nlattr *opt) int i, j, err; u32 max_cl_shift, maxbudg_shift, max_classes; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -1511,8 +1492,7 @@ static void qfq_destroy_qdisc(struct Qdisc *sch) static const struct Qdisc_class_ops qfq_class_ops = { .change = qfq_change_class, .delete = qfq_delete_class, - .get = qfq_get_class, - .put = qfq_put_class, + .find = qfq_search_class, .tcf_block = qfq_tcf_block, .bind_tcf = qfq_bind_tcf, .unbind_tcf = qfq_unbind_tcf, diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 11292adce412..7f8ea9e297c3 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -19,6 +19,7 @@ #include <linux/kernel.h> #include <linux/skbuff.h> #include <net/pkt_sched.h> +#include <net/pkt_cls.h> #include <net/inet_ecn.h> #include <net/red.h> @@ -40,6 +41,7 @@ struct red_sched_data { u32 limit; /* HARD maximal queue length */ unsigned char flags; struct timer_list adapt_timer; + struct Qdisc *sch; struct red_parms parms; struct red_vars vars; struct red_stats stats; @@ -147,11 +149,37 @@ static void red_reset(struct Qdisc *sch) red_restart(&q->vars); } +static int red_offload(struct Qdisc *sch, bool enable) +{ + struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload opt = { + .handle = sch->handle, + .parent = sch->parent, + }; + + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return -EOPNOTSUPP; + + if (enable) { + opt.command = TC_RED_REPLACE; + opt.set.min = q->parms.qth_min >> q->parms.Wlog; + opt.set.max = q->parms.qth_max >> q->parms.Wlog; + opt.set.probability = q->parms.max_P; + opt.set.is_ecn = red_use_ecn(q); + } else { + opt.command = TC_RED_DESTROY; + } + + return dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, &opt); +} + static void red_destroy(struct Qdisc *sch) { struct red_sched_data *q = qdisc_priv(sch); del_timer_sync(&q->adapt_timer); + red_offload(sch, false); qdisc_destroy(q->qdisc); } @@ -218,13 +246,14 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt) red_start_of_idle_period(&q->vars); sch_tree_unlock(sch); + red_offload(sch, true); return 0; } -static inline void red_adaptative_timer(unsigned long arg) +static inline void red_adaptative_timer(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct red_sched_data *q = qdisc_priv(sch); + struct red_sched_data *q = from_timer(q, t, adapt_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -238,10 +267,40 @@ static int red_init(struct Qdisc *sch, struct nlattr *opt) struct red_sched_data *q = qdisc_priv(sch); q->qdisc = &noop_qdisc; - setup_timer(&q->adapt_timer, red_adaptative_timer, (unsigned long)sch); + q->sch = sch; + timer_setup(&q->adapt_timer, red_adaptative_timer, 0); return red_change(sch, opt); } +static int red_dump_offload(struct Qdisc *sch, struct tc_red_qopt *opt) +{ + struct net_device *dev = qdisc_dev(sch); + struct tc_red_qopt_offload hw_stats = { + .command = TC_RED_STATS, + .handle = sch->handle, + .parent = sch->parent, + { + .stats.bstats = &sch->bstats, + .stats.qstats = &sch->qstats, + }, + }; + int err; + + opt->flags &= ~TC_RED_OFFLOADED; + if (!tc_can_offload(dev) || !dev->netdev_ops->ndo_setup_tc) + return 0; + + err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_RED, + &hw_stats); + if (err == -EOPNOTSUPP) + return 0; + + if (!err) + opt->flags |= TC_RED_OFFLOADED; + + return err; +} + static int red_dump(struct Qdisc *sch, struct sk_buff *skb) { struct red_sched_data *q = qdisc_priv(sch); @@ -255,8 +314,13 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb) .Plog = q->parms.Plog, .Scell_log = q->parms.Scell_log, }; + int err; sch->qstats.backlog = q->qdisc->qstats.backlog; + err = red_dump_offload(sch, &opt); + if (err) + goto nla_put_failure; + opts = nla_nest_start(skb, TCA_OPTIONS); if (opts == NULL) goto nla_put_failure; @@ -273,6 +337,7 @@ nla_put_failure: static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) { struct red_sched_data *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); struct tc_red_xstats st = { .early = q->stats.prob_drop + q->stats.forced_drop, .pdrop = q->stats.pdrop, @@ -280,6 +345,26 @@ static int red_dump_stats(struct Qdisc *sch, struct gnet_dump *d) .marked = q->stats.prob_mark + q->stats.forced_mark, }; + if (tc_can_offload(dev) && dev->netdev_ops->ndo_setup_tc) { + struct red_stats hw_stats = {0}; + struct tc_red_qopt_offload hw_stats_request = { + .command = TC_RED_XSTATS, + .handle = sch->handle, + .parent = sch->parent, + { + .xstats = &hw_stats, + }, + }; + if (!dev->netdev_ops->ndo_setup_tc(dev, + TC_SETUP_QDISC_RED, + &hw_stats_request)) { + st.early += hw_stats.prob_drop + hw_stats.forced_drop; + st.pdrop += hw_stats.pdrop; + st.other += hw_stats.other; + st.marked += hw_stats.prob_mark + hw_stats.forced_mark; + } + } + return gnet_stats_copy_app(d, &st, sizeof(st)); } @@ -311,15 +396,11 @@ static struct Qdisc *red_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long red_get(struct Qdisc *sch, u32 classid) +static unsigned long red_find(struct Qdisc *sch, u32 classid) { return 1; } -static void red_put(struct Qdisc *sch, unsigned long arg) -{ -} - static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -335,8 +416,7 @@ static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) static const struct Qdisc_class_ops red_class_ops = { .graft = red_graft, .leaf = red_leaf, - .get = red_get, - .put = red_put, + .find = red_find, .walk = red_walk, .dump = red_dump_class, }; diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 11fb6ec878d6..0678debdd856 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -268,6 +268,7 @@ static bool sfb_classify(struct sk_buff *skb, struct tcf_proto *fl, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return false; } @@ -553,7 +554,7 @@ static int sfb_init(struct Qdisc *sch, struct nlattr *opt) struct sfb_sched_data *q = qdisc_priv(sch); int err; - err = tcf_block_get(&q->block, &q->filter_list); + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; @@ -632,12 +633,12 @@ static struct Qdisc *sfb_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long sfb_get(struct Qdisc *sch, u32 classid) +static unsigned long sfb_find(struct Qdisc *sch, u32 classid) { return 1; } -static void sfb_put(struct Qdisc *sch, unsigned long arg) +static void sfb_unbind(struct Qdisc *sch, unsigned long arg) { } @@ -683,14 +684,13 @@ static unsigned long sfb_bind(struct Qdisc *sch, unsigned long parent, static const struct Qdisc_class_ops sfb_class_ops = { .graft = sfb_graft, .leaf = sfb_leaf, - .get = sfb_get, - .put = sfb_put, + .find = sfb_find, .change = sfb_change_class, .delete = sfb_delete, .walk = sfb_walk, .tcf_block = sfb_tcf_block, .bind_tcf = sfb_bind, - .unbind_tcf = sfb_put, + .unbind_tcf = sfb_unbind, .dump = sfb_dump_class, }; diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index 82469ef9655e..890f4a4564e7 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -145,6 +145,7 @@ struct sfq_sched_data { int perturb_period; unsigned int quantum; /* Allotment per round: MUST BE >= MTU */ struct timer_list perturb_timer; + struct Qdisc *sch; }; /* @@ -189,6 +190,7 @@ static unsigned int sfq_classify(struct sk_buff *skb, struct Qdisc *sch, case TC_ACT_QUEUED: case TC_ACT_TRAP: *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN; + /* fall through */ case TC_ACT_SHOT: return 0; } @@ -292,7 +294,7 @@ static inline void slot_queue_add(struct sfq_slot *slot, struct sk_buff *skb) slot->skblist_prev = skb; } -static unsigned int sfq_drop(struct Qdisc *sch) +static unsigned int sfq_drop(struct Qdisc *sch, struct sk_buff **to_free) { struct sfq_sched_data *q = qdisc_priv(sch); sfq_index x, d = q->cur_depth; @@ -310,9 +312,8 @@ drop: slot->backlog -= len; sfq_dec(q, x); sch->q.qlen--; - qdisc_qstats_drop(sch); qdisc_qstats_backlog_dec(sch, skb); - kfree_skb(skb); + qdisc_drop(skb, sch, to_free); return len; } @@ -360,7 +361,7 @@ sfq_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) if (hash == 0) { if (ret & __NET_XMIT_BYPASS) qdisc_qstats_drop(sch); - kfree_skb(skb); + __qdisc_drop(skb, to_free); return ret; } hash--; @@ -465,7 +466,7 @@ enqueue: return NET_XMIT_SUCCESS; qlen = slot->qlen; - dropped = sfq_drop(sch); + dropped = sfq_drop(sch, to_free); /* Return Congestion Notification only if we dropped a packet * from this flow. */ @@ -605,10 +606,10 @@ drop: qdisc_tree_reduce_backlog(sch, dropped, drop_len); } -static void sfq_perturbation(unsigned long arg) +static void sfq_perturbation(struct timer_list *t) { - struct Qdisc *sch = (struct Qdisc *)arg; - struct sfq_sched_data *q = qdisc_priv(sch); + struct sfq_sched_data *q = from_timer(q, t, perturb_timer); + struct Qdisc *sch = q->sch; spinlock_t *root_lock = qdisc_lock(qdisc_root_sleeping(sch)); spin_lock(root_lock); @@ -628,6 +629,8 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) struct tc_sfq_qopt_v1 *ctl_v1 = NULL; unsigned int qlen, dropped = 0; struct red_parms *p = NULL; + struct sk_buff *to_free = NULL; + struct sk_buff *tail = NULL; if (opt->nla_len < nla_attr_size(sizeof(*ctl))) return -EINVAL; @@ -674,8 +677,13 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) } qlen = sch->q.qlen; - while (sch->q.qlen > q->limit) - dropped += sfq_drop(sch); + while (sch->q.qlen > q->limit) { + dropped += sfq_drop(sch, &to_free); + if (!tail) + tail = to_free; + } + + rtnl_kfree_skbs(to_free, tail); qdisc_tree_reduce_backlog(sch, qlen - sch->q.qlen, dropped); del_timer(&q->perturb_timer); @@ -716,13 +724,12 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; - err = tcf_block_get(&q->block, &q->filter_list); + timer_setup(&q->perturb_timer, sfq_perturbation, TIMER_DEFERRABLE); + + err = tcf_block_get(&q->block, &q->filter_list, sch); if (err) return err; - setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, - (unsigned long)sch); - for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; q->dep[i].prev = i + SFQ_MAX_FLOWS; @@ -808,7 +815,7 @@ static struct Qdisc *sfq_leaf(struct Qdisc *sch, unsigned long arg) return NULL; } -static unsigned long sfq_get(struct Qdisc *sch, u32 classid) +static unsigned long sfq_find(struct Qdisc *sch, u32 classid) { return 0; } @@ -821,7 +828,7 @@ static unsigned long sfq_bind(struct Qdisc *sch, unsigned long parent, return 0; } -static void sfq_put(struct Qdisc *q, unsigned long cl) +static void sfq_unbind(struct Qdisc *q, unsigned long cl) { } @@ -885,11 +892,10 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) static const struct Qdisc_class_ops sfq_class_ops = { .leaf = sfq_leaf, - .get = sfq_get, - .put = sfq_put, + .find = sfq_find, .tcf_block = sfq_tcf_block, .bind_tcf = sfq_bind, - .unbind_tcf = sfq_put, + .unbind_tcf = sfq_unbind, .dump = sfq_dump_class, .dump_stats = sfq_dump_class_stats, .walk = sfq_walk, diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b2e4b6ad241a..120f4f365967 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -425,12 +425,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) { struct tbf_sched_data *q = qdisc_priv(sch); + qdisc_watchdog_init(&q->watchdog, sch); + q->qdisc = &noop_qdisc; + if (opt == NULL) return -EINVAL; q->t_c = ktime_get_ns(); - qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = &noop_qdisc; return tbf_change(sch, opt); } @@ -510,15 +511,11 @@ static struct Qdisc *tbf_leaf(struct Qdisc *sch, unsigned long arg) return q->qdisc; } -static unsigned long tbf_get(struct Qdisc *sch, u32 classid) +static unsigned long tbf_find(struct Qdisc *sch, u32 classid) { return 1; } -static void tbf_put(struct Qdisc *sch, unsigned long arg) -{ -} - static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { @@ -534,8 +531,7 @@ static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) static const struct Qdisc_class_ops tbf_class_ops = { .graft = tbf_graft, .leaf = tbf_leaf, - .get = tbf_get, - .put = tbf_put, + .find = tbf_find, .walk = tbf_walk, .dump = tbf_dump_class, }; diff --git a/net/sctp/Makefile b/net/sctp/Makefile index 70f1b570bab9..1ca84a288443 100644 --- a/net/sctp/Makefile +++ b/net/sctp/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for SCTP support code. # @@ -12,7 +13,8 @@ sctp-y := sm_statetable.o sm_statefuns.o sm_sideeffect.o \ inqueue.o outqueue.o ulpqueue.o \ tsnmap.o bind_addr.o socket.o primitive.o \ output.o input.o debug.o stream.o auth.o \ - offload.o + offload.o stream_sched.o stream_sched_prio.o \ + stream_sched_rr.o sctp_probe-y := probe.o diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 40ec83679d6e..69394f4d6091 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -63,11 +63,11 @@ static void sctp_assoc_free_asconf_queue(struct sctp_association *asoc); /* 1st Level Abstractions. */ /* Initialize a new association from provided memory. */ -static struct sctp_association *sctp_association_init(struct sctp_association *asoc, - const struct sctp_endpoint *ep, - const struct sock *sk, - sctp_scope_t scope, - gfp_t gfp) +static struct sctp_association *sctp_association_init( + struct sctp_association *asoc, + const struct sctp_endpoint *ep, + const struct sock *sk, + enum sctp_scope scope, gfp_t gfp) { struct net *net = sock_net(sk); struct sctp_sock *sp; @@ -149,8 +149,7 @@ static struct sctp_association *sctp_association_init(struct sctp_association *a /* Initializes the timers */ for (i = SCTP_EVENT_TIMEOUT_NONE; i < SCTP_NUM_TIMEOUT_TYPES; ++i) - setup_timer(&asoc->timers[i], sctp_timer_events[i], - (unsigned long)asoc); + timer_setup(&asoc->timers[i], sctp_timer_events[i], 0); /* Pull default initialization values from the sock options. * Note: This assumes that the values have already been @@ -301,9 +300,8 @@ fail_init: /* Allocate and initialize a new association */ struct sctp_association *sctp_association_new(const struct sctp_endpoint *ep, - const struct sock *sk, - sctp_scope_t scope, - gfp_t gfp) + const struct sock *sk, + enum sctp_scope scope, gfp_t gfp) { struct sctp_association *asoc; @@ -797,7 +795,7 @@ void sctp_assoc_del_nonprimary_peers(struct sctp_association *asoc, */ void sctp_assoc_control_transport(struct sctp_association *asoc, struct sctp_transport *transport, - sctp_transport_cmd_t command, + enum sctp_transport_cmd command, sctp_sn_error_t error) { struct sctp_ulpevent *event; @@ -1022,11 +1020,11 @@ static void sctp_assoc_bh_rcv(struct work_struct *work) container_of(work, struct sctp_association, base.inqueue.immediate); struct net *net = sock_net(asoc->base.sk); + union sctp_subtype subtype; struct sctp_endpoint *ep; struct sctp_chunk *chunk; struct sctp_inq *inqueue; int state; - sctp_subtype_t subtype; int error = 0; /* The association should be held so we should be safe. */ @@ -1564,7 +1562,7 @@ void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len) * local endpoint and the remote peer. */ int sctp_assoc_set_bind_addr_from_ep(struct sctp_association *asoc, - sctp_scope_t scope, gfp_t gfp) + enum sctp_scope scope, gfp_t gfp) { int flags; diff --git a/net/sctp/auth.c b/net/sctp/auth.c index e001b01b0e68..00667c50efa7 100644 --- a/net/sctp/auth.c +++ b/net/sctp/auth.c @@ -185,9 +185,9 @@ static int sctp_auth_compare_vectors(struct sctp_auth_bytes *vector1, * are called the two key vectors. */ static struct sctp_auth_bytes *sctp_auth_make_key_vector( - sctp_random_param_t *random, - sctp_chunks_param_t *chunks, - sctp_hmac_algo_param_t *hmacs, + struct sctp_random_param *random, + struct sctp_chunks_param *chunks, + struct sctp_hmac_algo_param *hmacs, gfp_t gfp) { struct sctp_auth_bytes *new; @@ -226,10 +226,9 @@ static struct sctp_auth_bytes *sctp_auth_make_local_vector( gfp_t gfp) { return sctp_auth_make_key_vector( - (sctp_random_param_t *)asoc->c.auth_random, - (sctp_chunks_param_t *)asoc->c.auth_chunks, - (sctp_hmac_algo_param_t *)asoc->c.auth_hmacs, - gfp); + (struct sctp_random_param *)asoc->c.auth_random, + (struct sctp_chunks_param *)asoc->c.auth_chunks, + (struct sctp_hmac_algo_param *)asoc->c.auth_hmacs, gfp); } /* Make a key vector based on peer's parameters */ diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 1ebc184a0e23..7df3704982f5 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -45,9 +45,9 @@ #include <net/sctp/sm.h> /* Forward declarations for internal helpers. */ -static int sctp_copy_one_addr(struct net *, struct sctp_bind_addr *, - union sctp_addr *, sctp_scope_t scope, gfp_t gfp, - int flags); +static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, + union sctp_addr *addr, enum sctp_scope scope, + gfp_t gfp, int flags); static void sctp_bind_addr_clean(struct sctp_bind_addr *); /* First Level Abstractions. */ @@ -57,7 +57,7 @@ static void sctp_bind_addr_clean(struct sctp_bind_addr *); */ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, const struct sctp_bind_addr *src, - sctp_scope_t scope, gfp_t gfp, + enum sctp_scope scope, gfp_t gfp, int flags) { struct sctp_sockaddr_entry *addr; @@ -440,9 +440,8 @@ union sctp_addr *sctp_find_unmatch_addr(struct sctp_bind_addr *bp, /* Copy out addresses from the global local address list. */ static int sctp_copy_one_addr(struct net *net, struct sctp_bind_addr *dest, - union sctp_addr *addr, - sctp_scope_t scope, gfp_t gfp, - int flags) + union sctp_addr *addr, enum sctp_scope scope, + gfp_t gfp, int flags) { int error = 0; @@ -485,9 +484,10 @@ int sctp_is_any(struct sock *sk, const union sctp_addr *addr) } /* Is 'addr' valid for 'scope'? */ -int sctp_in_scope(struct net *net, const union sctp_addr *addr, sctp_scope_t scope) +int sctp_in_scope(struct net *net, const union sctp_addr *addr, + enum sctp_scope scope) { - sctp_scope_t addr_scope = sctp_scope(addr); + enum sctp_scope addr_scope = sctp_scope(addr); /* The unusable SCTP addresses will not be considered with * any defined scopes. @@ -545,7 +545,7 @@ int sctp_is_ep_boundall(struct sock *sk) ********************************************************************/ /* What is the scope of 'addr'? */ -sctp_scope_t sctp_scope(const union sctp_addr *addr) +enum sctp_scope sctp_scope(const union sctp_addr *addr) { struct sctp_af *af; diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 1323d41e68b8..7b261afc47b9 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -201,7 +201,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, struct sctp_hmac *hmac_desc = sctp_auth_asoc_get_hmac(asoc); if (hmac_desc) - max_data -= SCTP_PAD4(sizeof(sctp_auth_chunk_t) + + max_data -= SCTP_PAD4(sizeof(struct sctp_auth_chunk) + hmac_desc->hmac_len); } @@ -221,7 +221,7 @@ struct sctp_datamsg *sctp_datamsg_from_user(struct sctp_association *asoc, asoc->outqueue.out_qlen == 0 && list_empty(&asoc->outqueue.retransmit) && msg_len > max_data) - first_len -= SCTP_PAD4(sizeof(sctp_sack_chunk_t)); + first_len -= SCTP_PAD4(sizeof(struct sctp_sack_chunk)); /* Encourage Cookie-ECHO bundling. */ if (asoc->state < SCTP_STATE_COOKIE_ECHOED) @@ -311,10 +311,10 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) if (chunk->sent_count) { chunk->asoc->abandoned_sent[SCTP_PR_INDEX(TTL)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(TTL)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(TTL)]++; } else { chunk->asoc->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; - streamout->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; + streamout->ext->abandoned_unsent[SCTP_PR_INDEX(TTL)]++; } return 1; } else if (SCTP_PR_RTX_ENABLED(chunk->sinfo.sinfo_flags) && @@ -323,7 +323,7 @@ int sctp_chunk_abandoned(struct sctp_chunk *chunk) &chunk->asoc->stream.out[chunk->sinfo.sinfo_stream]; chunk->asoc->abandoned_sent[SCTP_PR_INDEX(RTX)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(RTX)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(RTX)]++; return 1; } else if (!SCTP_PR_POLICY(chunk->sinfo.sinfo_flags) && chunk->msg->expires_at && diff --git a/net/sctp/debug.c b/net/sctp/debug.c index 2e47eb2f05cb..3f619fdcbf0a 100644 --- a/net/sctp/debug.c +++ b/net/sctp/debug.c @@ -60,7 +60,7 @@ static const char *const sctp_cid_tbl[SCTP_NUM_BASE_CHUNK_TYPES] = { }; /* Lookup "chunk type" debug name. */ -const char *sctp_cname(const sctp_subtype_t cid) +const char *sctp_cname(const union sctp_subtype cid) { if (cid.chunk <= SCTP_CID_BASE_MAX) return sctp_cid_tbl[cid.chunk]; @@ -130,7 +130,7 @@ static const char *const sctp_primitive_tbl[SCTP_NUM_PRIMITIVE_TYPES] = { }; /* Lookup primitive debug name. */ -const char *sctp_pname(const sctp_subtype_t id) +const char *sctp_pname(const union sctp_subtype id) { if (id.primitive <= SCTP_EVENT_PRIMITIVE_MAX) return sctp_primitive_tbl[id.primitive]; @@ -143,7 +143,7 @@ static const char *const sctp_other_tbl[] = { }; /* Lookup "other" debug name. */ -const char *sctp_oname(const sctp_subtype_t id) +const char *sctp_oname(const union sctp_subtype id) { if (id.other <= SCTP_EVENT_OTHER_MAX) return sctp_other_tbl[id.other]; @@ -165,7 +165,7 @@ static const char *const sctp_timer_tbl[] = { }; /* Lookup timer debug name. */ -const char *sctp_tname(const sctp_subtype_t id) +const char *sctp_tname(const union sctp_subtype id) { BUILD_BUG_ON(SCTP_EVENT_TIMEOUT_MAX + 1 != ARRAY_SIZE(sctp_timer_tbl)); diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index 0e86f988f836..ee1e601a0b11 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -73,13 +73,13 @@ static struct sctp_endpoint *sctp_endpoint_init(struct sctp_endpoint *ep, * variables. There are arrays that we encode directly * into parameters to make the rest of the operations easier. */ - auth_hmacs = kzalloc(sizeof(sctp_hmac_algo_param_t) + - sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); + auth_hmacs = kzalloc(sizeof(*auth_hmacs) + + sizeof(__u16) * SCTP_AUTH_NUM_HMACS, gfp); if (!auth_hmacs) goto nomem; - auth_chunks = kzalloc(sizeof(sctp_chunks_param_t) + - SCTP_NUM_CHUNK_TYPES, gfp); + auth_chunks = kzalloc(sizeof(*auth_chunks) + + SCTP_NUM_CHUNK_TYPES, gfp); if (!auth_chunks) goto nomem; @@ -382,8 +382,8 @@ static void sctp_endpoint_bh_rcv(struct work_struct *work) struct sctp_transport *transport; struct sctp_chunk *chunk; struct sctp_inq *inqueue; - sctp_subtype_t subtype; - sctp_state_t state; + union sctp_subtype subtype; + enum sctp_state state; int error = 0; int first_time = 1; /* is this the first time through the loop */ diff --git a/net/sctp/input.c b/net/sctp/input.c index 41eb2ec10460..621b5ca3fd1c 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -421,7 +421,7 @@ void sctp_icmp_redirect(struct sock *sk, struct sctp_transport *t, { struct dst_entry *dst; - if (!t) + if (sock_owned_by_user(sk) || !t) return; dst = sctp_transport_dst_check(t); if (dst) @@ -794,7 +794,7 @@ hit: struct sctp_hash_cmp_arg { const union sctp_addr *paddr; const struct net *net; - u16 lport; + __be16 lport; }; static inline int sctp_hash_cmp(struct rhashtable_compare_arg *arg, @@ -820,37 +820,37 @@ out: return err; } -static inline u32 sctp_hash_obj(const void *data, u32 len, u32 seed) +static inline __u32 sctp_hash_obj(const void *data, u32 len, u32 seed) { const struct sctp_transport *t = data; const union sctp_addr *paddr = &t->ipaddr; const struct net *net = sock_net(t->asoc->base.sk); - u16 lport = htons(t->asoc->base.bind_addr.port); - u32 addr; + __be16 lport = htons(t->asoc->base.bind_addr.port); + __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else - addr = paddr->v4.sin_addr.s_addr; + addr = (__force __u32)paddr->v4.sin_addr.s_addr; - return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } -static inline u32 sctp_hash_key(const void *data, u32 len, u32 seed) +static inline __u32 sctp_hash_key(const void *data, u32 len, u32 seed) { const struct sctp_hash_cmp_arg *x = data; const union sctp_addr *paddr = x->paddr; const struct net *net = x->net; - u16 lport = x->lport; - u32 addr; + __be16 lport = x->lport; + __u32 addr; if (paddr->sa.sa_family == AF_INET6) addr = jhash(&paddr->v6.sin6_addr, 16, seed); else - addr = paddr->v4.sin_addr.s_addr; + addr = (__force __u32)paddr->v4.sin_addr.s_addr; - return jhash_3words(addr, ((__u32)paddr->v4.sin_port) << 16 | + return jhash_3words(addr, ((__force __u32)paddr->v4.sin_port) << 16 | (__force __u32)lport, net_hash_mix(net), seed); } @@ -1111,7 +1111,7 @@ static struct sctp_association *__sctp_rcv_asconf_lookup( __be16 peer_port, struct sctp_transport **transportp) { - sctp_addip_chunk_t *asconf = (struct sctp_addip_chunk *)ch; + struct sctp_addip_chunk *asconf = (struct sctp_addip_chunk *)ch; struct sctp_af *af; union sctp_addr_param *param; union sctp_addr paddr; diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2a186b201ad2..3b18085e3b10 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -243,8 +243,8 @@ static void sctp_v6_get_dst(struct sctp_transport *t, union sctp_addr *saddr, union sctp_addr *daddr = &t->ipaddr; union sctp_addr dst_saddr; struct in6_addr *final_p, final; + enum sctp_scope scope; __u8 matchlen = 0; - sctp_scope_t scope; memset(fl6, 0, sizeof(struct flowi6)); fl6->daddr = daddr->v6.sin6_addr; @@ -497,7 +497,7 @@ static void sctp_v6_from_addr_param(union sctp_addr *addr, static int sctp_v6_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv6addr_param_t); + int length = sizeof(struct sctp_ipv6addr_param); param->v6.param_hdr.type = SCTP_PARAM_IPV6_ADDRESS; param->v6.param_hdr.length = htons(length); @@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; + addr->v6.sin6_scope_id = 0; } /* Compare addresses exactly. @@ -624,10 +626,10 @@ static int sctp_v6_addr_valid(union sctp_addr *addr, } /* What is the scope of 'addr'? */ -static sctp_scope_t sctp_v6_scope(union sctp_addr *addr) +static enum sctp_scope sctp_v6_scope(union sctp_addr *addr) { + enum sctp_scope retval; int v6scope; - sctp_scope_t retval; /* The IPv6 scope is really a set of bit fields. * See IFA_* in <net/if_inet6.h>. Map to a generic SCTP scope. @@ -736,7 +738,7 @@ static int sctp_v6_skb_iif(const struct sk_buff *skb) /* Was this packet marked by Explicit Congestion Notification? */ static int sctp_v6_is_ce(const struct sk_buff *skb) { - return *((__u32 *)(ipv6_hdr(skb))) & htonl(1 << 20); + return *((__u32 *)(ipv6_hdr(skb))) & (__force __u32)htonl(1 << 20); } /* Dump the v6 addr to the seq file. */ @@ -805,9 +807,10 @@ static void sctp_inet6_skb_msgname(struct sk_buff *skb, char *msgname, addr->v6.sin6_flowinfo = 0; addr->v6.sin6_port = sh->source; addr->v6.sin6_addr = ipv6_hdr(skb)->saddr; - if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) { + if (ipv6_addr_type(&addr->v6.sin6_addr) & IPV6_ADDR_LINKLOCAL) addr->v6.sin6_scope_id = sctp_v6_skb_iif(skb); - } + else + addr->v6.sin6_scope_id = 0; } *addr_len = sctp_v6_addr_to_user(sctp_sk(skb->sk), addr); @@ -880,8 +883,10 @@ static int sctp_inet6_bind_verify(struct sctp_sock *opt, union sctp_addr *addr) net = sock_net(&opt->inet.sk); rcu_read_lock(); dev = dev_get_by_index_rcu(net, addr->v6.sin6_scope_id); - if (!dev || - !ipv6_chk_addr(net, &addr->v6.sin6_addr, dev, 0)) { + if (!dev || !(opt->inet.freebind || + net->ipv6.sysctl.ip_nonlocal_bind || + ipv6_chk_addr(net, &addr->v6.sin6_addr, + dev, 0))) { rcu_read_unlock(); return 0; } diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index 105ac3327b28..aeea6da81441 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -57,7 +57,7 @@ SCTP_DBG_OBJCNT(keys); /* An array to make it easy to pretty print the debug information * to the proc fs. */ -static sctp_dbg_objcnt_entry_t sctp_dbg_objcnt[] = { +static struct sctp_dbg_objcnt_entry sctp_dbg_objcnt[] = { SCTP_DBG_OBJCNT_ENTRY(sock), SCTP_DBG_OBJCNT_ENTRY(ep), SCTP_DBG_OBJCNT_ENTRY(assoc), diff --git a/net/sctp/output.c b/net/sctp/output.c index 9d8504985744..4a865cd06d76 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -57,15 +57,15 @@ #include <net/sctp/checksum.h> /* Forward declarations for private helpers. */ -static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk); -static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, - struct sctp_chunk *chunk); +static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk); +static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk); static void sctp_packet_append_data(struct sctp_packet *packet, - struct sctp_chunk *chunk); -static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, - struct sctp_chunk *chunk, - u16 chunk_len); + struct sctp_chunk *chunk); +static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len); static void sctp_packet_reset(struct sctp_packet *packet) { @@ -181,11 +181,11 @@ void sctp_packet_free(struct sctp_packet *packet) * as it can fit in the packet, but any more data that does not fit in this * packet can be sent only after receiving the COOKIE_ACK. */ -sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk, - int one_packet, gfp_t gfp) +enum sctp_xmit sctp_packet_transmit_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk, + int one_packet, gfp_t gfp) { - sctp_xmit_t retval; + enum sctp_xmit retval; pr_debug("%s: packet:%p size:%zu chunk:%p size:%d\n", __func__, packet, packet->size, chunk, chunk->skb ? chunk->skb->len : -1); @@ -218,12 +218,12 @@ sctp_xmit_t sctp_packet_transmit_chunk(struct sctp_packet *packet, } /* Try to bundle an auth chunk into the packet. */ -static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, - struct sctp_chunk *chunk) +static enum sctp_xmit sctp_packet_bundle_auth(struct sctp_packet *pkt, + struct sctp_chunk *chunk) { struct sctp_association *asoc = pkt->transport->asoc; + enum sctp_xmit retval = SCTP_XMIT_OK; struct sctp_chunk *auth; - sctp_xmit_t retval = SCTP_XMIT_OK; /* if we don't have an association, we can't do authentication */ if (!asoc) @@ -254,10 +254,10 @@ static sctp_xmit_t sctp_packet_bundle_auth(struct sctp_packet *pkt, } /* Try to bundle a SACK with the packet. */ -static sctp_xmit_t sctp_packet_bundle_sack(struct sctp_packet *pkt, - struct sctp_chunk *chunk) +static enum sctp_xmit sctp_packet_bundle_sack(struct sctp_packet *pkt, + struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; + enum sctp_xmit retval = SCTP_XMIT_OK; /* If sending DATA and haven't aleady bundled a SACK, try to * bundle one in to the packet. @@ -299,11 +299,11 @@ out: /* Append a chunk to the offered packet reporting back any inability to do * so. */ -static sctp_xmit_t __sctp_packet_append_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk) +static enum sctp_xmit __sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; __u16 chunk_len = SCTP_PAD4(ntohs(chunk->chunk_hdr->length)); + enum sctp_xmit retval = SCTP_XMIT_OK; /* Check to see if this chunk will fit into the packet */ retval = sctp_packet_will_fit(packet, chunk, chunk_len); @@ -353,10 +353,10 @@ finish: /* Append a chunk to the offered packet reporting back any inability to do * so. */ -sctp_xmit_t sctp_packet_append_chunk(struct sctp_packet *packet, - struct sctp_chunk *chunk) +enum sctp_xmit sctp_packet_append_chunk(struct sctp_packet *packet, + struct sctp_chunk *chunk) { - sctp_xmit_t retval = SCTP_XMIT_OK; + enum sctp_xmit retval = SCTP_XMIT_OK; pr_debug("%s: packet:%p chunk:%p\n", __func__, packet, chunk); @@ -653,8 +653,8 @@ out: ********************************************************************/ /* This private function check to see if a chunk can be added */ -static sctp_xmit_t sctp_packet_can_append_data(struct sctp_packet *packet, - struct sctp_chunk *chunk) +static enum sctp_xmit sctp_packet_can_append_data(struct sctp_packet *packet, + struct sctp_chunk *chunk) { size_t datasize, rwnd, inflight, flight_size; struct sctp_transport *transport = packet->transport; @@ -762,12 +762,12 @@ static void sctp_packet_append_data(struct sctp_packet *packet, sctp_chunk_assign_ssn(chunk); } -static sctp_xmit_t sctp_packet_will_fit(struct sctp_packet *packet, - struct sctp_chunk *chunk, - u16 chunk_len) +static enum sctp_xmit sctp_packet_will_fit(struct sctp_packet *packet, + struct sctp_chunk *chunk, + u16 chunk_len) { + enum sctp_xmit retval = SCTP_XMIT_OK; size_t psize, pmtu, maxsize; - sctp_xmit_t retval = SCTP_XMIT_OK; psize = packet->size; if (packet->transport->asoc) diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index e8762702a313..4db012aa25f7 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -50,6 +50,7 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> /* Declare internal functions here. */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn); @@ -72,32 +73,38 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp); /* Add data to the front of the queue. */ static inline void sctp_outq_head_data(struct sctp_outq *q, - struct sctp_chunk *ch) + struct sctp_chunk *ch) { + struct sctp_stream_out_ext *oute; + __u16 stream; + list_add(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; + + stream = sctp_chunk_stream_no(ch); + oute = q->asoc->stream.out[stream].ext; + list_add(&ch->stream_list, &oute->outq); } /* Take data from the front of the queue. */ static inline struct sctp_chunk *sctp_outq_dequeue_data(struct sctp_outq *q) { - struct sctp_chunk *ch = NULL; - - if (!list_empty(&q->out_chunk_list)) { - struct list_head *entry = q->out_chunk_list.next; - - ch = list_entry(entry, struct sctp_chunk, list); - list_del_init(entry); - q->out_qlen -= ch->skb->len; - } - return ch; + return q->sched->dequeue(q); } + /* Add data chunk to the end of the queue. */ static inline void sctp_outq_tail_data(struct sctp_outq *q, struct sctp_chunk *ch) { + struct sctp_stream_out_ext *oute; + __u16 stream; + list_add_tail(&ch->list, &q->out_chunk_list); q->out_qlen += ch->skb->len; + + stream = sctp_chunk_stream_no(ch); + oute = q->asoc->stream.out[stream].ext; + list_add_tail(&ch->stream_list, &oute->outq); } /* @@ -207,6 +214,7 @@ void sctp_outq_init(struct sctp_association *asoc, struct sctp_outq *q) INIT_LIST_HEAD(&q->retransmit); INIT_LIST_HEAD(&q->sacked); INIT_LIST_HEAD(&q->abandoned); + sctp_sched_set_sched(asoc, SCTP_SS_FCFS); } /* Free the outqueue structure and any related pending chunks. @@ -258,6 +266,7 @@ static void __sctp_outq_teardown(struct sctp_outq *q) /* Throw away any leftover data chunks. */ while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { + sctp_sched_dequeue_done(q, chunk); /* Mark as send failure. */ sctp_chunk_fail(chunk, q->error); @@ -366,7 +375,7 @@ static int sctp_prsctp_prune_sent(struct sctp_association *asoc, streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; asoc->sent_cnt_removable--; asoc->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; - streamout->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; + streamout->ext->abandoned_sent[SCTP_PR_INDEX(PRIO)]++; if (!chk->tsn_gap_acked) { if (chk->transport) @@ -391,20 +400,21 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, struct sctp_outq *q = &asoc->outqueue; struct sctp_chunk *chk, *temp; + q->sched->unsched_all(&asoc->stream); + list_for_each_entry_safe(chk, temp, &q->out_chunk_list, list) { if (!SCTP_PR_PRIO_ENABLED(chk->sinfo.sinfo_flags) || chk->sinfo.sinfo_timetolive <= sinfo->sinfo_timetolive) continue; - list_del_init(&chk->list); - q->out_qlen -= chk->skb->len; + sctp_sched_dequeue_common(q, chk); asoc->sent_cnt_removable--; asoc->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; if (chk->sinfo.sinfo_stream < asoc->stream.outcnt) { struct sctp_stream_out *streamout = &asoc->stream.out[chk->sinfo.sinfo_stream]; - streamout->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; + streamout->ext->abandoned_unsent[SCTP_PR_INDEX(PRIO)]++; } msg_len -= SCTP_DATA_SNDSIZE(chk) + @@ -415,6 +425,8 @@ static int sctp_prsctp_prune_unsent(struct sctp_association *asoc, break; } + q->sched->sched_all(&asoc->stream); + return msg_len; } @@ -534,7 +546,7 @@ void sctp_retransmit_mark(struct sctp_outq *q, * one packet out. */ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, - sctp_retransmit_reason_t reason) + enum sctp_retransmit_reason reason) { struct net *net = sock_net(q->asoc->base.sk); @@ -594,14 +606,14 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, static int sctp_outq_flush_rtx(struct sctp_outq *q, struct sctp_packet *pkt, int rtx_timeout, int *start_timer) { - struct list_head *lqueue; struct sctp_transport *transport = pkt->transport; - sctp_xmit_t status; struct sctp_chunk *chunk, *chunk1; - int fast_rtx; + struct list_head *lqueue; + enum sctp_xmit status; int error = 0; int timer = 0; int done = 0; + int fast_rtx; lqueue = &q->retransmit; fast_rtx = q->fast_rtx; @@ -781,7 +793,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) struct sctp_transport *transport = NULL; struct sctp_transport *new_transport; struct sctp_chunk *chunk, *tmp; - sctp_xmit_t status; + enum sctp_xmit status; int error = 0; int start_timer = 0; int one_packet = 0; @@ -1033,22 +1045,9 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) while ((chunk = sctp_outq_dequeue_data(q)) != NULL) { __u32 sid = ntohs(chunk->subh.data_hdr->stream); - /* RFC 2960 6.5 Every DATA chunk MUST carry a valid - * stream identifier. - */ - if (chunk->sinfo.sinfo_stream >= asoc->stream.outcnt) { - - /* Mark as failed send. */ - sctp_chunk_fail(chunk, SCTP_ERROR_INV_STRM); - if (asoc->peer.prsctp_capable && - SCTP_PR_PRIO_ENABLED(chunk->sinfo.sinfo_flags)) - asoc->sent_cnt_removable--; - sctp_chunk_free(chunk); - continue; - } - /* Has this chunk expired? */ if (sctp_chunk_abandoned(chunk)) { + sctp_sched_dequeue_done(q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; @@ -1070,6 +1069,7 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) new_transport = asoc->peer.active_path; if (new_transport->state == SCTP_UNCONFIRMED) { WARN_ONCE(1, "Attempt to send packet on unconfirmed path."); + sctp_sched_dequeue_done(q, chunk); sctp_chunk_fail(chunk, 0); sctp_chunk_free(chunk); continue; @@ -1133,6 +1133,11 @@ static void sctp_outq_flush(struct sctp_outq *q, int rtx_timeout, gfp_t gfp) else asoc->stats.oodchunks++; + /* Only now it's safe to consider this + * chunk as sent, sched-wise. + */ + sctp_sched_dequeue_done(q, chunk); + break; default: @@ -1197,7 +1202,7 @@ sctp_flush_out: static void sctp_sack_update_unack_data(struct sctp_association *assoc, struct sctp_sackhdr *sack) { - sctp_sack_variable_t *frags; + union sctp_sack_variable *frags; __u16 unack_data; int i; @@ -1224,7 +1229,7 @@ int sctp_outq_sack(struct sctp_outq *q, struct sctp_chunk *chunk) struct sctp_transport *transport; struct sctp_chunk *tchunk = NULL; struct list_head *lchunk, *transport_list, *temp; - sctp_sack_variable_t *frags = sack->variable; + union sctp_sack_variable *frags = sack->variable; __u32 sack_ctsn, ctsn, tsn; __u32 highest_tsn, highest_new_tsn; __u32 sack_a_rwnd; @@ -1736,10 +1741,10 @@ static void sctp_mark_missing(struct sctp_outq *q, /* Is the given TSN acked by this packet? */ static int sctp_acked(struct sctp_sackhdr *sack, __u32 tsn) { - int i; - sctp_sack_variable_t *frags; - __u16 tsn_offset, blocks; __u32 ctsn = ntohl(sack->cum_tsn_ack); + union sctp_sack_variable *frags; + __u16 tsn_offset, blocks; + int i; if (TSN_lte(tsn, ctsn)) goto pass; diff --git a/net/sctp/primitive.c b/net/sctp/primitive.c index f0553a022859..c0817f7a8964 100644 --- a/net/sctp/primitive.c +++ b/net/sctp/primitive.c @@ -53,8 +53,8 @@ int sctp_primitive_ ## name(struct net *net, struct sctp_association *asoc, \ void *arg) { \ int error = 0; \ - sctp_event_t event_type; sctp_subtype_t subtype; \ - sctp_state_t state; \ + enum sctp_event event_type; union sctp_subtype subtype; \ + enum sctp_state state; \ struct sctp_endpoint *ep; \ \ event_type = SCTP_EVENT_T_PRIMITIVE; \ diff --git a/net/sctp/probe.c b/net/sctp/probe.c index 6cc2152e0740..1280f85a598d 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -127,12 +127,13 @@ static const struct file_operations sctpprobe_fops = { .llseek = noop_llseek, }; -static sctp_disposition_t jsctp_sf_eat_sack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition jsctp_sf_eat_sack( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sk_buff *skb = chunk->skb; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 989a900383b5..f5172c21349b 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -196,7 +196,7 @@ static void sctp_free_local_addr_list(struct net *net) /* Copy the local addresses which are valid for 'scope' into 'bp'. */ int sctp_copy_local_addr_list(struct net *net, struct sctp_bind_addr *bp, - sctp_scope_t scope, gfp_t gfp, int copy_flags) + enum sctp_scope scope, gfp_t gfp, int copy_flags) { struct sctp_sockaddr_entry *addr; union sctp_addr laddr; @@ -292,7 +292,7 @@ static void sctp_v4_from_addr_param(union sctp_addr *addr, static int sctp_v4_to_addr_param(const union sctp_addr *addr, union sctp_addr_param *param) { - int length = sizeof(sctp_ipv4addr_param_t); + int length = sizeof(struct sctp_ipv4addr_param); param->v4.param_hdr.type = SCTP_PARAM_IPV4_ADDRESS; param->v4.param_hdr.length = htons(length); @@ -400,9 +400,9 @@ static int sctp_v4_available(union sctp_addr *addr, struct sctp_sock *sp) * IPv4 scoping can be controlled through sysctl option * net.sctp.addr_scope_policy */ -static sctp_scope_t sctp_v4_scope(union sctp_addr *addr) +static enum sctp_scope sctp_v4_scope(union sctp_addr *addr) { - sctp_scope_t retval; + enum sctp_scope retval; /* Check for unusable SCTP addresses. */ if (IS_IPV4_UNUSABLE_ADDRESS(addr->v4.sin_addr.s_addr)) { @@ -622,9 +622,9 @@ static void sctp_v4_ecn_capable(struct sock *sk) INET_ECN_xmit(sk); } -static void sctp_addr_wq_timeout_handler(unsigned long arg) +static void sctp_addr_wq_timeout_handler(struct timer_list *t) { - struct net *net = (struct net *)arg; + struct net *net = from_timer(net, t, sctp.addr_wq_timer); struct sctp_sockaddr_entry *addrw, *temp; struct sctp_sock *sp; @@ -1304,8 +1304,7 @@ static int __net_init sctp_defaults_init(struct net *net) INIT_LIST_HEAD(&net->sctp.auto_asconf_splist); spin_lock_init(&net->sctp.addr_wq_lock); net->sctp.addr_wq_timer.expires = 0; - setup_timer(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, - (unsigned long)net); + timer_setup(&net->sctp.addr_wq_timer, sctp_addr_wq_timeout_handler, 0); return 0; diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 9a647214a91e..a72a7d925d46 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, info = nla_data(attr); list_for_each_entry_rcu(laddr, address_list, list) { - memcpy(info, &laddr->a, addrlen); + memcpy(info, &laddr->a, sizeof(laddr->a)); + memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } @@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, info = nla_data(attr); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { - memcpy(info, &from->ipaddr, addrlen); + memcpy(info, &from->ipaddr, sizeof(from->ipaddr)); + memset(info + sizeof(from->ipaddr), 0, + addrlen - sizeof(from->ipaddr)); info += addrlen; } @@ -276,9 +279,11 @@ out: return err; } -static int sctp_sock_dump(struct sock *sk, void *p) +static int sctp_sock_dump(struct sctp_transport *tsp, void *p) { + struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; + struct sock *sk = ep->base.sk; struct sk_buff *skb = commp->skb; struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; @@ -286,9 +291,7 @@ static int sctp_sock_dump(struct sock *sk, void *p) int err = 0; lock_sock(sk); - if (!sctp_sk(sk)->ep) - goto release; - list_for_each_entry(assoc, &sctp_sk(sk)->ep->asocs, asocs) { + list_for_each_entry(assoc, &ep->asocs, asocs) { if (cb->args[4] < cb->args[1]) goto next; @@ -306,7 +309,6 @@ static int sctp_sock_dump(struct sock *sk, void *p) cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, commp->net_admin) < 0) { - cb->args[3] = 1; err = 1; goto release; } @@ -324,40 +326,30 @@ next: cb->args[4]++; } cb->args[1] = 0; - cb->args[2]++; cb->args[3] = 0; cb->args[4] = 0; release: release_sock(sk); - sock_put(sk); return err; } -static int sctp_get_sock(struct sctp_transport *tsp, void *p) +static int sctp_sock_filter(struct sctp_transport *tsp, void *p) { struct sctp_endpoint *ep = tsp->asoc->ep; struct sctp_comm_param *commp = p; struct sock *sk = ep->base.sk; - struct netlink_callback *cb = commp->cb; const struct inet_diag_req_v2 *r = commp->r; struct sctp_association *assoc = list_entry(ep->asocs.next, struct sctp_association, asocs); /* find the ep only once through the transports by this condition */ if (tsp->asoc != assoc) - goto out; + return 0; if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family) - goto out; - - sock_hold(sk); - cb->args[5] = (long)sk; + return 0; return 1; - -out: - cb->args[2]++; - return 0; } static int sctp_ep_dump(struct sctp_endpoint *ep, void *p) @@ -471,6 +463,7 @@ static void sctp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, .r = r, .net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN), }; + int pos = cb->args[2]; /* eps hashtable dumps * args: @@ -500,12 +493,9 @@ skip: if (!(idiag_states & ~(TCPF_LISTEN | TCPF_CLOSE))) goto done; -next: - cb->args[5] = 0; - sctp_for_each_transport(sctp_get_sock, net, cb->args[2], &commp); - - if (cb->args[5] && !sctp_sock_dump((struct sock *)cb->args[5], &commp)) - goto next; + sctp_for_each_transport(sctp_sock_filter, sctp_sock_dump, + net, &pos, &commp); + cb->args[2] = pos; done: cb->args[1] = cb->args[4]; diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 6110447fe51d..9bf575f2e8ed 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -69,7 +69,8 @@ static struct sctp_chunk *sctp_make_data(const struct sctp_association *asoc, static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, __u8 type, __u8 flags, int paylen, gfp_t gfp); -static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, +static struct sctp_cookie_param *sctp_pack_cookie( + const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *init_chunk, int *cookie_len, @@ -131,17 +132,17 @@ static const struct sctp_paramhdr prsctp_param = { * provided chunk, as most cause codes will be embedded inside an * abort chunk. */ -void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, - size_t paylen) +void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, + size_t paylen) { - sctp_errhdr_t err; + struct sctp_errhdr err; __u16 len; /* Cause code constants are now defined in network order. */ err.cause = cause_code; - len = sizeof(sctp_errhdr_t) + paylen; + len = sizeof(err) + paylen; err.length = htons(len); - chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(sctp_errhdr_t), &err); + chunk->subh.err_hdr = sctp_addto_chunk(chunk, sizeof(err), &err); } /* A helper to initialize an op error inside a @@ -150,21 +151,21 @@ void sctp_init_cause(struct sctp_chunk *chunk, __be16 cause_code, * if there isn't enough space in the op error chunk */ static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, - size_t paylen) + size_t paylen) { - sctp_errhdr_t err; + struct sctp_errhdr err; __u16 len; /* Cause code constants are now defined in network order. */ err.cause = cause_code; - len = sizeof(sctp_errhdr_t) + paylen; + len = sizeof(err) + paylen; err.length = htons(len); if (skb_tailroom(chunk->skb) < len) return -ENOSPC; - chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, - sizeof(sctp_errhdr_t), - &err); + + chunk->subh.err_hdr = sctp_addto_chunk_fixed(chunk, sizeof(err), &err); + return 0; } /* 3.3.2 Initiation (INIT) (1) @@ -212,32 +213,31 @@ static int sctp_init_cause_fixed(struct sctp_chunk *chunk, __be16 cause_code, * Supported Address Types (Note 4) Optional 12 */ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, - const struct sctp_bind_addr *bp, - gfp_t gfp, int vparam_len) + const struct sctp_bind_addr *bp, + gfp_t gfp, int vparam_len) { struct net *net = sock_net(asoc->base.sk); + struct sctp_supported_ext_param ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_paramhdr *auth_chunks = NULL; + struct sctp_paramhdr *auth_hmacs = NULL; + struct sctp_supported_addrs_param sat; struct sctp_endpoint *ep = asoc->ep; - struct sctp_inithdr init; - union sctp_params addrs; - size_t chunksize; struct sctp_chunk *retval = NULL; int num_types, addrs_len = 0; + struct sctp_inithdr init; + union sctp_params addrs; struct sctp_sock *sp; - sctp_supported_addrs_param_t sat; + __u8 extensions[4]; + size_t chunksize; __be16 types[2]; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; int num_ext = 0; - __u8 extensions[4]; - struct sctp_paramhdr *auth_chunks = NULL, - *auth_hmacs = NULL; /* RFC 2960 3.3.2 Initiation (INIT) (1) * * Note 1: The INIT chunks can contain multiple addresses that * can be IPv4 and/or IPv6 in any combination. */ - retval = NULL; /* Convert the provided bind address list to raw format. */ addrs = sctp_bind_addrs_to_raw(bp, &addrs_len, gfp); @@ -305,8 +305,7 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, /* If we have any extensions to report, account for that */ if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* RFC 2960 3.3.2 Initiation (INIT) (1) * @@ -348,10 +347,8 @@ struct sctp_chunk *sctp_make_init(const struct sctp_association *asoc, */ if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } @@ -382,26 +379,24 @@ nodata: } struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - gfp_t gfp, int unkparam_len) + const struct sctp_chunk *chunk, + gfp_t gfp, int unkparam_len) { + struct sctp_supported_ext_param ext_param; + struct sctp_adaptation_ind_param aiparam; + struct sctp_paramhdr *auth_chunks = NULL; + struct sctp_paramhdr *auth_random = NULL; + struct sctp_paramhdr *auth_hmacs = NULL; + struct sctp_chunk *retval = NULL; + struct sctp_cookie_param *cookie; struct sctp_inithdr initack; - struct sctp_chunk *retval; union sctp_params addrs; struct sctp_sock *sp; - int addrs_len; - sctp_cookie_param_t *cookie; - int cookie_len; + __u8 extensions[4]; size_t chunksize; - sctp_adaptation_ind_param_t aiparam; - sctp_supported_ext_param_t ext_param; int num_ext = 0; - __u8 extensions[4]; - struct sctp_paramhdr *auth_chunks = NULL, - *auth_hmacs = NULL, - *auth_random = NULL; - - retval = NULL; + int cookie_len; + int addrs_len; /* Note: there may be no addresses to embed. */ addrs = sctp_bind_addrs_to_raw(&asoc->base.bind_addr, &addrs_len, gfp); @@ -468,8 +463,7 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, } if (num_ext) - chunksize += SCTP_PAD4(sizeof(sctp_supported_ext_param_t) + - num_ext); + chunksize += SCTP_PAD4(sizeof(ext_param) + num_ext); /* Now allocate and fill out the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_INIT_ACK, 0, chunksize, gfp); @@ -495,10 +489,8 @@ struct sctp_chunk *sctp_make_init_ack(const struct sctp_association *asoc, sctp_addto_chunk(retval, sizeof(ecap_param), &ecap_param); if (num_ext) { ext_param.param_hdr.type = SCTP_PARAM_SUPPORTED_EXT; - ext_param.param_hdr.length = - htons(sizeof(sctp_supported_ext_param_t) + num_ext); - sctp_addto_chunk(retval, sizeof(sctp_supported_ext_param_t), - &ext_param); + ext_param.param_hdr.length = htons(sizeof(ext_param) + num_ext); + sctp_addto_chunk(retval, sizeof(ext_param), &ext_param); sctp_addto_param(retval, num_ext, extensions); } if (asoc->peer.prsctp_capable) @@ -567,11 +559,11 @@ nomem_cookie: * to insure interoperability. */ struct sctp_chunk *sctp_make_cookie_echo(const struct sctp_association *asoc, - const struct sctp_chunk *chunk) + const struct sctp_chunk *chunk) { struct sctp_chunk *retval; - void *cookie; int cookie_len; + void *cookie; cookie = asoc->peer.cookie; cookie_len = asoc->peer.cookie_len; @@ -619,7 +611,7 @@ nodata: * Set to zero on transmit and ignored on receipt. */ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, - const struct sctp_chunk *chunk) + const struct sctp_chunk *chunk) { struct sctp_chunk *retval; @@ -664,15 +656,15 @@ struct sctp_chunk *sctp_make_cookie_ack(const struct sctp_association *asoc, * Note: The CWR is considered a Control chunk. */ struct sctp_chunk *sctp_make_cwr(const struct sctp_association *asoc, - const __u32 lowest_tsn, - const struct sctp_chunk *chunk) + const __u32 lowest_tsn, + const struct sctp_chunk *chunk) { struct sctp_chunk *retval; - sctp_cwrhdr_t cwr; + struct sctp_cwrhdr cwr; cwr.lowest_tsn = htonl(lowest_tsn); retval = sctp_make_control(asoc, SCTP_CID_ECN_CWR, 0, - sizeof(sctp_cwrhdr_t), GFP_ATOMIC); + sizeof(cwr), GFP_ATOMIC); if (!retval) goto nodata; @@ -699,14 +691,14 @@ nodata: /* Make an ECNE chunk. This is a congestion experienced report. */ struct sctp_chunk *sctp_make_ecne(const struct sctp_association *asoc, - const __u32 lowest_tsn) + const __u32 lowest_tsn) { struct sctp_chunk *retval; - sctp_ecnehdr_t ecne; + struct sctp_ecnehdr ecne; ecne.lowest_tsn = htonl(lowest_tsn); retval = sctp_make_control(asoc, SCTP_CID_ECN_ECNE, 0, - sizeof(sctp_ecnehdr_t), GFP_ATOMIC); + sizeof(ecne), GFP_ATOMIC); if (!retval) goto nodata; retval->subh.ecne_hdr = @@ -720,9 +712,9 @@ nodata: * parameters. However, do not populate the data payload. */ struct sctp_chunk *sctp_make_datafrag_empty(struct sctp_association *asoc, - const struct sctp_sndrcvinfo *sinfo, - int data_len, __u8 flags, __u16 ssn, - gfp_t gfp) + const struct sctp_sndrcvinfo *sinfo, + int data_len, __u8 flags, __u16 ssn, + gfp_t gfp) { struct sctp_chunk *retval; struct sctp_datahdr dp; @@ -760,15 +752,15 @@ nodata: */ struct sctp_chunk *sctp_make_sack(const struct sctp_association *asoc) { - struct sctp_chunk *retval; - struct sctp_sackhdr sack; - int len; - __u32 ctsn; - __u16 num_gabs, num_dup_tsns; - struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; + struct sctp_association *aptr = (struct sctp_association *)asoc; struct sctp_gap_ack_block gabs[SCTP_MAX_GABS]; + __u16 num_gabs, num_dup_tsns; struct sctp_transport *trans; + struct sctp_chunk *retval; + struct sctp_sackhdr sack; + __u32 ctsn; + int len; memset(gabs, 0, sizeof(gabs)); ctsn = sctp_tsnmap_get_ctsn(map); @@ -862,15 +854,15 @@ nodata: struct sctp_chunk *sctp_make_shutdown(const struct sctp_association *asoc, const struct sctp_chunk *chunk) { + struct sctp_shutdownhdr shut; struct sctp_chunk *retval; - sctp_shutdownhdr_t shut; __u32 ctsn; ctsn = sctp_tsnmap_get_ctsn(&asoc->peer.tsn_map); shut.cum_tsn_ack = htonl(ctsn); retval = sctp_make_control(asoc, SCTP_CID_SHUTDOWN, 0, - sizeof(sctp_shutdownhdr_t), GFP_ATOMIC); + sizeof(shut), GFP_ATOMIC); if (!retval) goto nodata; @@ -884,7 +876,7 @@ nodata: } struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, - const struct sctp_chunk *chunk) + const struct sctp_chunk *chunk) { struct sctp_chunk *retval; @@ -907,8 +899,8 @@ struct sctp_chunk *sctp_make_shutdown_ack(const struct sctp_association *asoc, } struct sctp_chunk *sctp_make_shutdown_complete( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) { struct sctp_chunk *retval; __u8 flags = 0; @@ -941,8 +933,8 @@ struct sctp_chunk *sctp_make_shutdown_complete( * association, except when responding to an INIT (sctpimpguide 2.41). */ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const size_t hint) + const struct sctp_chunk *chunk, + const size_t hint) { struct sctp_chunk *retval; __u8 flags = 0; @@ -978,14 +970,15 @@ struct sctp_chunk *sctp_make_abort(const struct sctp_association *asoc, /* Helper to create ABORT with a NO_USER_DATA error. */ struct sctp_chunk *sctp_make_abort_no_data( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk, __u32 tsn) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + __u32 tsn) { struct sctp_chunk *retval; __be32 payload; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) - + sizeof(tsn)); + retval = sctp_make_abort(asoc, chunk, + sizeof(struct sctp_errhdr) + sizeof(tsn)); if (!retval) goto no_mem; @@ -1020,7 +1013,8 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc, void *payload = NULL; int err; - retval = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t) + paylen); + retval = sctp_make_abort(asoc, NULL, + sizeof(struct sctp_errhdr) + paylen); if (!retval) goto err_chunk; @@ -1058,8 +1052,8 @@ err_chunk: static void *sctp_addto_param(struct sctp_chunk *chunk, int len, const void *data) { - void *target; int chunklen = ntohs(chunk->chunk_hdr->length); + void *target; target = skb_put(chunk->skb, len); @@ -1077,16 +1071,16 @@ static void *sctp_addto_param(struct sctp_chunk *chunk, int len, /* Make an ABORT chunk with a PROTOCOL VIOLATION cause code. */ struct sctp_chunk *sctp_make_abort_violation( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const __u8 *payload, - const size_t paylen) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + const __u8 *payload, + const size_t paylen) { struct sctp_chunk *retval; struct sctp_paramhdr phdr; - retval = sctp_make_abort(asoc, chunk, sizeof(sctp_errhdr_t) + paylen + - sizeof(phdr)); + retval = sctp_make_abort(asoc, chunk, sizeof(struct sctp_errhdr) + + paylen + sizeof(phdr)); if (!retval) goto end; @@ -1103,14 +1097,14 @@ end: } struct sctp_chunk *sctp_make_violation_paramlen( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - struct sctp_paramhdr *param) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + struct sctp_paramhdr *param) { - struct sctp_chunk *retval; static const char error[] = "The following parameter had invalid length:"; - size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t) + + size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr) + sizeof(*param); + struct sctp_chunk *retval; retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) @@ -1126,12 +1120,12 @@ nodata: } struct sctp_chunk *sctp_make_violation_max_retrans( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) { - struct sctp_chunk *retval; static const char error[] = "Association exceeded its max_retans count"; - size_t payload_len = sizeof(error) + sizeof(sctp_errhdr_t); + size_t payload_len = sizeof(error) + sizeof(struct sctp_errhdr); + struct sctp_chunk *retval; retval = sctp_make_abort(asoc, chunk, payload_len); if (!retval) @@ -1146,10 +1140,10 @@ nodata: /* Make a HEARTBEAT chunk. */ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, - const struct sctp_transport *transport) + const struct sctp_transport *transport) { + struct sctp_sender_hb_info hbinfo; struct sctp_chunk *retval; - sctp_sender_hb_info_t hbinfo; retval = sctp_make_control(asoc, SCTP_CID_HEARTBEAT, 0, sizeof(hbinfo), GFP_ATOMIC); @@ -1158,7 +1152,7 @@ struct sctp_chunk *sctp_make_heartbeat(const struct sctp_association *asoc, goto nodata; hbinfo.param_hdr.type = SCTP_PARAM_HEARTBEAT_INFO; - hbinfo.param_hdr.length = htons(sizeof(sctp_sender_hb_info_t)); + hbinfo.param_hdr.length = htons(sizeof(hbinfo)); hbinfo.daddr = transport->ipaddr; hbinfo.sent_at = jiffies; hbinfo.hb_nonce = transport->hb_nonce; @@ -1175,8 +1169,9 @@ nodata: } struct sctp_chunk *sctp_make_heartbeat_ack(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - const void *payload, const size_t paylen) + const struct sctp_chunk *chunk, + const void *payload, + const size_t paylen) { struct sctp_chunk *retval; @@ -1207,14 +1202,15 @@ nodata: * This routine can be used for containing multiple causes in the chunk. */ static struct sctp_chunk *sctp_make_op_error_space( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - size_t size) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk, + size_t size) { struct sctp_chunk *retval; retval = sctp_make_control(asoc, SCTP_CID_ERROR, 0, - sizeof(sctp_errhdr_t) + size, GFP_ATOMIC); + sizeof(struct sctp_errhdr) + size, + GFP_ATOMIC); if (!retval) goto nodata; @@ -1240,8 +1236,8 @@ nodata: * to report all the errors, if the incoming chunk is large */ static inline struct sctp_chunk *sctp_make_op_error_fixed( - const struct sctp_association *asoc, - const struct sctp_chunk *chunk) + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) { size_t size = asoc ? asoc->pathmtu : 0; @@ -1253,9 +1249,9 @@ static inline struct sctp_chunk *sctp_make_op_error_fixed( /* Create an Operation Error chunk. */ struct sctp_chunk *sctp_make_op_error(const struct sctp_association *asoc, - const struct sctp_chunk *chunk, - __be16 cause_code, const void *payload, - size_t paylen, size_t reserve_tail) + const struct sctp_chunk *chunk, + __be16 cause_code, const void *payload, + size_t paylen, size_t reserve_tail) { struct sctp_chunk *retval; @@ -1274,9 +1270,9 @@ nodata: struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) { - struct sctp_chunk *retval; - struct sctp_hmac *hmac_desc; struct sctp_authhdr auth_hdr; + struct sctp_hmac *hmac_desc; + struct sctp_chunk *retval; __u8 *hmac; /* Get the first hmac that the peer told us to use */ @@ -1285,16 +1281,16 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) return NULL; retval = sctp_make_control(asoc, SCTP_CID_AUTH, 0, - hmac_desc->hmac_len + sizeof(sctp_authhdr_t), - GFP_ATOMIC); + hmac_desc->hmac_len + sizeof(auth_hdr), + GFP_ATOMIC); if (!retval) return NULL; auth_hdr.hmac_id = htons(hmac_desc->hmac_id); auth_hdr.shkey_id = htons(asoc->active_key_id); - retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(sctp_authhdr_t), - &auth_hdr); + retval->subh.auth_hdr = sctp_addto_chunk(retval, sizeof(auth_hdr), + &auth_hdr); hmac = skb_put_zero(retval->skb, hmac_desc->hmac_len); @@ -1322,8 +1318,8 @@ struct sctp_chunk *sctp_make_auth(const struct sctp_association *asoc) * */ struct sctp_chunk *sctp_chunkify(struct sk_buff *skb, - const struct sctp_association *asoc, - struct sock *sk, gfp_t gfp) + const struct sctp_association *asoc, + struct sock *sk, gfp_t gfp) { struct sctp_chunk *retval; @@ -1375,11 +1371,11 @@ const union sctp_addr *sctp_source(const struct sctp_chunk *chunk) * arguments, reserving enough space for a 'paylen' byte payload. */ static struct sctp_chunk *_sctp_make_chunk(const struct sctp_association *asoc, - __u8 type, __u8 flags, int paylen, - gfp_t gfp) + __u8 type, __u8 flags, int paylen, + gfp_t gfp) { - struct sctp_chunk *retval; struct sctp_chunkhdr *chunk_hdr; + struct sctp_chunk *retval; struct sk_buff *skb; struct sock *sk; @@ -1473,9 +1469,9 @@ void sctp_chunk_put(struct sctp_chunk *ch) */ void *sctp_addto_chunk(struct sctp_chunk *chunk, int len, const void *data) { - void *target; int chunklen = ntohs(chunk->chunk_hdr->length); int padlen = SCTP_PAD4(chunklen) - chunklen; + void *target; skb_put_zero(chunk->skb, padlen); target = skb_put_data(chunk->skb, data, len); @@ -1528,11 +1524,10 @@ int sctp_user_addto_chunk(struct sctp_chunk *chunk, int len, */ void sctp_chunk_assign_ssn(struct sctp_chunk *chunk) { - struct sctp_datamsg *msg; - struct sctp_chunk *lchunk; struct sctp_stream *stream; - __u16 ssn; - __u16 sid; + struct sctp_chunk *lchunk; + struct sctp_datamsg *msg; + __u16 ssn, sid; if (chunk->has_ssn) return; @@ -1577,12 +1572,12 @@ void sctp_chunk_assign_tsn(struct sctp_chunk *chunk) /* Create a CLOSED association to use with an incoming packet. */ struct sctp_association *sctp_make_temp_asoc(const struct sctp_endpoint *ep, - struct sctp_chunk *chunk, - gfp_t gfp) + struct sctp_chunk *chunk, + gfp_t gfp) { struct sctp_association *asoc; + enum sctp_scope scope; struct sk_buff *skb; - sctp_scope_t scope; /* Create the bare association. */ scope = sctp_scope(sctp_source(chunk)); @@ -1601,14 +1596,15 @@ nodata: /* Build a cookie representing asoc. * This INCLUDES the param header needed to put the cookie in the INIT ACK. */ -static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const struct sctp_chunk *init_chunk, - int *cookie_len, - const __u8 *raw_addrs, int addrs_len) +static struct sctp_cookie_param *sctp_pack_cookie( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const struct sctp_chunk *init_chunk, + int *cookie_len, const __u8 *raw_addrs, + int addrs_len) { - sctp_cookie_param_t *retval; struct sctp_signed_cookie *cookie; + struct sctp_cookie_param *retval; int headersize, bodysize; /* Header size is static data prior to the actual cookie, including @@ -1692,19 +1688,19 @@ nodata: /* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */ struct sctp_association *sctp_unpack_cookie( - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - struct sctp_chunk *chunk, gfp_t gfp, - int *error, struct sctp_chunk **errp) + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, gfp_t gfp, + int *error, struct sctp_chunk **errp) { struct sctp_association *retval = NULL; + int headersize, bodysize, fixed_size; struct sctp_signed_cookie *cookie; + struct sk_buff *skb = chunk->skb; struct sctp_cookie *bear_cookie; - int headersize, bodysize, fixed_size; __u8 *digest = ep->digest; + enum sctp_scope scope; unsigned int len; - sctp_scope_t scope; - struct sk_buff *skb = chunk->skb; ktime_t kt; /* Header size is static data prior to the actual cookie, including @@ -1976,8 +1972,8 @@ static int sctp_process_hn_param(const struct sctp_association *asoc, static int sctp_verify_ext_param(struct net *net, union sctp_params param) { __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); - int have_auth = 0; int have_asconf = 0; + int have_auth = 0; int i; for (i = 0; i < num_ext; i++) { @@ -2007,10 +2003,10 @@ static int sctp_verify_ext_param(struct net *net, union sctp_params param) } static void sctp_process_ext_param(struct sctp_association *asoc, - union sctp_params param) + union sctp_params param) { - struct net *net = sock_net(asoc->base.sk); __u16 num_ext = ntohs(param.p->length) - sizeof(struct sctp_paramhdr); + struct net *net = sock_net(asoc->base.sk); int i; for (i = 0; i < num_ext; i++) { @@ -2067,10 +2063,11 @@ static void sctp_process_ext_param(struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop and report an error. * SCTP_IERROR_NOMEME - out of memory. */ -static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, - union sctp_params param, - struct sctp_chunk *chunk, - struct sctp_chunk **errp) +static enum sctp_ierror sctp_process_unk_param( + const struct sctp_association *asoc, + union sctp_params param, + struct sctp_chunk *chunk, + struct sctp_chunk **errp) { int retval = SCTP_IERROR_NO_ERROR; @@ -2119,13 +2116,13 @@ static sctp_ierror_t sctp_process_unk_param(const struct sctp_association *asoc, * SCTP_IERROR_ERROR - stop processing, trigger an ERROR * SCTP_IERROR_NO_ERROR - continue with the chunk */ -static sctp_ierror_t sctp_verify_param(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - union sctp_params param, - enum sctp_cid cid, - struct sctp_chunk *chunk, - struct sctp_chunk **err_chunk) +static enum sctp_ierror sctp_verify_param(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + union sctp_params param, + enum sctp_cid cid, + struct sctp_chunk *chunk, + struct sctp_chunk **err_chunk) { struct sctp_hmac_algo_param *hmacs; int retval = SCTP_IERROR_NO_ERROR; @@ -2310,13 +2307,13 @@ int sctp_process_init(struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_init_chunk *peer_init, gfp_t gfp) { struct net *net = sock_net(asoc->base.sk); - union sctp_params param; struct sctp_transport *transport; struct list_head *pos, *temp; - struct sctp_af *af; + union sctp_params param; union sctp_addr addr; - char *cookie; + struct sctp_af *af; int src_match = 0; + char *cookie; /* We must include the address that the INIT packet came from. * This is the only address that matters for an INIT packet. @@ -2500,16 +2497,15 @@ static int sctp_process_param(struct sctp_association *asoc, gfp_t gfp) { struct net *net = sock_net(asoc->base.sk); - union sctp_addr addr; - int i; - __u16 sat; - int retval = 1; - sctp_scope_t scope; - u32 stale; - struct sctp_af *af; + struct sctp_endpoint *ep = asoc->ep; union sctp_addr_param *addr_param; struct sctp_transport *t; - struct sctp_endpoint *ep = asoc->ep; + enum sctp_scope scope; + union sctp_addr addr; + struct sctp_af *af; + int retval = 1, i; + u32 stale; + __u16 sat; /* We maintain all INIT parameters in network byte order all the * time. This allows us to not worry about whether the parameters @@ -2617,7 +2613,7 @@ do_addr_param: if (!net->sctp.addip_enable) goto fall_through; - addr_param = param.v + sizeof(sctp_addip_param_t); + addr_param = param.v + sizeof(struct sctp_addip_param); af = sctp_get_af_specific(param_type2af(addr_param->p.type)); if (af == NULL) @@ -2754,7 +2750,7 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc, union sctp_addr *addr, int vparam_len) { - sctp_addiphdr_t asconf; + struct sctp_addiphdr asconf; struct sctp_chunk *retval; int length = sizeof(asconf) + vparam_len; union sctp_addr_param addrparam; @@ -2807,22 +2803,20 @@ static struct sctp_chunk *sctp_make_asconf(struct sctp_association *asoc, * */ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, - union sctp_addr *laddr, - struct sockaddr *addrs, - int addrcnt, - __be16 flags) -{ - sctp_addip_param_t param; - struct sctp_chunk *retval; - union sctp_addr_param addr_param; - union sctp_addr *addr; - void *addr_buf; - struct sctp_af *af; - int paramlen = sizeof(param); - int addr_param_len = 0; - int totallen = 0; - int i; - int del_pickup = 0; + union sctp_addr *laddr, + struct sockaddr *addrs, + int addrcnt, __be16 flags) +{ + union sctp_addr_param addr_param; + struct sctp_addip_param param; + int paramlen = sizeof(param); + struct sctp_chunk *retval; + int addr_param_len = 0; + union sctp_addr *addr; + int totallen = 0, i; + int del_pickup = 0; + struct sctp_af *af; + void *addr_buf; /* Get total length of all the address parameters. */ addr_buf = addrs; @@ -2860,7 +2854,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = flags; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); @@ -2873,7 +2867,7 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, addr_param_len = af->to_addr_param(addr, &addr_param); param.param_hdr.type = SCTP_PARAM_DEL_IP; param.param_hdr.length = htons(paramlen + addr_param_len); - param.crr_id = i; + param.crr_id = htonl(i); sctp_addto_chunk(retval, paramlen, ¶m); sctp_addto_chunk(retval, addr_param_len, &addr_param); @@ -2898,12 +2892,12 @@ struct sctp_chunk *sctp_make_asconf_update_ip(struct sctp_association *asoc, struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, union sctp_addr *addr) { - sctp_addip_param_t param; - struct sctp_chunk *retval; - int len = sizeof(param); - union sctp_addr_param addrparam; - int addrlen; - struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family); + struct sctp_af *af = sctp_get_af_specific(addr->v4.sin_family); + union sctp_addr_param addrparam; + struct sctp_addip_param param; + struct sctp_chunk *retval; + int len = sizeof(param); + int addrlen; addrlen = af->to_addr_param(addr, &addrparam); if (!addrlen) @@ -2947,9 +2941,9 @@ struct sctp_chunk *sctp_make_asconf_set_prim(struct sctp_association *asoc, static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *asoc, __u32 serial, int vparam_len) { - sctp_addiphdr_t asconf; - struct sctp_chunk *retval; - int length = sizeof(asconf) + vparam_len; + struct sctp_addiphdr asconf; + struct sctp_chunk *retval; + int length = sizeof(asconf) + vparam_len; /* Create the chunk. */ retval = sctp_make_control(asoc, SCTP_CID_ASCONF_ACK, 0, length, @@ -2967,13 +2961,14 @@ static struct sctp_chunk *sctp_make_asconf_ack(const struct sctp_association *as /* Add response parameters to an ASCONF_ACK chunk. */ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id, - __be16 err_code, sctp_addip_param_t *asconf_param) + __be16 err_code, + struct sctp_addip_param *asconf_param) { - sctp_addip_param_t ack_param; - sctp_errhdr_t err_param; - int asconf_param_len = 0; - int err_param_len = 0; - __be16 response_type; + struct sctp_addip_param ack_param; + struct sctp_errhdr err_param; + int asconf_param_len = 0; + int err_param_len = 0; + __be16 response_type; if (SCTP_ERROR_NO_ERROR == err_code) { response_type = SCTP_PARAM_SUCCESS_REPORT; @@ -3008,15 +3003,15 @@ static void sctp_add_asconf_response(struct sctp_chunk *chunk, __be32 crr_id, /* Process a asconf parameter. */ static __be16 sctp_process_asconf_param(struct sctp_association *asoc, - struct sctp_chunk *asconf, - sctp_addip_param_t *asconf_param) + struct sctp_chunk *asconf, + struct sctp_addip_param *asconf_param) { + union sctp_addr_param *addr_param; struct sctp_transport *peer; - struct sctp_af *af; union sctp_addr addr; - union sctp_addr_param *addr_param; + struct sctp_af *af; - addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); + addr_param = (void *)asconf_param + sizeof(*asconf_param); if (asconf_param->param_hdr.type != SCTP_PARAM_ADD_IP && asconf_param->param_hdr.type != SCTP_PARAM_DEL_IP && @@ -3141,10 +3136,11 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *chunk, bool addr_param_needed, struct sctp_paramhdr **errp) { - sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) chunk->chunk_hdr; - union sctp_params param; + struct sctp_addip_chunk *addip; bool addr_param_seen = false; + union sctp_params param; + addip = (struct sctp_addip_chunk *)chunk->chunk_hdr; sctp_walk_params(param, addip, addip_hdr.params) { size_t length = ntohs(param.p->length); @@ -3153,7 +3149,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, case SCTP_PARAM_ERR_CAUSE: break; case SCTP_PARAM_IPV4_ADDRESS: - if (length != sizeof(sctp_ipv4addr_param_t)) + if (length != sizeof(struct sctp_ipv4addr_param)) return false; /* ensure there is only one addr param and it's in the * beginning of addip_hdr params, or we reject it. @@ -3163,7 +3159,7 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, addr_param_seen = true; break; case SCTP_PARAM_IPV6_ADDRESS: - if (length != sizeof(sctp_ipv6addr_param_t)) + if (length != sizeof(struct sctp_ipv6addr_param)) return false; if (param.v != addip->addip_hdr.params) return false; @@ -3176,13 +3172,13 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, if (addr_param_needed && !addr_param_seen) return false; length = ntohs(param.addip->param_hdr.length); - if (length < sizeof(sctp_addip_param_t) + + if (length < sizeof(struct sctp_addip_param) + sizeof(**errp)) return false; break; case SCTP_PARAM_SUCCESS_REPORT: case SCTP_PARAM_ADAPTATION_LAYER_IND: - if (length != sizeof(sctp_addip_param_t)) + if (length != sizeof(struct sctp_addip_param)) return false; break; default: @@ -3208,24 +3204,24 @@ bool sctp_verify_asconf(const struct sctp_association *asoc, struct sctp_chunk *sctp_process_asconf(struct sctp_association *asoc, struct sctp_chunk *asconf) { - sctp_addip_chunk_t *addip = (sctp_addip_chunk_t *) asconf->chunk_hdr; + union sctp_addr_param *addr_param; + struct sctp_addip_chunk *addip; + struct sctp_chunk *asconf_ack; bool all_param_pass = true; + struct sctp_addiphdr *hdr; + int length = 0, chunk_len; union sctp_params param; - sctp_addiphdr_t *hdr; - union sctp_addr_param *addr_param; - struct sctp_chunk *asconf_ack; - __be16 err_code; - int length = 0; - int chunk_len; - __u32 serial; + __be16 err_code; + __u32 serial; + addip = (struct sctp_addip_chunk *)asconf->chunk_hdr; chunk_len = ntohs(asconf->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); - hdr = (sctp_addiphdr_t *)asconf->skb->data; + hdr = (struct sctp_addiphdr *)asconf->skb->data; serial = ntohl(hdr->serial); /* Skip the addiphdr and store a pointer to address parameter. */ - length = sizeof(sctp_addiphdr_t); + length = sizeof(*hdr); addr_param = (union sctp_addr_param *)(asconf->skb->data + length); chunk_len -= length; @@ -3291,16 +3287,16 @@ done: /* Process a asconf parameter that is successfully acked. */ static void sctp_asconf_param_success(struct sctp_association *asoc, - sctp_addip_param_t *asconf_param) + struct sctp_addip_param *asconf_param) { - struct sctp_af *af; - union sctp_addr addr; struct sctp_bind_addr *bp = &asoc->base.bind_addr; union sctp_addr_param *addr_param; - struct sctp_transport *transport; struct sctp_sockaddr_entry *saddr; + struct sctp_transport *transport; + union sctp_addr addr; + struct sctp_af *af; - addr_param = (void *)asconf_param + sizeof(sctp_addip_param_t); + addr_param = (void *)asconf_param + sizeof(*asconf_param); /* We have checked the packet before, so we do not check again. */ af = sctp_get_af_specific(param_type2af(addr_param->p.type)); @@ -3351,14 +3347,14 @@ static void sctp_asconf_param_success(struct sctp_association *asoc, * specific success indication is present for the parameter. */ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, - sctp_addip_param_t *asconf_param, - int no_err) + struct sctp_addip_param *asconf_param, + int no_err) { - sctp_addip_param_t *asconf_ack_param; - sctp_errhdr_t *err_param; - int length; - int asconf_ack_len; - __be16 err_code; + struct sctp_addip_param *asconf_ack_param; + struct sctp_errhdr *err_param; + int asconf_ack_len; + __be16 err_code; + int length; if (no_err) err_code = SCTP_ERROR_NO_ERROR; @@ -3371,9 +3367,9 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, /* Skip the addiphdr from the asconf_ack chunk and store a pointer to * the first asconf_ack parameter. */ - length = sizeof(sctp_addiphdr_t); - asconf_ack_param = (sctp_addip_param_t *)(asconf_ack->skb->data + - length); + length = sizeof(struct sctp_addiphdr); + asconf_ack_param = (struct sctp_addip_param *)(asconf_ack->skb->data + + length); asconf_ack_len -= length; while (asconf_ack_len > 0) { @@ -3382,7 +3378,7 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, case SCTP_PARAM_SUCCESS_REPORT: return SCTP_ERROR_NO_ERROR; case SCTP_PARAM_ERR_CAUSE: - length = sizeof(sctp_addip_param_t); + length = sizeof(*asconf_ack_param); err_param = (void *)asconf_ack_param + length; asconf_ack_len -= length; if (asconf_ack_len > 0) @@ -3407,20 +3403,20 @@ static __be16 sctp_get_asconf_response(struct sctp_chunk *asconf_ack, int sctp_process_asconf_ack(struct sctp_association *asoc, struct sctp_chunk *asconf_ack) { - struct sctp_chunk *asconf = asoc->addip_last_asconf; - union sctp_addr_param *addr_param; - sctp_addip_param_t *asconf_param; - int length = 0; - int asconf_len = asconf->skb->len; - int all_param_pass = 0; - int no_err = 1; - int retval = 0; - __be16 err_code = SCTP_ERROR_NO_ERROR; + struct sctp_chunk *asconf = asoc->addip_last_asconf; + struct sctp_addip_param *asconf_param; + __be16 err_code = SCTP_ERROR_NO_ERROR; + union sctp_addr_param *addr_param; + int asconf_len = asconf->skb->len; + int all_param_pass = 0; + int length = 0; + int no_err = 1; + int retval = 0; /* Skip the chunkhdr and addiphdr from the last asconf sent and store * a pointer to address parameter. */ - length = sizeof(sctp_addip_chunk_t); + length = sizeof(struct sctp_addip_chunk); addr_param = (union sctp_addr_param *)(asconf->skb->data + length); asconf_len -= length; @@ -3436,7 +3432,7 @@ int sctp_process_asconf_ack(struct sctp_association *asoc, * failures are indicated, then all request(s) are considered * successful. */ - if (asconf_ack->skb->len == sizeof(sctp_addiphdr_t)) + if (asconf_ack->skb->len == sizeof(struct sctp_addiphdr)) all_param_pass = 1; /* Process the TLVs contained in the last sent ASCONF chunk. */ @@ -3542,9 +3538,8 @@ struct sctp_chunk *sctp_make_fwdtsn(const struct sctp_association *asoc, * \ \ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ -static struct sctp_chunk *sctp_make_reconf( - const struct sctp_association *asoc, - int length) +static struct sctp_chunk *sctp_make_reconf(const struct sctp_association *asoc, + int length) { struct sctp_reconf_chunk *reconf; struct sctp_chunk *retval; @@ -3595,12 +3590,12 @@ static struct sctp_chunk *sctp_make_reconf( * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_req( - const struct sctp_association *asoc, - __u16 stream_num, __u16 *stream_list, - bool out, bool in) + const struct sctp_association *asoc, + __u16 stream_num, __be16 *stream_list, + bool out, bool in) { + __u16 stream_len = stream_num * sizeof(__u16); struct sctp_strreset_outreq outreq; - __u16 stream_len = stream_num * 2; struct sctp_strreset_inreq inreq; struct sctp_chunk *retval; __u16 outlen, inlen; @@ -3649,7 +3644,7 @@ struct sctp_chunk *sctp_make_strreset_req( * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_tsnreq( - const struct sctp_association *asoc) + const struct sctp_association *asoc) { struct sctp_strreset_tsnreq tsnreq; __u16 length = sizeof(tsnreq); @@ -3680,8 +3675,8 @@ struct sctp_chunk *sctp_make_strreset_tsnreq( * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ struct sctp_chunk *sctp_make_strreset_addstrm( - const struct sctp_association *asoc, - __u16 out, __u16 in) + const struct sctp_association *asoc, + __u16 out, __u16 in) { struct sctp_strreset_addstrm addstrm; __u16 size = sizeof(addstrm); @@ -3725,9 +3720,8 @@ struct sctp_chunk *sctp_make_strreset_addstrm( * | Result | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ -struct sctp_chunk *sctp_make_strreset_resp( - const struct sctp_association *asoc, - __u32 result, __u32 sn) +struct sctp_chunk *sctp_make_strreset_resp(const struct sctp_association *asoc, + __u32 result, __u32 sn) { struct sctp_strreset_resp resp; __u16 length = sizeof(resp); @@ -3762,10 +3756,10 @@ struct sctp_chunk *sctp_make_strreset_resp( * | Receiver's Next TSN (optional) | * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ */ -struct sctp_chunk *sctp_make_strreset_tsnresp( - struct sctp_association *asoc, - __u32 result, __u32 sn, - __u32 sender_tsn, __u32 receiver_tsn) +struct sctp_chunk *sctp_make_strreset_tsnresp(struct sctp_association *asoc, + __u32 result, __u32 sn, + __u32 sender_tsn, + __u32 receiver_tsn) { struct sctp_strreset_resptsn tsnresp; __u16 length = sizeof(tsnresp); @@ -3794,7 +3788,8 @@ bool sctp_verify_reconf(const struct sctp_association *asoc, { struct sctp_reconf_chunk *hdr; union sctp_params param; - __u16 last = 0, cnt = 0; + __be16 last = 0; + __u16 cnt = 0; hdr = (struct sctp_reconf_chunk *)chunk->chunk_hdr; sctp_walk_params(param, hdr, params) { diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index d6e5e9e0fd6d..df94d77401e7 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -50,23 +50,25 @@ #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> -static int sctp_cmd_interpreter(sctp_event_t event_type, - sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_cmd_interpreter(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, - sctp_disposition_t status, - sctp_cmd_seq_t *commands, + enum sctp_disposition status, + struct sctp_cmd_seq *commands, gfp_t gfp); -static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_side_effects(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association **asoc, void *event_arg, - sctp_disposition_t status, - sctp_cmd_seq_t *commands, + enum sctp_disposition status, + struct sctp_cmd_seq *commands, gfp_t gfp); /******************************************************************** @@ -96,8 +98,8 @@ static void sctp_do_ecn_ce_work(struct sctp_association *asoc, * that was originally marked with the CE bit. */ static struct sctp_chunk *sctp_do_ecn_ecne_work(struct sctp_association *asoc, - __u32 lowest_tsn, - struct sctp_chunk *chunk) + __u32 lowest_tsn, + struct sctp_chunk *chunk) { struct sctp_chunk *repl; @@ -149,11 +151,11 @@ static void sctp_do_ecn_cwr_work(struct sctp_association *asoc, /* Generate SACK if necessary. We call this at the end of a packet. */ static int sctp_gen_sack(struct sctp_association *asoc, int force, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { + struct sctp_transport *trans = asoc->peer.last_data_from; __u32 ctsn, max_tsn_seen; struct sctp_chunk *sack; - struct sctp_transport *trans = asoc->peer.last_data_from; int error = 0; if (force || @@ -241,13 +243,14 @@ nomem: /* When the T3-RTX timer expires, it calls this function to create the * relevant state machine event. */ -void sctp_generate_t3_rtx_event(unsigned long peer) +void sctp_generate_t3_rtx_event(struct timer_list *t) { - int error; - struct sctp_transport *transport = (struct sctp_transport *) peer; + struct sctp_transport *transport = + from_timer(transport, t, T3_rtx_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); + int error; /* Check whether a task is in the sock. */ @@ -280,7 +283,7 @@ out_unlock: * for timeouts which use the association as their parameter. */ static void sctp_generate_timeout_event(struct sctp_association *asoc, - sctp_event_timeout_t timeout_type) + enum sctp_event_timeout timeout_type) { struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -317,55 +320,68 @@ out_unlock: sctp_association_put(asoc); } -static void sctp_generate_t1_cookie_event(unsigned long data) +static void sctp_generate_t1_cookie_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_COOKIE]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_COOKIE); } -static void sctp_generate_t1_init_event(unsigned long data) +static void sctp_generate_t1_init_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T1_INIT]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T1_INIT); } -static void sctp_generate_t2_shutdown_event(unsigned long data) +static void sctp_generate_t2_shutdown_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T2_SHUTDOWN); } -static void sctp_generate_t4_rto_event(unsigned long data) +static void sctp_generate_t4_rto_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_T4_RTO]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T4_RTO); } -static void sctp_generate_t5_shutdown_guard_event(unsigned long data) +static void sctp_generate_t5_shutdown_guard_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *)data; + struct sctp_association *asoc = + from_timer(asoc, t, + timers[SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD); } /* sctp_generate_t5_shutdown_guard_event() */ -static void sctp_generate_autoclose_event(unsigned long data) +static void sctp_generate_autoclose_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_AUTOCLOSE]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_AUTOCLOSE); } /* Generate a heart beat event. If the sock is busy, reschedule. Make * sure that the transport is still valid. */ -void sctp_generate_heartbeat_event(unsigned long data) +void sctp_generate_heartbeat_event(struct timer_list *t) { - int error = 0; - struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_transport *transport = from_timer(transport, t, hb_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); u32 elapsed, timeout; + int error = 0; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { @@ -403,9 +419,10 @@ out_unlock: /* Handle the timeout of the ICMP protocol unreachable timer. Trigger * the correct state machine transition that will close the association. */ -void sctp_generate_proto_unreach_event(unsigned long data) +void sctp_generate_proto_unreach_event(struct timer_list *t) { - struct sctp_transport *transport = (struct sctp_transport *) data; + struct sctp_transport *transport = + from_timer(transport, t, proto_unreach_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -437,9 +454,10 @@ out_unlock: } /* Handle the timeout of the RE-CONFIG timer. */ -void sctp_generate_reconf_event(unsigned long data) +void sctp_generate_reconf_event(struct timer_list *t) { - struct sctp_transport *transport = (struct sctp_transport *)data; + struct sctp_transport *transport = + from_timer(transport, t, reconf_timer); struct sctp_association *asoc = transport->asoc; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); @@ -469,24 +487,27 @@ out_unlock: } /* Inject a SACK Timeout event into the state machine. */ -static void sctp_generate_sack_event(unsigned long data) +static void sctp_generate_sack_event(struct timer_list *t) { - struct sctp_association *asoc = (struct sctp_association *) data; + struct sctp_association *asoc = + from_timer(asoc, t, timers[SCTP_EVENT_TIMEOUT_SACK]); + sctp_generate_timeout_event(asoc, SCTP_EVENT_TIMEOUT_SACK); } sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { - NULL, - sctp_generate_t1_cookie_event, - sctp_generate_t1_init_event, - sctp_generate_t2_shutdown_event, - NULL, - sctp_generate_t4_rto_event, - sctp_generate_t5_shutdown_guard_event, - NULL, - NULL, - sctp_generate_sack_event, - sctp_generate_autoclose_event, + [SCTP_EVENT_TIMEOUT_NONE] = NULL, + [SCTP_EVENT_TIMEOUT_T1_COOKIE] = sctp_generate_t1_cookie_event, + [SCTP_EVENT_TIMEOUT_T1_INIT] = sctp_generate_t1_init_event, + [SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = sctp_generate_t2_shutdown_event, + [SCTP_EVENT_TIMEOUT_T3_RTX] = NULL, + [SCTP_EVENT_TIMEOUT_T4_RTO] = sctp_generate_t4_rto_event, + [SCTP_EVENT_TIMEOUT_T5_SHUTDOWN_GUARD] = + sctp_generate_t5_shutdown_guard_event, + [SCTP_EVENT_TIMEOUT_HEARTBEAT] = NULL, + [SCTP_EVENT_TIMEOUT_RECONF] = NULL, + [SCTP_EVENT_TIMEOUT_SACK] = sctp_generate_sack_event, + [SCTP_EVENT_TIMEOUT_AUTOCLOSE] = sctp_generate_autoclose_event, }; @@ -505,7 +526,7 @@ sctp_timer_event_t *sctp_timer_events[SCTP_NUM_TIMEOUT_TYPES] = { * notification SHOULD be sent to the upper layer. * */ -static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, +static void sctp_do_8_2_transport_strike(struct sctp_cmd_seq *commands, struct sctp_association *asoc, struct sctp_transport *transport, int is_hb) @@ -577,7 +598,7 @@ static void sctp_do_8_2_transport_strike(sctp_cmd_seq_t *commands, } /* Worker routine to handle INIT command failure. */ -static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands, +static void sctp_cmd_init_failed(struct sctp_cmd_seq *commands, struct sctp_association *asoc, unsigned int error) { @@ -600,15 +621,16 @@ static void sctp_cmd_init_failed(sctp_cmd_seq_t *commands, } /* Worker routine to handle SCTP_CMD_ASSOC_FAILED. */ -static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands, +static void sctp_cmd_assoc_failed(struct sctp_cmd_seq *commands, struct sctp_association *asoc, - sctp_event_t event_type, - sctp_subtype_t subtype, + enum sctp_event event_type, + union sctp_subtype subtype, struct sctp_chunk *chunk, unsigned int error) { struct sctp_ulpevent *event; struct sctp_chunk *abort; + /* Cancel any partial delivery in progress. */ sctp_ulpq_abort_pd(&asoc->ulpq, GFP_ATOMIC); @@ -644,7 +666,7 @@ static void sctp_cmd_assoc_failed(sctp_cmd_seq_t *commands, * since all other cases use "temporary" associations and can do all * their work in statefuns directly. */ -static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, +static int sctp_cmd_process_init(struct sctp_cmd_seq *commands, struct sctp_association *asoc, struct sctp_chunk *chunk, struct sctp_init_chunk *peer_init, @@ -666,7 +688,7 @@ static int sctp_cmd_process_init(sctp_cmd_seq_t *commands, } /* Helper function to break out starting up of heartbeat timers. */ -static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds, +static void sctp_cmd_hb_timers_start(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sctp_transport *t; @@ -679,7 +701,7 @@ static void sctp_cmd_hb_timers_start(sctp_cmd_seq_t *cmds, sctp_transport_reset_hb_timer(t); } -static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds, +static void sctp_cmd_hb_timers_stop(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sctp_transport *t; @@ -694,7 +716,7 @@ static void sctp_cmd_hb_timers_stop(sctp_cmd_seq_t *cmds, } /* Helper function to stop any pending T3-RTX timers */ -static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, +static void sctp_cmd_t3_rtx_timers_stop(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sctp_transport *t; @@ -708,12 +730,12 @@ static void sctp_cmd_t3_rtx_timers_stop(sctp_cmd_seq_t *cmds, /* Helper function to handle the reception of an HEARTBEAT ACK. */ -static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, +static void sctp_cmd_transport_on(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_transport *t, struct sctp_chunk *chunk) { - sctp_sender_hb_info_t *hbinfo; + struct sctp_sender_hb_info *hbinfo; int was_unconfirmed = 0; /* 8.3 Upon the receipt of the HEARTBEAT ACK, the sender of the @@ -767,7 +789,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, if (t->rto_pending == 0) t->rto_pending = 1; - hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; + hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data; sctp_transport_update_rto(t, (jiffies - hbinfo->sent_at)); /* Update the heartbeat timer. */ @@ -779,7 +801,7 @@ static void sctp_cmd_transport_on(sctp_cmd_seq_t *cmds, /* Helper function to process the process SACK command. */ -static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, +static int sctp_cmd_process_sack(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { @@ -801,7 +823,7 @@ static int sctp_cmd_process_sack(sctp_cmd_seq_t *cmds, /* Helper function to set the timeout value for T2-SHUTDOWN timer and to set * the transport for a shutdown chunk. */ -static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, +static void sctp_cmd_setup_t2(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { @@ -818,7 +840,7 @@ static void sctp_cmd_setup_t2(sctp_cmd_seq_t *cmds, asoc->timeouts[SCTP_EVENT_TIMEOUT_T2_SHUTDOWN] = t->rto; } -static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds, +static void sctp_cmd_assoc_update(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_association *new) { @@ -828,7 +850,7 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds, if (!sctp_assoc_update(asoc, new)) return; - abort = sctp_make_abort(asoc, NULL, sizeof(sctp_errhdr_t)); + abort = sctp_make_abort(asoc, NULL, sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); sctp_add_cmd_sf(cmds, SCTP_CMD_REPLY, SCTP_CHUNK(abort)); @@ -841,9 +863,9 @@ static void sctp_cmd_assoc_update(sctp_cmd_seq_t *cmds, } /* Helper function to change the state of an association. */ -static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, +static void sctp_cmd_new_state(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, - sctp_state_t state) + enum sctp_state state) { struct sock *sk = asoc->base.sk; @@ -901,7 +923,7 @@ static void sctp_cmd_new_state(sctp_cmd_seq_t *cmds, } /* Helper function to delete an association. */ -static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, +static void sctp_cmd_delete_tcb(struct sctp_cmd_seq *cmds, struct sctp_association *asoc) { struct sock *sk = asoc->base.sk; @@ -923,9 +945,9 @@ static void sctp_cmd_delete_tcb(sctp_cmd_seq_t *cmds, * destination address (we use active path instead of primary path just * because primary path may be inactive. */ -static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds, - struct sctp_association *asoc, - struct sctp_chunk *chunk) +static void sctp_cmd_setup_t4(struct sctp_cmd_seq *cmds, + struct sctp_association *asoc, + struct sctp_chunk *chunk) { struct sctp_transport *t; @@ -935,7 +957,7 @@ static void sctp_cmd_setup_t4(sctp_cmd_seq_t *cmds, } /* Process an incoming Operation Error Chunk. */ -static void sctp_cmd_process_operr(sctp_cmd_seq_t *cmds, +static void sctp_cmd_process_operr(struct sctp_cmd_seq *cmds, struct sctp_association *asoc, struct sctp_chunk *chunk) { @@ -990,6 +1012,7 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq, struct sctp_chunk *chunk) { struct sctp_fwdtsn_skip *skip; + /* Walk through all the skipped SSNs */ sctp_walk_fwdtsn(skip, chunk) { sctp_ulpq_skip(ulpq, ntohs(skip->stream), ntohs(skip->ssn)); @@ -1002,8 +1025,8 @@ static void sctp_cmd_process_fwdtsn(struct sctp_ulpq *ulpq, static void sctp_cmd_del_non_primary(struct sctp_association *asoc) { struct sctp_transport *t; - struct list_head *pos; struct list_head *temp; + struct list_head *pos; list_for_each_safe(pos, temp, &asoc->peer.transport_addr_list) { t = list_entry(pos, struct sctp_transport, transports); @@ -1024,9 +1047,9 @@ static void sctp_cmd_set_sk_err(struct sctp_association *asoc, int error) } /* Helper function to generate an association change event */ -static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands, - struct sctp_association *asoc, - u8 state) +static void sctp_cmd_assoc_change(struct sctp_cmd_seq *commands, + struct sctp_association *asoc, + u8 state) { struct sctp_ulpevent *ev; @@ -1039,7 +1062,7 @@ static void sctp_cmd_assoc_change(sctp_cmd_seq_t *commands, } /* Helper function to generate an adaptation indication event */ -static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, +static void sctp_cmd_adaptation_ind(struct sctp_cmd_seq *commands, struct sctp_association *asoc) { struct sctp_ulpevent *ev; @@ -1052,8 +1075,8 @@ static void sctp_cmd_adaptation_ind(sctp_cmd_seq_t *commands, static void sctp_cmd_t1_timer_update(struct sctp_association *asoc, - sctp_event_timeout_t timer, - char *name) + enum sctp_event_timeout timer, + char *name) { struct sctp_transport *t; @@ -1086,6 +1109,8 @@ static void sctp_cmd_send_msg(struct sctp_association *asoc, list_for_each_entry(chunk, &msg->chunks, frag_list) sctp_outq_tail(&asoc->outqueue, chunk, gfp); + + asoc->outqueue.sched->enqueue(&asoc->outqueue, msg); } @@ -1139,22 +1164,20 @@ static void sctp_cmd_send_asconf(struct sctp_association *asoc) * If you want to understand all of lksctp, this is a * good place to start. */ -int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, - struct sctp_endpoint *ep, - struct sctp_association *asoc, - void *event_arg, - gfp_t gfp) +int sctp_do_sm(struct net *net, enum sctp_event event_type, + union sctp_subtype subtype, enum sctp_state state, + struct sctp_endpoint *ep, struct sctp_association *asoc, + void *event_arg, gfp_t gfp) { - sctp_cmd_seq_t commands; - const sctp_sm_table_entry_t *state_fn; - sctp_disposition_t status; - int error = 0; - typedef const char *(printfn_t)(sctp_subtype_t); + typedef const char *(printfn_t)(union sctp_subtype); static printfn_t *table[] = { NULL, sctp_cname, sctp_tname, sctp_oname, sctp_pname, }; printfn_t *debug_fn __attribute__ ((unused)) = table[event_type]; + const struct sctp_sm_table_entry *state_fn; + struct sctp_cmd_seq commands; + enum sctp_disposition status; + int error = 0; /* Look up the state function, run it, and then process the * side effects. These three steps are the heart of lksctp. @@ -1178,13 +1201,14 @@ int sctp_do_sm(struct net *net, sctp_event_t event_type, sctp_subtype_t subtype, /***************************************************************** * This the master state function side effect processing function. *****************************************************************/ -static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_side_effects(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association **asoc, void *event_arg, - sctp_disposition_t status, - sctp_cmd_seq_t *commands, + enum sctp_disposition status, + struct sctp_cmd_seq *commands, gfp_t gfp) { int error; @@ -1263,29 +1287,27 @@ bail: ********************************************************************/ /* This is the side-effect interpreter. */ -static int sctp_cmd_interpreter(sctp_event_t event_type, - sctp_subtype_t subtype, - sctp_state_t state, +static int sctp_cmd_interpreter(enum sctp_event event_type, + union sctp_subtype subtype, + enum sctp_state state, struct sctp_endpoint *ep, struct sctp_association *asoc, void *event_arg, - sctp_disposition_t status, - sctp_cmd_seq_t *commands, + enum sctp_disposition status, + struct sctp_cmd_seq *commands, gfp_t gfp) { - struct sock *sk = ep->base.sk; - struct sctp_sock *sp = sctp_sk(sk); - int error = 0; - int force; - sctp_cmd_t *cmd; - struct sctp_chunk *new_obj; - struct sctp_chunk *chunk = NULL; + struct sctp_sock *sp = sctp_sk(ep->base.sk); + struct sctp_chunk *chunk = NULL, *new_obj; struct sctp_packet *packet; + struct sctp_sackhdr sackh; struct timer_list *timer; - unsigned long timeout; struct sctp_transport *t; - struct sctp_sackhdr sackh; + unsigned long timeout; + struct sctp_cmd *cmd; int local_cork = 0; + int error = 0; + int force; if (SCTP_EVENT_T_TIMEOUT != event_type) chunk = event_arg; @@ -1607,12 +1629,12 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, break; case SCTP_CMD_INIT_FAILED: - sctp_cmd_init_failed(commands, asoc, cmd->obj.err); + sctp_cmd_init_failed(commands, asoc, cmd->obj.u32); break; case SCTP_CMD_ASSOC_FAILED: sctp_cmd_assoc_failed(commands, asoc, event_type, - subtype, chunk, cmd->obj.err); + subtype, chunk, cmd->obj.u32); break; case SCTP_CMD_INIT_COUNTER_INC: @@ -1680,8 +1702,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, case SCTP_CMD_PROCESS_CTSN: /* Dummy up a SACK for processing. */ sackh.cum_tsn_ack = cmd->obj.be32; - sackh.a_rwnd = asoc->peer.rwnd + - asoc->outqueue.outstanding_bytes; + sackh.a_rwnd = htonl(asoc->peer.rwnd + + asoc->outqueue.outstanding_bytes); sackh.num_gap_ack_blocks = 0; sackh.num_dup_tsns = 0; chunk->subh.sack_hdr = &sackh; diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index b2a74c3823ee..8f8ccded13e4 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -59,103 +59,110 @@ #include <net/sctp/sm.h> #include <net/sctp/structs.h> -static struct sctp_packet *sctp_abort_pkt_new(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - struct sctp_chunk *chunk, - const void *payload, - size_t paylen); +static struct sctp_packet *sctp_abort_pkt_new( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + const void *payload, size_t paylen); static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands); -static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, - const struct sctp_association *asoc, - const struct sctp_chunk *chunk); + struct sctp_cmd_seq *commands); +static struct sctp_packet *sctp_ootb_pkt_new( + struct net *net, + const struct sctp_association *asoc, + const struct sctp_chunk *chunk); static void sctp_send_stale_cookie_err(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands, + struct sctp_cmd_seq *commands, struct sctp_chunk *err_chunk); -static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands); -static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, +static enum sctp_disposition sctp_sf_do_5_2_6_stale( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); +static enum sctp_disposition sctp_sf_shut_8_4_5( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); +static enum sctp_disposition sctp_sf_tabort_8_4_8( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands); + struct sctp_cmd_seq *commands); static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk); -static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, - sctp_cmd_seq_t *commands, - __be16 error, int sk_err, - const struct sctp_association *asoc, - struct sctp_transport *transport); +static enum sctp_disposition sctp_stop_t1_and_abort( + struct net *net, + struct sctp_cmd_seq *commands, + __be16 error, int sk_err, + const struct sctp_association *asoc, + struct sctp_transport *transport); -static sctp_disposition_t sctp_sf_abort_violation( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - void *arg, - sctp_cmd_seq_t *commands, - const __u8 *payload, - const size_t paylen); +static enum sctp_disposition sctp_sf_abort_violation( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + void *arg, + struct sctp_cmd_seq *commands, + const __u8 *payload, + const size_t paylen); -static sctp_disposition_t sctp_sf_violation_chunklen( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands); +static enum sctp_disposition sctp_sf_violation_chunklen( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); -static sctp_disposition_t sctp_sf_violation_paramlen( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, void *ext, - sctp_cmd_seq_t *commands); +static enum sctp_disposition sctp_sf_violation_paramlen( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, void *ext, + struct sctp_cmd_seq *commands); -static sctp_disposition_t sctp_sf_violation_ctsn( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands); +static enum sctp_disposition sctp_sf_violation_ctsn( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); -static sctp_disposition_t sctp_sf_violation_chunk( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands); +static enum sctp_disposition sctp_sf_violation_chunk( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands); -static sctp_ierror_t sctp_sf_authenticate(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - struct sctp_chunk *chunk); +static enum sctp_ierror sctp_sf_authenticate( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_chunk *chunk); -static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, +static enum sctp_disposition __sctp_sf_do_9_1_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands); + struct sctp_cmd_seq *commands); /* Small helper function that checks if the chunk length * is of the appropriate length. The 'required_length' argument @@ -164,8 +171,8 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, * false = Invalid length * */ -static inline bool -sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length) +static inline bool sctp_chunk_length_valid(struct sctp_chunk *chunk, + __u16 required_length) { __u16 chunk_length = ntohs(chunk->chunk_hdr->length); @@ -213,12 +220,11 @@ sctp_chunk_length_valid(struct sctp_chunk *chunk, __u16 required_length) * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_4_C(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_4_C(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; @@ -299,19 +305,17 @@ sctp_disposition_t sctp_sf_do_4_C(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_5_1B_init(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; - struct sctp_chunk *repl; + struct sctp_chunk *chunk = arg, *repl, *err_chunk; + struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; - struct sctp_chunk *err_chunk; struct sctp_packet *packet; - sctp_unrecognized_param_t *unk_param; int len; /* 6.10 Bundling @@ -435,7 +439,7 @@ sctp_disposition_t sctp_sf_do_5_1B_init(struct net *net, * construct the parameters in INIT ACK by copying the * ERROR causes over. */ - unk_param = (sctp_unrecognized_param_t *) + unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" @@ -495,15 +499,15 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_5_1C_ack(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; struct sctp_init_chunk *initchunk; + struct sctp_chunk *chunk = arg; struct sctp_chunk *err_chunk; struct sctp_packet *packet; @@ -518,7 +522,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, return sctp_sf_violation_chunk(net, ep, asoc, type, arg, commands); /* Make sure that the INIT-ACK chunk has a valid length */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_initack_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_initack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Grab the INIT header. */ @@ -530,7 +534,7 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, (struct sctp_init_chunk *)chunk->chunk_hdr, chunk, &err_chunk)) { - sctp_error_t error = SCTP_ERROR_NO_RESOURCE; + enum sctp_error error = SCTP_ERROR_NO_RESOURCE; /* This chunk contains fatal error. It is to be discarded. * Send an ABORT, with causes. If there are no causes, @@ -645,20 +649,21 @@ sctp_disposition_t sctp_sf_do_5_1C_ack(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_5_1D_ce(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; + struct sctp_ulpevent *ev, *ai_ev = NULL; struct sctp_association *new_asoc; struct sctp_init_chunk *peer_init; - struct sctp_chunk *repl; - struct sctp_ulpevent *ev, *ai_ev = NULL; - int error = 0; + struct sctp_chunk *chunk = arg; struct sctp_chunk *err_chk_p; + struct sctp_chunk *repl; struct sock *sk; + int error = 0; /* If the packet is an OOTB packet which is temporarily on the * control endpoint, respond with an ABORT. @@ -758,7 +763,7 @@ sctp_disposition_t sctp_sf_do_5_1D_ce(struct net *net, */ if (chunk->auth_chunk) { struct sctp_chunk auth; - sctp_ierror_t ret; + enum sctp_ierror ret; /* Make sure that we and the peer are AUTH capable */ if (!net->sctp.auth_enable || !new_asoc->peer.auth_capable) { @@ -872,11 +877,12 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_1E_ca(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_5_1E_ca(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_ulpevent *ev; @@ -950,11 +956,12 @@ nomem: } /* Generate and sendout a heartbeat packet. */ -static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_heartbeat( + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_transport *transport = (struct sctp_transport *) arg; struct sctp_chunk *reply; @@ -975,12 +982,12 @@ static sctp_disposition_t sctp_sf_heartbeat(const struct sctp_endpoint *ep, } /* Generate a HEARTBEAT packet on the given transport. */ -sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_sendbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_transport *transport = (struct sctp_transport *) arg; @@ -1023,11 +1030,12 @@ sctp_disposition_t sctp_sf_sendbeat_8_3(struct net *net, } /* resend asoc strreset_chunk. */ -sctp_disposition_t sctp_sf_send_reconf(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_send_reconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_transport *transport = arg; @@ -1074,12 +1082,11 @@ sctp_disposition_t sctp_sf_send_reconf(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_beat_8_3(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_beat_8_3(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { struct sctp_paramhdr *param_hdr; struct sctp_chunk *chunk = arg; @@ -1090,7 +1097,8 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_heartbeat_chunk_t))) + if (!sctp_chunk_length_valid(chunk, + sizeof(struct sctp_heartbeat_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -1098,7 +1106,7 @@ sctp_disposition_t sctp_sf_beat_8_3(struct net *net, * respond with a HEARTBEAT ACK that contains the Heartbeat * Information field copied from the received HEARTBEAT chunk. */ - chunk->subh.hb_hdr = (sctp_heartbeathdr_t *)chunk->skb->data; + chunk->subh.hb_hdr = (struct sctp_heartbeathdr *)chunk->skb->data; param_hdr = (struct sctp_paramhdr *)chunk->subh.hb_hdr; paylen = ntohs(chunk->chunk_hdr->length) - sizeof(struct sctp_chunkhdr); @@ -1148,34 +1156,32 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_backbeat_8_3(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { + struct sctp_sender_hb_info *hbinfo; struct sctp_chunk *chunk = arg; - union sctp_addr from_addr; struct sctp_transport *link; - sctp_sender_hb_info_t *hbinfo; unsigned long max_interval; + union sctp_addr from_addr; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the HEARTBEAT-ACK chunk has a valid length. */ if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_chunkhdr) + - sizeof(sctp_sender_hb_info_t))) + sizeof(*hbinfo))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - hbinfo = (sctp_sender_hb_info_t *) chunk->skb->data; + hbinfo = (struct sctp_sender_hb_info *)chunk->skb->data; /* Make sure that the length of the parameter is what we expect */ - if (ntohs(hbinfo->param_hdr.length) != - sizeof(sctp_sender_hb_info_t)) { + if (ntohs(hbinfo->param_hdr.length) != sizeof(*hbinfo)) return SCTP_DISPOSITION_DISCARD; - } from_addr = hbinfo->daddr; link = sctp_assoc_lookup_paddr(asoc, &from_addr); @@ -1227,15 +1233,15 @@ sctp_disposition_t sctp_sf_backbeat_8_3(struct net *net, */ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, struct sctp_chunk *init, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { - int len; - struct sctp_packet *pkt; + struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family); union sctp_addr_param *addrparm; struct sctp_errhdr *errhdr; + char buffer[sizeof(*errhdr) + sizeof(*addrparm)]; struct sctp_endpoint *ep; - char buffer[sizeof(struct sctp_errhdr)+sizeof(union sctp_addr_param)]; - struct sctp_af *af = sctp_get_af_specific(ssa->v4.sin_family); + struct sctp_packet *pkt; + int len; /* Build the error on the stack. We are way to malloc crazy * throughout the code today. @@ -1245,7 +1251,7 @@ static int sctp_sf_send_restart_abort(struct net *net, union sctp_addr *ssa, /* Copy into a parm format. */ len = af->to_addr_param(ssa, addrparm); - len += sizeof(sctp_errhdr_t); + len += sizeof(*errhdr); errhdr->cause = SCTP_ERROR_RESTART; errhdr->length = htons(len); @@ -1292,7 +1298,7 @@ static bool list_has_sctp_addr(const struct list_head *list, static int sctp_sf_check_restart_addrs(const struct sctp_association *new_asoc, const struct sctp_association *asoc, struct sctp_chunk *init, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { struct net *net = sock_net(new_asoc->base.sk); struct sctp_transport *new_addr; @@ -1412,20 +1418,19 @@ static char sctp_tietags_compare(struct sctp_association *new_asoc, /* Common helper routine for both duplicate and simulataneous INIT * chunk handling. */ -static sctp_disposition_t sctp_sf_do_unexpected_init( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_do_unexpected_init( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - sctp_disposition_t retval; - struct sctp_chunk *chunk = arg; - struct sctp_chunk *repl; + struct sctp_chunk *chunk = arg, *repl, *err_chunk; + struct sctp_unrecognized_param *unk_param; struct sctp_association *new_asoc; - struct sctp_chunk *err_chunk; + enum sctp_disposition retval; struct sctp_packet *packet; - sctp_unrecognized_param_t *unk_param; int len; /* 6.10 Bundling @@ -1555,7 +1560,7 @@ static sctp_disposition_t sctp_sf_do_unexpected_init( * construct the parameters in INIT ACK by copying the * ERROR causes over. */ - unk_param = (sctp_unrecognized_param_t *) + unk_param = (struct sctp_unrecognized_param *) ((__u8 *)(err_chunk->chunk_hdr) + sizeof(struct sctp_chunkhdr)); /* Replace the cause code with the "Unrecognized parameter" @@ -1626,12 +1631,13 @@ cleanup: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_5_2_1_siminit( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. @@ -1680,12 +1686,13 @@ sctp_disposition_t sctp_sf_do_5_2_1_siminit(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, +enum sctp_disposition sctp_sf_do_5_2_2_dupinit( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { /* Call helper to do the real work for both simulataneous and * duplicate INIT chunk handling. @@ -1703,11 +1710,13 @@ sctp_disposition_t sctp_sf_do_5_2_2_dupinit(struct net *net, * An unexpected INIT ACK usually indicates the processing of an old or * duplicated INIT chunk. */ -sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_5_2_3_initack( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* Per the above section, we'll discard the chunk if we have an * endpoint. If this is an OOTB INIT-ACK, treat it as such. @@ -1723,18 +1732,19 @@ sctp_disposition_t sctp_sf_do_5_2_3_initack(struct net *net, * Section 5.2.4 * A) In this case, the peer may have restarted. */ -static sctp_disposition_t sctp_sf_do_dupcook_a(struct net *net, +static enum sctp_disposition sctp_sf_do_dupcook_a( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands, + struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { struct sctp_init_chunk *peer_init; + enum sctp_disposition disposition; struct sctp_ulpevent *ev; struct sctp_chunk *repl; struct sctp_chunk *err; - sctp_disposition_t disposition; /* new_asoc is a brand-new association, so these are not yet * side effects--it is safe to run them here. @@ -1838,11 +1848,12 @@ nomem: * after responding to the local endpoint's INIT */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_b(struct net *net, +static enum sctp_disposition sctp_sf_do_dupcook_b( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands, + struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { struct sctp_init_chunk *peer_init; @@ -1909,11 +1920,12 @@ nomem: * but a new tag of its own. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net, +static enum sctp_disposition sctp_sf_do_dupcook_c( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands, + struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { /* The cookie should be silently discarded. @@ -1931,11 +1943,12 @@ static sctp_disposition_t sctp_sf_do_dupcook_c(struct net *net, * enter the ESTABLISHED state, if it has not already done so. */ /* This case represents an initialization collision. */ -static sctp_disposition_t sctp_sf_do_dupcook_d(struct net *net, +static enum sctp_disposition sctp_sf_do_dupcook_d( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands, + struct sctp_cmd_seq *commands, struct sctp_association *new_asoc) { struct sctp_ulpevent *ev = NULL, *ai_ev = NULL; @@ -2026,19 +2039,20 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_5_2_4_dupcook(struct net *net, +enum sctp_disposition sctp_sf_do_5_2_4_dupcook( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { - sctp_disposition_t retval; - struct sctp_chunk *chunk = arg; struct sctp_association *new_asoc; + struct sctp_chunk *chunk = arg; + enum sctp_disposition retval; + struct sctp_chunk *err_chk_p; int error = 0; char action; - struct sctp_chunk *err_chk_p; /* Make sure that the chunk has a valid length from the protocol * perspective. In this case check to make sure we have at least @@ -2144,13 +2158,13 @@ nomem: * * See sctp_sf_do_9_1_abort(). */ -sctp_disposition_t sctp_sf_shutdown_pending_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_shutdown_pending_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -2167,7 +2181,7 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2187,12 +2201,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_abort( * * See sctp_sf_do_9_1_abort(). */ -sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, +enum sctp_disposition sctp_sf_shutdown_sent_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -2209,7 +2224,7 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2237,13 +2252,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_abort(struct net *net, * * See sctp_sf_do_9_1_abort(). */ -sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_shutdown_ack_sent_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. @@ -2265,15 +2280,16 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_abort( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, +enum sctp_disposition sctp_sf_cookie_echoed_err( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; - sctp_errhdr_t *err; + struct sctp_errhdr *err; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2281,7 +2297,7 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, /* Make sure that the ERROR chunk has a valid length. * The parameter walking depends on this as well. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -2329,20 +2345,20 @@ sctp_disposition_t sctp_sf_cookie_echoed_err(struct net *net, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_do_5_2_6_stale( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; - u32 stale; - sctp_cookie_preserve_param_t bht; - sctp_errhdr_t *err; - struct sctp_chunk *reply; - struct sctp_bind_addr *bp; int attempts = asoc->init_err_counter + 1; + struct sctp_chunk *chunk = arg, *reply; + struct sctp_cookie_preserve_param bht; + struct sctp_bind_addr *bp; + struct sctp_errhdr *err; + u32 stale; if (attempts > asoc->max_init_attempts) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, @@ -2352,7 +2368,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, return SCTP_DISPOSITION_DELETE_TCB; } - err = (sctp_errhdr_t *)(chunk->skb->data); + err = (struct sctp_errhdr *)(chunk->skb->data); /* When calculating the time extension, an implementation * SHOULD use the RTT information measured based on the @@ -2368,7 +2384,7 @@ static sctp_disposition_t sctp_sf_do_5_2_6_stale(struct net *net, * to give ample time to retransmit the new cookie and thus * yield a higher probability of success on the reattempt. */ - stale = ntohl(*(__be32 *)((u8 *)err + sizeof(sctp_errhdr_t))); + stale = ntohl(*(__be32 *)((u8 *)err + sizeof(*err))); stale = (stale * 2) / 1000; bht.param_hdr.type = SCTP_PARAM_COOKIE_PRESERVATIVE; @@ -2452,12 +2468,13 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, +enum sctp_disposition sctp_sf_do_9_1_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -2474,7 +2491,7 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* ADD-IP: Special case for ABORT chunks @@ -2489,27 +2506,29 @@ sctp_disposition_t sctp_sf_do_9_1_abort(struct net *net, return __sctp_sf_do_9_1_abort(net, ep, asoc, type, arg, commands); } -static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, +static enum sctp_disposition __sctp_sf_do_9_1_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { + __be16 error = SCTP_ERROR_NO_ERROR; struct sctp_chunk *chunk = arg; unsigned int len; - __be16 error = SCTP_ERROR_NO_ERROR; /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) { + struct sctp_errhdr *err; - sctp_errhdr_t *err; sctp_walk_errors(err, chunk->chunk_hdr); if ((void *)err != (void *)chunk->chunk_end) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, + commands); - error = ((sctp_errhdr_t *)chunk->skb->data)->cause; + error = ((struct sctp_errhdr *)chunk->skb->data)->cause; } sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ECONNRESET)); @@ -2526,16 +2545,17 @@ static sctp_disposition_t __sctp_sf_do_9_1_abort(struct net *net, * * See sctp_sf_do_9_1_abort() above. */ -sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_cookie_wait_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { + __be16 error = SCTP_ERROR_NO_ERROR; struct sctp_chunk *chunk = arg; unsigned int len; - __be16 error = SCTP_ERROR_NO_ERROR; if (!sctp_vtag_verify_either(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); @@ -2550,13 +2570,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, * as we do not know its true length. So, to be safe, discard the * packet. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_abort_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_abort_chunk))) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* See if we have an error cause code in the chunk. */ len = ntohs(chunk->chunk_hdr->length); if (len >= sizeof(struct sctp_chunkhdr) + sizeof(struct sctp_errhdr)) - error = ((sctp_errhdr_t *)chunk->skb->data)->cause; + error = ((struct sctp_errhdr *)chunk->skb->data)->cause; return sctp_stop_t1_and_abort(net, commands, error, ECONNREFUSED, asoc, chunk->transport); @@ -2565,12 +2585,13 @@ sctp_disposition_t sctp_sf_cookie_wait_abort(struct net *net, /* * Process an incoming ICMP as an ABORT. (COOKIE-WAIT state) */ -sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, +enum sctp_disposition sctp_sf_cookie_wait_icmp_abort( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { return sctp_stop_t1_and_abort(net, commands, SCTP_ERROR_NO_ERROR, ENOPROTOOPT, asoc, @@ -2580,12 +2601,13 @@ sctp_disposition_t sctp_sf_cookie_wait_icmp_abort(struct net *net, /* * Process an ABORT. (COOKIE-ECHOED state) */ -sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_cookie_echoed_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. @@ -2598,11 +2620,12 @@ sctp_disposition_t sctp_sf_cookie_echoed_abort(struct net *net, * * This is common code called by several sctp_sf_*_abort() functions above. */ -static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, - sctp_cmd_seq_t *commands, - __be16 error, int sk_err, - const struct sctp_association *asoc, - struct sctp_transport *transport) +static enum sctp_disposition sctp_stop_t1_and_abort( + struct net *net, + struct sctp_cmd_seq *commands, + __be16 error, int sk_err, + const struct sctp_association *asoc, + struct sctp_transport *transport) { pr_debug("%s: ABORT received (INIT)\n", __func__); @@ -2652,16 +2675,17 @@ static sctp_disposition_t sctp_stop_t1_and_abort(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_shutdown( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { + enum sctp_disposition disposition; struct sctp_chunk *chunk = arg; - sctp_shutdownhdr_t *sdh; - sctp_disposition_t disposition; + struct sctp_shutdownhdr *sdh; struct sctp_ulpevent *ev; __u32 ctsn; @@ -2669,14 +2693,13 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown(struct net *net, return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, - sizeof(struct sctp_shutdown_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); /* Convert the elaborate header. */ - sdh = (sctp_shutdownhdr_t *)chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_shutdownhdr_t)); + sdh = (struct sctp_shutdownhdr *)chunk->skb->data; + skb_pull(chunk->skb, sizeof(*sdh)); chunk->subh.shutdown_hdr = sdh; ctsn = ntohl(sdh->cum_tsn_ack); @@ -2742,27 +2765,27 @@ out: * The Cumulative TSN Ack of the received SHUTDOWN chunk * MUST be processed. */ -sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_shut_ctsn( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; - sctp_shutdownhdr_t *sdh; + struct sctp_shutdownhdr *sdh; __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, - sizeof(struct sctp_shutdown_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - sdh = (sctp_shutdownhdr_t *)chunk->skb->data; + sdh = (struct sctp_shutdownhdr *)chunk->skb->data; ctsn = ntohl(sdh->cum_tsn_ack); if (TSN_lt(ctsn, asoc->ctsn_ack_point)) { @@ -2796,14 +2819,15 @@ sctp_disposition_t sctp_sf_do_9_2_shut_ctsn(struct net *net, * that belong to this association, it should discard the INIT chunk and * retransmit the SHUTDOWN ACK chunk. */ -sctp_disposition_t sctp_sf_do_9_2_reshutack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_reshutack( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = (struct sctp_chunk *) arg; + struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; /* Make sure that the chunk has a valid length */ @@ -2860,26 +2884,26 @@ nomem: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_ecn_cwr(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - sctp_cwrhdr_t *cwr; struct sctp_chunk *chunk = arg; + struct sctp_cwrhdr *cwr; u32 lowest_tsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - cwr = (sctp_cwrhdr_t *) chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_cwrhdr_t)); + cwr = (struct sctp_cwrhdr *)chunk->skb->data; + skb_pull(chunk->skb, sizeof(*cwr)); lowest_tsn = ntohl(cwr->lowest_tsn); @@ -2916,25 +2940,24 @@ sctp_disposition_t sctp_sf_do_ecn_cwr(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_ecne(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_ecne(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { - sctp_ecnehdr_t *ecne; struct sctp_chunk *chunk = arg; + struct sctp_ecnehdr *ecne; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_ecne_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_ecne_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - ecne = (sctp_ecnehdr_t *) chunk->skb->data; - skb_pull(chunk->skb, sizeof(sctp_ecnehdr_t)); + ecne = (struct sctp_ecnehdr *)chunk->skb->data; + skb_pull(chunk->skb, sizeof(*ecne)); /* If this is a newer ECNE than the last CWR packet we sent out */ sctp_add_cmd_sf(commands, SCTP_CMD_ECN_ECNE, @@ -2973,15 +2996,15 @@ sctp_disposition_t sctp_sf_do_ecne(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_6_2(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_eat_data_6_2(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { + union sctp_arg force = SCTP_NOFORCE(); struct sctp_chunk *chunk = arg; - sctp_arg_t force = SCTP_NOFORCE(); int error; if (!sctp_vtag_verify(chunk, asoc)) { @@ -3093,12 +3116,13 @@ discard_noforce: * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_eat_data_fast_4_4( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; int error; @@ -3184,22 +3208,22 @@ sctp_disposition_t sctp_sf_eat_data_fast_4_4(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_eat_sack_6_2(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; - sctp_sackhdr_t *sackh; + struct sctp_sackhdr *sackh; __u32 ctsn; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the SACK chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_sack_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_sack_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); @@ -3258,12 +3282,13 @@ sctp_disposition_t sctp_sf_eat_sack_6_2(struct net *net, * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, +static enum sctp_disposition sctp_sf_tabort_8_4_8( + struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; @@ -3308,21 +3333,21 @@ static sctp_disposition_t sctp_sf_tabort_8_4_8(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_operr_notify(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_operr_notify(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; - sctp_errhdr_t *err; + struct sctp_errhdr *err; if (!sctp_vtag_verify(chunk, asoc)) return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); /* Make sure that the ERROR chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_operr_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_operr_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); sctp_walk_errors(err, chunk->chunk_hdr); @@ -3346,12 +3371,12 @@ sctp_disposition_t sctp_sf_operr_notify(struct net *net, * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_final(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_final(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; @@ -3429,20 +3454,19 @@ nomem: * receiver of the OOTB packet shall discard the OOTB packet and take * no further action. */ -sctp_disposition_t sctp_sf_ootb(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_ootb(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; struct sk_buff *skb = chunk->skb; struct sctp_chunkhdr *ch; - sctp_errhdr_t *err; - __u8 *ch_end; - int ootb_shut_ack = 0; + struct sctp_errhdr *err; int ootb_cookie_ack = 0; + int ootb_shut_ack = 0; + __u8 *ch_end; SCTP_INC_STATS(net, SCTP_MIB_OUTOFBLUES); @@ -3518,16 +3542,17 @@ sctp_disposition_t sctp_sf_ootb(struct net *net, * (endpoint, asoc, type, arg, commands) * * Outputs - * (sctp_disposition_t) + * (enum sctp_disposition) * * The return value is the disposition of the chunk. */ -static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_shut_8_4_5( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; @@ -3584,12 +3609,12 @@ static sctp_disposition_t sctp_sf_shut_8_4_5(struct net *net, * chunks. --piggy ] * */ -sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_8_5_1_E_sa(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -3609,17 +3634,18 @@ sctp_disposition_t sctp_sf_do_8_5_1_E_sa(struct net *net, } /* ADDIP Section 4.2 Upon reception of an ASCONF Chunk. */ -sctp_disposition_t sctp_sf_do_asconf(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_asconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; - struct sctp_chunk *asconf_ack = NULL; - struct sctp_paramhdr *err_param = NULL; - sctp_addiphdr_t *hdr; - __u32 serial; + struct sctp_paramhdr *err_param = NULL; + struct sctp_chunk *asconf_ack = NULL; + struct sctp_chunk *chunk = arg; + struct sctp_addiphdr *hdr; + __u32 serial; if (!sctp_vtag_verify(chunk, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3634,14 +3660,15 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, * described in [I-D.ietf-tsvwg-sctp-auth]. */ if (!net->sctp.addip_noauth && !chunk->auth) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, + commands); /* Make sure that the ASCONF ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(sctp_addip_chunk_t))) + if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_addip_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - hdr = (sctp_addiphdr_t *)chunk->skb->data; + hdr = (struct sctp_addiphdr *)chunk->skb->data; serial = ntohl(hdr->serial); /* Verify the ASCONF chunk before processing it. */ @@ -3725,18 +3752,19 @@ sctp_disposition_t sctp_sf_do_asconf(struct net *net, * When building TLV parameters for the ASCONF Chunk that will add or * delete IP addresses the D0 to D13 rules should be applied: */ -sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_asconf_ack(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *asconf_ack = arg; - struct sctp_chunk *last_asconf = asoc->addip_last_asconf; - struct sctp_chunk *abort; - struct sctp_paramhdr *err_param = NULL; - sctp_addiphdr_t *addip_hdr; - __u32 sent_serial, rcvd_serial; + struct sctp_chunk *last_asconf = asoc->addip_last_asconf; + struct sctp_paramhdr *err_param = NULL; + struct sctp_chunk *asconf_ack = arg; + struct sctp_addiphdr *addip_hdr; + __u32 sent_serial, rcvd_serial; + struct sctp_chunk *abort; if (!sctp_vtag_verify(asconf_ack, asoc)) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_BAD_TAG, @@ -3751,14 +3779,16 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, * described in [I-D.ietf-tsvwg-sctp-auth]. */ if (!net->sctp.addip_noauth && !asconf_ack->auth) - return sctp_sf_discard_chunk(net, ep, asoc, type, arg, commands); + return sctp_sf_discard_chunk(net, ep, asoc, type, arg, + commands); /* Make sure that the ADDIP chunk has a valid length. */ - if (!sctp_chunk_length_valid(asconf_ack, sizeof(sctp_addip_chunk_t))) + if (!sctp_chunk_length_valid(asconf_ack, + sizeof(struct sctp_addip_chunk))) return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, commands); - addip_hdr = (sctp_addiphdr_t *)asconf_ack->skb->data; + addip_hdr = (struct sctp_addiphdr *)asconf_ack->skb->data; rcvd_serial = ntohl(addip_hdr->serial); /* Verify the ASCONF-ACK chunk before processing it. */ @@ -3767,7 +3797,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, (void *)err_param, commands); if (last_asconf) { - addip_hdr = (sctp_addiphdr_t *)last_asconf->subh.addip_hdr; + addip_hdr = (struct sctp_addiphdr *)last_asconf->subh.addip_hdr; sent_serial = ntohl(addip_hdr->serial); } else { sent_serial = asoc->addip_serial - 1; @@ -3782,7 +3812,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, if (ADDIP_SERIAL_gte(rcvd_serial, sent_serial + 1) && !(asoc->addip_last_asconf)) { abort = sctp_make_abort(asoc, asconf_ack, - sizeof(sctp_errhdr_t)); + sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_ASCONF_ACK, 0); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -3818,7 +3848,7 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, } abort = sctp_make_abort(asoc, asconf_ack, - sizeof(sctp_errhdr_t)); + sizeof(struct sctp_errhdr)); if (abort) { sctp_init_cause(abort, SCTP_ERROR_RSRC_LOW, 0); sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, @@ -3841,11 +3871,12 @@ sctp_disposition_t sctp_sf_do_asconf_ack(struct net *net, } /* RE-CONFIG Section 5.2 Upon reception of an RECONF Chunk. */ -sctp_disposition_t sctp_sf_do_reconf(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_reconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_paramhdr *err_param = NULL; struct sctp_chunk *chunk = arg; @@ -3917,15 +3948,15 @@ sctp_disposition_t sctp_sf_do_reconf(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_eat_fwd_tsn(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_eat_fwd_tsn(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; struct sctp_fwdtsn_hdr *fwdtsn_hdr; + struct sctp_chunk *chunk = arg; struct sctp_fwdtsn_skip *skip; __u16 len; __u32 tsn; @@ -3987,16 +4018,16 @@ discard_noforce: return SCTP_DISPOSITION_DISCARD; } -sctp_disposition_t sctp_sf_eat_fwd_tsn_fast( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_eat_fwd_tsn_fast( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; struct sctp_fwdtsn_hdr *fwdtsn_hdr; + struct sctp_chunk *chunk = arg; struct sctp_fwdtsn_skip *skip; __u16 len; __u32 tsn; @@ -4079,23 +4110,23 @@ gen_shutdown: * * The return value is the disposition of the chunk. */ -static sctp_ierror_t sctp_sf_authenticate(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - struct sctp_chunk *chunk) +static enum sctp_ierror sctp_sf_authenticate( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + struct sctp_chunk *chunk) { struct sctp_authhdr *auth_hdr; + __u8 *save_digest, *digest; struct sctp_hmac *hmac; unsigned int sig_len; __u16 key_id; - __u8 *save_digest; - __u8 *digest; /* Pull in the auth header, so we can do some more verification */ auth_hdr = (struct sctp_authhdr *)chunk->skb->data; chunk->subh.auth_hdr = auth_hdr; - skb_pull(chunk->skb, sizeof(struct sctp_authhdr)); + skb_pull(chunk->skb, sizeof(*auth_hdr)); /* Make sure that we support the HMAC algorithm from the auth * chunk. @@ -4114,7 +4145,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net, /* Make sure that the length of the signature matches what * we expect. */ - sig_len = ntohs(chunk->chunk_hdr->length) - sizeof(sctp_auth_chunk_t); + sig_len = ntohs(chunk->chunk_hdr->length) - + sizeof(struct sctp_auth_chunk); hmac = sctp_auth_get_hmac(ntohs(auth_hdr->hmac_id)); if (sig_len != hmac->hmac_len) return SCTP_IERROR_PROTO_VIOLATION; @@ -4136,8 +4168,8 @@ static sctp_ierror_t sctp_sf_authenticate(struct net *net, memset(digest, 0, sig_len); sctp_auth_calculate_hmac(asoc, chunk->skb, - (struct sctp_auth_chunk *)chunk->chunk_hdr, - GFP_ATOMIC); + (struct sctp_auth_chunk *)chunk->chunk_hdr, + GFP_ATOMIC); /* Discard the packet if the digests do not match */ if (memcmp(save_digest, digest, sig_len)) { @@ -4153,17 +4185,16 @@ nomem: return SCTP_IERROR_NOMEM; } -sctp_disposition_t sctp_sf_eat_auth(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_eat_auth(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { - struct sctp_authhdr *auth_hdr; struct sctp_chunk *chunk = arg; + struct sctp_authhdr *auth_hdr; struct sctp_chunk *err_chunk; - sctp_ierror_t error; + enum sctp_ierror error; /* Make sure that the peer has AUTH capable */ if (!asoc->peer.auth_capable) @@ -4250,12 +4281,12 @@ sctp_disposition_t sctp_sf_eat_auth(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_unk_chunk(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_unk_chunk(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *unk_chunk = arg; struct sctp_chunk *err_chunk; @@ -4330,12 +4361,12 @@ sctp_disposition_t sctp_sf_unk_chunk(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_discard_chunk(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_discard_chunk(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -4370,12 +4401,11 @@ sctp_disposition_t sctp_sf_discard_chunk(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_pdiscard(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_pdiscard(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { SCTP_INC_STATS(net, SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); @@ -4398,12 +4428,12 @@ sctp_disposition_t sctp_sf_pdiscard(struct net *net, * We simply tag the chunk as a violation. The state machine will log * the violation and continue. */ -sctp_disposition_t sctp_sf_violation(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_violation(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -4418,14 +4448,14 @@ sctp_disposition_t sctp_sf_violation(struct net *net, /* * Common function to handle a protocol violation. */ -static sctp_disposition_t sctp_sf_abort_violation( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - void *arg, - sctp_cmd_seq_t *commands, - const __u8 *payload, - const size_t paylen) +static enum sctp_disposition sctp_sf_abort_violation( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + void *arg, + struct sctp_cmd_seq *commands, + const __u8 *payload, + const size_t paylen) { struct sctp_packet *packet = NULL; struct sctp_chunk *chunk = arg; @@ -4454,11 +4484,10 @@ static sctp_disposition_t sctp_sf_abort_violation( /* Treat INIT-ACK as a special case during COOKIE-WAIT. */ if (chunk->chunk_hdr->type == SCTP_CID_INIT_ACK && !asoc->peer.i.init_tag) { - sctp_initack_chunk_t *initack; + struct sctp_initack_chunk *initack; - initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; - if (!sctp_chunk_length_valid(chunk, - sizeof(sctp_initack_chunk_t))) + initack = (struct sctp_initack_chunk *)chunk->chunk_hdr; + if (!sctp_chunk_length_valid(chunk, sizeof(*initack))) abort->chunk_hdr->flags |= SCTP_CHUNK_FLAG_T; else { unsigned int inittag; @@ -4521,7 +4550,7 @@ nomem: * Handle a protocol violation when the chunk length is invalid. * "Invalid" length is identified as smaller than the minimal length a * given chunk can be. For example, a SACK chunk has invalid length - * if its length is set to be smaller than the size of sctp_sack_chunk_t. + * if its length is set to be smaller than the size of struct sctp_sack_chunk. * * We inform the other end by sending an ABORT with a Protocol Violation * error code. @@ -4536,18 +4565,18 @@ nomem: * * Generate an ABORT chunk and terminate the association. */ -static sctp_disposition_t sctp_sf_violation_chunklen( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_violation_chunklen( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { static const char err_str[] = "The following chunk had invalid length:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, - sizeof(err_str)); + sizeof(err_str)); } /* @@ -4556,17 +4585,17 @@ static sctp_disposition_t sctp_sf_violation_chunklen( * or accumulated length in multi parameters exceeds the end of the chunk, * the length is considered as invalid. */ -static sctp_disposition_t sctp_sf_violation_paramlen( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, void *ext, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_violation_paramlen( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, void *ext, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = arg; struct sctp_paramhdr *param = ext; struct sctp_chunk *abort = NULL; + struct sctp_chunk *chunk = arg; if (sctp_auth_recv_cid(SCTP_CID_ABORT, asoc)) goto discard; @@ -4599,18 +4628,18 @@ nomem: * We inform the other end by sending an ABORT with a Protocol Violation * error code. */ -static sctp_disposition_t sctp_sf_violation_ctsn( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_violation_ctsn( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { static const char err_str[] = "The cumulative tsn ack beyond the max tsn currently sent:"; return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, - sizeof(err_str)); + sizeof(err_str)); } /* Handle protocol violation of an invalid chunk bundling. For example, @@ -4619,13 +4648,13 @@ static sctp_disposition_t sctp_sf_violation_ctsn( * statement from the specs. Additionally, there might be an attacker * on the path and we may not want to continue this communication. */ -static sctp_disposition_t sctp_sf_violation_chunk( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +static enum sctp_disposition sctp_sf_violation_chunk( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { static const char err_str[] = "The following chunk violates protocol:"; @@ -4633,7 +4662,7 @@ static sctp_disposition_t sctp_sf_violation_chunk( return sctp_sf_violation(net, ep, asoc, type, arg, commands); return sctp_sf_abort_violation(net, ep, asoc, arg, commands, err_str, - sizeof(err_str)); + sizeof(err_str)); } /*************************************************************************** * These are the state functions for handling primitive (Section 10) events. @@ -4695,15 +4724,15 @@ static sctp_disposition_t sctp_sf_violation_chunk( * * The return value is a disposition. */ -sctp_disposition_t sctp_sf_do_prm_asoc(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_prm_asoc(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *repl; struct sctp_association *my_asoc; + struct sctp_chunk *repl; /* The comment below says that we enter COOKIE-WAIT AFTER * sending the INIT, but that doesn't actually work in our @@ -4807,12 +4836,12 @@ nomem: * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_prm_send(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_prm_send(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_datamsg *msg = arg; @@ -4846,15 +4875,15 @@ sctp_disposition_t sctp_sf_do_prm_send(struct net *net, * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_prm_shutdown( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - int disposition; + enum sctp_disposition disposition; /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper @@ -4872,6 +4901,7 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } + return disposition; } @@ -4902,13 +4932,13 @@ sctp_disposition_t sctp_sf_do_9_2_prm_shutdown( * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_1_prm_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_1_prm_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* From 9.1 Abort of an Association * Upon receipt of the ABORT primitive from its upper @@ -4940,12 +4970,12 @@ sctp_disposition_t sctp_sf_do_9_1_prm_abort( } /* We tried an illegal operation on an association which is closed. */ -sctp_disposition_t sctp_sf_error_closed(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_error_closed(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-EINVAL)); return SCTP_DISPOSITION_CONSUME; @@ -4954,12 +4984,13 @@ sctp_disposition_t sctp_sf_error_closed(struct net *net, /* We tried an illegal operation on an association which is shutting * down. */ -sctp_disposition_t sctp_sf_error_shutdown(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_error_shutdown( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { sctp_add_cmd_sf(commands, SCTP_CMD_REPORT_ERROR, SCTP_ERROR(-ESHUTDOWN)); @@ -4980,13 +5011,13 @@ sctp_disposition_t sctp_sf_error_shutdown(struct net *net, * Outputs * (timers) */ -sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_cookie_wait_prm_shutdown( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, SCTP_TO(SCTP_EVENT_TIMEOUT_T1_INIT)); @@ -5015,12 +5046,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_shutdown( * Outputs * (timers) */ -sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_cookie_echoed_prm_shutdown( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. @@ -5042,13 +5074,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_shutdown( * Outputs * (timers) */ -sctp_disposition_t sctp_sf_cookie_wait_prm_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_cookie_wait_prm_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *abort = arg; @@ -5091,13 +5123,13 @@ sctp_disposition_t sctp_sf_cookie_wait_prm_abort( * Outputs * (timers) */ -sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_cookie_echoed_prm_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* There is a single T1 timer, so we should be able to use * common function with the COOKIE-WAIT state. @@ -5117,13 +5149,13 @@ sctp_disposition_t sctp_sf_cookie_echoed_prm_abort( * Outputs * (timers) */ -sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_shutdown_pending_prm_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* Stop the T5-shutdown guard timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -5144,13 +5176,13 @@ sctp_disposition_t sctp_sf_shutdown_pending_prm_abort( * Outputs * (timers) */ -sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_shutdown_sent_prm_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* Stop the T2-shutdown timer. */ sctp_add_cmd_sf(commands, SCTP_CMD_TIMER_STOP, @@ -5175,13 +5207,13 @@ sctp_disposition_t sctp_sf_shutdown_sent_prm_abort( * Outputs * (timers) */ -sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_shutdown_ack_sent_prm_abort( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { /* The same T2 timer, so we should be able to use * common function with the SHUTDOWN-SENT state. @@ -5211,13 +5243,13 @@ sctp_disposition_t sctp_sf_shutdown_ack_sent_prm_abort( * o destination transport address - the transport address of the * association on which a heartbeat should be issued. */ -sctp_disposition_t sctp_sf_do_prm_requestheartbeat( +enum sctp_disposition sctp_sf_do_prm_requestheartbeat( struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, - const sctp_subtype_t type, + const union sctp_subtype type, void *arg, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { if (SCTP_DISPOSITION_NOMEM == sctp_sf_heartbeat(ep, asoc, type, (struct sctp_transport *)arg, commands)) @@ -5244,12 +5276,12 @@ sctp_disposition_t sctp_sf_do_prm_requestheartbeat( * When an endpoint has an ASCONF signaled change to be sent to the * remote endpoint it should do A1 to A9 */ -sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_prm_asconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -5261,11 +5293,12 @@ sctp_disposition_t sctp_sf_do_prm_asconf(struct net *net, } /* RE-CONFIG Section 5.1 RECONF Chunk Procedures */ -sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_prm_reconf(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = arg; @@ -5278,13 +5311,13 @@ sctp_disposition_t sctp_sf_do_prm_reconf(struct net *net, * * The return value is the disposition of the primitive. */ -sctp_disposition_t sctp_sf_ignore_primitive( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_ignore_primitive( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { pr_debug("%s: primitive type:%d is ignored\n", __func__, type.primitive); @@ -5302,13 +5335,13 @@ sctp_disposition_t sctp_sf_ignore_primitive( * subscribes to this event, if there is no data to be sent or * retransmit, the stack will immediately send up this notification. */ -sctp_disposition_t sctp_sf_do_no_pending_tsn( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_no_pending_tsn( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_ulpevent *event; @@ -5334,13 +5367,13 @@ sctp_disposition_t sctp_sf_do_no_pending_tsn( * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_start_shutdown( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_start_shutdown( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *reply; @@ -5404,15 +5437,15 @@ nomem: * * The return value is the disposition. */ -sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_9_2_shutdown_ack( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *chunk = (struct sctp_chunk *) arg; + struct sctp_chunk *chunk = arg; struct sctp_chunk *reply; /* There are 2 ways of getting here: @@ -5424,12 +5457,14 @@ sctp_disposition_t sctp_sf_do_9_2_shutdown_ack( */ if (chunk) { if (!sctp_vtag_verify(chunk, asoc)) - return sctp_sf_pdiscard(net, ep, asoc, type, arg, commands); + return sctp_sf_pdiscard(net, ep, asoc, type, arg, + commands); /* Make sure that the SHUTDOWN chunk has a valid length. */ - if (!sctp_chunk_length_valid(chunk, sizeof(struct sctp_shutdown_chunk_t))) - return sctp_sf_violation_chunklen(net, ep, asoc, type, arg, - commands); + if (!sctp_chunk_length_valid( + chunk, sizeof(struct sctp_shutdown_chunk))) + return sctp_sf_violation_chunklen(net, ep, asoc, type, + arg, commands); } /* If it has no more outstanding DATA chunks, the SHUTDOWN receiver @@ -5476,12 +5511,12 @@ nomem: * * The return value is the disposition of the event. */ -sctp_disposition_t sctp_sf_ignore_other(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_ignore_other(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { pr_debug("%s: the event other type:%d is ignored\n", __func__, type.other); @@ -5504,12 +5539,12 @@ sctp_disposition_t sctp_sf_ignore_other(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_6_3_3_rtx(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_transport *transport = arg; @@ -5592,12 +5627,12 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(struct net *net, * allow. However, an SCTP transmitter MUST NOT be more aggressive than * the following algorithms allow. */ -sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_do_6_2_sack(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { SCTP_INC_STATS(net, SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); @@ -5623,16 +5658,17 @@ sctp_disposition_t sctp_sf_do_6_2_sack(struct net *net, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_t1_init_timer_expire( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { + int attempts = asoc->init_err_counter + 1; struct sctp_chunk *repl = NULL; struct sctp_bind_addr *bp; - int attempts = asoc->init_err_counter + 1; pr_debug("%s: timer T1 expired (INIT)\n", __func__); @@ -5687,15 +5723,16 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(struct net *net, * (timers, events) * */ -sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_t1_cookie_timer_expire( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - struct sctp_chunk *repl = NULL; int attempts = asoc->init_err_counter + 1; + struct sctp_chunk *repl = NULL; pr_debug("%s: timer T1 expired (COOKIE-ECHO)\n", __func__); @@ -5737,12 +5774,13 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(struct net *net, * the T2-Shutdown timer, giving its peer ample opportunity to transmit * all of its queued DATA chunks that have not yet been sent. */ -sctp_disposition_t sctp_sf_t2_timer_expire(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_t2_timer_expire( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *reply = NULL; @@ -5807,13 +5845,13 @@ nomem: * ADDIP Section 4.1 ASCONF CHunk Procedures * If the T4 RTO timer expires the endpoint should do B1 to B5 */ -sctp_disposition_t sctp_sf_t4_timer_expire( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_t4_timer_expire( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; @@ -5879,12 +5917,13 @@ sctp_disposition_t sctp_sf_t4_timer_expire( * At the expiration of this timer the sender SHOULD abort the association * by sending an ABORT chunk. */ -sctp_disposition_t sctp_sf_t5_timer_expire(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_t5_timer_expire( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { struct sctp_chunk *reply = NULL; @@ -5915,15 +5954,15 @@ nomem: * The work that needs to be done is same as when SHUTDOWN is initiated by * the user. So this routine looks same as sctp_sf_do_9_2_prm_shutdown(). */ -sctp_disposition_t sctp_sf_autoclose_timer_expire( - struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_autoclose_timer_expire( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { - int disposition; + enum sctp_disposition disposition; SCTP_INC_STATS(net, SCTP_MIB_AUTOCLOSE_EXPIREDS); @@ -5943,6 +5982,7 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( disposition = sctp_sf_do_9_2_start_shutdown(net, ep, asoc, type, arg, commands); } + return disposition; } @@ -5958,12 +5998,11 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_not_impl(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_not_impl(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { return SCTP_DISPOSITION_NOT_IMPL; } @@ -5976,12 +6015,11 @@ sctp_disposition_t sctp_sf_not_impl(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_bug(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_bug(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, struct sctp_cmd_seq *commands) { return SCTP_DISPOSITION_BUG; } @@ -5997,12 +6035,12 @@ sctp_disposition_t sctp_sf_bug(struct net *net, * * The return value is the disposition of the chunk. */ -sctp_disposition_t sctp_sf_timer_ignore(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - const sctp_subtype_t type, - void *arg, - sctp_cmd_seq_t *commands) +enum sctp_disposition sctp_sf_timer_ignore(struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + const union sctp_subtype type, + void *arg, + struct sctp_cmd_seq *commands) { pr_debug("%s: timer %d ignored\n", __func__, type.chunk); @@ -6017,9 +6055,9 @@ sctp_disposition_t sctp_sf_timer_ignore(struct net *net, static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk) { struct sctp_sackhdr *sack; + __u16 num_dup_tsns; unsigned int len; __u16 num_blocks; - __u16 num_dup_tsns; /* Protect ourselves from reading too far into * the skb from a bogus sender. @@ -6041,12 +6079,12 @@ static struct sctp_sackhdr *sctp_sm_pull_sack(struct sctp_chunk *chunk) /* Create an ABORT packet to be sent as a response, with the specified * error causes. */ -static struct sctp_packet *sctp_abort_pkt_new(struct net *net, - const struct sctp_endpoint *ep, - const struct sctp_association *asoc, - struct sctp_chunk *chunk, - const void *payload, - size_t paylen) +static struct sctp_packet *sctp_abort_pkt_new( + struct net *net, + const struct sctp_endpoint *ep, + const struct sctp_association *asoc, + struct sctp_chunk *chunk, + const void *payload, size_t paylen) { struct sctp_packet *packet; struct sctp_chunk *abort; @@ -6083,14 +6121,14 @@ static struct sctp_packet *sctp_abort_pkt_new(struct net *net, } /* Allocate a packet for responding in the OOTB conditions. */ -static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, - const struct sctp_association *asoc, - const struct sctp_chunk *chunk) +static struct sctp_packet *sctp_ootb_pkt_new( + struct net *net, + const struct sctp_association *asoc, + const struct sctp_chunk *chunk) { - struct sctp_packet *packet; struct sctp_transport *transport; - __u16 sport; - __u16 dport; + struct sctp_packet *packet; + __u16 sport, dport; __u32 vtag; /* Get the source and destination port from the inbound packet. */ @@ -6107,9 +6145,9 @@ static struct sctp_packet *sctp_ootb_pkt_new(struct net *net, switch (chunk->chunk_hdr->type) { case SCTP_CID_INIT_ACK: { - sctp_initack_chunk_t *initack; + struct sctp_initack_chunk *initack; - initack = (sctp_initack_chunk_t *)chunk->chunk_hdr; + initack = (struct sctp_initack_chunk *)chunk->chunk_hdr; vtag = ntohl(initack->init_hdr.init_tag); break; } @@ -6168,7 +6206,7 @@ static void sctp_send_stale_cookie_err(struct net *net, const struct sctp_endpoint *ep, const struct sctp_association *asoc, const struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands, + struct sctp_cmd_seq *commands, struct sctp_chunk *err_chunk) { struct sctp_packet *packet; @@ -6197,20 +6235,19 @@ static void sctp_send_stale_cookie_err(struct net *net, /* Process a data chunk */ static int sctp_eat_data(const struct sctp_association *asoc, struct sctp_chunk *chunk, - sctp_cmd_seq_t *commands) + struct sctp_cmd_seq *commands) { - struct sctp_datahdr *data_hdr; - struct sctp_chunk *err; - size_t datalen; - sctp_verb_t deliver; - int tmp; - __u32 tsn; struct sctp_tsnmap *map = (struct sctp_tsnmap *)&asoc->peer.tsn_map; struct sock *sk = asoc->base.sk; struct net *net = sock_net(sk); - u16 ssn; - u16 sid; + struct sctp_datahdr *data_hdr; + struct sctp_chunk *err; + enum sctp_verb deliver; + size_t datalen; u8 ordered = 0; + u16 ssn, sid; + __u32 tsn; + int tmp; data_hdr = (struct sctp_datahdr *)chunk->skb->data; chunk->subh.data_hdr = data_hdr; diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 3e958c1c4b95..79b6bee5b768 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -45,26 +45,27 @@ #include <net/sctp/sctp.h> #include <net/sctp/sm.h> -static const sctp_sm_table_entry_t +static const struct sctp_sm_table_entry primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t +static const struct sctp_sm_table_entry other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t +static const struct sctp_sm_table_entry timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES]; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - enum sctp_cid cid, - sctp_state_t state); +static const struct sctp_sm_table_entry *sctp_chunk_event_lookup( + struct net *net, + enum sctp_cid cid, + enum sctp_state state); -static const sctp_sm_table_entry_t bug = { +static const struct sctp_sm_table_entry bug = { .fn = sctp_sf_bug, .name = "sctp_sf_bug" }; #define DO_LOOKUP(_max, _type, _table) \ ({ \ - const sctp_sm_table_entry_t *rtn; \ + const struct sctp_sm_table_entry *rtn; \ \ if ((event_subtype._type > (_max))) { \ pr_warn("table %p possible attack: event %d exceeds max %d\n", \ @@ -76,10 +77,11 @@ static const sctp_sm_table_entry_t bug = { rtn; \ }) -const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, - sctp_event_t event_type, - sctp_state_t state, - sctp_subtype_t event_subtype) +const struct sctp_sm_table_entry *sctp_sm_lookup_event( + struct net *net, + enum sctp_event event_type, + enum sctp_state state, + union sctp_subtype event_subtype) { switch (event_type) { case SCTP_EVENT_T_CHUNK: @@ -392,7 +394,8 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(struct net *net, * * For base protocol (RFC 2960). */ -static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_DATA, TYPE_SCTP_INIT, TYPE_SCTP_INIT_ACK, @@ -451,7 +454,8 @@ static const sctp_sm_table_entry_t chunk_event_table[SCTP_NUM_BASE_CHUNK_TYPES][ /* The primary index for this table is the chunk type. * The secondary index for this table is the state. */ -static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_ASCONF, TYPE_SCTP_ASCONF_ACK, }; /*state_fn_t addip_chunk_event_table[][] */ @@ -478,7 +482,8 @@ static const sctp_sm_table_entry_t addip_chunk_event_table[SCTP_NUM_ADDIP_CHUNK_ /* The primary index for this table is the chunk type. * The secondary index for this table is the state. */ -static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_FWD_TSN, }; /*state_fn_t prsctp_chunk_event_table[][] */ @@ -504,7 +509,8 @@ static const sctp_sm_table_entry_t prsctp_chunk_event_table[SCTP_NUM_PRSCTP_CHUN /* The primary index for this table is the chunk type. * The secondary index for this table is the state. */ -static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +reconf_chunk_event_table[SCTP_NUM_RECONF_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_RECONF, }; /*state_fn_t reconf_chunk_event_table[][] */ @@ -530,11 +536,12 @@ static const sctp_sm_table_entry_t reconf_chunk_event_table[SCTP_NUM_RECONF_CHUN /* The primary index for this table is the chunk type. * The secondary index for this table is the state. */ -static const sctp_sm_table_entry_t auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +auth_chunk_event_table[SCTP_NUM_AUTH_CHUNK_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_AUTH, }; /*state_fn_t auth_chunk_event_table[][] */ -static const sctp_sm_table_entry_t +static const struct sctp_sm_table_entry chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* SCTP_STATE_CLOSED */ TYPE_SCTP_FUNC(sctp_sf_ootb), @@ -691,7 +698,8 @@ chunk_event_table_unknown[SCTP_STATE_NUM_STATES] = { /* The primary index for this table is the primitive type. * The secondary index for this table is the state. */ -static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +primitive_event_table[SCTP_NUM_PRIMITIVE_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_PRIMITIVE_ASSOCIATE, TYPE_SCTP_PRIMITIVE_SHUTDOWN, TYPE_SCTP_PRIMITIVE_ABORT, @@ -739,7 +747,8 @@ static const sctp_sm_table_entry_t primitive_event_table[SCTP_NUM_PRIMITIVE_TYPE TYPE_SCTP_FUNC(sctp_sf_ignore_other), \ } -static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_OTHER_NO_PENDING_TSN, TYPE_SCTP_OTHER_ICMP_PROTO_UNREACH, }; @@ -953,7 +962,8 @@ static const sctp_sm_table_entry_t other_event_table[SCTP_NUM_OTHER_TYPES][SCTP_ TYPE_SCTP_FUNC(sctp_sf_timer_ignore), \ } -static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = { +static const struct sctp_sm_table_entry +timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][SCTP_STATE_NUM_STATES] = { TYPE_SCTP_EVENT_TIMEOUT_NONE, TYPE_SCTP_EVENT_TIMEOUT_T1_COOKIE, TYPE_SCTP_EVENT_TIMEOUT_T1_INIT, @@ -967,9 +977,10 @@ static const sctp_sm_table_entry_t timeout_event_table[SCTP_NUM_TIMEOUT_TYPES][S TYPE_SCTP_EVENT_TIMEOUT_AUTOCLOSE, }; -static const sctp_sm_table_entry_t *sctp_chunk_event_lookup(struct net *net, - enum sctp_cid cid, - sctp_state_t state) +static const struct sctp_sm_table_entry *sctp_chunk_event_lookup( + struct net *net, + enum sctp_cid cid, + enum sctp_state state) { if (state > SCTP_STATE_MAX) return &bug; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1db478e34520..3204a9b29407 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -79,12 +79,13 @@ #include <net/sock.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> /* Forward declarations for internal helper functions. */ static int sctp_writeable(struct sock *sk); static void sctp_wfree(struct sk_buff *skb); -static int sctp_wait_for_sndbuf(struct sctp_association *, long *timeo_p, - size_t msg_len); +static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, + size_t msg_len, struct sock **orig_sk); static int sctp_wait_for_packet(struct sock *sk, int *err, long *timeo_p); static int sctp_wait_for_connect(struct sctp_association *, long *timeo_p); static int sctp_wait_for_accept(struct sock *sk, long timeo); @@ -100,8 +101,9 @@ static int sctp_send_asconf(struct sctp_association *asoc, struct sctp_chunk *chunk); static int sctp_do_bind(struct sock *, union sctp_addr *, int); static int sctp_autobind(struct sock *sk); -static void sctp_sock_migrate(struct sock *, struct sock *, - struct sctp_association *, sctp_socket_type_t); +static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, + struct sctp_association *assoc, + enum sctp_socket_type type); static unsigned long sctp_memory_pressure; static atomic_long_t sctp_memory_allocated; @@ -169,6 +171,36 @@ static inline void sctp_set_owner_w(struct sctp_chunk *chunk) sk_mem_charge(sk, chunk->skb->truesize); } +static void sctp_clear_owner_w(struct sctp_chunk *chunk) +{ + skb_orphan(chunk->skb); +} + +static void sctp_for_each_tx_datachunk(struct sctp_association *asoc, + void (*cb)(struct sctp_chunk *)) + +{ + struct sctp_outq *q = &asoc->outqueue; + struct sctp_transport *t; + struct sctp_chunk *chunk; + + list_for_each_entry(t, &asoc->peer.transport_addr_list, transports) + list_for_each_entry(chunk, &t->transmitted, transmitted_list) + cb(chunk); + + list_for_each_entry(chunk, &q->retransmit, list) + cb(chunk); + + list_for_each_entry(chunk, &q->sacked, list) + cb(chunk); + + list_for_each_entry(chunk, &q->abandoned, list) + cb(chunk); + + list_for_each_entry(chunk, &q->out_chunk_list, list) + cb(chunk); +} + /* Verify that this is a valid address. */ static inline int sctp_verify_addr(struct sock *sk, union sctp_addr *addr, int len) @@ -1055,7 +1087,7 @@ static int __sctp_connect(struct sock *sk, struct sctp_association *asoc2; struct sctp_transport *transport; union sctp_addr to; - sctp_scope_t scope; + enum sctp_scope scope; long timeo; int err = 0; int addrcnt = 0; @@ -1593,7 +1625,8 @@ static int sctp_error(struct sock *sk, int flags, int err) */ /* BUG: We do not implement the equivalent of sk_stream_wait_memory(). */ -static int sctp_msghdr_parse(const struct msghdr *, sctp_cmsgs_t *); +static int sctp_msghdr_parse(const struct msghdr *msg, + struct sctp_cmsgs *cmsgs); static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) { @@ -1609,8 +1642,8 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) struct sctp_sndrcvinfo *sinfo; struct sctp_initmsg *sinit; sctp_assoc_t associd = 0; - sctp_cmsgs_t cmsgs = { NULL }; - sctp_scope_t scope; + struct sctp_cmsgs cmsgs = { NULL }; + enum sctp_scope scope; bool fill_sinfo_ttl = false, wait_connect = false; struct sctp_datamsg *datamsg; int msg_flags = msg->msg_flags; @@ -1925,14 +1958,28 @@ static int sctp_sendmsg(struct sock *sk, struct msghdr *msg, size_t msg_len) goto out_free; } + /* Allocate sctp_stream_out_ext if not already done */ + if (unlikely(!asoc->stream.out[sinfo->sinfo_stream].ext)) { + err = sctp_stream_init_ext(&asoc->stream, sinfo->sinfo_stream); + if (err) + goto out_free; + } + if (sctp_wspace(asoc) < msg_len) sctp_prsctp_prune(asoc, sinfo, msg_len - sctp_wspace(asoc)); timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if (!sctp_wspace(asoc)) { - err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len); - if (err) + /* sk can be changed by peel off when waiting for buf. */ + err = sctp_wait_for_sndbuf(asoc, &timeo, msg_len, &sk); + if (err) { + if (err == -ESRCH) { + /* asoc is already dead. */ + new_asoc = NULL; + err = -EPIPE; + } goto out_free; + } } /* If an address is passed with the sendto/sendmsg call, it is used @@ -3093,9 +3140,9 @@ static int sctp_setsockopt_mappedv4(struct sock *sk, char __user *optval, unsign */ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int optlen) { + struct sctp_sock *sp = sctp_sk(sk); struct sctp_assoc_value params; struct sctp_association *asoc; - struct sctp_sock *sp = sctp_sk(sk); int val; if (optlen == sizeof(int)) { @@ -3111,26 +3158,35 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; val = params.assoc_value; - } else + } else { return -EINVAL; + } - if ((val != 0) && ((val < 8) || (val > SCTP_MAX_CHUNK_LEN))) - return -EINVAL; + if (val) { + int min_len, max_len; - asoc = sctp_id2assoc(sk, params.assoc_id); - if (!asoc && params.assoc_id && sctp_style(sk, UDP)) - return -EINVAL; + min_len = SCTP_DEFAULT_MINSEGMENT - sp->pf->af->net_header_len; + min_len -= sizeof(struct sctphdr) + + sizeof(struct sctp_data_chunk); + + max_len = SCTP_MAX_CHUNK_LEN - sizeof(struct sctp_data_chunk); + + if (val < min_len || val > max_len) + return -EINVAL; + } + asoc = sctp_id2assoc(sk, params.assoc_id); if (asoc) { if (val == 0) { - val = asoc->pathmtu; - val -= sp->pf->af->net_header_len; + val = asoc->pathmtu - sp->pf->af->net_header_len; val -= sizeof(struct sctphdr) + - sizeof(struct sctp_data_chunk); + sizeof(struct sctp_data_chunk); } asoc->user_frag = val; asoc->frag_point = sctp_frag_point(asoc, asoc->pathmtu); } else { + if (params.assoc_id && sctp_style(sk, UDP)) + return -EINVAL; sp->user_frag = val; } @@ -3905,6 +3961,64 @@ out: return retval; } +static int sctp_setsockopt_scheduler(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_assoc_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + if (params.assoc_value > SCTP_SS_MAX) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + goto out; + + retval = sctp_sched_set_sched(asoc, params.assoc_value); + +out: + return retval; +} + +static int sctp_setsockopt_scheduler_value(struct sock *sk, + char __user *optval, + unsigned int optlen) +{ + struct sctp_association *asoc; + struct sctp_stream_value params; + int retval = -EINVAL; + + if (optlen < sizeof(params)) + goto out; + + optlen = sizeof(params); + if (copy_from_user(¶ms, optval, optlen)) { + retval = -EFAULT; + goto out; + } + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) + goto out; + + retval = sctp_sched_set_value(asoc, params.stream_id, + params.stream_value, GFP_KERNEL); + +out: + return retval; +} + /* API 6.2 setsockopt(), getsockopt() * * Applications use setsockopt() and getsockopt() to set or retrieve @@ -4086,6 +4200,12 @@ static int sctp_setsockopt(struct sock *sk, int level, int optname, case SCTP_ADD_STREAMS: retval = sctp_setsockopt_add_streams(sk, optval, optlen); break; + case SCTP_STREAM_SCHEDULER: + retval = sctp_setsockopt_scheduler(sk, optval, optlen); + break; + case SCTP_STREAM_SCHEDULER_VALUE: + retval = sctp_setsockopt_scheduler_value(sk, optval, optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -4538,8 +4658,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; prim = asoc->peer.primary_path; - memcpy(&info->sctpi_p_address, &prim->ipaddr, - sizeof(struct sockaddr_storage)); + memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr)); info->sctpi_p_state = prim->state; info->sctpi_p_cwnd = prim->cwnd; info->sctpi_p_srtt = prim->srtt; @@ -4657,29 +4776,39 @@ int sctp_transport_lookup_process(int (*cb)(struct sctp_transport *, void *), EXPORT_SYMBOL_GPL(sctp_transport_lookup_process); int sctp_for_each_transport(int (*cb)(struct sctp_transport *, void *), - struct net *net, int pos, void *p) { + int (*cb_done)(struct sctp_transport *, void *), + struct net *net, int *pos, void *p) { struct rhashtable_iter hti; - void *obj; - int err; - - err = sctp_transport_walk_start(&hti); - if (err) - return err; + struct sctp_transport *tsp; + int ret; - obj = sctp_transport_get_idx(net, &hti, pos + 1); - for (; !IS_ERR_OR_NULL(obj); obj = sctp_transport_get_next(net, &hti)) { - struct sctp_transport *transport = obj; +again: + ret = sctp_transport_walk_start(&hti); + if (ret) + return ret; - if (!sctp_transport_hold(transport)) + tsp = sctp_transport_get_idx(net, &hti, *pos + 1); + for (; !IS_ERR_OR_NULL(tsp); tsp = sctp_transport_get_next(net, &hti)) { + if (!sctp_transport_hold(tsp)) continue; - err = cb(transport, p); - sctp_transport_put(transport); - if (err) + ret = cb(tsp, p); + if (ret) break; + (*pos)++; + sctp_transport_put(tsp); } sctp_transport_walk_stop(&hti); - return err; + if (ret) { + if (cb_done && !cb_done(tsp, p)) { + (*pos)++; + sctp_transport_put(tsp); + goto again; + } + sctp_transport_put(tsp); + } + + return ret; } EXPORT_SYMBOL_GPL(sctp_for_each_transport); @@ -4895,14 +5024,12 @@ int sctp_do_peeloff(struct sock *sk, sctp_assoc_t id, struct socket **sockp) struct socket *sock; int err = 0; - if (!asoc) + /* Do not peel off from one netns to another one. */ + if (!net_eq(current->nsproxy->net_ns, sock_net(sk))) return -EINVAL; - /* If there is a thread waiting on more sndbuf space for - * sending on this asoc, it cannot be peeled. - */ - if (waitqueue_active(&asoc->wait)) - return -EBUSY; + if (!asoc) + return -EINVAL; /* An association cannot be branched off from an already peeled-off * socket, nor is this supported for tcp style sockets. @@ -6634,7 +6761,7 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, char __user *optval, int __user *optlen) { - struct sctp_stream_out *streamout; + struct sctp_stream_out_ext *streamoute; struct sctp_association *asoc; struct sctp_prstatus params; int retval = -EINVAL; @@ -6657,21 +6784,29 @@ static int sctp_getsockopt_pr_streamstatus(struct sock *sk, int len, if (!asoc || params.sprstat_sid >= asoc->stream.outcnt) goto out; - streamout = &asoc->stream.out[params.sprstat_sid]; + streamoute = asoc->stream.out[params.sprstat_sid].ext; + if (!streamoute) { + /* Not allocated yet, means all stats are 0 */ + params.sprstat_abandoned_unsent = 0; + params.sprstat_abandoned_sent = 0; + retval = 0; + goto out; + } + if (policy == SCTP_PR_SCTP_NONE) { params.sprstat_abandoned_unsent = 0; params.sprstat_abandoned_sent = 0; for (policy = 0; policy <= SCTP_PR_INDEX(MAX); policy++) { params.sprstat_abandoned_unsent += - streamout->abandoned_unsent[policy]; + streamoute->abandoned_unsent[policy]; params.sprstat_abandoned_sent += - streamout->abandoned_sent[policy]; + streamoute->abandoned_sent[policy]; } } else { params.sprstat_abandoned_unsent = - streamout->abandoned_unsent[__SCTP_PR_INDEX(policy)]; + streamoute->abandoned_unsent[__SCTP_PR_INDEX(policy)]; params.sprstat_abandoned_sent = - streamout->abandoned_sent[__SCTP_PR_INDEX(policy)]; + streamoute->abandoned_sent[__SCTP_PR_INDEX(policy)]; } if (put_user(len, optlen) || copy_to_user(optval, ¶ms, len)) { @@ -6767,6 +6902,85 @@ out: return retval; } +static int sctp_getsockopt_scheduler(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_assoc_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) { + retval = -EINVAL; + goto out; + } + + params.assoc_value = sctp_sched_get_sched(asoc); + + if (put_user(len, optlen)) + goto out; + + if (copy_to_user(optval, ¶ms, len)) + goto out; + + retval = 0; + +out: + return retval; +} + +static int sctp_getsockopt_scheduler_value(struct sock *sk, int len, + char __user *optval, + int __user *optlen) +{ + struct sctp_stream_value params; + struct sctp_association *asoc; + int retval = -EFAULT; + + if (len < sizeof(params)) { + retval = -EINVAL; + goto out; + } + + len = sizeof(params); + if (copy_from_user(¶ms, optval, len)) + goto out; + + asoc = sctp_id2assoc(sk, params.assoc_id); + if (!asoc) { + retval = -EINVAL; + goto out; + } + + retval = sctp_sched_get_value(asoc, params.stream_id, + ¶ms.stream_value); + if (retval) + goto out; + + if (put_user(len, optlen)) { + retval = -EFAULT; + goto out; + } + + if (copy_to_user(optval, ¶ms, len)) { + retval = -EFAULT; + goto out; + } + +out: + return retval; +} + static int sctp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen) { @@ -6949,6 +7163,14 @@ static int sctp_getsockopt(struct sock *sk, int level, int optname, retval = sctp_getsockopt_enable_strreset(sk, len, optval, optlen); break; + case SCTP_STREAM_SCHEDULER: + retval = sctp_getsockopt_scheduler(sk, len, optval, + optlen); + break; + case SCTP_STREAM_SCHEDULER_VALUE: + retval = sctp_getsockopt_scheduler_value(sk, len, optval, + optlen); + break; default: retval = -ENOPROTOOPT; break; @@ -7445,10 +7667,10 @@ static int sctp_autobind(struct sock *sk) * msg_control * points here */ -static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs) +static int sctp_msghdr_parse(const struct msghdr *msg, struct sctp_cmsgs *cmsgs) { - struct cmsghdr *cmsg; struct msghdr *my_msg = (struct msghdr *)msg; + struct cmsghdr *cmsg; for_each_cmsghdr(cmsg, my_msg) { if (!CMSG_OK(my_msg, cmsg)) @@ -7777,7 +7999,7 @@ void sctp_sock_rfree(struct sk_buff *skb) /* Helper function to wait for space in the sndbuf. */ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, - size_t msg_len) + size_t msg_len, struct sock **orig_sk) { struct sock *sk = asoc->base.sk; int err = 0; @@ -7794,10 +8016,11 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, for (;;) { prepare_to_wait_exclusive(&asoc->wait, &wait, TASK_INTERRUPTIBLE); + if (asoc->base.dead) + goto do_dead; if (!*timeo_p) goto do_nonblock; - if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING || - asoc->base.dead) + if (sk->sk_err || asoc->state >= SCTP_STATE_SHUTDOWN_PENDING) goto do_error; if (signal_pending(current)) goto do_interrupted; @@ -7810,11 +8033,17 @@ static int sctp_wait_for_sndbuf(struct sctp_association *asoc, long *timeo_p, release_sock(sk); current_timeo = schedule_timeout(current_timeo); lock_sock(sk); + if (sk != asoc->base.sk) { + release_sock(sk); + sk = asoc->base.sk; + lock_sock(sk); + } *timeo_p = current_timeo; } out: + *orig_sk = sk; finish_wait(&asoc->wait, &wait); /* Release the association's refcnt. */ @@ -7822,6 +8051,10 @@ out: return err; +do_dead: + err = -ESRCH; + goto out; + do_error: err = -EPIPE; goto out; @@ -8085,7 +8318,7 @@ static inline void sctp_copy_descendant(struct sock *sk_to, */ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, struct sctp_association *assoc, - sctp_socket_type_t type) + enum sctp_socket_type type) { struct sctp_sock *oldsp = sctp_sk(oldsk); struct sctp_sock *newsp = sctp_sk(newsk); @@ -8197,7 +8430,9 @@ static void sctp_sock_migrate(struct sock *oldsk, struct sock *newsk, * paths won't try to lock it and then oldsk. */ lock_sock_nested(newsk, SINGLE_DEPTH_NESTING); + sctp_for_each_tx_datachunk(assoc, sctp_clear_owner_w); sctp_assoc_migrate(assoc, newsk); + sctp_for_each_tx_datachunk(assoc, sctp_set_owner_w); /* If the association on the newsk is already closed before accept() * is called, set RCV_SHUTDOWN flag. diff --git a/net/sctp/stream.c b/net/sctp/stream.c index 63ea15503714..a11db21dc8a0 100644 --- a/net/sctp/stream.c +++ b/net/sctp/stream.c @@ -32,44 +32,181 @@ * Xin Long <lucien.xin@gmail.com> */ +#include <linux/list.h> #include <net/sctp/sctp.h> #include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> + +/* Migrates chunks from stream queues to new stream queues if needed, + * but not across associations. Also, removes those chunks to streams + * higher than the new max. + */ +static void sctp_stream_outq_migrate(struct sctp_stream *stream, + struct sctp_stream *new, __u16 outcnt) +{ + struct sctp_association *asoc; + struct sctp_chunk *ch, *temp; + struct sctp_outq *outq; + int i; + + asoc = container_of(stream, struct sctp_association, stream); + outq = &asoc->outqueue; + + list_for_each_entry_safe(ch, temp, &outq->out_chunk_list, list) { + __u16 sid = sctp_chunk_stream_no(ch); + + if (sid < outcnt) + continue; + + sctp_sched_dequeue_common(outq, ch); + /* No need to call dequeue_done here because + * the chunks are not scheduled by now. + */ + + /* Mark as failed send. */ + sctp_chunk_fail(ch, SCTP_ERROR_INV_STRM); + if (asoc->peer.prsctp_capable && + SCTP_PR_PRIO_ENABLED(ch->sinfo.sinfo_flags)) + asoc->sent_cnt_removable--; + + sctp_chunk_free(ch); + } + + if (new) { + /* Here we actually move the old ext stuff into the new + * buffer, because we want to keep it. Then + * sctp_stream_update will swap ->out pointers. + */ + for (i = 0; i < outcnt; i++) { + kfree(new->out[i].ext); + new->out[i].ext = stream->out[i].ext; + stream->out[i].ext = NULL; + } + } + + for (i = outcnt; i < stream->outcnt; i++) + kfree(stream->out[i].ext); +} + +static int sctp_stream_alloc_out(struct sctp_stream *stream, __u16 outcnt, + gfp_t gfp) +{ + struct sctp_stream_out *out; + + out = kmalloc_array(outcnt, sizeof(*out), gfp); + if (!out) + return -ENOMEM; + + if (stream->out) { + memcpy(out, stream->out, min(outcnt, stream->outcnt) * + sizeof(*out)); + kfree(stream->out); + } + + if (outcnt > stream->outcnt) + memset(out + stream->outcnt, 0, + (outcnt - stream->outcnt) * sizeof(*out)); + + stream->out = out; + + return 0; +} + +static int sctp_stream_alloc_in(struct sctp_stream *stream, __u16 incnt, + gfp_t gfp) +{ + struct sctp_stream_in *in; + + in = kmalloc_array(incnt, sizeof(*stream->in), gfp); + + if (!in) + return -ENOMEM; + + if (stream->in) { + memcpy(in, stream->in, min(incnt, stream->incnt) * + sizeof(*in)); + kfree(stream->in); + } + + if (incnt > stream->incnt) + memset(in + stream->incnt, 0, + (incnt - stream->incnt) * sizeof(*in)); + + stream->in = in; + + return 0; +} int sctp_stream_init(struct sctp_stream *stream, __u16 outcnt, __u16 incnt, gfp_t gfp) { - int i; + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + int i, ret = 0; + + gfp |= __GFP_NOWARN; /* Initial stream->out size may be very big, so free it and alloc - * a new one with new outcnt to save memory. + * a new one with new outcnt to save memory if needed. */ - kfree(stream->out); + if (outcnt == stream->outcnt) + goto in; - stream->out = kcalloc(outcnt, sizeof(*stream->out), gfp); - if (!stream->out) - return -ENOMEM; + /* Filter out chunks queued on streams that won't exist anymore */ + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, NULL, outcnt); + sched->sched_all(stream); + + i = sctp_stream_alloc_out(stream, outcnt, gfp); + if (i) + return i; stream->outcnt = outcnt; for (i = 0; i < stream->outcnt; i++) stream->out[i].state = SCTP_STREAM_OPEN; + sched->init(stream); + +in: if (!incnt) - return 0; + goto out; - stream->in = kcalloc(incnt, sizeof(*stream->in), gfp); - if (!stream->in) { - kfree(stream->out); - stream->out = NULL; - return -ENOMEM; + i = sctp_stream_alloc_in(stream, incnt, gfp); + if (i) { + ret = -ENOMEM; + goto free; } stream->incnt = incnt; + goto out; - return 0; +free: + sched->free(stream); + kfree(stream->out); + stream->out = NULL; +out: + return ret; +} + +int sctp_stream_init_ext(struct sctp_stream *stream, __u16 sid) +{ + struct sctp_stream_out_ext *soute; + + soute = kzalloc(sizeof(*soute), GFP_KERNEL); + if (!soute) + return -ENOMEM; + stream->out[sid].ext = soute; + + return sctp_sched_init_sid(stream, sid, GFP_KERNEL); } void sctp_stream_free(struct sctp_stream *stream) { + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + int i; + + sched->free(stream); + for (i = 0; i < stream->outcnt; i++) + kfree(stream->out[i].ext); kfree(stream->out); kfree(stream->in); } @@ -87,6 +224,10 @@ void sctp_stream_clear(struct sctp_stream *stream) void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) { + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + + sched->unsched_all(stream); + sctp_stream_outq_migrate(stream, new, new->outcnt); sctp_stream_free(stream); stream->out = new->out; @@ -94,6 +235,8 @@ void sctp_stream_update(struct sctp_stream *stream, struct sctp_stream *new) stream->outcnt = new->outcnt; stream->incnt = new->incnt; + sched->sched_all(stream); + new->out = NULL; new->in = NULL; } @@ -118,6 +261,7 @@ int sctp_send_reset_streams(struct sctp_association *asoc, __u16 i, str_nums, *str_list; struct sctp_chunk *chunk; int retval = -EINVAL; + __be16 *nstr_list; bool out, in; if (!asoc->peer.reconf_capable || @@ -138,23 +282,44 @@ int sctp_send_reset_streams(struct sctp_association *asoc, str_nums = params->srs_number_streams; str_list = params->srs_stream_list; - if (out && str_nums) - for (i = 0; i < str_nums; i++) - if (str_list[i] >= stream->outcnt) - goto out; + if (str_nums) { + int param_len = 0; - if (in && str_nums) - for (i = 0; i < str_nums; i++) - if (str_list[i] >= stream->incnt) - goto out; + if (out) { + for (i = 0; i < str_nums; i++) + if (str_list[i] >= stream->outcnt) + goto out; - for (i = 0; i < str_nums; i++) - str_list[i] = htons(str_list[i]); + param_len = str_nums * sizeof(__u16) + + sizeof(struct sctp_strreset_outreq); + } + + if (in) { + for (i = 0; i < str_nums; i++) + if (str_list[i] >= stream->incnt) + goto out; - chunk = sctp_make_strreset_req(asoc, str_nums, str_list, out, in); + param_len += str_nums * sizeof(__u16) + + sizeof(struct sctp_strreset_inreq); + } + + if (param_len > SCTP_MAX_CHUNK_LEN - + sizeof(struct sctp_reconf_chunk)) + goto out; + } + + nstr_list = kcalloc(str_nums, sizeof(__be16), GFP_KERNEL); + if (!nstr_list) { + retval = -ENOMEM; + goto out; + } for (i = 0; i < str_nums; i++) - str_list[i] = ntohs(str_list[i]); + nstr_list[i] = htons(str_list[i]); + + chunk = sctp_make_strreset_req(asoc, str_nums, nstr_list, out, in); + + kfree(nstr_list); if (!chunk) { retval = -ENOMEM; @@ -244,7 +409,7 @@ int sctp_send_add_streams(struct sctp_association *asoc, { struct sctp_stream *stream = &asoc->stream; struct sctp_chunk *chunk = NULL; - int retval = -ENOMEM; + int retval; __u32 outcnt, incnt; __u16 out, in; @@ -270,20 +435,16 @@ int sctp_send_add_streams(struct sctp_association *asoc, } if (out) { - struct sctp_stream_out *streamout; - - streamout = krealloc(stream->out, outcnt * sizeof(*streamout), - GFP_KERNEL); - if (!streamout) + retval = sctp_stream_alloc_out(stream, outcnt, GFP_KERNEL); + if (retval) goto out; - - memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); - stream->out = streamout; } chunk = sctp_make_strreset_addstrm(asoc, out, in); - if (!chunk) + if (!chunk) { + retval = -ENOMEM; goto out; + } asoc->strreset_chunk = chunk; sctp_chunk_hold(asoc->strreset_chunk); @@ -305,7 +466,7 @@ out: } static struct sctp_paramhdr *sctp_chunk_lookup_strreset_param( - struct sctp_association *asoc, __u32 resp_seq, + struct sctp_association *asoc, __be32 resp_seq, __be16 type) { struct sctp_chunk *chunk = asoc->strreset_chunk; @@ -345,8 +506,9 @@ struct sctp_chunk *sctp_process_strreset_outreq( { struct sctp_strreset_outreq *outreq = param.v; struct sctp_stream *stream = &asoc->stream; - __u16 i, nums, flags = 0, *str_p = NULL; __u32 result = SCTP_STRRESET_DENIED; + __u16 i, nums, flags = 0; + __be16 *str_p = NULL; __u32 request_seq; request_seq = ntohl(outreq->request_seq); @@ -439,8 +601,9 @@ struct sctp_chunk *sctp_process_strreset_inreq( struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; struct sctp_chunk *chunk = NULL; - __u16 i, nums, *str_p; __u32 request_seq; + __u16 i, nums; + __be16 *str_p; request_seq = ntohl(inreq->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || @@ -601,7 +764,6 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - struct sctp_stream_in *streamin; __u32 request_seq, incnt; __u16 in, i; @@ -648,13 +810,9 @@ struct sctp_chunk *sctp_process_strreset_addstrm_out( if (!in || incnt > SCTP_MAX_STREAM) goto out; - streamin = krealloc(stream->in, incnt * sizeof(*streamin), - GFP_ATOMIC); - if (!streamin) + if (sctp_stream_alloc_in(stream, incnt, GFP_ATOMIC)) goto out; - memset(streamin + stream->incnt, 0, in * sizeof(*streamin)); - stream->in = streamin; stream->incnt = incnt; result = SCTP_STRRESET_PERFORMED; @@ -676,10 +834,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( struct sctp_strreset_addstrm *addstrm = param.v; struct sctp_stream *stream = &asoc->stream; __u32 result = SCTP_STRRESET_DENIED; - struct sctp_stream_out *streamout; struct sctp_chunk *chunk = NULL; __u32 request_seq, outcnt; __u16 out, i; + int ret; request_seq = ntohl(addstrm->request_seq); if (TSN_lt(asoc->strreset_inseq, request_seq) || @@ -708,14 +866,10 @@ struct sctp_chunk *sctp_process_strreset_addstrm_in( if (!out || outcnt > SCTP_MAX_STREAM) goto out; - streamout = krealloc(stream->out, outcnt * sizeof(*streamout), - GFP_ATOMIC); - if (!streamout) + ret = sctp_stream_alloc_out(stream, outcnt, GFP_ATOMIC); + if (ret) goto out; - memset(streamout + stream->outcnt, 0, out * sizeof(*streamout)); - stream->out = streamout; - chunk = sctp_make_strreset_addstrm(asoc, out, 0); if (!chunk) goto out; @@ -769,7 +923,7 @@ struct sctp_chunk *sctp_process_strreset_resp( if (req->type == SCTP_PARAM_RESET_OUT_REQUEST) { struct sctp_strreset_outreq *outreq; - __u16 *str_p; + __be16 *str_p; outreq = (struct sctp_strreset_outreq *)req; str_p = outreq->list_of_streams; @@ -794,7 +948,7 @@ struct sctp_chunk *sctp_process_strreset_resp( nums, str_p, GFP_ATOMIC); } else if (req->type == SCTP_PARAM_RESET_IN_REQUEST) { struct sctp_strreset_inreq *inreq; - __u16 *str_p; + __be16 *str_p; /* if the result is performed, it's impossible for inreq */ if (result == SCTP_STRRESET_PERFORMED) diff --git a/net/sctp/stream_sched.c b/net/sctp/stream_sched.c new file mode 100644 index 000000000000..0b83ec51e43b --- /dev/null +++ b/net/sctp/stream_sched.c @@ -0,0 +1,275 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> + */ + +#include <linux/list.h> +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> + +/* First Come First Serve (a.k.a. FIFO) + * RFC DRAFT ndata Section 3.1 + */ +static int sctp_sched_fcfs_set(struct sctp_stream *stream, __u16 sid, + __u16 value, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_fcfs_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + *value = 0; + return 0; +} + +static int sctp_sched_fcfs_init(struct sctp_stream *stream) +{ + return 0; +} + +static int sctp_sched_fcfs_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + return 0; +} + +static void sctp_sched_fcfs_free(struct sctp_stream *stream) +{ +} + +static void sctp_sched_fcfs_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ +} + +static struct sctp_chunk *sctp_sched_fcfs_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_chunk *ch = NULL; + struct list_head *entry; + + if (list_empty(&q->out_chunk_list)) + goto out; + + if (stream->out_curr) { + ch = list_entry(stream->out_curr->ext->outq.next, + struct sctp_chunk, stream_list); + } else { + entry = q->out_chunk_list.next; + ch = list_entry(entry, struct sctp_chunk, list); + } + + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_fcfs_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *chunk) +{ +} + +static void sctp_sched_fcfs_sched_all(struct sctp_stream *stream) +{ +} + +static void sctp_sched_fcfs_unsched_all(struct sctp_stream *stream) +{ +} + +static struct sctp_sched_ops sctp_sched_fcfs = { + .set = sctp_sched_fcfs_set, + .get = sctp_sched_fcfs_get, + .init = sctp_sched_fcfs_init, + .init_sid = sctp_sched_fcfs_init_sid, + .free = sctp_sched_fcfs_free, + .enqueue = sctp_sched_fcfs_enqueue, + .dequeue = sctp_sched_fcfs_dequeue, + .dequeue_done = sctp_sched_fcfs_dequeue_done, + .sched_all = sctp_sched_fcfs_sched_all, + .unsched_all = sctp_sched_fcfs_unsched_all, +}; + +/* API to other parts of the stack */ + +extern struct sctp_sched_ops sctp_sched_prio; +extern struct sctp_sched_ops sctp_sched_rr; + +static struct sctp_sched_ops *sctp_sched_ops[] = { + &sctp_sched_fcfs, + &sctp_sched_prio, + &sctp_sched_rr, +}; + +int sctp_sched_set_sched(struct sctp_association *asoc, + enum sctp_sched_type sched) +{ + struct sctp_sched_ops *n = sctp_sched_ops[sched]; + struct sctp_sched_ops *old = asoc->outqueue.sched; + struct sctp_datamsg *msg = NULL; + struct sctp_chunk *ch; + int i, ret = 0; + + if (old == n) + return ret; + + if (sched > SCTP_SS_MAX) + return -EINVAL; + + if (old) { + old->free(&asoc->stream); + + /* Give the next scheduler a clean slate. */ + for (i = 0; i < asoc->stream.outcnt; i++) { + void *p = asoc->stream.out[i].ext; + + if (!p) + continue; + + p += offsetofend(struct sctp_stream_out_ext, outq); + memset(p, 0, sizeof(struct sctp_stream_out_ext) - + offsetofend(struct sctp_stream_out_ext, outq)); + } + } + + asoc->outqueue.sched = n; + n->init(&asoc->stream); + for (i = 0; i < asoc->stream.outcnt; i++) { + if (!asoc->stream.out[i].ext) + continue; + + ret = n->init_sid(&asoc->stream, i, GFP_KERNEL); + if (ret) + goto err; + } + + /* We have to requeue all chunks already queued. */ + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + if (ch->msg == msg) + continue; + msg = ch->msg; + n->enqueue(&asoc->outqueue, msg); + } + + return ret; + +err: + n->free(&asoc->stream); + asoc->outqueue.sched = &sctp_sched_fcfs; /* Always safe */ + + return ret; +} + +int sctp_sched_get_sched(struct sctp_association *asoc) +{ + int i; + + for (i = 0; i <= SCTP_SS_MAX; i++) + if (asoc->outqueue.sched == sctp_sched_ops[i]) + return i; + + return 0; +} + +int sctp_sched_set_value(struct sctp_association *asoc, __u16 sid, + __u16 value, gfp_t gfp) +{ + if (sid >= asoc->stream.outcnt) + return -EINVAL; + + if (!asoc->stream.out[sid].ext) { + int ret; + + ret = sctp_stream_init_ext(&asoc->stream, sid); + if (ret) + return ret; + } + + return asoc->outqueue.sched->set(&asoc->stream, sid, value, gfp); +} + +int sctp_sched_get_value(struct sctp_association *asoc, __u16 sid, + __u16 *value) +{ + if (sid >= asoc->stream.outcnt) + return -EINVAL; + + if (!asoc->stream.out[sid].ext) + return 0; + + return asoc->outqueue.sched->get(&asoc->stream, sid, value); +} + +void sctp_sched_dequeue_done(struct sctp_outq *q, struct sctp_chunk *ch) +{ + if (!list_is_last(&ch->frag_list, &ch->msg->chunks)) { + struct sctp_stream_out *sout; + __u16 sid; + + /* datamsg is not finish, so save it as current one, + * in case application switch scheduler or a higher + * priority stream comes in. + */ + sid = sctp_chunk_stream_no(ch); + sout = &q->asoc->stream.out[sid]; + q->asoc->stream.out_curr = sout; + return; + } + + q->asoc->stream.out_curr = NULL; + q->sched->dequeue_done(q, ch); +} + +/* Auxiliary functions for the schedulers */ +void sctp_sched_dequeue_common(struct sctp_outq *q, struct sctp_chunk *ch) +{ + list_del_init(&ch->list); + list_del_init(&ch->stream_list); + q->out_qlen -= ch->skb->len; +} + +int sctp_sched_init_sid(struct sctp_stream *stream, __u16 sid, gfp_t gfp) +{ + struct sctp_sched_ops *sched = sctp_sched_ops_from_stream(stream); + + INIT_LIST_HEAD(&stream->out[sid].ext->outq); + return sched->init_sid(stream, sid, gfp); +} + +struct sctp_sched_ops *sctp_sched_ops_from_stream(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + + asoc = container_of(stream, struct sctp_association, stream); + + return asoc->outqueue.sched; +} diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c new file mode 100644 index 000000000000..384dbf3c8760 --- /dev/null +++ b/net/sctp/stream_sched_prio.c @@ -0,0 +1,347 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> + */ + +#include <linux/list.h> +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> + +/* Priority handling + * RFC DRAFT ndata section 3.4 + */ + +static void sctp_sched_prio_unsched_all(struct sctp_stream *stream); + +static struct sctp_stream_priorities *sctp_sched_prio_new_head( + struct sctp_stream *stream, int prio, gfp_t gfp) +{ + struct sctp_stream_priorities *p; + + p = kmalloc(sizeof(*p), gfp); + if (!p) + return NULL; + + INIT_LIST_HEAD(&p->prio_sched); + INIT_LIST_HEAD(&p->active); + p->next = NULL; + p->prio = prio; + + return p; +} + +static struct sctp_stream_priorities *sctp_sched_prio_get_head( + struct sctp_stream *stream, int prio, gfp_t gfp) +{ + struct sctp_stream_priorities *p; + int i; + + /* Look into scheduled priorities first, as they are sorted and + * we can find it fast IF it's scheduled. + */ + list_for_each_entry(p, &stream->prio_list, prio_sched) { + if (p->prio == prio) + return p; + if (p->prio > prio) + break; + } + + /* No luck. So we search on all streams now. */ + for (i = 0; i < stream->outcnt; i++) { + if (!stream->out[i].ext) + continue; + + p = stream->out[i].ext->prio_head; + if (!p) + /* Means all other streams won't be initialized + * as well. + */ + break; + if (p->prio == prio) + return p; + } + + /* If not even there, allocate a new one. */ + return sctp_sched_prio_new_head(stream, prio, gfp); +} + +static void sctp_sched_prio_next_stream(struct sctp_stream_priorities *p) +{ + struct list_head *pos; + + pos = p->next->prio_list.next; + if (pos == &p->active) + pos = pos->next; + p->next = list_entry(pos, struct sctp_stream_out_ext, prio_list); +} + +static bool sctp_sched_prio_unsched(struct sctp_stream_out_ext *soute) +{ + bool scheduled = false; + + if (!list_empty(&soute->prio_list)) { + struct sctp_stream_priorities *prio_head = soute->prio_head; + + /* Scheduled */ + scheduled = true; + + if (prio_head->next == soute) + /* Try to move to the next stream */ + sctp_sched_prio_next_stream(prio_head); + + list_del_init(&soute->prio_list); + + /* Also unsched the priority if this was the last stream */ + if (list_empty(&prio_head->active)) { + list_del_init(&prio_head->prio_sched); + /* If there is no stream left, clear next */ + prio_head->next = NULL; + } + } + + return scheduled; +} + +static void sctp_sched_prio_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + struct sctp_stream_priorities *prio, *prio_head; + + prio_head = soute->prio_head; + + /* Nothing to do if already scheduled */ + if (!list_empty(&soute->prio_list)) + return; + + /* Schedule the stream. If there is a next, we schedule the new + * one before it, so it's the last in round robin order. + * If there isn't, we also have to schedule the priority. + */ + if (prio_head->next) { + list_add(&soute->prio_list, prio_head->next->prio_list.prev); + return; + } + + list_add(&soute->prio_list, &prio_head->active); + prio_head->next = soute; + + list_for_each_entry(prio, &stream->prio_list, prio_sched) { + if (prio->prio > prio_head->prio) { + list_add(&prio_head->prio_sched, prio->prio_sched.prev); + return; + } + } + + list_add_tail(&prio_head->prio_sched, &stream->prio_list); +} + +static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid, + __u16 prio, gfp_t gfp) +{ + struct sctp_stream_out *sout = &stream->out[sid]; + struct sctp_stream_out_ext *soute = sout->ext; + struct sctp_stream_priorities *prio_head, *old; + bool reschedule = false; + int i; + + prio_head = sctp_sched_prio_get_head(stream, prio, gfp); + if (!prio_head) + return -ENOMEM; + + reschedule = sctp_sched_prio_unsched(soute); + old = soute->prio_head; + soute->prio_head = prio_head; + if (reschedule) + sctp_sched_prio_sched(stream, soute); + + if (!old) + /* Happens when we set the priority for the first time */ + return 0; + + for (i = 0; i < stream->outcnt; i++) { + soute = stream->out[i].ext; + if (soute && soute->prio_head == old) + /* It's still in use, nothing else to do here. */ + return 0; + } + + /* No hits, we are good to free it. */ + kfree(old); + + return 0; +} + +static int sctp_sched_prio_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + *value = stream->out[sid].ext->prio_head->prio; + return 0; +} + +static int sctp_sched_prio_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->prio_list); + + return 0; +} + +static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + INIT_LIST_HEAD(&stream->out[sid].ext->prio_list); + return sctp_sched_prio_set(stream, sid, 0, gfp); +} + +static void sctp_sched_prio_free(struct sctp_stream *stream) +{ + struct sctp_stream_priorities *prio, *n; + LIST_HEAD(list); + int i; + + /* As we don't keep a list of priorities, to avoid multiple + * frees we have to do it in 3 steps: + * 1. unsched everyone, so the lists are free to use in 2. + * 2. build the list of the priorities + * 3. free the list + */ + sctp_sched_prio_unsched_all(stream); + for (i = 0; i < stream->outcnt; i++) { + if (!stream->out[i].ext) + continue; + prio = stream->out[i].ext->prio_head; + if (prio && list_empty(&prio->prio_sched)) + list_add(&prio->prio_sched, &list); + } + list_for_each_entry_safe(prio, n, &list, prio_sched) { + list_del_init(&prio->prio_sched); + kfree(prio); + } +} + +static void sctp_sched_prio_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_prio_sched(stream, stream->out[sid].ext); +} + +static struct sctp_chunk *sctp_sched_prio_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_priorities *prio; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch = NULL; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + goto out; + + /* Find which chunk is next. It's easy, it's either the current + * one or the first chunk on the next active stream. + */ + if (stream->out_curr) { + soute = stream->out_curr->ext; + } else { + prio = list_entry(stream->prio_list.next, + struct sctp_stream_priorities, prio_sched); + soute = prio->next; + } + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_prio_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream_priorities *prio; + struct sctp_stream_out_ext *soute; + __u16 sid; + + /* Last chunk on that msg, move to the next stream on + * this priority. + */ + sid = sctp_chunk_stream_no(ch); + soute = q->asoc->stream.out[sid].ext; + prio = soute->prio_head; + + sctp_sched_prio_next_stream(prio); + + if (list_empty(&soute->outq)) + sctp_sched_prio_unsched(soute); +} + +static void sctp_sched_prio_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_stream_out *sout; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid; + + sid = sctp_chunk_stream_no(ch); + sout = &stream->out[sid]; + if (sout->ext) + sctp_sched_prio_sched(stream, sout->ext); + } +} + +static void sctp_sched_prio_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_priorities *p, *tmp; + struct sctp_stream_out_ext *soute, *souttmp; + + list_for_each_entry_safe(p, tmp, &stream->prio_list, prio_sched) + list_for_each_entry_safe(soute, souttmp, &p->active, prio_list) + sctp_sched_prio_unsched(soute); +} + +struct sctp_sched_ops sctp_sched_prio = { + .set = sctp_sched_prio_set, + .get = sctp_sched_prio_get, + .init = sctp_sched_prio_init, + .init_sid = sctp_sched_prio_init_sid, + .free = sctp_sched_prio_free, + .enqueue = sctp_sched_prio_enqueue, + .dequeue = sctp_sched_prio_dequeue, + .dequeue_done = sctp_sched_prio_dequeue_done, + .sched_all = sctp_sched_prio_sched_all, + .unsched_all = sctp_sched_prio_unsched_all, +}; diff --git a/net/sctp/stream_sched_rr.c b/net/sctp/stream_sched_rr.c new file mode 100644 index 000000000000..7612a438c5b9 --- /dev/null +++ b/net/sctp/stream_sched_rr.c @@ -0,0 +1,201 @@ +/* SCTP kernel implementation + * (C) Copyright Red Hat Inc. 2017 + * + * This file is part of the SCTP kernel implementation + * + * These functions manipulate sctp stream queue/scheduling. + * + * This SCTP implementation is free software; + * you can redistribute it and/or modify it under the terms of + * the GNU General Public License as published by + * the Free Software Foundation; either version 2, or (at your option) + * any later version. + * + * This SCTP implementation is distributed in the hope that it + * will be useful, but WITHOUT ANY WARRANTY; without even the implied + * ************************ + * warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * See the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with GNU CC; see the file COPYING. If not, see + * <http://www.gnu.org/licenses/>. + * + * Please send any bug reports or fixes you make to the + * email addresched(es): + * lksctp developers <linux-sctp@vger.kernel.org> + * + * Written or modified by: + * Marcelo Ricardo Leitner <marcelo.leitner@gmail.com> + */ + +#include <linux/list.h> +#include <net/sctp/sctp.h> +#include <net/sctp/sm.h> +#include <net/sctp/stream_sched.h> + +/* Priority handling + * RFC DRAFT ndata section 3.2 + */ +static void sctp_sched_rr_unsched_all(struct sctp_stream *stream); + +static void sctp_sched_rr_next_stream(struct sctp_stream *stream) +{ + struct list_head *pos; + + pos = stream->rr_next->rr_list.next; + if (pos == &stream->rr_list) + pos = pos->next; + stream->rr_next = list_entry(pos, struct sctp_stream_out_ext, rr_list); +} + +static void sctp_sched_rr_unsched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + if (stream->rr_next == soute) + /* Try to move to the next stream */ + sctp_sched_rr_next_stream(stream); + + list_del_init(&soute->rr_list); + + /* If we have no other stream queued, clear next */ + if (list_empty(&stream->rr_list)) + stream->rr_next = NULL; +} + +static void sctp_sched_rr_sched(struct sctp_stream *stream, + struct sctp_stream_out_ext *soute) +{ + if (!list_empty(&soute->rr_list)) + /* Already scheduled. */ + return; + + /* Schedule the stream */ + list_add_tail(&soute->rr_list, &stream->rr_list); + + if (!stream->rr_next) + stream->rr_next = soute; +} + +static int sctp_sched_rr_set(struct sctp_stream *stream, __u16 sid, + __u16 prio, gfp_t gfp) +{ + return 0; +} + +static int sctp_sched_rr_get(struct sctp_stream *stream, __u16 sid, + __u16 *value) +{ + return 0; +} + +static int sctp_sched_rr_init(struct sctp_stream *stream) +{ + INIT_LIST_HEAD(&stream->rr_list); + stream->rr_next = NULL; + + return 0; +} + +static int sctp_sched_rr_init_sid(struct sctp_stream *stream, __u16 sid, + gfp_t gfp) +{ + INIT_LIST_HEAD(&stream->out[sid].ext->rr_list); + + return 0; +} + +static void sctp_sched_rr_free(struct sctp_stream *stream) +{ + sctp_sched_rr_unsched_all(stream); +} + +static void sctp_sched_rr_enqueue(struct sctp_outq *q, + struct sctp_datamsg *msg) +{ + struct sctp_stream *stream; + struct sctp_chunk *ch; + __u16 sid; + + ch = list_first_entry(&msg->chunks, struct sctp_chunk, frag_list); + sid = sctp_chunk_stream_no(ch); + stream = &q->asoc->stream; + sctp_sched_rr_sched(stream, stream->out[sid].ext); +} + +static struct sctp_chunk *sctp_sched_rr_dequeue(struct sctp_outq *q) +{ + struct sctp_stream *stream = &q->asoc->stream; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch = NULL; + + /* Bail out quickly if queue is empty */ + if (list_empty(&q->out_chunk_list)) + goto out; + + /* Find which chunk is next */ + if (stream->out_curr) + soute = stream->out_curr->ext; + else + soute = stream->rr_next; + ch = list_entry(soute->outq.next, struct sctp_chunk, stream_list); + + sctp_sched_dequeue_common(q, ch); + +out: + return ch; +} + +static void sctp_sched_rr_dequeue_done(struct sctp_outq *q, + struct sctp_chunk *ch) +{ + struct sctp_stream_out_ext *soute; + __u16 sid; + + /* Last chunk on that msg, move to the next stream */ + sid = sctp_chunk_stream_no(ch); + soute = q->asoc->stream.out[sid].ext; + + sctp_sched_rr_next_stream(&q->asoc->stream); + + if (list_empty(&soute->outq)) + sctp_sched_rr_unsched(&q->asoc->stream, soute); +} + +static void sctp_sched_rr_sched_all(struct sctp_stream *stream) +{ + struct sctp_association *asoc; + struct sctp_stream_out_ext *soute; + struct sctp_chunk *ch; + + asoc = container_of(stream, struct sctp_association, stream); + list_for_each_entry(ch, &asoc->outqueue.out_chunk_list, list) { + __u16 sid; + + sid = sctp_chunk_stream_no(ch); + soute = stream->out[sid].ext; + if (soute) + sctp_sched_rr_sched(stream, soute); + } +} + +static void sctp_sched_rr_unsched_all(struct sctp_stream *stream) +{ + struct sctp_stream_out_ext *soute, *tmp; + + list_for_each_entry_safe(soute, tmp, &stream->rr_list, rr_list) + sctp_sched_rr_unsched(stream, soute); +} + +struct sctp_sched_ops sctp_sched_rr = { + .set = sctp_sched_rr_set, + .get = sctp_sched_rr_get, + .init = sctp_sched_rr_init, + .init_sid = sctp_sched_rr_init_sid, + .free = sctp_sched_rr_free, + .enqueue = sctp_sched_rr_enqueue, + .dequeue = sctp_sched_rr_dequeue, + .dequeue_done = sctp_sched_rr_dequeue_done, + .sched_all = sctp_sched_rr_sched_all, + .unsched_all = sctp_sched_rr_unsched_all, +}; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 0e732f68c2bf..ef7ca44d6e6a 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -46,7 +46,7 @@ static int timer_max = 86400000; /* ms in one day */ static int int_max = INT_MAX; static int sack_timer_min = 1; static int sack_timer_max = 500; -static int addr_scope_max = 3; /* check sctp_scope_policy_t in include/net/sctp/constants.h for max entries */ +static int addr_scope_max = SCTP_SCOPE_POLICY_MAX; static int rwnd_scale_max = 16; static int rto_alpha_min = 0; static int rto_beta_min = 0; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 80a97c8501a7..1e5a22430cf5 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -87,14 +87,11 @@ static struct sctp_transport *sctp_transport_init(struct net *net, INIT_LIST_HEAD(&peer->send_ready); INIT_LIST_HEAD(&peer->transports); - setup_timer(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, - (unsigned long)peer); - setup_timer(&peer->hb_timer, sctp_generate_heartbeat_event, - (unsigned long)peer); - setup_timer(&peer->reconf_timer, sctp_generate_reconf_event, - (unsigned long)peer); - setup_timer(&peer->proto_unreach_timer, - sctp_generate_proto_unreach_event, (unsigned long)peer); + timer_setup(&peer->T3_rtx_timer, sctp_generate_t3_rtx_event, 0); + timer_setup(&peer->hb_timer, sctp_generate_heartbeat_event, 0); + timer_setup(&peer->reconf_timer, sctp_generate_reconf_event, 0); + timer_setup(&peer->proto_unreach_timer, + sctp_generate_proto_unreach_event, 0); /* Initialize the 64-bit random nonce sent with heartbeat. */ get_random_bytes(&peer->hb_nonce, sizeof(peer->hb_nonce)); @@ -490,7 +487,7 @@ void sctp_transport_raise_cwnd(struct sctp_transport *transport, * detected. */ void sctp_transport_lower_cwnd(struct sctp_transport *transport, - sctp_lower_cwnd_t reason) + enum sctp_lower_cwnd reason) { struct sctp_association *asoc = transport->asoc; diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c index 5f86c5062a98..5447228bf1a0 100644 --- a/net/sctp/ulpevent.c +++ b/net/sctp/ulpevent.c @@ -371,19 +371,19 @@ sctp_ulpevent_make_remote_error(const struct sctp_association *asoc, struct sctp_chunk *chunk, __u16 flags, gfp_t gfp) { - struct sctp_ulpevent *event; struct sctp_remote_error *sre; + struct sctp_ulpevent *event; + struct sctp_errhdr *ch; struct sk_buff *skb; - sctp_errhdr_t *ch; __be16 cause; int elen; - ch = (sctp_errhdr_t *)(chunk->skb->data); + ch = (struct sctp_errhdr *)(chunk->skb->data); cause = ch->cause; - elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(sctp_errhdr_t); + elen = SCTP_PAD4(ntohs(ch->length)) - sizeof(*ch); /* Pull off the ERROR header. */ - skb_pull(chunk->skb, sizeof(sctp_errhdr_t)); + skb_pull(chunk->skb, sizeof(*ch)); /* Copy the skb to a new skb with room for us to prepend * notification with. @@ -847,7 +847,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_sender_dry_event( struct sctp_ulpevent *sctp_ulpevent_make_stream_reset_event( const struct sctp_association *asoc, __u16 flags, __u16 stream_num, - __u16 *stream_list, gfp_t gfp) + __be16 *stream_list, gfp_t gfp) { struct sctp_stream_reset_event *sreset; struct sctp_ulpevent *event; diff --git a/net/sctp/ulpqueue.c b/net/sctp/ulpqueue.c index 0225d62a869f..a71be33f3afe 100644 --- a/net/sctp/ulpqueue.c +++ b/net/sctp/ulpqueue.c @@ -265,7 +265,8 @@ int sctp_ulpq_tail_event(struct sctp_ulpq *ulpq, struct sctp_ulpevent *event) sctp_ulpq_clear_pd(ulpq); if (queue == &sk->sk_receive_queue && !sp->data_ready_signalled) { - sp->data_ready_signalled = 1; + if (!sock_owned_by_user(sk)) + sp->data_ready_signalled = 1; sk->sk_data_ready(sk); } return 1; diff --git a/net/smc/Kconfig b/net/smc/Kconfig index 33954852f3f8..c717ef0896aa 100644 --- a/net/smc/Kconfig +++ b/net/smc/Kconfig @@ -8,10 +8,6 @@ config SMC The Linux implementation of the SMC-R solution is designed as a separate socket family SMC. - Warning: SMC will expose all memory for remote reads and writes - once a connection is established. Don't enable this option except - for tightly controlled lab environment. - Select this option if you want to run SMC socket applications config SMC_DIAG diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 6793d7348cc8..6451c5013e06 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -282,6 +282,7 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock, __be32 *subnet, u8 *prefix_len) { struct dst_entry *dst = sk_dst_get(clcsock->sk); + struct in_device *in_dev; struct sockaddr_in addr; int rc = -ENOENT; int len; @@ -298,14 +299,17 @@ int smc_netinfo_by_tcpsk(struct socket *clcsock, /* get address to which the internal TCP socket is bound */ kernel_getsockname(clcsock, (struct sockaddr *)&addr, &len); /* analyze IPv4 specific data of net_device belonging to TCP socket */ - for_ifa(dst->dev->ip_ptr) { - if (ifa->ifa_address != addr.sin_addr.s_addr) + rcu_read_lock(); + in_dev = __in_dev_get_rcu(dst->dev); + for_ifa(in_dev) { + if (!inet_ifa_match(addr.sin_addr.s_addr, ifa)) continue; *prefix_len = inet_mask_len(ifa->ifa_mask); *subnet = ifa->ifa_address & ifa->ifa_mask; rc = 0; break; - } endfor_ifa(dst->dev->ip_ptr); + } endfor_ifa(in_dev); + rcu_read_unlock(); out_rel: dst_release(dst); @@ -338,6 +342,12 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc, union ib_gid *gid) return SMC_CLC_DECL_INTERR; smc_wr_remember_qp_attr(link); + + rc = smc_wr_reg_send(link, + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) + return SMC_CLC_DECL_INTERR; + /* send CONFIRM LINK response over RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], @@ -380,6 +390,12 @@ static int smc_connect_rdma(struct smc_sock *smc) int rc = 0; u8 ibport; + if (!tcp_sk(smc->clcsock->sk)->syn_smc) { + /* peer has not signalled SMC-capability */ + smc->use_fallback = true; + goto out_connected; + } + /* IPSec connections opt out of SMC-R optimizations */ if (using_ipsec(smc)) { reason_code = SMC_CLC_DECL_IPSEC; @@ -430,12 +446,8 @@ static int smc_connect_rdma(struct smc_sock *smc) smc_conn_save_peer_info(smc, &aclc); - rc = smc_sndbuf_create(smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma_unlock; - } - rc = smc_rmb_create(smc); + /* create send buffer and rmb */ + rc = smc_buf_create(smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; goto decline_rdma_unlock; @@ -459,7 +471,20 @@ static int smc_connect_rdma(struct smc_sock *smc) reason_code = SMC_CLC_DECL_INTERR; goto decline_rdma_unlock; } + } else { + struct smc_buf_desc *buf_desc = smc->conn.rmb_desc; + + if (!buf_desc->reused) { + /* register memory region for new rmb */ + rc = smc_wr_reg_send(link, + buf_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) { + reason_code = SMC_CLC_DECL_INTERR; + goto decline_rdma_unlock; + } + } } + smc_rmb_sync_sg_for_device(&smc->conn); rc = smc_clc_send_confirm(smc); if (rc) @@ -494,7 +519,7 @@ decline_rdma: /* RDMA setup failed, switch back to TCP */ smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(smc, reason_code, 0); + rc = smc_clc_send_decline(smc, reason_code); if (rc < sizeof(struct smc_clc_msg_decline)) goto out_err; } @@ -536,6 +561,7 @@ static int smc_connect(struct socket *sock, struct sockaddr *addr, } smc_copy_sock_settings_to_clc(smc); + tcp_sk(smc->clcsock->sk)->syn_smc = 1; rc = kernel_connect(smc->clcsock, addr, alen, flags); if (rc) goto out; @@ -692,6 +718,12 @@ static int smc_serv_conf_first_link(struct smc_sock *smc) int rc; link = &lgr->lnk[SMC_SINGLE_LINK]; + + rc = smc_wr_reg_send(link, + smc->conn.rmb_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) + return SMC_CLC_DECL_INTERR; + /* send CONFIRM LINK request to client over the RoCE fabric */ rc = smc_llc_send_confirm_link(link, link->smcibdev->mac[link->ibport - 1], @@ -734,6 +766,12 @@ static void smc_listen_work(struct work_struct *work) u8 prefix_len; u8 ibport; + /* check if peer is smc capable */ + if (!tcp_sk(newclcsock->sk)->syn_smc) { + new_smc->use_fallback = true; + goto out_connected; + } + /* do inband token exchange - *wait for and receive SMC Proposal CLC message */ @@ -779,46 +817,50 @@ static void smc_listen_work(struct work_struct *work) mutex_lock(&smc_create_lgr_pending); local_contact = smc_conn_create(new_smc, peeraddr.sin_addr.s_addr, smcibdev, ibport, &pclc.lcl, 0); - if (local_contact == SMC_REUSE_CONTACT) - /* lock no longer needed, free it due to following - * smc_clc_wait_msg() call - */ - mutex_unlock(&smc_create_lgr_pending); if (local_contact < 0) { rc = local_contact; if (rc == -ENOMEM) reason_code = SMC_CLC_DECL_MEM;/* insufficient memory*/ - else if (rc == -ENOLINK) - reason_code = SMC_CLC_DECL_SYNCERR; /* synchr. error */ - goto decline_rdma; + goto decline_rdma_unlock; } link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK]; - rc = smc_sndbuf_create(new_smc); + /* create send buffer and rmb */ + rc = smc_buf_create(new_smc); if (rc) { reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma; - } - rc = smc_rmb_create(new_smc); - if (rc) { - reason_code = SMC_CLC_DECL_MEM; - goto decline_rdma; + goto decline_rdma_unlock; } smc_close_init(new_smc); smc_rx_init(new_smc); + if (local_contact != SMC_FIRST_CONTACT) { + struct smc_buf_desc *buf_desc = new_smc->conn.rmb_desc; + + if (!buf_desc->reused) { + /* register memory region for new rmb */ + rc = smc_wr_reg_send(link, + buf_desc->mr_rx[SMC_SINGLE_LINK]); + if (rc) { + reason_code = SMC_CLC_DECL_INTERR; + goto decline_rdma_unlock; + } + } + } + smc_rmb_sync_sg_for_device(&new_smc->conn); + rc = smc_clc_send_accept(new_smc, local_contact); if (rc) - goto out_err; + goto out_err_unlock; /* receive SMC Confirm CLC message */ reason_code = smc_clc_wait_msg(new_smc, &cclc, sizeof(cclc), SMC_CLC_CONFIRM); if (reason_code < 0) - goto out_err; + goto out_err_unlock; if (reason_code > 0) - goto decline_rdma; + goto decline_rdma_unlock; smc_conn_save_peer_info(new_smc, &cclc); if (local_contact == SMC_FIRST_CONTACT) smc_link_save_peer_info(link, &cclc); @@ -826,35 +868,34 @@ static void smc_listen_work(struct work_struct *work) rc = smc_rmb_rtoken_handling(&new_smc->conn, &cclc); if (rc) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma; + goto decline_rdma_unlock; } if (local_contact == SMC_FIRST_CONTACT) { rc = smc_ib_ready_link(link); if (rc) { reason_code = SMC_CLC_DECL_INTERR; - goto decline_rdma; + goto decline_rdma_unlock; } /* QP confirmation over RoCE fabric */ reason_code = smc_serv_conf_first_link(new_smc); if (reason_code < 0) { /* peer is not aware of a problem */ rc = reason_code; - goto out_err; + goto out_err_unlock; } if (reason_code > 0) - goto decline_rdma; + goto decline_rdma_unlock; } smc_tx_init(new_smc); + mutex_unlock(&smc_create_lgr_pending); out_connected: sk_refcnt_debug_inc(newsmcsk); if (newsmcsk->sk_state == SMC_INIT) newsmcsk->sk_state = SMC_ACTIVE; enqueue: - if (local_contact == SMC_FIRST_CONTACT) - mutex_unlock(&smc_create_lgr_pending); lock_sock_nested(&lsmc->sk, SINGLE_DEPTH_NESTING); if (lsmc->sk.sk_state == SMC_LISTEN) { smc_accept_enqueue(&lsmc->sk, newsmcsk); @@ -868,17 +909,21 @@ enqueue: sock_put(&lsmc->sk); /* sock_hold in smc_tcp_listen_work */ return; +decline_rdma_unlock: + mutex_unlock(&smc_create_lgr_pending); decline_rdma: /* RDMA setup failed, switch back to TCP */ smc_conn_free(&new_smc->conn); new_smc->use_fallback = true; if (reason_code && (reason_code != SMC_CLC_DECL_REPLY)) { - rc = smc_clc_send_decline(new_smc, reason_code, 0); + rc = smc_clc_send_decline(new_smc, reason_code); if (rc < sizeof(struct smc_clc_msg_decline)) goto out_err; } goto out_connected; +out_err_unlock: + mutex_unlock(&smc_create_lgr_pending); out_err: newsmcsk->sk_state = SMC_CLOSED; smc_conn_free(&new_smc->conn); @@ -935,6 +980,7 @@ static int smc_listen(struct socket *sock, int backlog) * them to the clc socket -- copy smc socket options to clc socket */ smc_copy_sock_settings_to_clc(smc); + tcp_sk(smc->clcsock->sk)->syn_smc = 1; rc = kernel_listen(smc->clcsock, backlog); if (rc) @@ -1377,6 +1423,7 @@ static int __init smc_init(void) goto out_sock; } + static_branch_enable(&tcp_have_smc); return 0; out_sock: @@ -1401,6 +1448,7 @@ static void __exit smc_exit(void) list_del_init(&lgr->list); smc_lgr_free(lgr); /* free link group */ } + static_branch_disable(&tcp_have_smc); smc_ib_unregister_client(); sock_unregister(PF_SMC); proto_unregister(&smc_proto); diff --git a/net/smc/smc.h b/net/smc/smc.h index 6e44313e4467..0bee9d16cf29 100644 --- a/net/smc/smc.h +++ b/net/smc/smc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -149,7 +150,7 @@ struct smc_connection { atomic_t sndbuf_space; /* remaining space in sndbuf */ u16 tx_cdc_seq; /* sequence # for CDC send */ spinlock_t send_lock; /* protect wr_sends */ - struct work_struct tx_work; /* retry of smc_cdc_msg_send */ + struct delayed_work tx_work; /* retry of smc_cdc_msg_send */ struct smc_host_cdc_msg local_rx_ctrl; /* filled during event_handl. * .prod cf. TCP rcv_nxt diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c index a7294edbc221..87f7bede6eab 100644 --- a/net/smc/smc_cdc.c +++ b/net/smc/smc_cdc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -62,10 +63,12 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd, bh_unlock_sock(&smc->sk); } -int smc_cdc_get_free_slot(struct smc_link *link, +int smc_cdc_get_free_slot(struct smc_connection *conn, struct smc_wr_buf **wr_buf, struct smc_cdc_tx_pend **pend) { + struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK]; + return smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf, (struct smc_wr_tx_pend_priv **)pend); } @@ -118,8 +121,7 @@ int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn) struct smc_wr_buf *wr_buf; int rc; - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, - &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc) return rc; diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h index 8e1d76f26007..149ceda1b088 100644 --- a/net/smc/smc_cdc.h +++ b/net/smc/smc_cdc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -206,7 +207,8 @@ static inline void smc_cdc_msg_to_host(struct smc_host_cdc_msg *local, struct smc_cdc_tx_pend; -int smc_cdc_get_free_slot(struct smc_link *link, struct smc_wr_buf **wr_buf, +int smc_cdc_get_free_slot(struct smc_connection *conn, + struct smc_wr_buf **wr_buf, struct smc_cdc_tx_pend **pend); void smc_cdc_tx_dismiss_slots(struct smc_connection *conn); int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf, diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c index 03ec058d18df..1800e16b2a02 100644 --- a/net/smc/smc_clc.c +++ b/net/smc/smc_clc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -95,9 +96,10 @@ int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, } if (clcm->type == SMC_CLC_DECLINE) { reason_code = SMC_CLC_DECL_REPLY; - if (ntohl(((struct smc_clc_msg_decline *)buf)->peer_diagnosis) - == SMC_CLC_DECL_SYNCERR) + if (((struct smc_clc_msg_decline *)buf)->hdr.flag) { smc->conn.lgr->sync_err = true; + smc_lgr_terminate(smc->conn.lgr); + } } out: @@ -105,8 +107,7 @@ out: } /* send CLC DECLINE message across internal TCP socket */ -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, - u8 out_of_sync) +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info) { struct smc_clc_msg_decline dclc; struct msghdr msg; @@ -118,7 +119,7 @@ int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, dclc.hdr.type = SMC_CLC_DECLINE; dclc.hdr.length = htons(sizeof(struct smc_clc_msg_decline)); dclc.hdr.version = SMC_CLC_V1; - dclc.hdr.flag = out_of_sync ? 1 : 0; + dclc.hdr.flag = (peer_diag_info == SMC_CLC_DECL_SYNCERR) ? 1 : 0; memcpy(dclc.id_for_peer, local_systemid, sizeof(local_systemid)); dclc.peer_diagnosis = htonl(peer_diag_info); memcpy(dclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -204,13 +205,13 @@ int smc_clc_send_confirm(struct smc_sock *smc) memcpy(&cclc.lcl.mac, &link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(cclc.qpn, link->roce_qp->qp_num); cclc.rmb_rkey = - htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); cclc.conn_idx = 1; /* for now: 1 RMB = 1 RMBE */ cclc.rmbe_alert_token = htonl(conn->alert_token_local); cclc.qp_mtu = min(link->path_mtu, link->peer_mtu); cclc.rmbe_size = conn->rmbe_size_short; - cclc.rmb_dma_addr = - cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); + cclc.rmb_dma_addr = cpu_to_be64( + (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); hton24(cclc.psn, link->psn_initial); memcpy(cclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); @@ -256,13 +257,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact) memcpy(&aclc.lcl.mac, link->smcibdev->mac[link->ibport - 1], ETH_ALEN); hton24(aclc.qpn, link->roce_qp->qp_num); aclc.rmb_rkey = - htonl(conn->rmb_desc->rkey[SMC_SINGLE_LINK]); + htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey); aclc.conn_idx = 1; /* as long as 1 RMB = 1 RMBE */ aclc.rmbe_alert_token = htonl(conn->alert_token_local); aclc.qp_mtu = link->path_mtu; aclc.rmbe_size = conn->rmbe_size_short, - aclc.rmb_dma_addr = - cpu_to_be64((u64)conn->rmb_desc->dma_addr[SMC_SINGLE_LINK]); + aclc.rmb_dma_addr = cpu_to_be64( + (u64)sg_dma_address(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl)); hton24(aclc.psn, link->psn_initial); memcpy(aclc.trl.eyecatcher, SMC_EYECATCHER, sizeof(SMC_EYECATCHER)); diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h index 13db8ce177c9..12a9af1539a2 100644 --- a/net/smc/smc_clc.h +++ b/net/smc/smc_clc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -106,8 +107,7 @@ struct smc_ib_device; int smc_clc_wait_msg(struct smc_sock *smc, void *buf, int buflen, u8 expected_type); -int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info, - u8 out_of_sync); +int smc_clc_send_decline(struct smc_sock *smc, u32 peer_diag_info); int smc_clc_send_proposal(struct smc_sock *smc, struct smc_ib_device *smcibdev, u8 ibport); int smc_clc_send_confirm(struct smc_sock *smc); diff --git a/net/smc/smc_close.c b/net/smc/smc_close.c index 3c2e166b5d22..48615d2ac4aa 100644 --- a/net/smc/smc_close.c +++ b/net/smc/smc_close.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -174,15 +175,15 @@ int smc_close_active(struct smc_sock *smc) { struct smc_cdc_conn_state_flags *txflags = &smc->conn.local_tx_ctrl.conn_state_flags; - long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; struct smc_connection *conn = &smc->conn; struct sock *sk = &smc->sk; int old_state; + long timeout; int rc = 0; - if (sock_flag(sk, SOCK_LINGER) && - !(current->flags & PF_EXITING)) - timeout = sk->sk_lingertime; + timeout = current->flags & PF_EXITING ? + 0 : sock_flag(sk, SOCK_LINGER) ? + sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; again: old_state = sk->sk_state; @@ -208,7 +209,7 @@ again: case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); if (sk->sk_state == SMC_ACTIVE) { /* send close request */ @@ -234,7 +235,7 @@ again: if (!smc_cdc_rxed_any_close(conn)) smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); if (sk->sk_err != ECONNABORTED) { /* confirm close from peer */ @@ -263,7 +264,9 @@ again: /* peer sending PeerConnectionClosed will cause transition */ break; case SMC_PROCESSABORT: - cancel_work_sync(&conn->tx_work); + release_sock(sk); + cancel_delayed_work_sync(&conn->tx_work); + lock_sock(sk); smc_close_abort(conn); sk->sk_state = SMC_CLOSED; smc_close_wait_tx_pends(smc); @@ -358,7 +361,8 @@ static void smc_close_passive_work(struct work_struct *work) case SMC_PEERCLOSEWAIT1: if (rxflags->peer_done_writing) sk->sk_state = SMC_PEERCLOSEWAIT2; - /* fall through to check for closing */ + /* fall through */ + /* to check for closing */ case SMC_PEERCLOSEWAIT2: case SMC_PEERFINCLOSEWAIT: if (!smc_cdc_rxed_any_close(&smc->conn)) @@ -411,13 +415,14 @@ void smc_close_sock_put_work(struct work_struct *work) int smc_close_shutdown_write(struct smc_sock *smc) { struct smc_connection *conn = &smc->conn; - long timeout = SMC_MAX_STREAM_WAIT_TIMEOUT; struct sock *sk = &smc->sk; int old_state; + long timeout; int rc = 0; - if (sock_flag(sk, SOCK_LINGER)) - timeout = sk->sk_lingertime; + timeout = current->flags & PF_EXITING ? + 0 : sock_flag(sk, SOCK_LINGER) ? + sk->sk_lingertime : SMC_MAX_STREAM_WAIT_TIMEOUT; again: old_state = sk->sk_state; @@ -425,7 +430,7 @@ again: case SMC_ACTIVE: smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); /* send close wr request */ rc = smc_close_wr(conn); @@ -439,7 +444,7 @@ again: if (!smc_cdc_rxed_any_close(conn)) smc_close_stream_wait(smc, timeout); release_sock(sk); - cancel_work_sync(&conn->tx_work); + cancel_delayed_work_sync(&conn->tx_work); lock_sock(sk); /* confirm close from peer */ rc = smc_close_wr(conn); diff --git a/net/smc/smc_close.h b/net/smc/smc_close.h index 4a3d99a8d7cb..ed82506b1b0a 100644 --- a/net/smc/smc_close.h +++ b/net/smc/smc_close.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index 3ac09a629ea1..2578fbd95664 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -25,8 +26,9 @@ #include "smc_cdc.h" #include "smc_close.h" -#define SMC_LGR_NUM_INCR 256 -#define SMC_LGR_FREE_DELAY (600 * HZ) +#define SMC_LGR_NUM_INCR 256 +#define SMC_LGR_FREE_DELAY_SERV (600 * HZ) +#define SMC_LGR_FREE_DELAY_CLNT (SMC_LGR_FREE_DELAY_SERV + 10) static u32 smc_lgr_num; /* unique link group number */ @@ -107,8 +109,15 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn) __smc_lgr_unregister_conn(conn); } write_unlock_bh(&lgr->conns_lock); - if (reduced && !lgr->conns_num) - schedule_delayed_work(&lgr->free_work, SMC_LGR_FREE_DELAY); + if (!reduced || lgr->conns_num) + return; + /* client link group creation always follows the server link group + * creation. For client use a somewhat higher removal delay time, + * otherwise there is a risk of out-of-sync link groups. + */ + mod_delayed_work(system_wq, &lgr->free_work, + lgr->role == SMC_CLNT ? SMC_LGR_FREE_DELAY_CLNT : + SMC_LGR_FREE_DELAY_SERV); } static void smc_lgr_free_work(struct work_struct *work) @@ -175,7 +184,6 @@ static int smc_lgr_create(struct smc_sock *smc, __be32 peer_in_addr, rc = smc_wr_alloc_link_mem(lnk); if (rc) goto free_lgr; - init_waitqueue_head(&lnk->wr_tx_wait); rc = smc_ib_create_protection_domain(lnk); if (rc) goto free_link_mem; @@ -207,17 +215,14 @@ out: return rc; } -static void smc_sndbuf_unuse(struct smc_connection *conn) +static void smc_buf_unuse(struct smc_connection *conn) { if (conn->sndbuf_desc) { conn->sndbuf_desc->used = 0; conn->sndbuf_size = 0; } -} - -static void smc_rmb_unuse(struct smc_connection *conn) -{ if (conn->rmb_desc) { + conn->rmb_desc->reused = true; conn->rmb_desc->used = 0; conn->rmbe_size = 0; } @@ -232,8 +237,7 @@ void smc_conn_free(struct smc_connection *conn) return; smc_cdc_tx_dismiss_slots(conn); smc_lgr_unregister_conn(conn); - smc_rmb_unuse(conn); - smc_sndbuf_unuse(conn); + smc_buf_unuse(conn); } static void smc_link_clear(struct smc_link *lnk) @@ -246,48 +250,57 @@ static void smc_link_clear(struct smc_link *lnk) smc_wr_free_link_mem(lnk); } -static void smc_lgr_free_sndbufs(struct smc_link_group *lgr) +static void smc_buf_free(struct smc_buf_desc *buf_desc, struct smc_link *lnk, + bool is_rmb) { - struct smc_buf_desc *sndbuf_desc, *bf_desc; - int i; - - for (i = 0; i < SMC_RMBE_SIZES; i++) { - list_for_each_entry_safe(sndbuf_desc, bf_desc, &lgr->sndbufs[i], - list) { - list_del(&sndbuf_desc->list); - smc_ib_buf_unmap(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - smc_uncompress_bufsize(i), - sndbuf_desc, DMA_TO_DEVICE); - kfree(sndbuf_desc->cpu_addr); - kfree(sndbuf_desc); - } + if (is_rmb) { + if (buf_desc->mr_rx[SMC_SINGLE_LINK]) + smc_ib_put_memory_region( + buf_desc->mr_rx[SMC_SINGLE_LINK]); + smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, + DMA_FROM_DEVICE); + } else { + smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc, + DMA_TO_DEVICE); } + sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]); + if (buf_desc->cpu_addr) + free_pages((unsigned long)buf_desc->cpu_addr, buf_desc->order); + kfree(buf_desc); } -static void smc_lgr_free_rmbs(struct smc_link_group *lgr) +static void __smc_lgr_free_bufs(struct smc_link_group *lgr, bool is_rmb) { - struct smc_buf_desc *rmb_desc, *bf_desc; struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK]; + struct smc_buf_desc *buf_desc, *bf_desc; + struct list_head *buf_list; int i; for (i = 0; i < SMC_RMBE_SIZES; i++) { - list_for_each_entry_safe(rmb_desc, bf_desc, &lgr->rmbs[i], + if (is_rmb) + buf_list = &lgr->rmbs[i]; + else + buf_list = &lgr->sndbufs[i]; + list_for_each_entry_safe(buf_desc, bf_desc, buf_list, list) { - list_del(&rmb_desc->list); - smc_ib_buf_unmap(lnk->smcibdev, - smc_uncompress_bufsize(i), - rmb_desc, DMA_FROM_DEVICE); - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); + list_del(&buf_desc->list); + smc_buf_free(buf_desc, lnk, is_rmb); } } } +static void smc_lgr_free_bufs(struct smc_link_group *lgr) +{ + /* free send buffers */ + __smc_lgr_free_bufs(lgr, false); + /* free rmbs */ + __smc_lgr_free_bufs(lgr, true); +} + /* remove a link group */ void smc_lgr_free(struct smc_link_group *lgr) { - smc_lgr_free_rmbs(lgr); - smc_lgr_free_sndbufs(lgr); + smc_lgr_free_bufs(lgr); smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]); kfree(lgr); } @@ -368,10 +381,14 @@ static int smc_link_determine_gid(struct smc_link_group *lgr) if (ib_query_gid(lnk->smcibdev->ibdev, lnk->ibport, i, &gid, &gattr)) continue; - if (gattr.ndev && - (vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id)) { - lnk->gid = gid; - return 0; + if (gattr.ndev) { + if (is_vlan_dev(gattr.ndev) && + vlan_dev_vlan_id(gattr.ndev) == lgr->vlan_id) { + lnk->gid = gid; + dev_put(gattr.ndev); + return 0; + } + dev_put(gattr.ndev); } } return -ENODEV; @@ -452,45 +469,25 @@ out: return rc ? rc : local_contact; } -/* try to reuse a sndbuf description slot of the sndbufs list for a certain - * buf_size; if not available, return NULL +/* try to reuse a sndbuf or rmb description slot for a certain + * buffer size; if not available, return NULL */ static inline -struct smc_buf_desc *smc_sndbuf_get_slot(struct smc_link_group *lgr, - int compressed_bufsize) +struct smc_buf_desc *smc_buf_get_slot(struct smc_link_group *lgr, + int compressed_bufsize, + rwlock_t *lock, + struct list_head *buf_list) { - struct smc_buf_desc *sndbuf_slot; - - read_lock_bh(&lgr->sndbufs_lock); - list_for_each_entry(sndbuf_slot, &lgr->sndbufs[compressed_bufsize], - list) { - if (cmpxchg(&sndbuf_slot->used, 0, 1) == 0) { - read_unlock_bh(&lgr->sndbufs_lock); - return sndbuf_slot; - } - } - read_unlock_bh(&lgr->sndbufs_lock); - return NULL; -} + struct smc_buf_desc *buf_slot; -/* try to reuse an rmb description slot of the rmbs list for a certain - * rmbe_size; if not available, return NULL - */ -static inline -struct smc_buf_desc *smc_rmb_get_slot(struct smc_link_group *lgr, - int compressed_bufsize) -{ - struct smc_buf_desc *rmb_slot; - - read_lock_bh(&lgr->rmbs_lock); - list_for_each_entry(rmb_slot, &lgr->rmbs[compressed_bufsize], - list) { - if (cmpxchg(&rmb_slot->used, 0, 1) == 0) { - read_unlock_bh(&lgr->rmbs_lock); - return rmb_slot; + read_lock_bh(lock); + list_for_each_entry(buf_slot, buf_list, list) { + if (cmpxchg(&buf_slot->used, 0, 1) == 0) { + read_unlock_bh(lock); + return buf_slot; } } - read_unlock_bh(&lgr->rmbs_lock); + read_unlock_bh(lock); return NULL; } @@ -503,136 +500,186 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size) return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2); } -/* create the tx buffer for an SMC socket */ -int smc_sndbuf_create(struct smc_sock *smc) +static struct smc_buf_desc *smc_new_buf_create(struct smc_link_group *lgr, + bool is_rmb, int bufsize) { - struct smc_connection *conn = &smc->conn; - struct smc_link_group *lgr = conn->lgr; - int tmp_bufsize, tmp_bufsize_short; - struct smc_buf_desc *sndbuf_desc; + struct smc_buf_desc *buf_desc; + struct smc_link *lnk; int rc; - /* use socket send buffer size (w/o overhead) as start value */ - for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); - tmp_bufsize_short >= 0; tmp_bufsize_short--) { - tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); - /* check for reusable sndbuf_slot in the link group */ - sndbuf_desc = smc_sndbuf_get_slot(lgr, tmp_bufsize_short); - if (sndbuf_desc) { - memset(sndbuf_desc->cpu_addr, 0, tmp_bufsize); - break; /* found reusable slot */ - } - /* try to alloc a new send buffer */ - sndbuf_desc = kzalloc(sizeof(*sndbuf_desc), GFP_KERNEL); - if (!sndbuf_desc) - break; /* give up with -ENOMEM */ - sndbuf_desc->cpu_addr = kzalloc(tmp_bufsize, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | - __GFP_NORETRY); - if (!sndbuf_desc->cpu_addr) { - kfree(sndbuf_desc); - sndbuf_desc = NULL; - /* if send buffer allocation has failed, - * try a smaller one - */ - continue; - } - rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, sndbuf_desc, - DMA_TO_DEVICE); + /* try to alloc a new buffer */ + buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL); + if (!buf_desc) + return ERR_PTR(-ENOMEM); + + buf_desc->cpu_addr = + (void *)__get_free_pages(GFP_KERNEL | __GFP_NOWARN | + __GFP_NOMEMALLOC | + __GFP_NORETRY | __GFP_ZERO, + get_order(bufsize)); + if (!buf_desc->cpu_addr) { + kfree(buf_desc); + return ERR_PTR(-EAGAIN); + } + buf_desc->order = get_order(bufsize); + + /* build the sg table from the pages */ + lnk = &lgr->lnk[SMC_SINGLE_LINK]; + rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1, + GFP_KERNEL); + if (rc) { + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(rc); + } + sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl, + buf_desc->cpu_addr, bufsize); + + /* map sg table to DMA address */ + rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc, + is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE); + /* SMC protocol depends on mapping to one DMA address only */ + if (rc != 1) { + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(-EAGAIN); + } + + /* create a new memory region for the RMB */ + if (is_rmb) { + rc = smc_ib_get_memory_region(lnk->roce_pd, + IB_ACCESS_REMOTE_WRITE | + IB_ACCESS_LOCAL_WRITE, + buf_desc); if (rc) { - kfree(sndbuf_desc->cpu_addr); - kfree(sndbuf_desc); - sndbuf_desc = NULL; - continue; /* if mapping failed, try smaller one */ + smc_buf_free(buf_desc, lnk, is_rmb); + return ERR_PTR(rc); } - sndbuf_desc->used = 1; - write_lock_bh(&lgr->sndbufs_lock); - list_add(&sndbuf_desc->list, - &lgr->sndbufs[tmp_bufsize_short]); - write_unlock_bh(&lgr->sndbufs_lock); - break; - } - if (sndbuf_desc && sndbuf_desc->cpu_addr) { - conn->sndbuf_desc = sndbuf_desc; - conn->sndbuf_size = tmp_bufsize; - smc->sk.sk_sndbuf = tmp_bufsize * 2; - atomic_set(&conn->sndbuf_space, tmp_bufsize); - return 0; - } else { - return -ENOMEM; } + + return buf_desc; } -/* create the RMB for an SMC socket (even though the SMC protocol - * allows more than one RMB-element per RMB, the Linux implementation - * uses just one RMB-element per RMB, i.e. uses an extra RMB for every - * connection in a link group - */ -int smc_rmb_create(struct smc_sock *smc) +static int __smc_buf_create(struct smc_sock *smc, bool is_rmb) { struct smc_connection *conn = &smc->conn; struct smc_link_group *lgr = conn->lgr; - int tmp_bufsize, tmp_bufsize_short; - struct smc_buf_desc *rmb_desc; - int rc; + struct smc_buf_desc *buf_desc = NULL; + struct list_head *buf_list; + int bufsize, bufsize_short; + int sk_buf_size; + rwlock_t *lock; + + if (is_rmb) + /* use socket recv buffer size (w/o overhead) as start value */ + sk_buf_size = smc->sk.sk_rcvbuf / 2; + else + /* use socket send buffer size (w/o overhead) as start value */ + sk_buf_size = smc->sk.sk_sndbuf / 2; + + for (bufsize_short = smc_compress_bufsize(smc->sk.sk_sndbuf / 2); + bufsize_short >= 0; bufsize_short--) { + + if (is_rmb) { + lock = &lgr->rmbs_lock; + buf_list = &lgr->rmbs[bufsize_short]; + } else { + lock = &lgr->sndbufs_lock; + buf_list = &lgr->sndbufs[bufsize_short]; + } + bufsize = smc_uncompress_bufsize(bufsize_short); + if ((1 << get_order(bufsize)) > SG_MAX_SINGLE_ALLOC) + continue; - /* use socket recv buffer size (w/o overhead) as start value */ - for (tmp_bufsize_short = smc_compress_bufsize(smc->sk.sk_rcvbuf / 2); - tmp_bufsize_short >= 0; tmp_bufsize_short--) { - tmp_bufsize = smc_uncompress_bufsize(tmp_bufsize_short); - /* check for reusable rmb_slot in the link group */ - rmb_desc = smc_rmb_get_slot(lgr, tmp_bufsize_short); - if (rmb_desc) { - memset(rmb_desc->cpu_addr, 0, tmp_bufsize); + /* check for reusable slot in the link group */ + buf_desc = smc_buf_get_slot(lgr, bufsize_short, lock, buf_list); + if (buf_desc) { + memset(buf_desc->cpu_addr, 0, bufsize); break; /* found reusable slot */ } - /* try to alloc a new RMB */ - rmb_desc = kzalloc(sizeof(*rmb_desc), GFP_KERNEL); - if (!rmb_desc) - break; /* give up with -ENOMEM */ - rmb_desc->cpu_addr = kzalloc(tmp_bufsize, - GFP_KERNEL | __GFP_NOWARN | - __GFP_NOMEMALLOC | - __GFP_NORETRY); - if (!rmb_desc->cpu_addr) { - kfree(rmb_desc); - rmb_desc = NULL; - /* if RMB allocation has failed, - * try a smaller one - */ + + buf_desc = smc_new_buf_create(lgr, is_rmb, bufsize); + if (PTR_ERR(buf_desc) == -ENOMEM) + break; + if (IS_ERR(buf_desc)) continue; - } - rc = smc_ib_buf_map(lgr->lnk[SMC_SINGLE_LINK].smcibdev, - tmp_bufsize, rmb_desc, - DMA_FROM_DEVICE); - if (rc) { - kfree(rmb_desc->cpu_addr); - kfree(rmb_desc); - rmb_desc = NULL; - continue; /* if mapping failed, try smaller one */ - } - rmb_desc->rkey[SMC_SINGLE_LINK] = - lgr->lnk[SMC_SINGLE_LINK].roce_pd->unsafe_global_rkey; - rmb_desc->used = 1; - write_lock_bh(&lgr->rmbs_lock); - list_add(&rmb_desc->list, - &lgr->rmbs[tmp_bufsize_short]); - write_unlock_bh(&lgr->rmbs_lock); - break; + + buf_desc->used = 1; + write_lock_bh(lock); + list_add(&buf_desc->list, buf_list); + write_unlock_bh(lock); + break; /* found */ } - if (rmb_desc && rmb_desc->cpu_addr) { - conn->rmb_desc = rmb_desc; - conn->rmbe_size = tmp_bufsize; - conn->rmbe_size_short = tmp_bufsize_short; - smc->sk.sk_rcvbuf = tmp_bufsize * 2; + + if (IS_ERR(buf_desc)) + return -ENOMEM; + + if (is_rmb) { + conn->rmb_desc = buf_desc; + conn->rmbe_size = bufsize; + conn->rmbe_size_short = bufsize_short; + smc->sk.sk_rcvbuf = bufsize * 2; atomic_set(&conn->bytes_to_rcv, 0); - conn->rmbe_update_limit = smc_rmb_wnd_update_limit(tmp_bufsize); - return 0; + conn->rmbe_update_limit = smc_rmb_wnd_update_limit(bufsize); } else { - return -ENOMEM; + conn->sndbuf_desc = buf_desc; + conn->sndbuf_size = bufsize; + smc->sk.sk_sndbuf = bufsize * 2; + atomic_set(&conn->sndbuf_space, bufsize); } + return 0; +} + +void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->sndbuf_desc, DMA_TO_DEVICE); +} + +void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->sndbuf_desc, DMA_TO_DEVICE); +} + +void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->rmb_desc, DMA_FROM_DEVICE); +} + +void smc_rmb_sync_sg_for_device(struct smc_connection *conn) +{ + struct smc_link_group *lgr = conn->lgr; + + smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev, + conn->rmb_desc, DMA_FROM_DEVICE); +} + +/* create the send and receive buffer for an SMC socket; + * receive buffers are called RMBs; + * (even though the SMC protocol allows more than one RMB-element per RMB, + * the Linux implementation uses just one RMB-element per RMB, i.e. uses an + * extra RMB for every connection in a link group + */ +int smc_buf_create(struct smc_sock *smc) +{ + int rc; + + /* create send buffer */ + rc = __smc_buf_create(smc, false); + if (rc) + return rc; + /* create rmb */ + rc = __smc_buf_create(smc, true); + if (rc) + smc_buf_free(smc->conn.sndbuf_desc, + &smc->conn.lgr->lnk[SMC_SINGLE_LINK], false); + return rc; } static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr) diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index b013cb43a327..fe691bf9af91 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -37,6 +38,14 @@ struct smc_wr_buf { u8 raw[SMC_WR_BUF_SIZE]; }; +#define SMC_WR_REG_MR_WAIT_TIME (5 * HZ)/* wait time for ib_wr_reg_mr result */ + +enum smc_wr_reg_state { + POSTED, /* ib_wr_reg_mr request posted */ + CONFIRMED, /* ib_wr_reg_mr response: successful */ + FAILED /* ib_wr_reg_mr response: failure */ +}; + struct smc_link { struct smc_ib_device *smcibdev; /* ib-device */ u8 ibport; /* port - values 1 | 2 */ @@ -65,6 +74,10 @@ struct smc_link { u64 wr_rx_id; /* seq # of last recv WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ + struct ib_reg_wr wr_reg; /* WR register memory region */ + wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ + enum smc_wr_reg_state wr_reg_state; /* state of wr_reg request */ + union ib_gid gid; /* gid matching used vlan id */ u32 peer_qpn; /* QP number of peer */ enum ib_mtu path_mtu; /* used mtu */ @@ -90,14 +103,15 @@ struct smc_link { /* tx/rx buffer list element for sndbufs list and rmbs list of a lgr */ struct smc_buf_desc { struct list_head list; - u64 dma_addr[SMC_LINKS_PER_LGR_MAX]; - /* mapped address of buffer */ void *cpu_addr; /* virtual address of buffer */ - u32 rkey[SMC_LINKS_PER_LGR_MAX]; - /* for rmb only: - * rkey provided to peer + struct sg_table sgt[SMC_LINKS_PER_LGR_MAX];/* virtual buffer */ + struct ib_mr *mr_rx[SMC_LINKS_PER_LGR_MAX]; + /* for rmb only: memory region + * incl. rkey provided to peer */ + u32 order; /* allocation order */ u32 used; /* currently used / unused */ + bool reused; /* new created / reused */ }; struct smc_rtoken { /* address/key of remote RMB */ @@ -173,9 +187,11 @@ struct smc_clc_msg_accept_confirm; void smc_lgr_free(struct smc_link_group *lgr); void smc_lgr_terminate(struct smc_link_group *lgr); -int smc_sndbuf_create(struct smc_sock *smc); -int smc_rmb_create(struct smc_sock *smc); +int smc_buf_create(struct smc_sock *smc); int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_clc_msg_accept_confirm *clc); - +void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn); +void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn); +void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn); +void smc_rmb_sync_sg_for_device(struct smc_connection *conn); #endif diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index b31715505a35..90f1a7f9085c 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -13,6 +14,7 @@ #include <linux/random.h> #include <linux/workqueue.h> +#include <linux/scatterlist.h> #include <rdma/ib_verbs.h> #include "smc_pnet.h" @@ -192,8 +194,7 @@ int smc_ib_create_protection_domain(struct smc_link *lnk) { int rc; - lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, - IB_PD_UNSAFE_GLOBAL_RKEY); + lnk->roce_pd = ib_alloc_pd(lnk->smcibdev->ibdev, 0); rc = PTR_ERR_OR_ZERO(lnk->roce_pd); if (IS_ERR(lnk->roce_pd)) lnk->roce_pd = NULL; @@ -232,10 +233,10 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) .recv_cq = lnk->smcibdev->roce_cq_recv, .srq = NULL, .cap = { - .max_send_wr = SMC_WR_BUF_CNT, /* include unsolicited rdma_writes as well, * there are max. 2 RDMA_WRITE per 1 WR_SEND */ + .max_send_wr = SMC_WR_BUF_CNT * 3, .max_recv_wr = SMC_WR_BUF_CNT * 3, .max_send_sge = SMC_IB_MAX_SEND_SGE, .max_recv_sge = 1, @@ -254,56 +255,132 @@ int smc_ib_create_queue_pair(struct smc_link *lnk) return rc; } -/* map a new TX or RX buffer to DMA */ -int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size, - struct smc_buf_desc *buf_slot, - enum dma_data_direction data_direction) +void smc_ib_put_memory_region(struct ib_mr *mr) { - int rc = 0; + ib_dereg_mr(mr); +} - if (buf_slot->dma_addr[SMC_SINGLE_LINK]) - return rc; /* already mapped */ - buf_slot->dma_addr[SMC_SINGLE_LINK] = - ib_dma_map_single(smcibdev->ibdev, buf_slot->cpu_addr, - buf_size, data_direction); - if (ib_dma_mapping_error(smcibdev->ibdev, - buf_slot->dma_addr[SMC_SINGLE_LINK])) - rc = -EIO; - return rc; +static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot) +{ + unsigned int offset = 0; + int sg_num; + + /* map the largest prefix of a dma mapped SG list */ + sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK], + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + &offset, PAGE_SIZE); + + return sg_num; +} + +/* Allocate a memory region and map the dma mapped SG list of buf_slot */ +int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot) +{ + if (buf_slot->mr_rx[SMC_SINGLE_LINK]) + return 0; /* already done */ + + buf_slot->mr_rx[SMC_SINGLE_LINK] = + ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order); + if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) { + int rc; + + rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]); + buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL; + return rc; + } + + if (smc_ib_map_mr_sg(buf_slot) != 1) + return -EINVAL; + + return 0; +} + +/* synchronize buffer usage for cpu access */ +void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + struct scatterlist *sg; + unsigned int i; + + /* for now there is just one DMA address */ + for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, + buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + if (!sg_dma_len(sg)) + break; + ib_dma_sync_single_for_cpu(smcibdev->ibdev, + sg_dma_address(sg), + sg_dma_len(sg), + data_direction); + } +} + +/* synchronize buffer usage for device access */ +void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + struct scatterlist *sg; + unsigned int i; + + /* for now there is just one DMA address */ + for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg, + buf_slot->sgt[SMC_SINGLE_LINK].nents, i) { + if (!sg_dma_len(sg)) + break; + ib_dma_sync_single_for_device(smcibdev->ibdev, + sg_dma_address(sg), + sg_dma_len(sg), + data_direction); + } } -void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int buf_size, +/* Map a new TX or RX buffer SG-table to DMA */ +int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction) { - if (!buf_slot->dma_addr[SMC_SINGLE_LINK]) + int mapped_nents; + + mapped_nents = ib_dma_map_sg(smcibdev->ibdev, + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + data_direction); + if (!mapped_nents) + return -ENOMEM; + + return mapped_nents; +} + +void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction) +{ + if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address) return; /* already unmapped */ - ib_dma_unmap_single(smcibdev->ibdev, *buf_slot->dma_addr, buf_size, - data_direction); - buf_slot->dma_addr[SMC_SINGLE_LINK] = 0; + + ib_dma_unmap_sg(smcibdev->ibdev, + buf_slot->sgt[SMC_SINGLE_LINK].sgl, + buf_slot->sgt[SMC_SINGLE_LINK].orig_nents, + data_direction); + buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0; } static int smc_ib_fill_gid_and_mac(struct smc_ib_device *smcibdev, u8 ibport) { - struct net_device *ndev; + struct ib_gid_attr gattr; int rc; rc = ib_query_gid(smcibdev->ibdev, ibport, 0, - &smcibdev->gid[ibport - 1], NULL); - /* the SMC protocol requires specification of the roce MAC address; - * if net_device cannot be determined, it can be derived from gid 0 - */ - ndev = smcibdev->ibdev->get_netdev(smcibdev->ibdev, ibport); - if (ndev) { - memcpy(&smcibdev->mac, ndev->dev_addr, ETH_ALEN); - } else if (!rc) { - memcpy(&smcibdev->mac[ibport - 1][0], - &smcibdev->gid[ibport - 1].raw[8], 3); - memcpy(&smcibdev->mac[ibport - 1][3], - &smcibdev->gid[ibport - 1].raw[13], 3); - smcibdev->mac[ibport - 1][0] &= ~0x02; - } - return rc; + &smcibdev->gid[ibport - 1], &gattr); + if (rc || !gattr.ndev) + return -ENODEV; + + memcpy(smcibdev->mac[ibport - 1], gattr.ndev->dev_addr, ETH_ALEN); + dev_put(gattr.ndev); + return 0; } /* Create an identifier unique for this instance of SMC-R. @@ -334,6 +411,7 @@ int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport) &smcibdev->pattr[ibport - 1]); if (rc) goto out; + /* the SMC protocol requires specification of the RoCE MAC address */ rc = smc_ib_fill_gid_and_mac(smcibdev, ibport); if (rc) goto out; diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h index b567152a526d..e90630dadf8e 100644 --- a/net/smc/smc_ib.h +++ b/net/smc/smc_ib.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -51,12 +52,12 @@ int smc_ib_register_client(void) __init; void smc_ib_unregister_client(void); bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport); int smc_ib_remember_port_attr(struct smc_ib_device *smcibdev, u8 ibport); -int smc_ib_buf_map(struct smc_ib_device *smcibdev, int buf_size, - struct smc_buf_desc *buf_slot, - enum dma_data_direction data_direction); -void smc_ib_buf_unmap(struct smc_ib_device *smcibdev, int bufsize, +int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev, struct smc_buf_desc *buf_slot, enum dma_data_direction data_direction); +void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); void smc_ib_dealloc_protection_domain(struct smc_link *lnk); int smc_ib_create_protection_domain(struct smc_link *lnk); void smc_ib_destroy_queue_pair(struct smc_link *lnk); @@ -65,6 +66,13 @@ int smc_ib_ready_link(struct smc_link *lnk); int smc_ib_modify_qp_rts(struct smc_link *lnk); int smc_ib_modify_qp_reset(struct smc_link *lnk); long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev); - - +int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags, + struct smc_buf_desc *buf_slot); +void smc_ib_put_memory_region(struct ib_mr *mr); +void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); +void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev, + struct smc_buf_desc *buf_slot, + enum dma_data_direction data_direction); #endif diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index c2f9165d13ef..92fe4cc8c82c 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index b472f853953a..51b27ce90dbd 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c index 78f7af28ae4f..74568cdbca70 100644 --- a/net/smc/smc_pnet.c +++ b/net/smc/smc_pnet.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -181,8 +182,10 @@ static int smc_pnet_enter(struct smc_pnetentry *new_pnetelem) sizeof(new_pnetelem->ndev->name)) || smc_pnet_same_ibname(pnetelem, new_pnetelem->smcibdev->ibdev->name, - new_pnetelem->ib_port)) + new_pnetelem->ib_port)) { + dev_put(pnetelem->ndev); goto found; + } } list_add_tail(&new_pnetelem->list, &smc_pnettable.pnetlist); rc = 0; diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h index c4f1bccd4358..5a29519db976 100644 --- a/net/smc/smc_pnet.h +++ b/net/smc/smc_pnet.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c index f0c8b089f770..cbf58637ee14 100644 --- a/net/smc/smc_rx.c +++ b/net/smc/smc_rx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -148,6 +149,8 @@ int smc_rx_recvmsg(struct smc_sock *smc, struct msghdr *msg, size_t len, read_done = sock_intr_errno(timeo); break; } + if (!timeo) + return -EAGAIN; } if (!atomic_read(&conn->bytes_to_rcv)) { @@ -170,6 +173,7 @@ copy: copylen, conn->rmbe_size - cons.count); chunk_len_sum = chunk_len; chunk_off = cons.count; + smc_rmb_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { if (!(flags & MSG_TRUNC)) { rc = memcpy_to_msg(msg, rcvbuf_base + chunk_off, @@ -177,6 +181,7 @@ copy: if (rc) { if (!read_done) read_done = -EFAULT; + smc_rmb_sync_sg_for_device(conn); goto out; } } @@ -190,6 +195,7 @@ copy: chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in recv ring buffer */ } + smc_rmb_sync_sg_for_device(conn); /* update cursors */ if (!(flags & MSG_PEEK)) { diff --git a/net/smc/smc_rx.h b/net/smc/smc_rx.h index b5b80e1f8b0f..3a32b59bf06c 100644 --- a/net/smc/smc_rx.h +++ b/net/smc/smc_rx.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index 21ec1832ab51..c48dc2d5fd3a 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -24,6 +25,8 @@ #include "smc_cdc.h" #include "smc_tx.h" +#define SMC_TX_WORK_DELAY HZ + /***************************** sndbuf producer *******************************/ /* callback implementation for sk.sk_write_space() @@ -174,10 +177,12 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) copylen, conn->sndbuf_size - tx_cnt_prep); chunk_len_sum = chunk_len; chunk_off = tx_cnt_prep; + smc_sndbuf_sync_sg_for_cpu(conn); for (chunk = 0; chunk < 2; chunk++) { rc = memcpy_from_msg(sndbuf_base + chunk_off, msg, chunk_len); if (rc) { + smc_sndbuf_sync_sg_for_device(conn); if (send_done) return send_done; goto out_err; @@ -192,6 +197,7 @@ int smc_tx_sendmsg(struct smc_sock *smc, struct msghdr *msg, size_t len) chunk_len_sum += chunk_len; chunk_off = 0; /* modulo offset in send ring buffer */ } + smc_sndbuf_sync_sg_for_device(conn); /* update cursors */ smc_curs_add(conn->sndbuf_size, &prep, copylen); smc_curs_write(&conn->tx_curs_prep, @@ -277,6 +283,7 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) struct smc_link_group *lgr = conn->lgr; int to_send, rmbespace; struct smc_link *link; + dma_addr_t dma_addr; int num_sges; int rc; @@ -334,12 +341,11 @@ static int smc_tx_rdma_writes(struct smc_connection *conn) src_len = conn->sndbuf_size - sent.count; } src_len_sum = src_len; + dma_addr = sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl); for (dstchunk = 0; dstchunk < 2; dstchunk++) { num_sges = 0; for (srcchunk = 0; srcchunk < 2; srcchunk++) { - sges[srcchunk].addr = - conn->sndbuf_desc->dma_addr[SMC_SINGLE_LINK] + - src_off; + sges[srcchunk].addr = dma_addr + src_off; sges[srcchunk].length = src_len; sges[srcchunk].lkey = link->roce_pd->local_dma_lkey; num_sges++; @@ -391,8 +397,7 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) int rc; spin_lock_bh(&conn->send_lock); - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], &wr_buf, - &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (rc < 0) { if (rc == -EBUSY) { struct smc_sock *smc = @@ -403,7 +408,8 @@ int smc_tx_sndbuf_nonempty(struct smc_connection *conn) goto out_unlock; } rc = 0; - schedule_work(&conn->tx_work); + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); } goto out_unlock; } @@ -427,7 +433,7 @@ out_unlock: */ static void smc_tx_work(struct work_struct *work) { - struct smc_connection *conn = container_of(work, + struct smc_connection *conn = container_of(to_delayed_work(work), struct smc_connection, tx_work); struct smc_sock *smc = container_of(conn, struct smc_sock, conn); @@ -460,12 +466,12 @@ void smc_tx_consumer_update(struct smc_connection *conn) ((to_confirm > conn->rmbe_update_limit) && ((to_confirm > (conn->rmbe_size / 2)) || conn->local_rx_ctrl.prod_flags.write_blocked))) { - rc = smc_cdc_get_free_slot(&conn->lgr->lnk[SMC_SINGLE_LINK], - &wr_buf, &pend); + rc = smc_cdc_get_free_slot(conn, &wr_buf, &pend); if (!rc) rc = smc_cdc_msg_send(conn, wr_buf, pend); if (rc < 0) { - schedule_work(&conn->tx_work); + schedule_delayed_work(&conn->tx_work, + SMC_TX_WORK_DELAY); return; } smc_curs_write(&conn->rx_curs_confirmed, @@ -484,6 +490,6 @@ void smc_tx_consumer_update(struct smc_connection *conn) void smc_tx_init(struct smc_sock *smc) { smc->sk.sk_write_space = smc_tx_write_space; - INIT_WORK(&smc->conn.tx_work, smc_tx_work); + INIT_DELAYED_WORK(&smc->conn.tx_work, smc_tx_work); spin_lock_init(&smc->conn.send_lock); } diff --git a/net/smc/smc_tx.h b/net/smc/smc_tx.h index 1d6a0dcdcfe6..78255964fa4d 100644 --- a/net/smc/smc_tx.h +++ b/net/smc/smc_tx.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 874ee9f9d796..de4537f66832 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -68,6 +69,16 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc) int i; link = wc->qp->qp_context; + + if (wc->opcode == IB_WC_REG_MR) { + if (wc->status) + link->wr_reg_state = FAILED; + else + link->wr_reg_state = CONFIRMED; + wake_up(&link->wr_reg_wait); + return; + } + pnd_snd_idx = smc_wr_tx_find_pending_index(link, wc->wr_id); if (pnd_snd_idx == link->wr_tx_cnt) return; @@ -234,7 +245,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) int rc; ib_req_notify_cq(link->smcibdev->roce_cq_send, - IB_CQ_SOLICITED_MASK | IB_CQ_REPORT_MISSED_EVENTS); + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); pend = container_of(priv, struct smc_wr_tx_pend, priv); rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], &failed_wr); @@ -243,6 +254,52 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv) return rc; } +/* Register a memory region and wait for result. */ +int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr) +{ + struct ib_send_wr *failed_wr = NULL; + int rc; + + ib_req_notify_cq(link->smcibdev->roce_cq_send, + IB_CQ_NEXT_COMP | IB_CQ_REPORT_MISSED_EVENTS); + link->wr_reg_state = POSTED; + link->wr_reg.wr.wr_id = (u64)(uintptr_t)mr; + link->wr_reg.mr = mr; + link->wr_reg.key = mr->rkey; + failed_wr = &link->wr_reg.wr; + rc = ib_post_send(link->roce_qp, &link->wr_reg.wr, &failed_wr); + WARN_ON(failed_wr != &link->wr_reg.wr); + if (rc) + return rc; + + rc = wait_event_interruptible_timeout(link->wr_reg_wait, + (link->wr_reg_state != POSTED), + SMC_WR_REG_MR_WAIT_TIME); + if (!rc) { + /* timeout - terminate connections */ + struct smc_link_group *lgr; + + lgr = container_of(link, struct smc_link_group, + lnk[SMC_SINGLE_LINK]); + smc_lgr_terminate(lgr); + return -EPIPE; + } + if (rc == -ERESTARTSYS) + return -EINTR; + switch (link->wr_reg_state) { + case CONFIRMED: + rc = 0; + break; + case FAILED: + rc = -EIO; + break; + case POSTED: + rc = -EPIPE; + break; + } + return rc; +} + void smc_wr_tx_dismiss_slots(struct smc_link *link, u8 wr_rx_hdr_type, smc_wr_tx_filter filter, smc_wr_tx_dismisser dismisser, @@ -458,6 +515,11 @@ static void smc_wr_init_sge(struct smc_link *lnk) lnk->wr_rx_ibs[i].sg_list = &lnk->wr_rx_sges[i]; lnk->wr_rx_ibs[i].num_sge = 1; } + lnk->wr_reg.wr.next = NULL; + lnk->wr_reg.wr.num_sge = 0; + lnk->wr_reg.wr.send_flags = IB_SEND_SIGNALED; + lnk->wr_reg.wr.opcode = IB_WR_REG_MR; + lnk->wr_reg.access = IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE; } void smc_wr_free_link(struct smc_link *lnk) @@ -602,6 +664,8 @@ int smc_wr_create_link(struct smc_link *lnk) smc_wr_init_sge(lnk); memset(lnk->wr_tx_mask, 0, BITS_TO_LONGS(SMC_WR_BUF_CNT) * sizeof(*lnk->wr_tx_mask)); + init_waitqueue_head(&lnk->wr_tx_wait); + init_waitqueue_head(&lnk->wr_reg_wait); return rc; dma_unmap: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index 0b9beeda6053..2acf12b06063 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Shared Memory Communications over RDMA (SMC-R) and RoCE * @@ -102,5 +103,6 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type, int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler); int smc_wr_rx_post_init(struct smc_link *link); void smc_wr_rx_cq_handler(struct ib_cq *ib_cq, void *cq_context); +int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr); #endif /* SMC_WR_H */ diff --git a/net/socket.c b/net/socket.c index ad22df1ffbd1..42d8e9c9ccd5 100644 --- a/net/socket.c +++ b/net/socket.c @@ -568,7 +568,6 @@ struct socket *sock_alloc(void) sock = SOCKET_I(inode); - kmemcheck_annotate_bitfield(sock, type); inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); @@ -652,6 +651,20 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg, } EXPORT_SYMBOL(kernel_sendmsg); +int kernel_sendmsg_locked(struct sock *sk, struct msghdr *msg, + struct kvec *vec, size_t num, size_t size) +{ + struct socket *sock = sk->sk_socket; + + if (!sock->ops->sendmsg_locked) + return sock_no_sendmsg_locked(sk, msg, size); + + iov_iter_kvec(&msg->msg_iter, WRITE | ITER_KVEC, vec, num, size); + + return sock->ops->sendmsg_locked(sk, msg, msg_data_left(msg)); +} +EXPORT_SYMBOL(kernel_sendmsg_locked); + static bool skb_is_err_queue(const struct sk_buff *skb) { /* pkt_type of skbs enqueued on the error queue are set to @@ -3376,6 +3389,19 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, } EXPORT_SYMBOL(kernel_sendpage); +int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset, + size_t size, int flags) +{ + struct socket *sock = sk->sk_socket; + + if (sock->ops->sendpage_locked) + return sock->ops->sendpage_locked(sk, page, offset, size, + flags); + + return sock_no_sendpage_locked(sk, page, offset, size, flags); +} +EXPORT_SYMBOL(kernel_sendpage_locked); + int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg) { mm_segment_t oldfs = get_fs(); @@ -3405,7 +3431,6 @@ u32 kernel_sock_ip_overhead(struct sock *sk) struct inet_sock *inet; struct ip_options_rcu *opt; u32 overhead = 0; - bool owned_by_user; #if IS_ENABLED(CONFIG_IPV6) struct ipv6_pinfo *np; struct ipv6_txoptions *optv6 = NULL; @@ -3414,13 +3439,12 @@ u32 kernel_sock_ip_overhead(struct sock *sk) if (!sk) return overhead; - owned_by_user = sock_owned_by_user(sk); switch (sk->sk_family) { case AF_INET: inet = inet_sk(sk); overhead += sizeof(struct iphdr); opt = rcu_dereference_protected(inet->inet_opt, - owned_by_user); + sock_owned_by_user(sk)); if (opt) overhead += opt->opt.optlen; return overhead; @@ -3430,7 +3454,7 @@ u32 kernel_sock_ip_overhead(struct sock *sk) overhead += sizeof(struct ipv6hdr); if (np) optv6 = rcu_dereference_protected(np->opt, - owned_by_user); + sock_owned_by_user(sk)); if (optv6) overhead += (optv6->opt_flen + optv6->opt_nflen); return overhead; diff --git a/net/strparser/strparser.c b/net/strparser/strparser.c index b5c279b22680..c5fda15ba319 100644 --- a/net/strparser/strparser.c +++ b/net/strparser/strparser.c @@ -29,44 +29,46 @@ static struct workqueue_struct *strp_wq; -struct _strp_rx_msg { - /* Internal cb structure. struct strp_rx_msg must be first for passing +struct _strp_msg { + /* Internal cb structure. struct strp_msg must be first for passing * to upper layer. */ - struct strp_rx_msg strp; + struct strp_msg strp; int accum_len; int early_eaten; }; -static inline struct _strp_rx_msg *_strp_rx_msg(struct sk_buff *skb) +static inline struct _strp_msg *_strp_msg(struct sk_buff *skb) { - return (struct _strp_rx_msg *)((void *)skb->cb + + return (struct _strp_msg *)((void *)skb->cb + offsetof(struct qdisc_skb_cb, data)); } /* Lower lock held */ -static void strp_abort_rx_strp(struct strparser *strp, int err) +static void strp_abort_strp(struct strparser *strp, int err) { - struct sock *csk = strp->sk; - /* Unrecoverable error in receive */ - del_timer(&strp->rx_msg_timer); + cancel_delayed_work(&strp->msg_timer_work); - if (strp->rx_stopped) + if (strp->stopped) return; - strp->rx_stopped = 1; + strp->stopped = 1; + + if (strp->sk) { + struct sock *sk = strp->sk; - /* Report an error on the lower socket */ - csk->sk_err = err; - csk->sk_error_report(csk); + /* Report an error on the lower socket */ + sk->sk_err = err; + sk->sk_error_report(sk); + } } -static void strp_start_rx_timer(struct strparser *strp) +static void strp_start_timer(struct strparser *strp, long timeo) { - if (strp->sk->sk_rcvtimeo) - mod_timer(&strp->rx_msg_timer, strp->sk->sk_rcvtimeo); + if (timeo) + mod_delayed_work(strp_wq, &strp->msg_timer_work, timeo); } /* Lower lock held */ @@ -74,46 +76,55 @@ static void strp_parser_err(struct strparser *strp, int err, read_descriptor_t *desc) { desc->error = err; - kfree_skb(strp->rx_skb_head); - strp->rx_skb_head = NULL; + kfree_skb(strp->skb_head); + strp->skb_head = NULL; strp->cb.abort_parser(strp, err); } static inline int strp_peek_len(struct strparser *strp) { - struct socket *sock = strp->sk->sk_socket; + if (strp->sk) { + struct socket *sock = strp->sk->sk_socket; + + return sock->ops->peek_len(sock); + } + + /* If we don't have an associated socket there's nothing to peek. + * Return int max to avoid stopping the strparser. + */ - return sock->ops->peek_len(sock); + return INT_MAX; } /* Lower socket lock held */ -static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, - unsigned int orig_offset, size_t orig_len) +static int __strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo) { struct strparser *strp = (struct strparser *)desc->arg.data; - struct _strp_rx_msg *rxm; + struct _strp_msg *stm; struct sk_buff *head, *skb; size_t eaten = 0, cand_len; ssize_t extra; int err; bool cloned_orig = false; - if (strp->rx_paused) + if (strp->paused) return 0; - head = strp->rx_skb_head; + head = strp->skb_head; if (head) { /* Message already in progress */ - rxm = _strp_rx_msg(head); - if (unlikely(rxm->early_eaten)) { + stm = _strp_msg(head); + if (unlikely(stm->early_eaten)) { /* Already some number of bytes on the receive sock - * data saved in rx_skb_head, just indicate they + * data saved in skb_head, just indicate they * are consumed. */ - eaten = orig_len <= rxm->early_eaten ? - orig_len : rxm->early_eaten; - rxm->early_eaten -= eaten; + eaten = orig_len <= stm->early_eaten ? + orig_len : stm->early_eaten; + stm->early_eaten -= eaten; return eaten; } @@ -126,12 +137,12 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, */ orig_skb = skb_clone(orig_skb, GFP_ATOMIC); if (!orig_skb) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; return 0; } if (!pskb_pull(orig_skb, orig_offset)) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); kfree_skb(orig_skb); desc->error = -ENOMEM; return 0; @@ -140,13 +151,13 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, orig_offset = 0; } - if (!strp->rx_skb_nextp) { + if (!strp->skb_nextp) { /* We are going to append to the frags_list of head. * Need to unshare the frag_list. */ err = skb_unclone(head, GFP_ATOMIC); if (err) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = err; return 0; } @@ -165,20 +176,20 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, skb = alloc_skb(0, GFP_ATOMIC); if (!skb) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; return 0; } skb->len = head->len; skb->data_len = head->len; skb->truesize = head->truesize; - *_strp_rx_msg(skb) = *_strp_rx_msg(head); - strp->rx_skb_nextp = &head->next; + *_strp_msg(skb) = *_strp_msg(head); + strp->skb_nextp = &head->next; skb_shinfo(skb)->frag_list = head; - strp->rx_skb_head = skb; + strp->skb_head = skb; head = skb; } else { - strp->rx_skb_nextp = + strp->skb_nextp = &skb_shinfo(head)->frag_list; } } @@ -188,112 +199,112 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, /* Always clone since we will consume something */ skb = skb_clone(orig_skb, GFP_ATOMIC); if (!skb) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = -ENOMEM; break; } cand_len = orig_len - eaten; - head = strp->rx_skb_head; + head = strp->skb_head; if (!head) { head = skb; - strp->rx_skb_head = head; - /* Will set rx_skb_nextp on next packet if needed */ - strp->rx_skb_nextp = NULL; - rxm = _strp_rx_msg(head); - memset(rxm, 0, sizeof(*rxm)); - rxm->strp.offset = orig_offset + eaten; + strp->skb_head = head; + /* Will set skb_nextp on next packet if needed */ + strp->skb_nextp = NULL; + stm = _strp_msg(head); + memset(stm, 0, sizeof(*stm)); + stm->strp.offset = orig_offset + eaten; } else { /* Unclone since we may be appending to an skb that we * already share a frag_list with. */ err = skb_unclone(skb, GFP_ATOMIC); if (err) { - STRP_STATS_INCR(strp->stats.rx_mem_fail); + STRP_STATS_INCR(strp->stats.mem_fail); desc->error = err; break; } - rxm = _strp_rx_msg(head); - *strp->rx_skb_nextp = skb; - strp->rx_skb_nextp = &skb->next; + stm = _strp_msg(head); + *strp->skb_nextp = skb; + strp->skb_nextp = &skb->next; head->data_len += skb->len; head->len += skb->len; head->truesize += skb->truesize; } - if (!rxm->strp.full_len) { + if (!stm->strp.full_len) { ssize_t len; len = (*strp->cb.parse_msg)(strp, head); if (!len) { /* Need more header to determine length */ - if (!rxm->accum_len) { + if (!stm->accum_len) { /* Start RX timer for new message */ - strp_start_rx_timer(strp); + strp_start_timer(strp, timeo); } - rxm->accum_len += cand_len; + stm->accum_len += cand_len; eaten += cand_len; - STRP_STATS_INCR(strp->stats.rx_need_more_hdr); + STRP_STATS_INCR(strp->stats.need_more_hdr); WARN_ON(eaten != orig_len); break; } else if (len < 0) { - if (len == -ESTRPIPE && rxm->accum_len) { + if (len == -ESTRPIPE && stm->accum_len) { len = -ENODATA; - strp->rx_unrecov_intr = 1; + strp->unrecov_intr = 1; } else { - strp->rx_interrupted = 1; + strp->interrupted = 1; } strp_parser_err(strp, len, desc); break; - } else if (len > strp->sk->sk_rcvbuf) { + } else if (len > max_msg_size) { /* Message length exceeds maximum allowed */ - STRP_STATS_INCR(strp->stats.rx_msg_too_big); + STRP_STATS_INCR(strp->stats.msg_too_big); strp_parser_err(strp, -EMSGSIZE, desc); break; } else if (len <= (ssize_t)head->len - - skb->len - rxm->strp.offset) { + skb->len - stm->strp.offset) { /* Length must be into new skb (and also * greater than zero) */ - STRP_STATS_INCR(strp->stats.rx_bad_hdr_len); + STRP_STATS_INCR(strp->stats.bad_hdr_len); strp_parser_err(strp, -EPROTO, desc); break; } - rxm->strp.full_len = len; + stm->strp.full_len = len; } - extra = (ssize_t)(rxm->accum_len + cand_len) - - rxm->strp.full_len; + extra = (ssize_t)(stm->accum_len + cand_len) - + stm->strp.full_len; if (extra < 0) { /* Message not complete yet. */ - if (rxm->strp.full_len - rxm->accum_len > + if (stm->strp.full_len - stm->accum_len > strp_peek_len(strp)) { - /* Don't have the whole messages in the socket - * buffer. Set strp->rx_need_bytes to wait for + /* Don't have the whole message in the socket + * buffer. Set strp->need_bytes to wait for * the rest of the message. Also, set "early * eaten" since we've already buffered the skb * but don't consume yet per strp_read_sock. */ - if (!rxm->accum_len) { + if (!stm->accum_len) { /* Start RX timer for new message */ - strp_start_rx_timer(strp); + strp_start_timer(strp, timeo); } - strp->rx_need_bytes = rxm->strp.full_len - - rxm->accum_len; - rxm->accum_len += cand_len; - rxm->early_eaten = cand_len; - STRP_STATS_ADD(strp->stats.rx_bytes, cand_len); + strp->need_bytes = stm->strp.full_len - + stm->accum_len; + stm->accum_len += cand_len; + stm->early_eaten = cand_len; + STRP_STATS_ADD(strp->stats.bytes, cand_len); desc->count = 0; /* Stop reading socket */ break; } - rxm->accum_len += cand_len; + stm->accum_len += cand_len; eaten += cand_len; WARN_ON(eaten != orig_len); break; @@ -308,14 +319,14 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, eaten += (cand_len - extra); /* Hurray, we have a new message! */ - del_timer(&strp->rx_msg_timer); - strp->rx_skb_head = NULL; - STRP_STATS_INCR(strp->stats.rx_msgs); + cancel_delayed_work(&strp->msg_timer_work); + strp->skb_head = NULL; + STRP_STATS_INCR(strp->stats.msgs); /* Give skb to upper layer */ strp->cb.rcv_msg(strp, head); - if (unlikely(strp->rx_paused)) { + if (unlikely(strp->paused)) { /* Upper layer paused strp */ break; } @@ -324,11 +335,33 @@ static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, if (cloned_orig) kfree_skb(orig_skb); - STRP_STATS_ADD(strp->stats.rx_bytes, eaten); + STRP_STATS_ADD(strp->stats.bytes, eaten); return eaten; } +int strp_process(struct strparser *strp, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len, + size_t max_msg_size, long timeo) +{ + read_descriptor_t desc; /* Dummy arg to strp_recv */ + + desc.arg.data = strp; + + return __strp_recv(&desc, orig_skb, orig_offset, orig_len, + max_msg_size, timeo); +} +EXPORT_SYMBOL_GPL(strp_process); + +static int strp_recv(read_descriptor_t *desc, struct sk_buff *orig_skb, + unsigned int orig_offset, size_t orig_len) +{ + struct strparser *strp = (struct strparser *)desc->arg.data; + + return __strp_recv(desc, orig_skb, orig_offset, orig_len, + strp->sk->sk_rcvbuf, strp->sk->sk_rcvtimeo); +} + static int default_read_sock_done(struct strparser *strp, int err) { return err; @@ -340,6 +373,9 @@ static int strp_read_sock(struct strparser *strp) struct socket *sock = strp->sk->sk_socket; read_descriptor_t desc; + if (unlikely(!sock || !sock->ops || !sock->ops->read_sock)) + return -EBUSY; + desc.arg.data = strp; desc.error = 0; desc.count = 1; /* give more than one skb per call */ @@ -355,101 +391,123 @@ static int strp_read_sock(struct strparser *strp) /* Lower sock lock held */ void strp_data_ready(struct strparser *strp) { - if (unlikely(strp->rx_stopped)) + if (unlikely(strp->stopped)) return; - /* This check is needed to synchronize with do_strp_rx_work. - * do_strp_rx_work acquires a process lock (lock_sock) whereas + /* This check is needed to synchronize with do_strp_work. + * do_strp_work acquires a process lock (lock_sock) whereas * the lock held here is bh_lock_sock. The two locks can be * held by different threads at the same time, but bh_lock_sock * allows a thread in BH context to safely check if the process * lock is held. In this case, if the lock is held, queue work. */ if (sock_owned_by_user(strp->sk)) { - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); return; } - if (strp->rx_paused) + if (strp->paused) return; - if (strp->rx_need_bytes) { - if (strp_peek_len(strp) >= strp->rx_need_bytes) - strp->rx_need_bytes = 0; + if (strp->need_bytes) { + if (strp_peek_len(strp) >= strp->need_bytes) + strp->need_bytes = 0; else return; } if (strp_read_sock(strp) == -ENOMEM) - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); } EXPORT_SYMBOL_GPL(strp_data_ready); -static void do_strp_rx_work(struct strparser *strp) +static void do_strp_work(struct strparser *strp) { read_descriptor_t rd_desc; - struct sock *csk = strp->sk; /* We need the read lock to synchronize with strp_data_ready. We * need the socket lock for calling strp_read_sock. */ - lock_sock(csk); + strp->cb.lock(strp); - if (unlikely(strp->rx_stopped)) + if (unlikely(strp->stopped)) goto out; - if (strp->rx_paused) + if (strp->paused) goto out; rd_desc.arg.data = strp; if (strp_read_sock(strp) == -ENOMEM) - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); out: - release_sock(csk); + strp->cb.unlock(strp); } -static void strp_rx_work(struct work_struct *w) +static void strp_work(struct work_struct *w) { - do_strp_rx_work(container_of(w, struct strparser, rx_work)); + do_strp_work(container_of(w, struct strparser, work)); } -static void strp_rx_msg_timeout(unsigned long arg) +static void strp_msg_timeout(struct work_struct *w) { - struct strparser *strp = (struct strparser *)arg; + struct strparser *strp = container_of(w, struct strparser, + msg_timer_work.work); /* Message assembly timed out */ - STRP_STATS_INCR(strp->stats.rx_msg_timeouts); - lock_sock(strp->sk); + STRP_STATS_INCR(strp->stats.msg_timeouts); + strp->cb.lock(strp); strp->cb.abort_parser(strp, ETIMEDOUT); + strp->cb.unlock(strp); +} + +static void strp_sock_lock(struct strparser *strp) +{ + lock_sock(strp->sk); +} + +static void strp_sock_unlock(struct strparser *strp) +{ release_sock(strp->sk); } -int strp_init(struct strparser *strp, struct sock *csk, - struct strp_callbacks *cb) +int strp_init(struct strparser *strp, struct sock *sk, + const struct strp_callbacks *cb) { - struct socket *sock = csk->sk_socket; if (!cb || !cb->rcv_msg || !cb->parse_msg) return -EINVAL; - if (!sock->ops->read_sock || !sock->ops->peek_len) - return -EAFNOSUPPORT; - - memset(strp, 0, sizeof(*strp)); + /* The sk (sock) arg determines the mode of the stream parser. + * + * If the sock is set then the strparser is in receive callback mode. + * The upper layer calls strp_data_ready to kick receive processing + * and strparser calls the read_sock function on the socket to + * get packets. + * + * If the sock is not set then the strparser is in general mode. + * The upper layer calls strp_process for each skb to be parsed. + */ - strp->sk = csk; + if (!sk) { + if (!cb->lock || !cb->unlock) + return -EINVAL; + } - setup_timer(&strp->rx_msg_timer, strp_rx_msg_timeout, - (unsigned long)strp); + memset(strp, 0, sizeof(*strp)); - INIT_WORK(&strp->rx_work, strp_rx_work); + strp->sk = sk; + strp->cb.lock = cb->lock ? : strp_sock_lock; + strp->cb.unlock = cb->unlock ? : strp_sock_unlock; strp->cb.rcv_msg = cb->rcv_msg; strp->cb.parse_msg = cb->parse_msg; strp->cb.read_sock_done = cb->read_sock_done ? : default_read_sock_done; - strp->cb.abort_parser = cb->abort_parser ? : strp_abort_rx_strp; + strp->cb.abort_parser = cb->abort_parser ? : strp_abort_strp; + + INIT_DELAYED_WORK(&strp->msg_timer_work, strp_msg_timeout); + INIT_WORK(&strp->work, strp_work); return 0; } @@ -457,12 +515,12 @@ EXPORT_SYMBOL_GPL(strp_init); void strp_unpause(struct strparser *strp) { - strp->rx_paused = 0; + strp->paused = 0; - /* Sync setting rx_paused with RX work */ + /* Sync setting paused with RX work */ smp_mb(); - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); } EXPORT_SYMBOL_GPL(strp_unpause); @@ -471,27 +529,27 @@ EXPORT_SYMBOL_GPL(strp_unpause); */ void strp_done(struct strparser *strp) { - WARN_ON(!strp->rx_stopped); + WARN_ON(!strp->stopped); - del_timer_sync(&strp->rx_msg_timer); - cancel_work_sync(&strp->rx_work); + cancel_delayed_work_sync(&strp->msg_timer_work); + cancel_work_sync(&strp->work); - if (strp->rx_skb_head) { - kfree_skb(strp->rx_skb_head); - strp->rx_skb_head = NULL; + if (strp->skb_head) { + kfree_skb(strp->skb_head); + strp->skb_head = NULL; } } EXPORT_SYMBOL_GPL(strp_done); void strp_stop(struct strparser *strp) { - strp->rx_stopped = 1; + strp->stopped = 1; } EXPORT_SYMBOL_GPL(strp_stop); void strp_check_rcv(struct strparser *strp) { - queue_work(strp_wq, &strp->rx_work); + queue_work(strp_wq, &strp->work); } EXPORT_SYMBOL_GPL(strp_check_rcv); diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile index ea7ffa12e0f9..090658c3da12 100644 --- a/net/sunrpc/Makefile +++ b/net/sunrpc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for Linux kernel SUN RPC # diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile index 14e9e53e63d5..c374268b008f 100644 --- a/net/sunrpc/auth_gss/Makefile +++ b/net/sunrpc/auth_gss/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for Linux kernel rpcsec_gss implementation # diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 7b1ee5a0b03c..73165e9ca5bf 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -855,11 +855,13 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g return stat; if (integ_len > buf->len) return stat; - if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) - BUG(); + if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len)) { + WARN_ON_ONCE(1); + return stat; + } /* copy out mic... */ if (read_u32_from_xdr_buf(buf, integ_len, &mic.len)) - BUG(); + return stat; if (mic.len > RPC_MAX_AUTH_SIZE) return stat; mic.data = kmalloc(mic.len, GFP_KERNEL); @@ -1611,8 +1613,10 @@ svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp) BUG_ON(integ_len % 4); *p++ = htonl(integ_len); *p++ = htonl(gc->gc_seq); - if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) - BUG(); + if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) { + WARN_ON_ONCE(1); + goto out_err; + } if (resbuf->tail[0].iov_base == NULL) { if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE) goto out_err; diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c index 5f3d527dff65..75d72e109a04 100644 --- a/net/sunrpc/auth_null.c +++ b/net/sunrpc/auth_null.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/net/sunrpc/auth_null.c * diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c index 82337e1ec9cd..dafd6b870ba3 100644 --- a/net/sunrpc/auth_unix.c +++ b/net/sunrpc/auth_unix.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/net/sunrpc/auth_unix.c * diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index ac701c28f44f..c2c68a15b59d 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -171,10 +171,10 @@ int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs) /* * Add the temporary list to the backchannel preallocation list */ - spin_lock_bh(&xprt->bc_pa_lock); + spin_lock(&xprt->bc_pa_lock); list_splice(&tmp_list, &xprt->bc_pa_list); xprt_inc_alloc_count(xprt, min_reqs); - spin_unlock_bh(&xprt->bc_pa_lock); + spin_unlock(&xprt->bc_pa_lock); dprintk("RPC: setup backchannel transport done\n"); return 0; diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 2e49d1f892b7..a801da812f86 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -1491,7 +1491,6 @@ rpc_restart_call(struct rpc_task *task) } EXPORT_SYMBOL_GPL(rpc_restart_call); -#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) const char *rpc_proc_name(const struct rpc_task *task) { @@ -1505,7 +1504,6 @@ const char } else return "no proc"; } -#endif /* * 0. Initial state @@ -1519,6 +1517,7 @@ call_start(struct rpc_task *task) struct rpc_clnt *clnt = task->tk_client; int idx = task->tk_msg.rpc_proc->p_statidx; + trace_rpc_request(task); dprintk("RPC: %5u call_start %s%d proc %s (%s)\n", task->tk_pid, clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task), @@ -1586,6 +1585,7 @@ call_reserveresult(struct rpc_task *task) switch (status) { case -ENOMEM: rpc_delay(task, HZ >> 2); + /* fall through */ case -EAGAIN: /* woken up; retry */ task->tk_action = call_retry_reserve; return; @@ -1647,10 +1647,13 @@ call_refreshresult(struct rpc_task *task) /* Use rate-limiting and a max number of retries if refresh * had status 0 but failed to update the cred. */ + /* fall through */ case -ETIMEDOUT: rpc_delay(task, 3*HZ); + /* fall through */ case -EAGAIN: status = -EACCES; + /* fall through */ case -EKEYEXPIRED: if (!task->tk_cred_retry) break; @@ -1903,6 +1906,15 @@ call_connect_status(struct rpc_task *task) task->tk_status = 0; switch (status) { case -ECONNREFUSED: + /* A positive refusal suggests a rebind is needed. */ + if (RPC_IS_SOFTCONN(task)) + break; + if (clnt->cl_autobind) { + rpc_force_rebind(clnt); + task->tk_action = call_bind; + return; + } + /* fall through */ case -ECONNRESET: case -ECONNABORTED: case -ENETUNREACH: @@ -1916,6 +1928,7 @@ call_connect_status(struct rpc_task *task) break; /* retry with existing socket, after a delay */ rpc_delay(task, 3*HZ); + /* fall through */ case -EAGAIN: /* Check for timeouts before looping back to call_bind */ case -ETIMEDOUT: @@ -2017,6 +2030,7 @@ call_transmit_status(struct rpc_task *task) rpc_exit(task, task->tk_status); break; } + /* fall through */ case -ECONNRESET: case -ECONNABORTED: case -EADDRINUSE: @@ -2137,25 +2151,25 @@ call_status(struct rpc_task *task) * were a timeout. */ rpc_delay(task, 3*HZ); + /* fall through */ case -ETIMEDOUT: task->tk_action = call_timeout; - if (!(task->tk_flags & RPC_TASK_NO_RETRANS_TIMEOUT) - && task->tk_client->cl_discrtry) - xprt_conditional_disconnect(req->rq_xprt, - req->rq_connect_cookie); break; case -ECONNREFUSED: case -ECONNRESET: case -ECONNABORTED: rpc_force_rebind(clnt); + /* fall through */ case -EADDRINUSE: rpc_delay(task, 3*HZ); + /* fall through */ case -EPIPE: case -ENOTCONN: task->tk_action = call_bind; break; case -ENOBUFS: rpc_delay(task, HZ>>2); + /* fall through */ case -EAGAIN: task->tk_action = call_transmit; break; diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c index c8fd0b6c1618..e980d2a493de 100644 --- a/net/sunrpc/debugfs.c +++ b/net/sunrpc/debugfs.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /** * debugfs interface for sunrpc * diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h index 394ce523174c..7ec10b92bea1 100644 --- a/net/sunrpc/netns.h +++ b/net/sunrpc/netns.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __SUNRPC_NETNS_H__ #define __SUNRPC_NETNS_H__ diff --git a/net/sunrpc/rpc_pipe.c b/net/sunrpc/rpc_pipe.c index 61a504fb1ae2..7803f3b6aa53 100644 --- a/net/sunrpc/rpc_pipe.c +++ b/net/sunrpc/rpc_pipe.c @@ -1410,8 +1410,8 @@ rpc_fill_super(struct super_block *sb, void *data, int silent) return PTR_ERR(gssd_dentry); } - dprintk("RPC: sending pipefs MOUNT notification for net %p%s\n", - net, NET_NAME(net)); + dprintk("RPC: sending pipefs MOUNT notification for net %x%s\n", + net->ns.inum, NET_NAME(net)); mutex_lock(&sn->pipefs_sb_lock); sn->pipefs_sb = sb; err = blocking_notifier_call_chain(&rpc_pipefs_notifier_list, @@ -1462,8 +1462,8 @@ static void rpc_kill_sb(struct super_block *sb) goto out; } sn->pipefs_sb = NULL; - dprintk("RPC: sending pipefs UMOUNT notification for net %p%s\n", - net, NET_NAME(net)); + dprintk("RPC: sending pipefs UMOUNT notification for net %x%s\n", + net->ns.inum, NET_NAME(net)); blocking_notifier_call_chain(&rpc_pipefs_notifier_list, RPC_PIPEFS_UMOUNT, sb); diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c index ea0676f199c8..c526f8fb37c9 100644 --- a/net/sunrpc/rpcb_clnt.c +++ b/net/sunrpc/rpcb_clnt.c @@ -216,9 +216,9 @@ static void rpcb_set_local(struct net *net, struct rpc_clnt *clnt, smp_wmb(); sn->rpcb_users = 1; dprintk("RPC: created new rpcb local clients (rpcb_local_clnt: " - "%p, rpcb_local_clnt4: %p) for net %p%s\n", - sn->rpcb_local_clnt, sn->rpcb_local_clnt4, - net, (net == &init_net) ? " (init_net)" : ""); + "%p, rpcb_local_clnt4: %p) for net %x%s\n", + sn->rpcb_local_clnt, sn->rpcb_local_clnt4, + net->ns.inum, (net == &init_net) ? " (init_net)" : ""); } /* diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index 0cc83839c13c..b1b49edd7c4d 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -44,7 +44,7 @@ static mempool_t *rpc_buffer_mempool __read_mostly; static void rpc_async_schedule(struct work_struct *); static void rpc_release_task(struct rpc_task *task); -static void __rpc_queue_timer_fn(unsigned long ptr); +static void __rpc_queue_timer_fn(struct timer_list *t); /* * RPC tasks sit here while waiting for conditions to improve. @@ -228,7 +228,7 @@ static void __rpc_init_priority_wait_queue(struct rpc_wait_queue *queue, const c queue->maxpriority = nr_queues - 1; rpc_reset_waitqueue_priority(queue); queue->qlen = 0; - setup_timer(&queue->timer_list.timer, __rpc_queue_timer_fn, (unsigned long)queue); + timer_setup(&queue->timer_list.timer, __rpc_queue_timer_fn, 0); INIT_LIST_HEAD(&queue->timer_list.list); rpc_assign_waitqueue_name(queue, qname); } @@ -274,10 +274,9 @@ static inline void rpc_task_set_debuginfo(struct rpc_task *task) static void rpc_set_active(struct rpc_task *task) { - trace_rpc_task_begin(task->tk_client, task, NULL); - rpc_task_set_debuginfo(task); set_bit(RPC_TASK_ACTIVE, &task->tk_runstate); + trace_rpc_task_begin(task->tk_client, task, NULL); } /* @@ -635,9 +634,9 @@ void rpc_wake_up_status(struct rpc_wait_queue *queue, int status) } EXPORT_SYMBOL_GPL(rpc_wake_up_status); -static void __rpc_queue_timer_fn(unsigned long ptr) +static void __rpc_queue_timer_fn(struct timer_list *t) { - struct rpc_wait_queue *queue = (struct rpc_wait_queue *)ptr; + struct rpc_wait_queue *queue = from_timer(queue, t, timer_list.timer); struct rpc_task *task, *n; unsigned long expires, now, timeo; diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c index c73de181467a..56f9eff74150 100644 --- a/net/sunrpc/sunrpc_syms.c +++ b/net/sunrpc/sunrpc_syms.c @@ -65,10 +65,13 @@ err_proc: static __net_exit void sunrpc_exit_net(struct net *net) { + struct sunrpc_net *sn = net_generic(net, sunrpc_net_id); + rpc_pipefs_exit_net(net); unix_gid_cache_destroy(net); ip_map_cache_destroy(net); rpc_proc_exit(net); + WARN_ON_ONCE(!list_empty(&sn->all_clients)); } static struct pernet_operations sunrpc_net_ops = { diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 85ce0db5b0a6..387cc4add6f6 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -50,7 +50,7 @@ EXPORT_SYMBOL_GPL(svc_pool_map); static DEFINE_MUTEX(svc_pool_map_mutex);/* protects svc_pool_map.count only */ static int -param_set_pool_mode(const char *val, struct kernel_param *kp) +param_set_pool_mode(const char *val, const struct kernel_param *kp) { int *ip = (int *)kp->arg; struct svc_pool_map *m = &svc_pool_map; @@ -80,7 +80,7 @@ out: } static int -param_get_pool_mode(char *buf, struct kernel_param *kp) +param_get_pool_mode(char *buf, const struct kernel_param *kp) { int *ip = (int *)kp->arg; @@ -421,7 +421,7 @@ __svc_init_bc(struct svc_serv *serv) */ static struct svc_serv * __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int vers; @@ -455,7 +455,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, serv->sv_xdrsize = xdrsize; INIT_LIST_HEAD(&serv->sv_tempsocks); INIT_LIST_HEAD(&serv->sv_permsocks); - init_timer(&serv->sv_temptimer); + timer_setup(&serv->sv_temptimer, NULL, 0); spin_lock_init(&serv->sv_lock); __svc_init_bc(serv); @@ -486,7 +486,7 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools, struct svc_serv * svc_create(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { return __svc_create(prog, bufsize, /*npools*/1, ops); } @@ -494,7 +494,7 @@ EXPORT_SYMBOL_GPL(svc_create); struct svc_serv * svc_create_pooled(struct svc_program *prog, unsigned int bufsize, - struct svc_serv_ops *ops) + const struct svc_serv_ops *ops) { struct svc_serv *serv; unsigned int npools = svc_pool_map_get(); diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c index d16a8b423c20..e8e0831229cf 100644 --- a/net/sunrpc/svc_xprt.c +++ b/net/sunrpc/svc_xprt.c @@ -28,7 +28,7 @@ module_param(svc_rpc_per_connection_limit, uint, 0644); static struct svc_deferred_req *svc_deferred_dequeue(struct svc_xprt *xprt); static int svc_deferred_recv(struct svc_rqst *rqstp); static struct cache_deferred_req *svc_defer(struct cache_req *req); -static void svc_age_temp_xprts(unsigned long closure); +static void svc_age_temp_xprts(struct timer_list *t); static void svc_delete_xprt(struct svc_xprt *xprt); /* apparently the "standard" is that clients close @@ -250,9 +250,9 @@ void svc_add_new_perm_xprt(struct svc_serv *serv, struct svc_xprt *new) svc_xprt_received(new); } -int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, - struct net *net, const int family, - const unsigned short port, int flags) +static int _svc_create_xprt(struct svc_serv *serv, const char *xprt_name, + struct net *net, const int family, + const unsigned short port, int flags) { struct svc_xprt_class *xcl; @@ -380,7 +380,6 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) struct svc_pool *pool; struct svc_rqst *rqstp = NULL; int cpu; - bool queued = false; if (!svc_xprt_has_something_to_do(xprt)) goto out; @@ -401,58 +400,25 @@ void svc_xprt_do_enqueue(struct svc_xprt *xprt) atomic_long_inc(&pool->sp_stats.packets); -redo_search: + dprintk("svc: transport %p put into queue\n", xprt); + spin_lock_bh(&pool->sp_lock); + list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); + pool->sp_stats.sockets_queued++; + spin_unlock_bh(&pool->sp_lock); + /* find a thread for this xprt */ rcu_read_lock(); list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) { - /* Do a lockless check first */ - if (test_bit(RQ_BUSY, &rqstp->rq_flags)) + if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) continue; - - /* - * Once the xprt has been queued, it can only be dequeued by - * the task that intends to service it. All we can do at that - * point is to try to wake this thread back up so that it can - * do so. - */ - if (!queued) { - spin_lock_bh(&rqstp->rq_lock); - if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) { - /* already busy, move on... */ - spin_unlock_bh(&rqstp->rq_lock); - continue; - } - - /* this one will do */ - rqstp->rq_xprt = xprt; - svc_xprt_get(xprt); - spin_unlock_bh(&rqstp->rq_lock); - } - rcu_read_unlock(); - atomic_long_inc(&pool->sp_stats.threads_woken); wake_up_process(rqstp->rq_task); - put_cpu(); - goto out; - } - rcu_read_unlock(); - - /* - * We didn't find an idle thread to use, so we need to queue the xprt. - * Do so and then search again. If we find one, we can't hook this one - * up to it directly but we can wake the thread up in the hopes that it - * will pick it up once it searches for a xprt to service. - */ - if (!queued) { - queued = true; - dprintk("svc: transport %p put into queue\n", xprt); - spin_lock_bh(&pool->sp_lock); - list_add_tail(&xprt->xpt_ready, &pool->sp_sockets); - pool->sp_stats.sockets_queued++; - spin_unlock_bh(&pool->sp_lock); - goto redo_search; + goto out_unlock; } + set_bit(SP_CONGESTED, &pool->sp_flags); rqstp = NULL; +out_unlock: + rcu_read_unlock(); put_cpu(); out: trace_svc_xprt_do_enqueue(xprt, rqstp); @@ -721,38 +687,25 @@ rqst_should_sleep(struct svc_rqst *rqstp) static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) { - struct svc_xprt *xprt; struct svc_pool *pool = rqstp->rq_pool; long time_left = 0; /* rq_xprt should be clear on entry */ WARN_ON_ONCE(rqstp->rq_xprt); - /* Normally we will wait up to 5 seconds for any required - * cache information to be provided. - */ - rqstp->rq_chandle.thread_wait = 5*HZ; - - xprt = svc_xprt_dequeue(pool); - if (xprt) { - rqstp->rq_xprt = xprt; - - /* As there is a shortage of threads and this request - * had to be queued, don't allow the thread to wait so - * long for cache updates. - */ - rqstp->rq_chandle.thread_wait = 1*HZ; - clear_bit(SP_TASK_PENDING, &pool->sp_flags); - return xprt; - } + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) + goto out_found; /* * We have to be able to interrupt this wait * to bring down the daemons ... */ set_current_state(TASK_INTERRUPTIBLE); + smp_mb__before_atomic(); + clear_bit(SP_CONGESTED, &pool->sp_flags); clear_bit(RQ_BUSY, &rqstp->rq_flags); - smp_mb(); + smp_mb__after_atomic(); if (likely(rqst_should_sleep(rqstp))) time_left = schedule_timeout(timeout); @@ -761,13 +714,11 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) try_to_freeze(); - spin_lock_bh(&rqstp->rq_lock); set_bit(RQ_BUSY, &rqstp->rq_flags); - spin_unlock_bh(&rqstp->rq_lock); - - xprt = rqstp->rq_xprt; - if (xprt != NULL) - return xprt; + smp_mb__after_atomic(); + rqstp->rq_xprt = svc_xprt_dequeue(pool); + if (rqstp->rq_xprt) + goto out_found; if (!time_left) atomic_long_inc(&pool->sp_stats.threads_timedout); @@ -775,6 +726,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout) if (signalled() || kthread_should_stop()) return ERR_PTR(-EINTR); return ERR_PTR(-EAGAIN); +out_found: + /* Normally we will wait up to 5 seconds for any required + * cache information to be provided. + */ + if (!test_bit(SP_CONGESTED, &pool->sp_flags)) + rqstp->rq_chandle.thread_wait = 5*HZ; + else + rqstp->rq_chandle.thread_wait = 1*HZ; + return rqstp->rq_xprt; } static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt) @@ -785,8 +745,7 @@ static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt serv->sv_tmpcnt++; if (serv->sv_temptimer.function == NULL) { /* setup timer to age temp transports */ - setup_timer(&serv->sv_temptimer, svc_age_temp_xprts, - (unsigned long)serv); + serv->sv_temptimer.function = (TIMER_FUNC_TYPE)svc_age_temp_xprts; mod_timer(&serv->sv_temptimer, jiffies + svc_conn_age_period * HZ); } @@ -960,9 +919,9 @@ out: * Timer function to close old temporary transports, using * a mark-and-sweep algorithm. */ -static void svc_age_temp_xprts(unsigned long closure) +static void svc_age_temp_xprts(struct timer_list *t) { - struct svc_serv *serv = (struct svc_serv *)closure; + struct svc_serv *serv = from_timer(serv, t, sv_temptimer); struct svc_xprt *xprt; struct list_head *le, *next; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2b720fa35c4f..ff8e06cd067e 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk) dprintk("svc: socket %p(inet %p), busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); @@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk) if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -687,7 +693,7 @@ static struct svc_xprt *svc_udp_create(struct svc_serv *serv, return svc_create_socket(serv, IPPROTO_UDP, net, sa, salen, flags); } -static struct svc_xprt_ops svc_udp_ops = { +static const struct svc_xprt_ops svc_udp_ops = { .xpo_create = svc_udp_create, .xpo_recvfrom = svc_udp_recvfrom, .xpo_sendto = svc_udp_sendto, @@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk) dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); - if (svsk) + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); + } + /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk) if (!svsk) printk("svc: socket %p: no user data\n", sk); else { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_ostate(sk); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1001,7 +1013,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) if (!bc_xprt) return -EAGAIN; - spin_lock_bh(&bc_xprt->transport_lock); + spin_lock(&bc_xprt->recv_lock); req = xprt_lookup_rqst(bc_xprt, xid); if (!req) goto unlock_notfound; @@ -1019,7 +1031,7 @@ static int receive_cb_reply(struct svc_sock *svsk, struct svc_rqst *rqstp) memcpy(dst->iov_base, src->iov_base, src->iov_len); xprt_complete_rqst(req->rq_task, rqstp->rq_arg.len); rqstp->rq_arg.len = 0; - spin_unlock_bh(&bc_xprt->transport_lock); + spin_unlock(&bc_xprt->recv_lock); return 0; unlock_notfound: printk(KERN_NOTICE @@ -1028,7 +1040,7 @@ unlock_notfound: __func__, ntohl(calldir), bc_xprt, ntohl(xid)); unlock_eagain: - spin_unlock_bh(&bc_xprt->transport_lock); + spin_unlock(&bc_xprt->recv_lock); return -EAGAIN; } @@ -1229,7 +1241,7 @@ static void svc_bc_tcp_sock_detach(struct svc_xprt *xprt) { } -static struct svc_xprt_ops svc_tcp_bc_ops = { +static const struct svc_xprt_ops svc_tcp_bc_ops = { .xpo_create = svc_bc_tcp_create, .xpo_detach = svc_bc_tcp_sock_detach, .xpo_free = svc_bc_sock_free, @@ -1263,7 +1275,7 @@ static void svc_cleanup_bc_xprt_sock(void) } #endif /* CONFIG_SUNRPC_BACKCHANNEL */ -static struct svc_xprt_ops svc_tcp_ops = { +static const struct svc_xprt_ops svc_tcp_ops = { .xpo_create = svc_tcp_create, .xpo_recvfrom = svc_tcp_recvfrom, .xpo_sendto = svc_tcp_sendto, @@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return ERR_PTR(err); } - inet->sk_user_data = svsk; svsk->sk_sock = sock; svsk->sk_sk = inet; svsk->sk_ostate = inet->sk_state_change; svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; + /* + * This barrier is necessary in order to prevent race condition + * with svc_data_ready(), svc_listen_data_ready() and others + * when calling callbacks above. + */ + wmb(); + inet->sk_user_data = svsk; /* Initialize the socket */ if (sock->type == SOCK_DGRAM) diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index 4654a9934269..333b9d697ae5 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -696,9 +696,9 @@ xprt_schedule_autodisconnect(struct rpc_xprt *xprt) } static void -xprt_init_autodisconnect(unsigned long data) +xprt_init_autodisconnect(struct timer_list *t) { - struct rpc_xprt *xprt = (struct rpc_xprt *)data; + struct rpc_xprt *xprt = from_timer(xprt, t, timer); spin_lock(&xprt->transport_lock); if (!list_empty(&xprt->recv)) @@ -844,6 +844,50 @@ struct rpc_rqst *xprt_lookup_rqst(struct rpc_xprt *xprt, __be32 xid) } EXPORT_SYMBOL_GPL(xprt_lookup_rqst); +/** + * xprt_pin_rqst - Pin a request on the transport receive list + * @req: Request to pin + * + * Caller must ensure this is atomic with the call to xprt_lookup_rqst() + * so should be holding the xprt transport lock. + */ +void xprt_pin_rqst(struct rpc_rqst *req) +{ + set_bit(RPC_TASK_MSG_RECV, &req->rq_task->tk_runstate); +} +EXPORT_SYMBOL_GPL(xprt_pin_rqst); + +/** + * xprt_unpin_rqst - Unpin a request on the transport receive list + * @req: Request to pin + * + * Caller should be holding the xprt transport lock. + */ +void xprt_unpin_rqst(struct rpc_rqst *req) +{ + struct rpc_task *task = req->rq_task; + + clear_bit(RPC_TASK_MSG_RECV, &task->tk_runstate); + if (test_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate)) + wake_up_bit(&task->tk_runstate, RPC_TASK_MSG_RECV); +} +EXPORT_SYMBOL_GPL(xprt_unpin_rqst); + +static void xprt_wait_on_pinned_rqst(struct rpc_rqst *req) +__must_hold(&req->rq_xprt->recv_lock) +{ + struct rpc_task *task = req->rq_task; + + if (task && test_bit(RPC_TASK_MSG_RECV, &task->tk_runstate)) { + spin_unlock(&req->rq_xprt->recv_lock); + set_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); + wait_on_bit(&task->tk_runstate, RPC_TASK_MSG_RECV, + TASK_UNINTERRUPTIBLE); + clear_bit(RPC_TASK_MSG_RECV_WAIT, &task->tk_runstate); + spin_lock(&req->rq_xprt->recv_lock); + } +} + static void xprt_update_rtt(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; @@ -966,13 +1010,13 @@ void xprt_transmit(struct rpc_task *task) /* * Add to the list only if we're expecting a reply */ - spin_lock_bh(&xprt->transport_lock); /* Update the softirq receive buffer */ memcpy(&req->rq_private_buf, &req->rq_rcv_buf, sizeof(req->rq_private_buf)); /* Add request to the receive list */ + spin_lock(&xprt->recv_lock); list_add_tail(&req->rq_list, &xprt->recv); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); xprt_reset_majortimeo(req); /* Turn off autodisconnect */ del_singleshot_timer_sync(&xprt->timer); @@ -1095,6 +1139,7 @@ void xprt_alloc_slot(struct rpc_xprt *xprt, struct rpc_task *task) case -EAGAIN: xprt_add_backlog(xprt, task); dprintk("RPC: waiting for request slot\n"); + /* fall through */ default: task->tk_status = -EAGAIN; } @@ -1287,12 +1332,16 @@ void xprt_release(struct rpc_task *task) task->tk_ops->rpc_count_stats(task, task->tk_calldata); else if (task->tk_client) rpc_count_iostats(task, task->tk_client->cl_metrics); + spin_lock(&xprt->recv_lock); + if (!list_empty(&req->rq_list)) { + list_del_init(&req->rq_list); + xprt_wait_on_pinned_rqst(req); + } + spin_unlock(&xprt->recv_lock); spin_lock_bh(&xprt->transport_lock); xprt->ops->release_xprt(xprt, task); if (xprt->ops->release_request) xprt->ops->release_request(task); - if (!list_empty(&req->rq_list)) - list_del(&req->rq_list); xprt->last_used = jiffies; xprt_schedule_autodisconnect(xprt); spin_unlock_bh(&xprt->transport_lock); @@ -1318,6 +1367,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net) spin_lock_init(&xprt->transport_lock); spin_lock_init(&xprt->reserve_lock); + spin_lock_init(&xprt->recv_lock); INIT_LIST_HEAD(&xprt->free); INIT_LIST_HEAD(&xprt->recv); @@ -1373,10 +1423,9 @@ found: xprt->idle_timeout = 0; INIT_WORK(&xprt->task_cleanup, xprt_autoclose); if (xprt_has_timer(xprt)) - setup_timer(&xprt->timer, xprt_init_autodisconnect, - (unsigned long)xprt); + timer_setup(&xprt->timer, xprt_init_autodisconnect, 0); else - init_timer(&xprt->timer); + timer_setup(&xprt->timer, NULL, 0); if (strlen(args->servername) > RPC_MAXNETNAMELEN) { xprt_destroy(xprt); @@ -1396,6 +1445,23 @@ out: return xprt; } +static void xprt_destroy_cb(struct work_struct *work) +{ + struct rpc_xprt *xprt = + container_of(work, struct rpc_xprt, task_cleanup); + + rpc_xprt_debugfs_unregister(xprt); + rpc_destroy_wait_queue(&xprt->binding); + rpc_destroy_wait_queue(&xprt->pending); + rpc_destroy_wait_queue(&xprt->sending); + rpc_destroy_wait_queue(&xprt->backlog); + kfree(xprt->servername); + /* + * Tear down transport state and free the rpc_xprt + */ + xprt->ops->destroy(xprt); +} + /** * xprt_destroy - destroy an RPC transport, killing off all requests. * @xprt: transport to destroy @@ -1405,22 +1471,19 @@ static void xprt_destroy(struct rpc_xprt *xprt) { dprintk("RPC: destroying transport %p\n", xprt); - /* Exclude transport connect/disconnect handlers */ + /* + * Exclude transport connect/disconnect handlers and autoclose + */ wait_on_bit_lock(&xprt->state, XPRT_LOCKED, TASK_UNINTERRUPTIBLE); del_timer_sync(&xprt->timer); - rpc_xprt_debugfs_unregister(xprt); - rpc_destroy_wait_queue(&xprt->binding); - rpc_destroy_wait_queue(&xprt->pending); - rpc_destroy_wait_queue(&xprt->sending); - rpc_destroy_wait_queue(&xprt->backlog); - cancel_work_sync(&xprt->task_cleanup); - kfree(xprt->servername); /* - * Tear down transport state and free the rpc_xprt + * Destroy sockets etc from the system workqueue so they can + * safely flush receive work running on rpciod. */ - xprt->ops->destroy(xprt); + INIT_WORK(&xprt->task_cleanup, xprt_destroy_cb); + schedule_work(&xprt->task_cleanup); } static void xprt_destroy_kref(struct kref *kref) diff --git a/net/sunrpc/xprtmultipath.c b/net/sunrpc/xprtmultipath.c index ae92a9e9ba52..e2d64c7138c3 100644 --- a/net/sunrpc/xprtmultipath.c +++ b/net/sunrpc/xprtmultipath.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Multipath support for RPC * diff --git a/net/sunrpc/xprtrdma/Makefile b/net/sunrpc/xprtrdma/Makefile index b8213ddce2f2..8bf19e142b6b 100644 --- a/net/sunrpc/xprtrdma/Makefile +++ b/net/sunrpc/xprtrdma/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_SUNRPC_XPRT_RDMA) += rpcrdma.o rpcrdma-y := transport.o rpc_rdma.o verbs.o \ diff --git a/net/sunrpc/xprtrdma/backchannel.c b/net/sunrpc/xprtrdma/backchannel.c index 03f6b5840764..8b818bb3518a 100644 --- a/net/sunrpc/xprtrdma/backchannel.c +++ b/net/sunrpc/xprtrdma/backchannel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2015 Oracle. All rights reserved. * @@ -42,13 +43,14 @@ static int rpcrdma_bc_setup_rqst(struct rpcrdma_xprt *r_xprt, req = rpcrdma_create_req(r_xprt); if (IS_ERR(req)) return PTR_ERR(req); - req->rl_backchannel = true; + __set_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags); rb = rpcrdma_alloc_regbuf(RPCRDMA_HDRBUF_SIZE, DMA_TO_DEVICE, GFP_KERNEL); if (IS_ERR(rb)) goto out_fail; req->rl_rdmabuf = rb; + xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); size = r_xprt->rx_data.inline_rsize; rb = rpcrdma_alloc_regbuf(size, DMA_TO_DEVICE, GFP_KERNEL); @@ -202,23 +204,27 @@ size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *xprt) */ int rpcrdma_bc_marshal_reply(struct rpc_rqst *rqst) { - struct rpc_xprt *xprt = rqst->rq_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); + struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_msg *headerp; - - headerp = rdmab_to_msg(req->rl_rdmabuf); - headerp->rm_xid = rqst->rq_xid; - headerp->rm_vers = rpcrdma_version; - headerp->rm_credit = - cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests); - headerp->rm_type = rdma_msg; - headerp->rm_body.rm_chunks[0] = xdr_zero; - headerp->rm_body.rm_chunks[1] = xdr_zero; - headerp->rm_body.rm_chunks[2] = xdr_zero; - - if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, RPCRDMA_HDRLEN_MIN, - &rqst->rq_snd_buf, rpcrdma_noch)) + __be32 *p; + + rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); + xdr_init_encode(&req->rl_stream, &req->rl_hdrbuf, + req->rl_rdmabuf->rg_base); + + p = xdr_reserve_space(&req->rl_stream, 28); + if (unlikely(!p)) + return -EIO; + *p++ = rqst->rq_xid; + *p++ = rpcrdma_version; + *p++ = cpu_to_be32(r_xprt->rx_buf.rb_bc_srv_max_requests); + *p++ = rdma_msg; + *p++ = xdr_zero; + *p++ = xdr_zero; + *p = xdr_zero; + + if (rpcrdma_prepare_send_sges(r_xprt, req, RPCRDMA_HDRLEN_MIN, + &rqst->rq_snd_buf, rpcrdma_noch)) return -EIO; return 0; } @@ -271,9 +277,6 @@ void xprt_rdma_bc_free_rqst(struct rpc_rqst *rqst) * @xprt: transport receiving the call * @rep: receive buffer containing the call * - * Called in the RPC reply handler, which runs in a tasklet. - * Be quick about it. - * * Operational assumptions: * o Backchannel credits are ignored, just as the NFS server * forechannel currently does @@ -284,7 +287,6 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) { struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct rpcrdma_msg *headerp; struct svc_serv *bc_serv; struct rpcrdma_req *req; struct rpc_rqst *rqst; @@ -292,24 +294,15 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, size_t size; __be32 *p; - headerp = rdmab_to_msg(rep->rr_rdmabuf); + p = xdr_inline_decode(&rep->rr_stream, 0); + size = xdr_stream_remaining(&rep->rr_stream); + #ifdef RPCRDMA_BACKCHANNEL_DEBUG pr_info("RPC: %s: callback XID %08x, length=%u\n", - __func__, be32_to_cpu(headerp->rm_xid), rep->rr_len); - pr_info("RPC: %s: %*ph\n", __func__, rep->rr_len, headerp); + __func__, be32_to_cpup(p), size); + pr_info("RPC: %s: %*ph\n", __func__, size, p); #endif - /* Sanity check: - * Need at least enough bytes for RPC/RDMA header, as code - * here references the header fields by array offset. Also, - * backward calls are always inline, so ensure there - * are some bytes beyond the RPC/RDMA header. - */ - if (rep->rr_len < RPCRDMA_HDRLEN_MIN + 24) - goto out_short; - p = (__be32 *)((unsigned char *)headerp + RPCRDMA_HDRLEN_MIN); - size = rep->rr_len - RPCRDMA_HDRLEN_MIN; - /* Grab a free bc rqst */ spin_lock(&xprt->bc_pa_lock); if (list_empty(&xprt->bc_pa_list)) { @@ -325,7 +318,7 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, /* Prepare rqst */ rqst->rq_reply_bytes_recvd = 0; rqst->rq_bytes_sent = 0; - rqst->rq_xid = headerp->rm_xid; + rqst->rq_xid = *p; rqst->rq_private_buf.len = size; set_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state); @@ -337,9 +330,9 @@ void rpcrdma_bc_receive_call(struct rpcrdma_xprt *r_xprt, buf->len = size; /* The receive buffer has to be hooked to the rpcrdma_req - * so that it can be reposted after the server is done - * parsing it but just before sending the backward - * direction reply. + * so that it is not released while the req is pointing + * to its buffer, and so that it can be reposted after + * the Upper Layer is done decoding it. */ req = rpcr_to_rdmar(rqst); dprintk("RPC: %s: attaching rep %p to req %p\n", @@ -367,13 +360,4 @@ out_overflow: * when the connection is re-established. */ return; - -out_short: - pr_warn("RPC/RDMA short backward direction call\n"); - - if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) - xprt_disconnect_done(xprt); - else - pr_warn("RPC: %s: reposting rep %p\n", - __func__, rep); } diff --git a/net/sunrpc/xprtrdma/fmr_ops.c b/net/sunrpc/xprtrdma/fmr_ops.c index d3f84bb1d443..29fc84c7ff98 100644 --- a/net/sunrpc/xprtrdma/fmr_ops.c +++ b/net/sunrpc/xprtrdma/fmr_ops.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. @@ -177,7 +178,7 @@ fmr_op_maxpages(struct rpcrdma_xprt *r_xprt) /* Use the ib_map_phys_fmr() verb to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ -static int +static struct rpcrdma_mr_seg * fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, struct rpcrdma_mw **out) { @@ -188,7 +189,7 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mw = rpcrdma_get_mw(r_xprt); if (!mw) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); pageoff = offset_in_page(seg1->mr_offset); seg1->mr_offset -= pageoff; /* start of page */ @@ -232,13 +233,13 @@ fmr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mw->mw_offset = dma_pages[0] + pageoff; *out = mw; - return mw->mw_nents; + return seg; out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", mw->mw_sg, i); rpcrdma_put_mw(r_xprt, mw); - return -EIO; + return ERR_PTR(-EIO); out_maperr: pr_err("rpcrdma: ib_map_phys_fmr %u@0x%llx+%i (%d) status %i\n", @@ -247,7 +248,7 @@ out_maperr: ib_dma_unmap_sg(r_xprt->rx_ia.ri_device, mw->mw_sg, mw->mw_nents, mw->mw_dir); rpcrdma_put_mw(r_xprt, mw); - return -EIO; + return ERR_PTR(-EIO); } /* Invalidate all memory regions that were registered for "req". @@ -305,28 +306,9 @@ out_reset: } } -/* Use a slow, safe mechanism to invalidate all memory regions - * that were registered for "req". - */ -static void -fmr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, - bool sync) -{ - struct rpcrdma_mw *mw; - - while (!list_empty(&req->rl_registered)) { - mw = rpcrdma_pop_mw(&req->rl_registered); - if (sync) - fmr_op_recover_mr(mw); - else - rpcrdma_defer_mr_recovery(mw); - } -} - const struct rpcrdma_memreg_ops rpcrdma_fmr_memreg_ops = { .ro_map = fmr_op_map, .ro_unmap_sync = fmr_op_unmap_sync, - .ro_unmap_safe = fmr_op_unmap_safe, .ro_recover_mr = fmr_op_recover_mr, .ro_open = fmr_op_open, .ro_maxpages = fmr_op_maxpages, diff --git a/net/sunrpc/xprtrdma/frwr_ops.c b/net/sunrpc/xprtrdma/frwr_ops.c index 6aea36a38bfd..773e66e10a15 100644 --- a/net/sunrpc/xprtrdma/frwr_ops.c +++ b/net/sunrpc/xprtrdma/frwr_ops.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2015 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. @@ -344,7 +345,7 @@ frwr_wc_localinv_wake(struct ib_cq *cq, struct ib_wc *wc) /* Post a REG_MR Work Request to register a memory region * for remote access via RDMA READ or RDMA WRITE. */ -static int +static struct rpcrdma_mr_seg * frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, int nsegs, bool writing, struct rpcrdma_mw **out) { @@ -364,7 +365,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, rpcrdma_defer_mr_recovery(mw); mw = rpcrdma_get_mw(r_xprt); if (!mw) - return -ENOBUFS; + return ERR_PTR(-ENOBUFS); } while (mw->frmr.fr_state != FRMR_IS_INVALID); frmr = &mw->frmr; frmr->fr_state = FRMR_IS_VALID; @@ -401,7 +402,7 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, if (unlikely(n != mw->mw_nents)) goto out_mapmr_err; - dprintk("RPC: %s: Using frmr %p to map %u segments (%u bytes)\n", + dprintk("RPC: %s: Using frmr %p to map %u segments (%llu bytes)\n", __func__, frmr, mw->mw_nents, mr->length); key = (u8)(mr->rkey & 0x000000FF); @@ -419,7 +420,6 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, IB_ACCESS_REMOTE_WRITE | IB_ACCESS_LOCAL_WRITE : IB_ACCESS_REMOTE_READ; - rpcrdma_set_signaled(&r_xprt->rx_ep, ®_wr->wr); rc = ib_post_send(ia->ri_id->qp, ®_wr->wr, &bad_wr); if (rc) goto out_senderr; @@ -429,25 +429,25 @@ frwr_op_map(struct rpcrdma_xprt *r_xprt, struct rpcrdma_mr_seg *seg, mw->mw_offset = mr->iova; *out = mw; - return mw->mw_nents; + return seg; out_dmamap_err: pr_err("rpcrdma: failed to DMA map sg %p sg_nents %d\n", mw->mw_sg, i); frmr->fr_state = FRMR_IS_INVALID; rpcrdma_put_mw(r_xprt, mw); - return -EIO; + return ERR_PTR(-EIO); out_mapmr_err: pr_err("rpcrdma: failed to map mr %p (%d/%d)\n", frmr->fr_mr, n, mw->mw_nents); rpcrdma_defer_mr_recovery(mw); - return -EIO; + return ERR_PTR(-EIO); out_senderr: pr_err("rpcrdma: FRMR registration ib_post_send returned %i\n", rc); rpcrdma_defer_mr_recovery(mw); - return -ENOTCONN; + return ERR_PTR(-ENOTCONN); } /* Invalidate all memory regions that were registered for "req". @@ -507,12 +507,6 @@ frwr_op_unmap_sync(struct rpcrdma_xprt *r_xprt, struct list_head *mws) f->fr_cqe.done = frwr_wc_localinv_wake; reinit_completion(&f->fr_linv_done); - /* Initialize CQ count, since there is always a signaled - * WR being posted here. The new cqcount depends on how - * many SQEs are about to be consumed. - */ - rpcrdma_init_cqcount(&r_xprt->rx_ep, count); - /* Transport disconnect drains the receive CQ before it * replaces the QP. The RPC reply handler won't call us * unless ri_id->qp is a valid pointer. @@ -545,7 +539,6 @@ reset_mrs: /* Find and reset the MRs in the LOCAL_INV WRs that did not * get posted. */ - rpcrdma_init_cqcount(&r_xprt->rx_ep, -count); while (bad_wr) { f = container_of(bad_wr, struct rpcrdma_frmr, fr_invwr); @@ -558,28 +551,9 @@ reset_mrs: goto unmap; } -/* Use a slow, safe mechanism to invalidate all memory regions - * that were registered for "req". - */ -static void -frwr_op_unmap_safe(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, - bool sync) -{ - struct rpcrdma_mw *mw; - - while (!list_empty(&req->rl_registered)) { - mw = rpcrdma_pop_mw(&req->rl_registered); - if (sync) - frwr_op_recover_mr(mw); - else - rpcrdma_defer_mr_recovery(mw); - } -} - const struct rpcrdma_memreg_ops rpcrdma_frwr_memreg_ops = { .ro_map = frwr_op_map, .ro_unmap_sync = frwr_op_unmap_sync, - .ro_unmap_safe = frwr_op_unmap_safe, .ro_recover_mr = frwr_op_recover_mr, .ro_open = frwr_op_open, .ro_maxpages = frwr_op_maxpages, diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c index ca4d6e4528f3..ed34dc0f144c 100644 --- a/net/sunrpc/xprtrdma/rpc_rdma.c +++ b/net/sunrpc/xprtrdma/rpc_rdma.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -75,11 +76,11 @@ static unsigned int rpcrdma_max_call_header_size(unsigned int maxsegs) /* Maximum Read list size */ maxsegs += 2; /* segment for head and tail buffers */ - size = maxsegs * sizeof(struct rpcrdma_read_chunk); + size = maxsegs * rpcrdma_readchunk_maxsz * sizeof(__be32); /* Minimal Read chunk size */ size += sizeof(__be32); /* segment count */ - size += sizeof(struct rpcrdma_segment); + size += rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ dprintk("RPC: %s: max call header size = %u\n", @@ -102,7 +103,7 @@ static unsigned int rpcrdma_max_reply_header_size(unsigned int maxsegs) /* Maximum Write list size */ maxsegs += 2; /* segment for head and tail buffers */ size = sizeof(__be32); /* segment count */ - size += maxsegs * sizeof(struct rpcrdma_segment); + size += maxsegs * rpcrdma_segment_maxsz * sizeof(__be32); size += sizeof(__be32); /* list discriminator */ dprintk("RPC: %s: max reply header size = %u\n", @@ -169,40 +170,41 @@ static bool rpcrdma_results_inline(struct rpcrdma_xprt *r_xprt, return rqst->rq_rcv_buf.buflen <= ia->ri_max_inline_read; } -/* Split "vec" on page boundaries into segments. FMR registers pages, - * not a byte range. Other modes coalesce these segments into a single - * MR when they can. +/* Split @vec on page boundaries into SGEs. FMR registers pages, not + * a byte range. Other modes coalesce these SGEs into a single MR + * when they can. + * + * Returns pointer to next available SGE, and bumps the total number + * of SGEs consumed. */ -static int -rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, int n) +static struct rpcrdma_mr_seg * +rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg, + unsigned int *n) { - size_t page_offset; - u32 remaining; + u32 remaining, page_offset; char *base; base = vec->iov_base; page_offset = offset_in_page(base); remaining = vec->iov_len; - while (remaining && n < RPCRDMA_MAX_SEGS) { - seg[n].mr_page = NULL; - seg[n].mr_offset = base; - seg[n].mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); - remaining -= seg[n].mr_len; - base += seg[n].mr_len; - ++n; + while (remaining) { + seg->mr_page = NULL; + seg->mr_offset = base; + seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining); + remaining -= seg->mr_len; + base += seg->mr_len; + ++seg; + ++(*n); page_offset = 0; } - return n; + return seg; } -/* - * Chunk assembly from upper layer xdr_buf. +/* Convert @xdrbuf into SGEs no larger than a page each. As they + * are registered, these SGEs are then coalesced into RDMA segments + * when the selected memreg mode supports it. * - * Prepare the passed-in xdr_buf into representation as RPC/RDMA chunk - * elements. Segments are then coalesced when registered, if possible - * within the selected memreg mode. - * - * Returns positive number of segments converted, or a negative errno. + * Returns positive number of SGEs consumed, or a negative errno. */ static int @@ -210,47 +212,41 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, unsigned int pos, enum rpcrdma_chunktype type, struct rpcrdma_mr_seg *seg) { - int len, n, p, page_base; + unsigned long page_base; + unsigned int len, n; struct page **ppages; n = 0; - if (pos == 0) { - n = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, n); - if (n == RPCRDMA_MAX_SEGS) - goto out_overflow; - } + if (pos == 0) + seg = rpcrdma_convert_kvec(&xdrbuf->head[0], seg, &n); len = xdrbuf->page_len; ppages = xdrbuf->pages + (xdrbuf->page_base >> PAGE_SHIFT); page_base = offset_in_page(xdrbuf->page_base); - p = 0; - while (len && n < RPCRDMA_MAX_SEGS) { - if (!ppages[p]) { - /* alloc the pagelist for receiving buffer */ - ppages[p] = alloc_page(GFP_ATOMIC); - if (!ppages[p]) + while (len) { + if (unlikely(!*ppages)) { + /* XXX: Certain upper layer operations do + * not provide receive buffer pages. + */ + *ppages = alloc_page(GFP_ATOMIC); + if (!*ppages) return -EAGAIN; } - seg[n].mr_page = ppages[p]; - seg[n].mr_offset = (void *)(unsigned long) page_base; - seg[n].mr_len = min_t(u32, PAGE_SIZE - page_base, len); - if (seg[n].mr_len > PAGE_SIZE) - goto out_overflow; - len -= seg[n].mr_len; + seg->mr_page = *ppages; + seg->mr_offset = (char *)page_base; + seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len); + len -= seg->mr_len; + ++ppages; + ++seg; ++n; - ++p; - page_base = 0; /* page offset only applies to first page */ + page_base = 0; } - /* Message overflows the seg array */ - if (len && n == RPCRDMA_MAX_SEGS) - goto out_overflow; - /* When encoding a Read chunk, the tail iovec contains an * XDR pad and may be omitted. */ if (type == rpcrdma_readch && r_xprt->rx_ia.ri_implicit_roundup) - return n; + goto out; /* When encoding a Write chunk, some servers need to see an * extra segment for non-XDR-aligned Write chunks. The upper @@ -258,30 +254,81 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf, * for this purpose. */ if (type == rpcrdma_writech && r_xprt->rx_ia.ri_implicit_roundup) - return n; + goto out; - if (xdrbuf->tail[0].iov_len) { - n = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, n); - if (n == RPCRDMA_MAX_SEGS) - goto out_overflow; - } + if (xdrbuf->tail[0].iov_len) + seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n); +out: + if (unlikely(n > RPCRDMA_MAX_SEGS)) + return -EIO; return n; +} + +static inline int +encode_item_present(struct xdr_stream *xdr) +{ + __be32 *p; -out_overflow: - pr_err("rpcrdma: segment array overflow\n"); - return -EIO; + p = xdr_reserve_space(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EMSGSIZE; + + *p = xdr_one; + return 0; } -static inline __be32 * +static inline int +encode_item_not_present(struct xdr_stream *xdr) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EMSGSIZE; + + *p = xdr_zero; + return 0; +} + +static void xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) { *iptr++ = cpu_to_be32(mw->mw_handle); *iptr++ = cpu_to_be32(mw->mw_length); - return xdr_encode_hyper(iptr, mw->mw_offset); + xdr_encode_hyper(iptr, mw->mw_offset); +} + +static int +encode_rdma_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 4 * sizeof(*p)); + if (unlikely(!p)) + return -EMSGSIZE; + + xdr_encode_rdma_segment(p, mw); + return 0; } -/* XDR-encode the Read list. Supports encoding a list of read +static int +encode_read_segment(struct xdr_stream *xdr, struct rpcrdma_mw *mw, + u32 position) +{ + __be32 *p; + + p = xdr_reserve_space(xdr, 6 * sizeof(*p)); + if (unlikely(!p)) + return -EMSGSIZE; + + *p++ = xdr_one; /* Item present */ + *p++ = cpu_to_be32(position); + xdr_encode_rdma_segment(p, mw); + return 0; +} + +/* Register and XDR encode the Read list. Supports encoding a list of read * segments that belong to a single read chunk. * * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): @@ -290,23 +337,20 @@ xdr_encode_rdma_segment(__be32 *iptr, struct rpcrdma_mw *mw) * N elements, position P (same P for all chunks of same arg!): * 1 - PHLOO - 1 - PHLOO - ... - 1 - PHLOO - 0 * - * Returns a pointer to the XDR word in the RDMA header following - * the end of the Read list, or an error pointer. + * Returns zero on success, or a negative errno if a failure occurred. + * @xdr is advanced to the next position in the stream. + * + * Only a single @pos value is currently supported. */ -static __be32 * -rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, - struct rpcrdma_req *req, struct rpc_rqst *rqst, - __be32 *iptr, enum rpcrdma_chunktype rtype) +static noinline int +rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + struct rpc_rqst *rqst, enum rpcrdma_chunktype rtype) { + struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; struct rpcrdma_mw *mw; unsigned int pos; - int n, nsegs; - - if (rtype == rpcrdma_noch) { - *iptr++ = xdr_zero; /* item not present */ - return iptr; - } + int nsegs; pos = rqst->rq_snd_buf.head[0].iov_len; if (rtype == rpcrdma_areadch) @@ -315,40 +359,33 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_snd_buf, pos, rtype, seg); if (nsegs < 0) - return ERR_PTR(nsegs); + return nsegs; do { - n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - false, &mw); - if (n < 0) - return ERR_PTR(n); + seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, + false, &mw); + if (IS_ERR(seg)) + return PTR_ERR(seg); rpcrdma_push_mw(mw, &req->rl_registered); - *iptr++ = xdr_one; /* item present */ - - /* All read segments in this chunk - * have the same "position". - */ - *iptr++ = cpu_to_be32(pos); - iptr = xdr_encode_rdma_segment(iptr, mw); + if (encode_read_segment(xdr, mw, pos) < 0) + return -EMSGSIZE; dprintk("RPC: %5u %s: pos %u %u@0x%016llx:0x%08x (%s)\n", rqst->rq_task->tk_pid, __func__, pos, mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, n < nsegs ? "more" : "last"); + mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); r_xprt->rx_stats.read_chunk_count++; - seg += n; - nsegs -= n; + nsegs -= mw->mw_nents; } while (nsegs); - /* Finish Read list */ - *iptr++ = xdr_zero; /* Next item not present */ - return iptr; + return 0; } -/* XDR-encode the Write list. Supports encoding a list containing - * one array of plain segments that belong to a single write chunk. +/* Register and XDR encode the Write list. Supports encoding a list + * containing one array of plain segments that belong to a single + * write chunk. * * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): * @@ -356,66 +393,65 @@ rpcrdma_encode_read_list(struct rpcrdma_xprt *r_xprt, * N elements: * 1 - N - HLOO - HLOO - ... - HLOO - 0 * - * Returns a pointer to the XDR word in the RDMA header following - * the end of the Write list, or an error pointer. + * Returns zero on success, or a negative errno if a failure occurred. + * @xdr is advanced to the next position in the stream. + * + * Only a single Write chunk is currently supported. */ -static __be32 * +static noinline int rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, - struct rpc_rqst *rqst, __be32 *iptr, - enum rpcrdma_chunktype wtype) + struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) { + struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; struct rpcrdma_mw *mw; - int n, nsegs, nchunks; + int nsegs, nchunks; __be32 *segcount; - if (wtype != rpcrdma_writech) { - *iptr++ = xdr_zero; /* no Write list present */ - return iptr; - } - seg = req->rl_segments; nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, rqst->rq_rcv_buf.head[0].iov_len, wtype, seg); if (nsegs < 0) - return ERR_PTR(nsegs); + return nsegs; - *iptr++ = xdr_one; /* Write list present */ - segcount = iptr++; /* save location of segment count */ + if (encode_item_present(xdr) < 0) + return -EMSGSIZE; + segcount = xdr_reserve_space(xdr, sizeof(*segcount)); + if (unlikely(!segcount)) + return -EMSGSIZE; + /* Actual value encoded below */ nchunks = 0; do { - n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mw); - if (n < 0) - return ERR_PTR(n); + seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, + true, &mw); + if (IS_ERR(seg)) + return PTR_ERR(seg); rpcrdma_push_mw(mw, &req->rl_registered); - iptr = xdr_encode_rdma_segment(iptr, mw); + if (encode_rdma_segment(xdr, mw) < 0) + return -EMSGSIZE; dprintk("RPC: %5u %s: %u@0x016%llx:0x%08x (%s)\n", rqst->rq_task->tk_pid, __func__, mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, n < nsegs ? "more" : "last"); + mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); r_xprt->rx_stats.write_chunk_count++; r_xprt->rx_stats.total_rdma_request += seg->mr_len; nchunks++; - seg += n; - nsegs -= n; + nsegs -= mw->mw_nents; } while (nsegs); /* Update count of segments in this Write chunk */ *segcount = cpu_to_be32(nchunks); - /* Finish Write list */ - *iptr++ = xdr_zero; /* Next item not present */ - return iptr; + return 0; } -/* XDR-encode the Reply chunk. Supports encoding an array of plain - * segments that belong to a single write (reply) chunk. +/* Register and XDR encode the Reply chunk. Supports encoding an array + * of plain segments that belong to a single write (reply) chunk. * * Encoding key for single-list chunks (HLOO = Handle32 Length32 Offset64): * @@ -423,81 +459,113 @@ rpcrdma_encode_write_list(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, * N elements: * 1 - N - HLOO - HLOO - ... - HLOO * - * Returns a pointer to the XDR word in the RDMA header following - * the end of the Reply chunk, or an error pointer. + * Returns zero on success, or a negative errno if a failure occurred. + * @xdr is advanced to the next position in the stream. */ -static __be32 * -rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, - struct rpcrdma_req *req, struct rpc_rqst *rqst, - __be32 *iptr, enum rpcrdma_chunktype wtype) +static noinline int +rpcrdma_encode_reply_chunk(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, + struct rpc_rqst *rqst, enum rpcrdma_chunktype wtype) { + struct xdr_stream *xdr = &req->rl_stream; struct rpcrdma_mr_seg *seg; struct rpcrdma_mw *mw; - int n, nsegs, nchunks; + int nsegs, nchunks; __be32 *segcount; - if (wtype != rpcrdma_replych) { - *iptr++ = xdr_zero; /* no Reply chunk present */ - return iptr; - } - seg = req->rl_segments; nsegs = rpcrdma_convert_iovs(r_xprt, &rqst->rq_rcv_buf, 0, wtype, seg); if (nsegs < 0) - return ERR_PTR(nsegs); + return nsegs; - *iptr++ = xdr_one; /* Reply chunk present */ - segcount = iptr++; /* save location of segment count */ + if (encode_item_present(xdr) < 0) + return -EMSGSIZE; + segcount = xdr_reserve_space(xdr, sizeof(*segcount)); + if (unlikely(!segcount)) + return -EMSGSIZE; + /* Actual value encoded below */ nchunks = 0; do { - n = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, - true, &mw); - if (n < 0) - return ERR_PTR(n); + seg = r_xprt->rx_ia.ri_ops->ro_map(r_xprt, seg, nsegs, + true, &mw); + if (IS_ERR(seg)) + return PTR_ERR(seg); rpcrdma_push_mw(mw, &req->rl_registered); - iptr = xdr_encode_rdma_segment(iptr, mw); + if (encode_rdma_segment(xdr, mw) < 0) + return -EMSGSIZE; dprintk("RPC: %5u %s: %u@0x%016llx:0x%08x (%s)\n", rqst->rq_task->tk_pid, __func__, mw->mw_length, (unsigned long long)mw->mw_offset, - mw->mw_handle, n < nsegs ? "more" : "last"); + mw->mw_handle, mw->mw_nents < nsegs ? "more" : "last"); r_xprt->rx_stats.reply_chunk_count++; r_xprt->rx_stats.total_rdma_request += seg->mr_len; nchunks++; - seg += n; - nsegs -= n; + nsegs -= mw->mw_nents; } while (nsegs); /* Update count of segments in the Reply chunk */ *segcount = cpu_to_be32(nchunks); - return iptr; + return 0; +} + +/** + * rpcrdma_unmap_sendctx - DMA-unmap Send buffers + * @sc: sendctx containing SGEs to unmap + * + */ +void +rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc) +{ + struct rpcrdma_ia *ia = &sc->sc_xprt->rx_ia; + struct ib_sge *sge; + unsigned int count; + + dprintk("RPC: %s: unmapping %u sges for sc=%p\n", + __func__, sc->sc_unmap_count, sc); + + /* The first two SGEs contain the transport header and + * the inline buffer. These are always left mapped so + * they can be cheaply re-used. + */ + sge = &sc->sc_sges[2]; + for (count = sc->sc_unmap_count; count; ++sge, --count) + ib_dma_unmap_page(ia->ri_device, + sge->addr, sge->length, DMA_TO_DEVICE); + + if (test_and_clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &sc->sc_req->rl_flags)) { + smp_mb__after_atomic(); + wake_up_bit(&sc->sc_req->rl_flags, RPCRDMA_REQ_F_TX_RESOURCES); + } } -/* Prepare the RPC-over-RDMA header SGE. +/* Prepare an SGE for the RPC-over-RDMA transport header. */ static bool rpcrdma_prepare_hdr_sge(struct rpcrdma_ia *ia, struct rpcrdma_req *req, u32 len) { + struct rpcrdma_sendctx *sc = req->rl_sendctx; struct rpcrdma_regbuf *rb = req->rl_rdmabuf; - struct ib_sge *sge = &req->rl_send_sge[0]; + struct ib_sge *sge = sc->sc_sges; - if (unlikely(!rpcrdma_regbuf_is_mapped(rb))) { - if (!__rpcrdma_dma_map_regbuf(ia, rb)) - return false; - sge->addr = rdmab_addr(rb); - sge->lkey = rdmab_lkey(rb); - } + if (!rpcrdma_dma_map_regbuf(ia, rb)) + goto out_regbuf; + sge->addr = rdmab_addr(rb); sge->length = len; + sge->lkey = rdmab_lkey(rb); ib_dma_sync_single_for_device(rdmab_device(rb), sge->addr, sge->length, DMA_TO_DEVICE); - req->rl_send_wr.num_sge++; + sc->sc_wr.num_sge++; return true; + +out_regbuf: + pr_err("rpcrdma: failed to DMA map a Send buffer\n"); + return false; } /* Prepare the Send SGEs. The head and tail iovec, and each entry @@ -507,10 +575,11 @@ static bool rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) { + struct rpcrdma_sendctx *sc = req->rl_sendctx; unsigned int sge_no, page_base, len, remaining; struct rpcrdma_regbuf *rb = req->rl_sendbuf; struct ib_device *device = ia->ri_device; - struct ib_sge *sge = req->rl_send_sge; + struct ib_sge *sge = sc->sc_sges; u32 lkey = ia->ri_pd->local_dma_lkey; struct page *page, **ppages; @@ -518,7 +587,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, * DMA-mapped. Sync the content that has changed. */ if (!rpcrdma_dma_map_regbuf(ia, rb)) - return false; + goto out_regbuf; sge_no = 1; sge[sge_no].addr = rdmab_addr(rb); sge[sge_no].length = xdr->head[0].iov_len; @@ -573,7 +642,7 @@ rpcrdma_prepare_msg_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, sge[sge_no].length = len; sge[sge_no].lkey = lkey; - req->rl_mapped_sges++; + sc->sc_unmap_count++; ppages++; remaining -= len; page_base = 0; @@ -599,89 +668,109 @@ map_tail: goto out_mapping_err; sge[sge_no].length = len; sge[sge_no].lkey = lkey; - req->rl_mapped_sges++; + sc->sc_unmap_count++; } out: - req->rl_send_wr.num_sge = sge_no + 1; + sc->sc_wr.num_sge += sge_no; + if (sc->sc_unmap_count) + __set_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); return true; +out_regbuf: + pr_err("rpcrdma: failed to DMA map a Send buffer\n"); + return false; + out_mapping_overflow: + rpcrdma_unmap_sendctx(sc); pr_err("rpcrdma: too many Send SGEs (%u)\n", sge_no); return false; out_mapping_err: + rpcrdma_unmap_sendctx(sc); pr_err("rpcrdma: Send mapping error\n"); return false; } -bool -rpcrdma_prepare_send_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req, - u32 hdrlen, struct xdr_buf *xdr, - enum rpcrdma_chunktype rtype) +/** + * rpcrdma_prepare_send_sges - Construct SGEs for a Send WR + * @r_xprt: controlling transport + * @req: context of RPC Call being marshalled + * @hdrlen: size of transport header, in bytes + * @xdr: xdr_buf containing RPC Call + * @rtype: chunk type being encoded + * + * Returns 0 on success; otherwise a negative errno is returned. + */ +int +rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, u32 hdrlen, + struct xdr_buf *xdr, enum rpcrdma_chunktype rtype) { - req->rl_send_wr.num_sge = 0; - req->rl_mapped_sges = 0; + req->rl_sendctx = rpcrdma_sendctx_get_locked(&r_xprt->rx_buf); + if (!req->rl_sendctx) + return -ENOBUFS; + req->rl_sendctx->sc_wr.num_sge = 0; + req->rl_sendctx->sc_unmap_count = 0; + req->rl_sendctx->sc_req = req; + __clear_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags); - if (!rpcrdma_prepare_hdr_sge(ia, req, hdrlen)) - goto out_map; + if (!rpcrdma_prepare_hdr_sge(&r_xprt->rx_ia, req, hdrlen)) + return -EIO; if (rtype != rpcrdma_areadch) - if (!rpcrdma_prepare_msg_sges(ia, req, xdr, rtype)) - goto out_map; - - return true; - -out_map: - pr_err("rpcrdma: failed to DMA map a Send buffer\n"); - return false; -} - -void -rpcrdma_unmap_sges(struct rpcrdma_ia *ia, struct rpcrdma_req *req) -{ - struct ib_device *device = ia->ri_device; - struct ib_sge *sge; - int count; + if (!rpcrdma_prepare_msg_sges(&r_xprt->rx_ia, req, xdr, rtype)) + return -EIO; - sge = &req->rl_send_sge[2]; - for (count = req->rl_mapped_sges; count--; sge++) - ib_dma_unmap_page(device, sge->addr, sge->length, - DMA_TO_DEVICE); - req->rl_mapped_sges = 0; + return 0; } -/* - * Marshal a request: the primary job of this routine is to choose - * the transfer modes. See comments below. +/** + * rpcrdma_marshal_req - Marshal and send one RPC request + * @r_xprt: controlling transport + * @rqst: RPC request to be marshaled + * + * For the RPC in "rqst", this function: + * - Chooses the transfer mode (eg., RDMA_MSG or RDMA_NOMSG) + * - Registers Read, Write, and Reply chunks + * - Constructs the transport header + * - Posts a Send WR to send the transport header and request * - * Returns zero on success, otherwise a negative errno. + * Returns: + * %0 if the RPC was sent successfully, + * %-ENOTCONN if the connection was lost, + * %-EAGAIN if not enough pages are available for on-demand reply buffer, + * %-ENOBUFS if no MRs are available to register chunks, + * %-EMSGSIZE if the transport header is too small, + * %-EIO if a permanent problem occurred while marshaling. */ - int -rpcrdma_marshal_req(struct rpc_rqst *rqst) +rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst) { - struct rpc_xprt *xprt = rqst->rq_xprt; - struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); + struct xdr_stream *xdr = &req->rl_stream; enum rpcrdma_chunktype rtype, wtype; - struct rpcrdma_msg *headerp; bool ddp_allowed; - ssize_t hdrlen; - size_t rpclen; - __be32 *iptr; + __be32 *p; + int ret; #if defined(CONFIG_SUNRPC_BACKCHANNEL) if (test_bit(RPC_BC_PA_IN_USE, &rqst->rq_bc_pa_state)) return rpcrdma_bc_marshal_reply(rqst); #endif - headerp = rdmab_to_msg(req->rl_rdmabuf); - /* don't byte-swap XID, it's already done in request */ - headerp->rm_xid = rqst->rq_xid; - headerp->rm_vers = rpcrdma_version; - headerp->rm_credit = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); - headerp->rm_type = rdma_msg; + rpcrdma_set_xdrlen(&req->rl_hdrbuf, 0); + xdr_init_encode(xdr, &req->rl_hdrbuf, + req->rl_rdmabuf->rg_base); + + /* Fixed header fields */ + ret = -EMSGSIZE; + p = xdr_reserve_space(xdr, 4 * sizeof(*p)); + if (!p) + goto out_err; + *p++ = rqst->rq_xid; + *p++ = rpcrdma_version; + *p++ = cpu_to_be32(r_xprt->rx_buf.rb_max_requests); /* When the ULP employs a GSS flavor that guarantees integrity * or privacy, direct data placement of individual data items @@ -721,22 +810,17 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * by themselves are larger than the inline threshold. */ if (rpcrdma_args_inline(r_xprt, rqst)) { + *p++ = rdma_msg; rtype = rpcrdma_noch; - rpclen = rqst->rq_snd_buf.len; } else if (ddp_allowed && rqst->rq_snd_buf.flags & XDRBUF_WRITE) { + *p++ = rdma_msg; rtype = rpcrdma_readch; - rpclen = rqst->rq_snd_buf.head[0].iov_len + - rqst->rq_snd_buf.tail[0].iov_len; } else { r_xprt->rx_stats.nomsg_call_count++; - headerp->rm_type = htonl(RDMA_NOMSG); + *p++ = rdma_nomsg; rtype = rpcrdma_areadch; - rpclen = 0; } - req->rl_xid = rqst->rq_xid; - rpcrdma_insert_req(&r_xprt->rx_buf, req); - /* This implementation supports the following combinations * of chunk lists in one RPC-over-RDMA Call message: * @@ -759,79 +843,48 @@ rpcrdma_marshal_req(struct rpc_rqst *rqst) * send a Call message with a Position Zero Read chunk and a * regular Read chunk at the same time. */ - iptr = headerp->rm_body.rm_chunks; - iptr = rpcrdma_encode_read_list(r_xprt, req, rqst, iptr, rtype); - if (IS_ERR(iptr)) + if (rtype != rpcrdma_noch) { + ret = rpcrdma_encode_read_list(r_xprt, req, rqst, rtype); + if (ret) + goto out_err; + } + ret = encode_item_not_present(xdr); + if (ret) goto out_err; - iptr = rpcrdma_encode_write_list(r_xprt, req, rqst, iptr, wtype); - if (IS_ERR(iptr)) + + if (wtype == rpcrdma_writech) { + ret = rpcrdma_encode_write_list(r_xprt, req, rqst, wtype); + if (ret) + goto out_err; + } + ret = encode_item_not_present(xdr); + if (ret) goto out_err; - iptr = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, iptr, wtype); - if (IS_ERR(iptr)) + + if (wtype != rpcrdma_replych) + ret = encode_item_not_present(xdr); + else + ret = rpcrdma_encode_reply_chunk(r_xprt, req, rqst, wtype); + if (ret) goto out_err; - hdrlen = (unsigned char *)iptr - (unsigned char *)headerp; - dprintk("RPC: %5u %s: %s/%s: hdrlen %zd rpclen %zd\n", + dprintk("RPC: %5u %s: %s/%s: hdrlen %u rpclen\n", rqst->rq_task->tk_pid, __func__, transfertypes[rtype], transfertypes[wtype], - hdrlen, rpclen); + xdr_stream_pos(xdr)); - if (!rpcrdma_prepare_send_sges(&r_xprt->rx_ia, req, hdrlen, - &rqst->rq_snd_buf, rtype)) { - iptr = ERR_PTR(-EIO); + ret = rpcrdma_prepare_send_sges(r_xprt, req, xdr_stream_pos(xdr), + &rqst->rq_snd_buf, rtype); + if (ret) goto out_err; - } return 0; out_err: - if (PTR_ERR(iptr) != -ENOBUFS) { - pr_err("rpcrdma: rpcrdma_marshal_req failed, status %ld\n", - PTR_ERR(iptr)); + if (ret != -ENOBUFS) { + pr_err("rpcrdma: header marshaling failed (%d)\n", ret); r_xprt->rx_stats.failed_marshal_count++; } - return PTR_ERR(iptr); -} - -/* - * Chase down a received write or reply chunklist to get length - * RDMA'd by server. See map at rpcrdma_create_chunks()! :-) - */ -static int -rpcrdma_count_chunks(struct rpcrdma_rep *rep, int wrchunk, __be32 **iptrp) -{ - unsigned int i, total_len; - struct rpcrdma_write_chunk *cur_wchunk; - char *base = (char *)rdmab_to_msg(rep->rr_rdmabuf); - - i = be32_to_cpu(**iptrp); - cur_wchunk = (struct rpcrdma_write_chunk *) (*iptrp + 1); - total_len = 0; - while (i--) { - struct rpcrdma_segment *seg = &cur_wchunk->wc_target; - ifdebug(FACILITY) { - u64 off; - xdr_decode_hyper((__be32 *)&seg->rs_offset, &off); - dprintk("RPC: %s: chunk %d@0x%016llx:0x%08x\n", - __func__, - be32_to_cpu(seg->rs_length), - (unsigned long long)off, - be32_to_cpu(seg->rs_handle)); - } - total_len += be32_to_cpu(seg->rs_length); - ++cur_wchunk; - } - /* check and adjust for properly terminated write chunk */ - if (wrchunk) { - __be32 *w = (__be32 *) cur_wchunk; - if (*w++ != xdr_zero) - return -1; - cur_wchunk = (struct rpcrdma_write_chunk *) w; - } - if ((char *)cur_wchunk > base + rep->rr_len) - return -1; - - *iptrp = (__be32 *) cur_wchunk; - return total_len; + return ret; } /** @@ -949,196 +1002,417 @@ rpcrdma_mark_remote_invalidation(struct list_head *mws, } } -#if defined(CONFIG_SUNRPC_BACKCHANNEL) /* By convention, backchannel calls arrive via rdma_msg type * messages, and never populate the chunk lists. This makes * the RPC/RDMA header small and fixed in size, so it is * straightforward to check the RPC header's direction field. */ static bool -rpcrdma_is_bcall(struct rpcrdma_msg *headerp) +rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) +#if defined(CONFIG_SUNRPC_BACKCHANNEL) { - __be32 *p = (__be32 *)headerp; + struct xdr_stream *xdr = &rep->rr_stream; + __be32 *p; - if (headerp->rm_type != rdma_msg) + if (rep->rr_proc != rdma_msg) return false; - if (headerp->rm_body.rm_chunks[0] != xdr_zero) + + /* Peek at stream contents without advancing. */ + p = xdr_inline_decode(xdr, 0); + + /* Chunk lists */ + if (*p++ != xdr_zero) return false; - if (headerp->rm_body.rm_chunks[1] != xdr_zero) + if (*p++ != xdr_zero) return false; - if (headerp->rm_body.rm_chunks[2] != xdr_zero) + if (*p++ != xdr_zero) return false; - /* sanity */ - if (p[7] != headerp->rm_xid) + /* RPC header */ + if (*p++ != rep->rr_xid) return false; - /* call direction */ - if (p[8] != cpu_to_be32(RPC_CALL)) + if (*p != cpu_to_be32(RPC_CALL)) return false; + /* Now that we are sure this is a backchannel call, + * advance to the RPC header. + */ + p = xdr_inline_decode(xdr, 3 * sizeof(*p)); + if (unlikely(!p)) + goto out_short; + + rpcrdma_bc_receive_call(r_xprt, rep); + return true; + +out_short: + pr_warn("RPC/RDMA short backward direction call\n"); + if (rpcrdma_ep_post_recv(&r_xprt->rx_ia, rep)) + xprt_disconnect_done(&r_xprt->rx_xprt); return true; } +#else /* CONFIG_SUNRPC_BACKCHANNEL */ +{ + return false; +} #endif /* CONFIG_SUNRPC_BACKCHANNEL */ +static int decode_rdma_segment(struct xdr_stream *xdr, u32 *length) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, 4 * sizeof(*p)); + if (unlikely(!p)) + return -EIO; + + ifdebug(FACILITY) { + u64 offset; + u32 handle; + + handle = be32_to_cpup(p++); + *length = be32_to_cpup(p++); + xdr_decode_hyper(p, &offset); + dprintk("RPC: %s: segment %u@0x%016llx:0x%08x\n", + __func__, *length, (unsigned long long)offset, + handle); + } else { + *length = be32_to_cpup(p + 1); + } + + return 0; +} + +static int decode_write_chunk(struct xdr_stream *xdr, u32 *length) +{ + u32 segcount, seglength; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EIO; + + *length = 0; + segcount = be32_to_cpup(p); + while (segcount--) { + if (decode_rdma_segment(xdr, &seglength)) + return -EIO; + *length += seglength; + } + + dprintk("RPC: %s: segcount=%u, %u bytes\n", + __func__, be32_to_cpup(p), *length); + return 0; +} + +/* In RPC-over-RDMA Version One replies, a Read list is never + * expected. This decoder is a stub that returns an error if + * a Read list is present. + */ +static int decode_read_list(struct xdr_stream *xdr) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EIO; + if (unlikely(*p != xdr_zero)) + return -EIO; + return 0; +} + +/* Supports only one Write chunk in the Write list + */ +static int decode_write_list(struct xdr_stream *xdr, u32 *length) +{ + u32 chunklen; + bool first; + __be32 *p; + + *length = 0; + first = true; + do { + p = xdr_inline_decode(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EIO; + if (*p == xdr_zero) + break; + if (!first) + return -EIO; + + if (decode_write_chunk(xdr, &chunklen)) + return -EIO; + *length += chunklen; + first = false; + } while (true); + return 0; +} + +static int decode_reply_chunk(struct xdr_stream *xdr, u32 *length) +{ + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EIO; + + *length = 0; + if (*p != xdr_zero) + if (decode_write_chunk(xdr, length)) + return -EIO; + return 0; +} + +static int +rpcrdma_decode_msg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, + struct rpc_rqst *rqst) +{ + struct xdr_stream *xdr = &rep->rr_stream; + u32 writelist, replychunk, rpclen; + char *base; + + /* Decode the chunk lists */ + if (decode_read_list(xdr)) + return -EIO; + if (decode_write_list(xdr, &writelist)) + return -EIO; + if (decode_reply_chunk(xdr, &replychunk)) + return -EIO; + + /* RDMA_MSG sanity checks */ + if (unlikely(replychunk)) + return -EIO; + + /* Build the RPC reply's Payload stream in rqst->rq_rcv_buf */ + base = (char *)xdr_inline_decode(xdr, 0); + rpclen = xdr_stream_remaining(xdr); + r_xprt->rx_stats.fixup_copy_count += + rpcrdma_inline_fixup(rqst, base, rpclen, writelist & 3); + + r_xprt->rx_stats.total_rdma_reply += writelist; + return rpclen + xdr_align_size(writelist); +} + +static noinline int +rpcrdma_decode_nomsg(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep) +{ + struct xdr_stream *xdr = &rep->rr_stream; + u32 writelist, replychunk; + + /* Decode the chunk lists */ + if (decode_read_list(xdr)) + return -EIO; + if (decode_write_list(xdr, &writelist)) + return -EIO; + if (decode_reply_chunk(xdr, &replychunk)) + return -EIO; + + /* RDMA_NOMSG sanity checks */ + if (unlikely(writelist)) + return -EIO; + if (unlikely(!replychunk)) + return -EIO; + + /* Reply chunk buffer already is the reply vector */ + r_xprt->rx_stats.total_rdma_reply += replychunk; + return replychunk; +} + +static noinline int +rpcrdma_decode_error(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep, + struct rpc_rqst *rqst) +{ + struct xdr_stream *xdr = &rep->rr_stream; + __be32 *p; + + p = xdr_inline_decode(xdr, sizeof(*p)); + if (unlikely(!p)) + return -EIO; + + switch (*p) { + case err_vers: + p = xdr_inline_decode(xdr, 2 * sizeof(*p)); + if (!p) + break; + dprintk("RPC: %5u: %s: server reports version error (%u-%u)\n", + rqst->rq_task->tk_pid, __func__, + be32_to_cpup(p), be32_to_cpu(*(p + 1))); + break; + case err_chunk: + dprintk("RPC: %5u: %s: server reports header decoding error\n", + rqst->rq_task->tk_pid, __func__); + break; + default: + dprintk("RPC: %5u: %s: server reports unrecognized error %d\n", + rqst->rq_task->tk_pid, __func__, be32_to_cpup(p)); + } + + r_xprt->rx_stats.bad_reply_count++; + return -EREMOTEIO; +} + +/* Perform XID lookup, reconstruction of the RPC reply, and + * RPC completion while holding the transport lock to ensure + * the rep, rqst, and rq_task pointers remain stable. + */ +void rpcrdma_complete_rqst(struct rpcrdma_rep *rep) +{ + struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; + struct rpc_xprt *xprt = &r_xprt->rx_xprt; + struct rpc_rqst *rqst = rep->rr_rqst; + unsigned long cwnd; + int status; + + xprt->reestablish_timeout = 0; + + switch (rep->rr_proc) { + case rdma_msg: + status = rpcrdma_decode_msg(r_xprt, rep, rqst); + break; + case rdma_nomsg: + status = rpcrdma_decode_nomsg(r_xprt, rep); + break; + case rdma_error: + status = rpcrdma_decode_error(r_xprt, rep, rqst); + break; + default: + status = -EIO; + } + if (status < 0) + goto out_badheader; + +out: + spin_lock(&xprt->recv_lock); + cwnd = xprt->cwnd; + xprt->cwnd = r_xprt->rx_buf.rb_credits << RPC_CWNDSHIFT; + if (xprt->cwnd > cwnd) + xprt_release_rqst_cong(rqst->rq_task); + + xprt_complete_rqst(rqst->rq_task, status); + xprt_unpin_rqst(rqst); + spin_unlock(&xprt->recv_lock); + return; + +/* If the incoming reply terminated a pending RPC, the next + * RPC call will post a replacement receive buffer as it is + * being marshaled. + */ +out_badheader: + dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", + rqst->rq_task->tk_pid, __func__, be32_to_cpu(rep->rr_proc)); + r_xprt->rx_stats.bad_reply_count++; + status = -EIO; + goto out; +} + +void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req) +{ + /* Invalidate and unmap the data payloads before waking + * the waiting application. This guarantees the memory + * regions are properly fenced from the server before the + * application accesses the data. It also ensures proper + * send flow control: waking the next RPC waits until this + * RPC has relinquished all its Send Queue entries. + */ + if (!list_empty(&req->rl_registered)) + r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, + &req->rl_registered); + + /* Ensure that any DMA mapped pages associated with + * the Send of the RPC Call have been unmapped before + * allowing the RPC to complete. This protects argument + * memory not controlled by the RPC client from being + * re-used before we're done with it. + */ + if (test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { + r_xprt->rx_stats.reply_waits_for_send++; + out_of_line_wait_on_bit(&req->rl_flags, + RPCRDMA_REQ_F_TX_RESOURCES, + bit_wait, + TASK_UNINTERRUPTIBLE); + } +} + +/* Reply handling runs in the poll worker thread. Anything that + * might wait is deferred to a separate workqueue. + */ +void rpcrdma_deferred_completion(struct work_struct *work) +{ + struct rpcrdma_rep *rep = + container_of(work, struct rpcrdma_rep, rr_work); + struct rpcrdma_req *req = rpcr_to_rdmar(rep->rr_rqst); + + rpcrdma_mark_remote_invalidation(&req->rl_registered, rep); + rpcrdma_release_rqst(rep->rr_rxprt, req); + rpcrdma_complete_rqst(rep); +} + /* Process received RPC/RDMA messages. * * Errors must result in the RPC task either being awakened, or * allowed to timeout, to discover the errors at that time. */ -void -rpcrdma_reply_handler(struct work_struct *work) +void rpcrdma_reply_handler(struct rpcrdma_rep *rep) { - struct rpcrdma_rep *rep = - container_of(work, struct rpcrdma_rep, rr_work); struct rpcrdma_xprt *r_xprt = rep->rr_rxprt; - struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpc_xprt *xprt = &r_xprt->rx_xprt; - struct rpcrdma_msg *headerp; + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; struct rpcrdma_req *req; struct rpc_rqst *rqst; - __be32 *iptr; - int rdmalen, status, rmerr; - unsigned long cwnd; - struct list_head mws; + u32 credits; + __be32 *p; dprintk("RPC: %s: incoming rep %p\n", __func__, rep); - if (rep->rr_len == RPCRDMA_BAD_LEN) + if (rep->rr_hdrbuf.head[0].iov_len == 0) goto out_badstatus; - if (rep->rr_len < RPCRDMA_HDRLEN_ERR) + + xdr_init_decode(&rep->rr_stream, &rep->rr_hdrbuf, + rep->rr_hdrbuf.head[0].iov_base); + + /* Fixed transport header fields */ + p = xdr_inline_decode(&rep->rr_stream, 4 * sizeof(*p)); + if (unlikely(!p)) goto out_shortreply; + rep->rr_xid = *p++; + rep->rr_vers = *p++; + credits = be32_to_cpu(*p++); + rep->rr_proc = *p++; - headerp = rdmab_to_msg(rep->rr_rdmabuf); -#if defined(CONFIG_SUNRPC_BACKCHANNEL) - if (rpcrdma_is_bcall(headerp)) - goto out_bcall; -#endif + if (rep->rr_vers != rpcrdma_version) + goto out_badversion; + + if (rpcrdma_is_bcall(r_xprt, rep)) + return; /* Match incoming rpcrdma_rep to an rpcrdma_req to * get context for handling any incoming chunks. */ - spin_lock(&buf->rb_lock); - req = rpcrdma_lookup_req_locked(&r_xprt->rx_buf, - headerp->rm_xid); - if (!req) - goto out_nomatch; - if (req->rl_reply) - goto out_duplicate; - - list_replace_init(&req->rl_registered, &mws); - rpcrdma_mark_remote_invalidation(&mws, rep); - - /* Avoid races with signals and duplicate replies - * by marking this req as matched. - */ - req->rl_reply = rep; - spin_unlock(&buf->rb_lock); - - dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", - __func__, rep, req, be32_to_cpu(headerp->rm_xid)); - - /* Invalidate and unmap the data payloads before waking the - * waiting application. This guarantees the memory regions - * are properly fenced from the server before the application - * accesses the data. It also ensures proper send flow control: - * waking the next RPC waits until this RPC has relinquished - * all its Send Queue entries. - */ - if (!list_empty(&mws)) - r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, &mws); - - /* Perform XID lookup, reconstruction of the RPC reply, and - * RPC completion while holding the transport lock to ensure - * the rep, rqst, and rq_task pointers remain stable. - */ - spin_lock_bh(&xprt->transport_lock); - rqst = xprt_lookup_rqst(xprt, headerp->rm_xid); + spin_lock(&xprt->recv_lock); + rqst = xprt_lookup_rqst(xprt, rep->rr_xid); if (!rqst) goto out_norqst; - xprt->reestablish_timeout = 0; - if (headerp->rm_vers != rpcrdma_version) - goto out_badversion; - - /* check for expected message types */ - /* The order of some of these tests is important. */ - switch (headerp->rm_type) { - case rdma_msg: - /* never expect read chunks */ - /* never expect reply chunks (two ways to check) */ - if (headerp->rm_body.rm_chunks[0] != xdr_zero || - (headerp->rm_body.rm_chunks[1] == xdr_zero && - headerp->rm_body.rm_chunks[2] != xdr_zero)) - goto badheader; - if (headerp->rm_body.rm_chunks[1] != xdr_zero) { - /* count any expected write chunks in read reply */ - /* start at write chunk array count */ - iptr = &headerp->rm_body.rm_chunks[2]; - rdmalen = rpcrdma_count_chunks(rep, 1, &iptr); - /* check for validity, and no reply chunk after */ - if (rdmalen < 0 || *iptr++ != xdr_zero) - goto badheader; - rep->rr_len -= - ((unsigned char *)iptr - (unsigned char *)headerp); - status = rep->rr_len + rdmalen; - r_xprt->rx_stats.total_rdma_reply += rdmalen; - /* special case - last chunk may omit padding */ - if (rdmalen &= 3) { - rdmalen = 4 - rdmalen; - status += rdmalen; - } - } else { - /* else ordinary inline */ - rdmalen = 0; - iptr = (__be32 *)((unsigned char *)headerp + - RPCRDMA_HDRLEN_MIN); - rep->rr_len -= RPCRDMA_HDRLEN_MIN; - status = rep->rr_len; - } + xprt_pin_rqst(rqst); - r_xprt->rx_stats.fixup_copy_count += - rpcrdma_inline_fixup(rqst, (char *)iptr, rep->rr_len, - rdmalen); - break; + if (credits == 0) + credits = 1; /* don't deadlock */ + else if (credits > buf->rb_max_requests) + credits = buf->rb_max_requests; + buf->rb_credits = credits; - case rdma_nomsg: - /* never expect read or write chunks, always reply chunks */ - if (headerp->rm_body.rm_chunks[0] != xdr_zero || - headerp->rm_body.rm_chunks[1] != xdr_zero || - headerp->rm_body.rm_chunks[2] != xdr_one) - goto badheader; - iptr = (__be32 *)((unsigned char *)headerp + - RPCRDMA_HDRLEN_MIN); - rdmalen = rpcrdma_count_chunks(rep, 0, &iptr); - if (rdmalen < 0) - goto badheader; - r_xprt->rx_stats.total_rdma_reply += rdmalen; - /* Reply chunk buffer already is the reply vector - no fixup. */ - status = rdmalen; - break; + spin_unlock(&xprt->recv_lock); - case rdma_error: - goto out_rdmaerr; - -badheader: - default: - dprintk("RPC: %5u %s: invalid rpcrdma reply (type %u)\n", - rqst->rq_task->tk_pid, __func__, - be32_to_cpu(headerp->rm_type)); - status = -EIO; - r_xprt->rx_stats.bad_reply_count++; - break; - } + req = rpcr_to_rdmar(rqst); + req->rl_reply = rep; + rep->rr_rqst = rqst; + clear_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); -out: - cwnd = xprt->cwnd; - xprt->cwnd = atomic_read(&r_xprt->rx_buf.rb_credits) << RPC_CWNDSHIFT; - if (xprt->cwnd > cwnd) - xprt_release_rqst_cong(rqst->rq_task); + dprintk("RPC: %s: reply %p completes request %p (xid 0x%08x)\n", + __func__, rep, req, be32_to_cpu(rep->rr_xid)); - xprt_complete_rqst(rqst->rq_task, status); - spin_unlock_bh(&xprt->transport_lock); - dprintk("RPC: %s: xprt_complete_rqst(0x%p, 0x%p, %d)\n", - __func__, xprt, rqst, status); + if (list_empty(&req->rl_registered) && + !test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) + rpcrdma_complete_rqst(rep); + else + queue_work(rpcrdma_receive_wq, &rep->rr_work); return; out_badstatus: @@ -1149,71 +1423,22 @@ out_badstatus: } return; -#if defined(CONFIG_SUNRPC_BACKCHANNEL) -out_bcall: - rpcrdma_bc_receive_call(r_xprt, rep); - return; -#endif - -/* If the incoming reply terminated a pending RPC, the next - * RPC call will post a replacement receive buffer as it is - * being marshaled. - */ out_badversion: dprintk("RPC: %s: invalid version %d\n", - __func__, be32_to_cpu(headerp->rm_vers)); - status = -EIO; - r_xprt->rx_stats.bad_reply_count++; - goto out; - -out_rdmaerr: - rmerr = be32_to_cpu(headerp->rm_body.rm_error.rm_err); - switch (rmerr) { - case ERR_VERS: - pr_err("%s: server reports header version error (%u-%u)\n", - __func__, - be32_to_cpu(headerp->rm_body.rm_error.rm_vers_low), - be32_to_cpu(headerp->rm_body.rm_error.rm_vers_high)); - break; - case ERR_CHUNK: - pr_err("%s: server reports header decoding error\n", - __func__); - break; - default: - pr_err("%s: server reports unknown error %d\n", - __func__, rmerr); - } - status = -EREMOTEIO; - r_xprt->rx_stats.bad_reply_count++; - goto out; + __func__, be32_to_cpu(rep->rr_vers)); + goto repost; -/* The req was still available, but by the time the transport_lock - * was acquired, the rqst and task had been released. Thus the RPC - * has already been terminated. +/* The RPC transaction has already been terminated, or the header + * is corrupt. */ out_norqst: - spin_unlock_bh(&xprt->transport_lock); - rpcrdma_buffer_put(req); - dprintk("RPC: %s: race, no rqst left for req %p\n", - __func__, req); - return; + spin_unlock(&xprt->recv_lock); + dprintk("RPC: %s: no match for incoming xid 0x%08x\n", + __func__, be32_to_cpu(rep->rr_xid)); + goto repost; out_shortreply: dprintk("RPC: %s: short/invalid reply\n", __func__); - goto repost; - -out_nomatch: - spin_unlock(&buf->rb_lock); - dprintk("RPC: %s: no match for incoming xid 0x%08x len %d\n", - __func__, be32_to_cpu(headerp->rm_xid), - rep->rr_len); - goto repost; - -out_duplicate: - spin_unlock(&buf->rb_lock); - dprintk("RPC: %s: " - "duplicate reply %p to RPC request %p: xid 0x%08x\n", - __func__, rep, req, be32_to_cpu(headerp->rm_xid)); /* If no pending RPC transaction was matched, post a replacement * receive buffer before returning. diff --git a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c index c676ed0efb5a..af7893501e40 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_backchannel.c +++ b/net/sunrpc/xprtrdma/svc_rdma_backchannel.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2015 Oracle. All rights reserved. * @@ -52,7 +53,7 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, if (src->iov_len < 24) goto out_shortreply; - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); req = xprt_lookup_rqst(xprt, xid); if (!req) goto out_notfound; @@ -69,17 +70,20 @@ int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, __be32 *rdma_resp, else if (credits > r_xprt->rx_buf.rb_bc_max_requests) credits = r_xprt->rx_buf.rb_bc_max_requests; + spin_lock_bh(&xprt->transport_lock); cwnd = xprt->cwnd; xprt->cwnd = credits << RPC_CWNDSHIFT; if (xprt->cwnd > cwnd) xprt_release_rqst_cong(req->rq_task); + spin_unlock_bh(&xprt->transport_lock); + ret = 0; xprt_complete_rqst(req->rq_task, rcvbuf->len); rcvbuf->len = 0; out_unlock: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); out: return ret; @@ -129,6 +133,10 @@ static int svc_rdma_bc_sendto(struct svcxprt_rdma *rdma, if (ret) goto out_err; + /* Bump page refcnt so Send completion doesn't release + * the rq_buffer before all retransmits are complete. + */ + get_page(virt_to_page(rqst->rq_buffer)); ret = svc_rdma_post_send_wr(rdma, ctxt, 1, 0); if (ret) goto out_unmap; @@ -161,7 +169,6 @@ xprt_rdma_bc_allocate(struct rpc_task *task) return -EINVAL; } - /* svc_rdma_sendto releases this page */ page = alloc_page(RPCRDMA_DEF_GFP); if (!page) return -ENOMEM; @@ -180,6 +187,7 @@ xprt_rdma_bc_free(struct rpc_task *task) { struct rpc_rqst *rqst = task->tk_rqstp; + put_page(virt_to_page(rqst->rq_buffer)); kfree(rqst->rq_rbuffer); } @@ -266,7 +274,7 @@ xprt_rdma_bc_put(struct rpc_xprt *xprt) module_put(THIS_MODULE); } -static struct rpc_xprt_ops xprt_rdma_bc_procs = { +static const struct rpc_xprt_ops xprt_rdma_bc_procs = { .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, .alloc_slot = xprt_alloc_slot, diff --git a/net/sunrpc/xprtrdma/svc_rdma_rw.c b/net/sunrpc/xprtrdma/svc_rdma_rw.c index 933f79bed270..9bd04549a1ad 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_rw.c +++ b/net/sunrpc/xprtrdma/svc_rdma_rw.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2016 Oracle. All rights reserved. * @@ -660,19 +661,21 @@ out_initerr: return -EIO; } +/* Walk the segments in the Read chunk starting at @p and construct + * RDMA Read operations to pull the chunk to the server. + */ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, struct svc_rdma_read_info *info, __be32 *p) { int ret; + ret = -EINVAL; info->ri_chunklen = 0; - while (*p++ != xdr_zero) { + while (*p++ != xdr_zero && be32_to_cpup(p++) == info->ri_position) { u32 rs_handle, rs_length; u64 rs_offset; - if (be32_to_cpup(p++) != info->ri_position) - break; rs_handle = be32_to_cpup(p++); rs_length = be32_to_cpup(p++); p = xdr_decode_hyper(p, &rs_offset); @@ -689,78 +692,6 @@ static int svc_rdma_build_read_chunk(struct svc_rqst *rqstp, return ret; } -/* If there is inline content following the Read chunk, append it to - * the page list immediately following the data payload. This has to - * be done after the reader function has determined how many pages - * were consumed for RDMA Read. - * - * On entry, ri_pageno and ri_pageoff point directly to the end of the - * page list. On exit, both have been updated to the new "next byte". - * - * Assumptions: - * - Inline content fits entirely in rq_pages[0] - * - Trailing content is only a handful of bytes - */ -static int svc_rdma_copy_tail(struct svc_rqst *rqstp, - struct svc_rdma_read_info *info) -{ - struct svc_rdma_op_ctxt *head = info->ri_readctxt; - unsigned int tail_length, remaining; - u8 *srcp, *destp; - - /* Assert that all inline content fits in page 0. This is an - * implementation limit, not a protocol limit. - */ - if (head->arg.head[0].iov_len > PAGE_SIZE) { - pr_warn_once("svcrdma: too much trailing inline content\n"); - return -EINVAL; - } - - srcp = head->arg.head[0].iov_base; - srcp += info->ri_position; - tail_length = head->arg.head[0].iov_len - info->ri_position; - remaining = tail_length; - - /* If there is room on the last page in the page list, try to - * fit the trailing content there. - */ - if (info->ri_pageoff > 0) { - unsigned int len; - - len = min_t(unsigned int, remaining, - PAGE_SIZE - info->ri_pageoff); - destp = page_address(rqstp->rq_pages[info->ri_pageno]); - destp += info->ri_pageoff; - - memcpy(destp, srcp, len); - srcp += len; - destp += len; - info->ri_pageoff += len; - remaining -= len; - - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; - } - } - - /* Otherwise, a fresh page is needed. */ - if (remaining) { - head->arg.pages[info->ri_pageno] = - rqstp->rq_pages[info->ri_pageno]; - head->count++; - - destp = page_address(rqstp->rq_pages[info->ri_pageno]); - memcpy(destp, srcp, remaining); - info->ri_pageoff += remaining; - } - - head->arg.page_len += tail_length; - head->arg.len += tail_length; - head->arg.buflen += tail_length; - return 0; -} - /* Construct RDMA Reads to pull over a normal Read chunk. The chunk * data lands in the page list of head->arg.pages. * @@ -785,34 +716,28 @@ static int svc_rdma_build_normal_read_chunk(struct svc_rqst *rqstp, if (ret < 0) goto out; - /* Read chunk may need XDR round-up (see RFC 5666, s. 3.7). + /* Split the Receive buffer between the head and tail + * buffers at Read chunk's position. XDR roundup of the + * chunk is not included in either the pagelist or in + * the tail. */ - if (info->ri_chunklen & 3) { - u32 padlen = 4 - (info->ri_chunklen & 3); - - info->ri_chunklen += padlen; + head->arg.tail[0].iov_base = + head->arg.head[0].iov_base + info->ri_position; + head->arg.tail[0].iov_len = + head->arg.head[0].iov_len - info->ri_position; + head->arg.head[0].iov_len = info->ri_position; - /* NB: data payload always starts on XDR alignment, - * thus the pad can never contain a page boundary. - */ - info->ri_pageoff += padlen; - if (info->ri_pageoff == PAGE_SIZE) { - info->ri_pageno++; - info->ri_pageoff = 0; - } - } + /* Read chunk may need XDR roundup (see RFC 5666, s. 3.7). + * + * NFSv2/3 write decoders need the length of the tail to + * contain the size of the roundup padding. + */ + head->arg.tail[0].iov_len += 4 - (info->ri_chunklen & 3); head->arg.page_len = info->ri_chunklen; head->arg.len += info->ri_chunklen; head->arg.buflen += info->ri_chunklen; - if (info->ri_position < head->arg.head[0].iov_len) { - ret = svc_rdma_copy_tail(rqstp, info); - if (ret < 0) - goto out; - } - head->arg.head[0].iov_len = info->ri_position; - out: return ret; } diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c index e660d4965b18..46ec069150d5 100644 --- a/net/sunrpc/xprtrdma/svc_rdma_transport.c +++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c @@ -51,6 +51,7 @@ #include <linux/workqueue.h> #include <rdma/ib_verbs.h> #include <rdma/rdma_cm.h> +#include <rdma/rw.h> #include <linux/sunrpc/svc_rdma.h> #include <linux/export.h> #include "xprt_rdma.h" @@ -70,7 +71,7 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt); static int svc_rdma_secure_port(struct svc_rqst *); static void svc_rdma_kill_temp_xprt(struct svc_xprt *); -static struct svc_xprt_ops svc_rdma_ops = { +static const struct svc_xprt_ops svc_rdma_ops = { .xpo_create = svc_rdma_create, .xpo_recvfrom = svc_rdma_recvfrom, .xpo_sendto = svc_rdma_sendto, @@ -98,7 +99,7 @@ static struct svc_xprt *svc_rdma_bc_create(struct svc_serv *, struct net *, static void svc_rdma_bc_detach(struct svc_xprt *); static void svc_rdma_bc_free(struct svc_xprt *); -static struct svc_xprt_ops svc_rdma_bc_ops = { +static const struct svc_xprt_ops svc_rdma_bc_ops = { .xpo_create = svc_rdma_bc_create, .xpo_detach = svc_rdma_bc_detach, .xpo_free = svc_rdma_bc_free, @@ -167,8 +168,8 @@ static bool svc_rdma_prealloc_ctxts(struct svcxprt_rdma *xprt) { unsigned int i; - /* Each RPC/RDMA credit can consume a number of send - * and receive WQEs. One ctxt is allocated for each. + /* Each RPC/RDMA credit can consume one Receive and + * one Send WQE at the same time. */ i = xprt->sc_sq_depth + xprt->sc_rq_depth; @@ -289,6 +290,7 @@ static void qp_event_handler(struct ib_event *event, void *context) ib_event_msg(event->event), event->event, event->element.qp); set_bit(XPT_CLOSE, &xprt->xpt_flags); + svc_xprt_enqueue(xprt); break; } } @@ -321,8 +323,7 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) set_bit(XPT_DATA, &xprt->sc_xprt.xpt_flags); if (test_bit(RDMAXPRT_CONN_PENDING, &xprt->sc_flags)) goto out; - svc_xprt_enqueue(&xprt->sc_xprt); - goto out; + goto out_enqueue; flushed: if (wc->status != IB_WC_WR_FLUSH_ERR) @@ -332,6 +333,8 @@ flushed: set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); svc_rdma_put_context(ctxt, 1); +out_enqueue: + svc_xprt_enqueue(&xprt->sc_xprt); out: svc_xprt_put(&xprt->sc_xprt); } @@ -357,6 +360,7 @@ void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) if (unlikely(wc->status != IB_WC_SUCCESS)) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_xprt_enqueue(&xprt->sc_xprt); if (wc->status != IB_WC_WR_FLUSH_ERR) pr_err("svcrdma: Send: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), @@ -568,8 +572,10 @@ static int rdma_listen_handler(struct rdma_cm_id *cma_id, case RDMA_CM_EVENT_DEVICE_REMOVAL: dprintk("svcrdma: Device removal xprt=%p, cm_id=%p\n", xprt, cma_id); - if (xprt) + if (xprt) { set_bit(XPT_CLOSE, &xprt->sc_xprt.xpt_flags); + svc_xprt_enqueue(&xprt->sc_xprt); + } break; default: @@ -713,7 +719,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) struct ib_qp_init_attr qp_attr; struct ib_device *dev; struct sockaddr *sap; - unsigned int i; + unsigned int i, ctxts; int ret = 0; listen_rdma = container_of(xprt, struct svcxprt_rdma, sc_xprt); @@ -742,14 +748,26 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) newxprt->sc_max_sge = min((size_t)dev->attrs.max_sge, (size_t)RPCSVC_MAXPAGES); newxprt->sc_max_req_size = svcrdma_max_req_size; - newxprt->sc_max_requests = min_t(u32, dev->attrs.max_qp_wr, - svcrdma_max_requests); - newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); - newxprt->sc_max_bc_requests = min_t(u32, dev->attrs.max_qp_wr, - svcrdma_max_bc_requests); + newxprt->sc_max_requests = svcrdma_max_requests; + newxprt->sc_max_bc_requests = svcrdma_max_bc_requests; newxprt->sc_rq_depth = newxprt->sc_max_requests + newxprt->sc_max_bc_requests; - newxprt->sc_sq_depth = newxprt->sc_rq_depth; + if (newxprt->sc_rq_depth > dev->attrs.max_qp_wr) { + pr_warn("svcrdma: reducing receive depth to %d\n", + dev->attrs.max_qp_wr); + newxprt->sc_rq_depth = dev->attrs.max_qp_wr; + newxprt->sc_max_requests = newxprt->sc_rq_depth - 2; + newxprt->sc_max_bc_requests = 2; + } + newxprt->sc_fc_credits = cpu_to_be32(newxprt->sc_max_requests); + ctxts = rdma_rw_mr_factor(dev, newxprt->sc_port_num, RPCSVC_MAXPAGES); + ctxts *= newxprt->sc_max_requests; + newxprt->sc_sq_depth = newxprt->sc_rq_depth + ctxts; + if (newxprt->sc_sq_depth > dev->attrs.max_qp_wr) { + pr_warn("svcrdma: reducing send depth to %d\n", + dev->attrs.max_qp_wr); + newxprt->sc_sq_depth = dev->attrs.max_qp_wr; + } atomic_set(&newxprt->sc_sq_avail, newxprt->sc_sq_depth); if (!svc_rdma_prealloc_ctxts(newxprt)) @@ -784,8 +802,8 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) qp_attr.event_handler = qp_event_handler; qp_attr.qp_context = &newxprt->sc_xprt; qp_attr.port_num = newxprt->sc_port_num; - qp_attr.cap.max_rdma_ctxs = newxprt->sc_max_requests; - qp_attr.cap.max_send_wr = newxprt->sc_sq_depth; + qp_attr.cap.max_rdma_ctxs = ctxts; + qp_attr.cap.max_send_wr = newxprt->sc_sq_depth - ctxts; qp_attr.cap.max_recv_wr = newxprt->sc_rq_depth; qp_attr.cap.max_send_sge = newxprt->sc_max_sge; qp_attr.cap.max_recv_sge = newxprt->sc_max_sge; @@ -853,6 +871,7 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt) dprintk(" remote address : %pIS:%u\n", sap, rpc_get_port(sap)); dprintk(" max_sge : %d\n", newxprt->sc_max_sge); dprintk(" sq_depth : %d\n", newxprt->sc_sq_depth); + dprintk(" rdma_rw_ctxs : %d\n", ctxts); dprintk(" max_requests : %d\n", newxprt->sc_max_requests); dprintk(" ord : %d\n", newxprt->sc_ord); diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c index d1c458e5ec4d..646c24494ea7 100644 --- a/net/sunrpc/xprtrdma/transport.c +++ b/net/sunrpc/xprtrdma/transport.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -149,7 +150,7 @@ static struct ctl_table sunrpc_table[] = { #endif -static struct rpc_xprt_ops xprt_rdma_procs; /*forward reference */ +static const struct rpc_xprt_ops xprt_rdma_procs; static void xprt_rdma_format_addresses4(struct rpc_xprt *xprt, struct sockaddr *sap) @@ -559,6 +560,7 @@ rpcrdma_get_rdmabuf(struct rpcrdma_xprt *r_xprt, struct rpcrdma_req *req, r_xprt->rx_stats.hardway_register_count += size; req->rl_rdmabuf = rb; + xdr_buf_init(&req->rl_hdrbuf, rb->rg_base, rdmab_length(rb)); return true; } @@ -677,17 +679,14 @@ xprt_rdma_free(struct rpc_task *task) struct rpc_rqst *rqst = task->tk_rqstp; struct rpcrdma_xprt *r_xprt = rpcx_to_rdmax(rqst->rq_xprt); struct rpcrdma_req *req = rpcr_to_rdmar(rqst); - struct rpcrdma_ia *ia = &r_xprt->rx_ia; - if (req->rl_backchannel) + if (test_bit(RPCRDMA_REQ_F_BACKCHANNEL, &req->rl_flags)) return; dprintk("RPC: %s: called on 0x%p\n", __func__, req->rl_reply); - rpcrdma_remove_req(&r_xprt->rx_buf, req); - if (!list_empty(&req->rl_registered)) - ia->ri_ops->ro_unmap_safe(r_xprt, req, !RPC_IS_ASYNC(task)); - rpcrdma_unmap_sges(ia, req); + if (test_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags)) + rpcrdma_release_rqst(r_xprt, req); rpcrdma_buffer_put(req); } @@ -728,9 +727,10 @@ xprt_rdma_send_request(struct rpc_task *task) /* On retransmit, remove any previously registered chunks */ if (unlikely(!list_empty(&req->rl_registered))) - r_xprt->rx_ia.ri_ops->ro_unmap_safe(r_xprt, req, false); + r_xprt->rx_ia.ri_ops->ro_unmap_sync(r_xprt, + &req->rl_registered); - rc = rpcrdma_marshal_req(rqst); + rc = rpcrdma_marshal_req(r_xprt, rqst); if (rc < 0) goto failed_marshal; @@ -742,6 +742,7 @@ xprt_rdma_send_request(struct rpc_task *task) goto drop_connection; req->rl_connect_cookie = xprt->connect_cookie; + set_bit(RPCRDMA_REQ_F_PENDING, &req->rl_flags); if (rpcrdma_ep_post(&r_xprt->rx_ia, &r_xprt->rx_ep, req)) goto drop_connection; @@ -789,11 +790,13 @@ void xprt_rdma_print_stats(struct rpc_xprt *xprt, struct seq_file *seq) r_xprt->rx_stats.failed_marshal_count, r_xprt->rx_stats.bad_reply_count, r_xprt->rx_stats.nomsg_call_count); - seq_printf(seq, "%lu %lu %lu %lu\n", + seq_printf(seq, "%lu %lu %lu %lu %lu %lu\n", r_xprt->rx_stats.mrs_recovered, r_xprt->rx_stats.mrs_orphaned, r_xprt->rx_stats.mrs_allocated, - r_xprt->rx_stats.local_inv_needed); + r_xprt->rx_stats.local_inv_needed, + r_xprt->rx_stats.empty_sendctx_q, + r_xprt->rx_stats.reply_waits_for_send); } static int @@ -811,7 +814,7 @@ xprt_rdma_disable_swap(struct rpc_xprt *xprt) * Plumbing for rpc transport switch and kernel module */ -static struct rpc_xprt_ops xprt_rdma_procs = { +static const struct rpc_xprt_ops xprt_rdma_procs = { .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, /* sunrpc/xprt.c */ .alloc_slot = xprt_alloc_slot, diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c index e4171f2abe37..710b3f77db82 100644 --- a/net/sunrpc/xprtrdma/verbs.c +++ b/net/sunrpc/xprtrdma/verbs.c @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -49,9 +50,10 @@ #include <linux/interrupt.h> #include <linux/slab.h> -#include <linux/prefetch.h> #include <linux/sunrpc/addr.h> #include <linux/sunrpc/svc_rdma.h> + +#include <asm-generic/barrier.h> #include <asm/bitops.h> #include <rdma/ib_cm.h> @@ -73,7 +75,7 @@ static void rpcrdma_create_mrs(struct rpcrdma_xprt *r_xprt); static void rpcrdma_destroy_mrs(struct rpcrdma_buffer *buf); static void rpcrdma_dma_unmap_regbuf(struct rpcrdma_regbuf *rb); -static struct workqueue_struct *rpcrdma_receive_wq __read_mostly; +struct workqueue_struct *rpcrdma_receive_wq __read_mostly; int rpcrdma_alloc_wq(void) @@ -126,33 +128,17 @@ rpcrdma_qp_async_error_upcall(struct ib_event *event, void *context) static void rpcrdma_wc_send(struct ib_cq *cq, struct ib_wc *wc) { + struct ib_cqe *cqe = wc->wr_cqe; + struct rpcrdma_sendctx *sc = + container_of(cqe, struct rpcrdma_sendctx, sc_cqe); + /* WARNING: Only wr_cqe and status are reliable at this point */ if (wc->status != IB_WC_SUCCESS && wc->status != IB_WC_WR_FLUSH_ERR) pr_err("rpcrdma: Send: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); -} - -/* Perform basic sanity checking to avoid using garbage - * to update the credit grant value. - */ -static void -rpcrdma_update_granted_credits(struct rpcrdma_rep *rep) -{ - struct rpcrdma_msg *rmsgp = rdmab_to_msg(rep->rr_rdmabuf); - struct rpcrdma_buffer *buffer = &rep->rr_rxprt->rx_buf; - u32 credits; - - if (rep->rr_len < RPCRDMA_HDRLEN_ERR) - return; - - credits = be32_to_cpu(rmsgp->rm_credit); - if (credits == 0) - credits = 1; /* don't deadlock */ - else if (credits > buffer->rb_max_requests) - credits = buffer->rb_max_requests; - atomic_set(&buffer->rb_credits, credits); + rpcrdma_sendctx_put_locked(sc); } /** @@ -173,24 +159,19 @@ rpcrdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc) goto out_fail; /* status == SUCCESS means all fields in wc are trustworthy */ - if (wc->opcode != IB_WC_RECV) - return; - dprintk("RPC: %s: rep %p opcode 'recv', length %u: success\n", __func__, rep, wc->byte_len); - rep->rr_len = wc->byte_len; + rpcrdma_set_xdrlen(&rep->rr_hdrbuf, wc->byte_len); rep->rr_wc_flags = wc->wc_flags; rep->rr_inv_rkey = wc->ex.invalidate_rkey; ib_dma_sync_single_for_cpu(rdmab_device(rep->rr_rdmabuf), rdmab_addr(rep->rr_rdmabuf), - rep->rr_len, DMA_FROM_DEVICE); - - rpcrdma_update_granted_credits(rep); + wc->byte_len, DMA_FROM_DEVICE); out_schedule: - queue_work(rpcrdma_receive_wq, &rep->rr_work); + rpcrdma_reply_handler(rep); return; out_fail: @@ -198,7 +179,7 @@ out_fail: pr_err("rpcrdma: Recv: %s (%u/0x%x)\n", ib_wc_status_msg(wc->status), wc->status, wc->vendor_err); - rep->rr_len = RPCRDMA_BAD_LEN; + rpcrdma_set_xdrlen(&rep->rr_hdrbuf, 0); goto out_schedule; } @@ -300,7 +281,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event) case RDMA_CM_EVENT_DISCONNECTED: connstate = -ECONNABORTED; connected: - atomic_set(&xprt->rx_buf.rb_credits, 1); + xprt->rx_buf.rb_credits = 1; ep->rep_connected = connstate; rpcrdma_conn_func(ep); wake_up_all(&ep->rep_connect_wait); @@ -569,16 +550,15 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, ep->rep_attr.cap.max_recv_sge); /* set trigger for requesting send completion */ - ep->rep_cqinit = ep->rep_attr.cap.max_send_wr/2 - 1; - if (ep->rep_cqinit <= 2) - ep->rep_cqinit = 0; /* always signal? */ - rpcrdma_init_cqcount(ep, 0); + ep->rep_send_batch = min_t(unsigned int, RPCRDMA_MAX_SEND_BATCH, + cdata->max_requests >> 2); + ep->rep_send_count = ep->rep_send_batch; init_waitqueue_head(&ep->rep_connect_wait); INIT_DELAYED_WORK(&ep->rep_connect_worker, rpcrdma_connect_worker); sendcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_send_wr + 1, - 0, IB_POLL_SOFTIRQ); + 1, IB_POLL_WORKQUEUE); if (IS_ERR(sendcq)) { rc = PTR_ERR(sendcq); dprintk("RPC: %s: failed to create send CQ: %i\n", @@ -588,7 +568,7 @@ rpcrdma_ep_create(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia, recvcq = ib_alloc_cq(ia->ri_device, NULL, ep->rep_attr.cap.max_recv_wr + 1, - 0, IB_POLL_SOFTIRQ); + 0, IB_POLL_WORKQUEUE); if (IS_ERR(recvcq)) { rc = PTR_ERR(recvcq); dprintk("RPC: %s: failed to create recv CQ: %i\n", @@ -851,6 +831,168 @@ rpcrdma_ep_disconnect(struct rpcrdma_ep *ep, struct rpcrdma_ia *ia) ib_drain_qp(ia->ri_id->qp); } +/* Fixed-size circular FIFO queue. This implementation is wait-free and + * lock-free. + * + * Consumer is the code path that posts Sends. This path dequeues a + * sendctx for use by a Send operation. Multiple consumer threads + * are serialized by the RPC transport lock, which allows only one + * ->send_request call at a time. + * + * Producer is the code path that handles Send completions. This path + * enqueues a sendctx that has been completed. Multiple producer + * threads are serialized by the ib_poll_cq() function. + */ + +/* rpcrdma_sendctxs_destroy() assumes caller has already quiesced + * queue activity, and ib_drain_qp has flushed all remaining Send + * requests. + */ +static void rpcrdma_sendctxs_destroy(struct rpcrdma_buffer *buf) +{ + unsigned long i; + + for (i = 0; i <= buf->rb_sc_last; i++) + kfree(buf->rb_sc_ctxs[i]); + kfree(buf->rb_sc_ctxs); +} + +static struct rpcrdma_sendctx *rpcrdma_sendctx_create(struct rpcrdma_ia *ia) +{ + struct rpcrdma_sendctx *sc; + + sc = kzalloc(sizeof(*sc) + + ia->ri_max_send_sges * sizeof(struct ib_sge), + GFP_KERNEL); + if (!sc) + return NULL; + + sc->sc_wr.wr_cqe = &sc->sc_cqe; + sc->sc_wr.sg_list = sc->sc_sges; + sc->sc_wr.opcode = IB_WR_SEND; + sc->sc_cqe.done = rpcrdma_wc_send; + return sc; +} + +static int rpcrdma_sendctxs_create(struct rpcrdma_xprt *r_xprt) +{ + struct rpcrdma_buffer *buf = &r_xprt->rx_buf; + struct rpcrdma_sendctx *sc; + unsigned long i; + + /* Maximum number of concurrent outstanding Send WRs. Capping + * the circular queue size stops Send Queue overflow by causing + * the ->send_request call to fail temporarily before too many + * Sends are posted. + */ + i = buf->rb_max_requests + RPCRDMA_MAX_BC_REQUESTS; + dprintk("RPC: %s: allocating %lu send_ctxs\n", __func__, i); + buf->rb_sc_ctxs = kcalloc(i, sizeof(sc), GFP_KERNEL); + if (!buf->rb_sc_ctxs) + return -ENOMEM; + + buf->rb_sc_last = i - 1; + for (i = 0; i <= buf->rb_sc_last; i++) { + sc = rpcrdma_sendctx_create(&r_xprt->rx_ia); + if (!sc) + goto out_destroy; + + sc->sc_xprt = r_xprt; + buf->rb_sc_ctxs[i] = sc; + } + + return 0; + +out_destroy: + rpcrdma_sendctxs_destroy(buf); + return -ENOMEM; +} + +/* The sendctx queue is not guaranteed to have a size that is a + * power of two, thus the helpers in circ_buf.h cannot be used. + * The other option is to use modulus (%), which can be expensive. + */ +static unsigned long rpcrdma_sendctx_next(struct rpcrdma_buffer *buf, + unsigned long item) +{ + return likely(item < buf->rb_sc_last) ? item + 1 : 0; +} + +/** + * rpcrdma_sendctx_get_locked - Acquire a send context + * @buf: transport buffers from which to acquire an unused context + * + * Returns pointer to a free send completion context; or NULL if + * the queue is empty. + * + * Usage: Called to acquire an SGE array before preparing a Send WR. + * + * The caller serializes calls to this function (per rpcrdma_buffer), + * and provides an effective memory barrier that flushes the new value + * of rb_sc_head. + */ +struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf) +{ + struct rpcrdma_xprt *r_xprt; + struct rpcrdma_sendctx *sc; + unsigned long next_head; + + next_head = rpcrdma_sendctx_next(buf, buf->rb_sc_head); + + if (next_head == READ_ONCE(buf->rb_sc_tail)) + goto out_emptyq; + + /* ORDER: item must be accessed _before_ head is updated */ + sc = buf->rb_sc_ctxs[next_head]; + + /* Releasing the lock in the caller acts as a memory + * barrier that flushes rb_sc_head. + */ + buf->rb_sc_head = next_head; + + return sc; + +out_emptyq: + /* The queue is "empty" if there have not been enough Send + * completions recently. This is a sign the Send Queue is + * backing up. Cause the caller to pause and try again. + */ + dprintk("RPC: %s: empty sendctx queue\n", __func__); + r_xprt = container_of(buf, struct rpcrdma_xprt, rx_buf); + r_xprt->rx_stats.empty_sendctx_q++; + return NULL; +} + +/** + * rpcrdma_sendctx_put_locked - Release a send context + * @sc: send context to release + * + * Usage: Called from Send completion to return a sendctxt + * to the queue. + * + * The caller serializes calls to this function (per rpcrdma_buffer). + */ +void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc) +{ + struct rpcrdma_buffer *buf = &sc->sc_xprt->rx_buf; + unsigned long next_tail; + + /* Unmap SGEs of previously completed by unsignaled + * Sends by walking up the queue until @sc is found. + */ + next_tail = buf->rb_sc_tail; + do { + next_tail = rpcrdma_sendctx_next(buf, next_tail); + + /* ORDER: item must be accessed _before_ tail is updated */ + rpcrdma_unmap_sendctx(buf->rb_sc_ctxs[next_tail]); + + } while (buf->rb_sc_ctxs[next_tail] != sc); + + /* Paired with READ_ONCE */ + smp_store_release(&buf->rb_sc_tail, next_tail); +} + static void rpcrdma_mr_recovery_worker(struct work_struct *work) { @@ -946,13 +1088,8 @@ rpcrdma_create_req(struct rpcrdma_xprt *r_xprt) spin_lock(&buffer->rb_reqslock); list_add(&req->rl_all, &buffer->rb_allreqs); spin_unlock(&buffer->rb_reqslock); - req->rl_cqe.done = rpcrdma_wc_send; req->rl_buffer = &r_xprt->rx_buf; INIT_LIST_HEAD(&req->rl_registered); - req->rl_send_wr.next = NULL; - req->rl_send_wr.wr_cqe = &req->rl_cqe; - req->rl_send_wr.sg_list = req->rl_send_sge; - req->rl_send_wr.opcode = IB_WR_SEND; return req; } @@ -974,10 +1111,12 @@ rpcrdma_create_rep(struct rpcrdma_xprt *r_xprt) rc = PTR_ERR(rep->rr_rdmabuf); goto out_free; } + xdr_buf_init(&rep->rr_hdrbuf, rep->rr_rdmabuf->rg_base, + rdmab_length(rep->rr_rdmabuf)); rep->rr_cqe.done = rpcrdma_wc_receive; rep->rr_rxprt = r_xprt; - INIT_WORK(&rep->rr_work, rpcrdma_reply_handler); + INIT_WORK(&rep->rr_work, rpcrdma_deferred_completion); rep->rr_recv_wr.next = NULL; rep->rr_recv_wr.wr_cqe = &rep->rr_cqe; rep->rr_recv_wr.sg_list = &rep->rr_rdmabuf->rg_iov; @@ -998,13 +1137,11 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) buf->rb_max_requests = r_xprt->rx_data.max_requests; buf->rb_bc_srv_max_requests = 0; - atomic_set(&buf->rb_credits, 1); spin_lock_init(&buf->rb_mwlock); spin_lock_init(&buf->rb_lock); spin_lock_init(&buf->rb_recovery_lock); INIT_LIST_HEAD(&buf->rb_mws); INIT_LIST_HEAD(&buf->rb_all); - INIT_LIST_HEAD(&buf->rb_pending); INIT_LIST_HEAD(&buf->rb_stale_mrs); INIT_DELAYED_WORK(&buf->rb_refresh_worker, rpcrdma_mr_refresh_worker); @@ -1026,7 +1163,6 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) rc = PTR_ERR(req); goto out; } - req->rl_backchannel = false; list_add(&req->rl_list, &buf->rb_send_bufs); } @@ -1044,6 +1180,10 @@ rpcrdma_buffer_create(struct rpcrdma_xprt *r_xprt) list_add(&rep->rr_list, &buf->rb_recv_bufs); } + rc = rpcrdma_sendctxs_create(r_xprt); + if (rc) + goto out; + return 0; out: rpcrdma_buffer_destroy(buf); @@ -1120,6 +1260,8 @@ rpcrdma_buffer_destroy(struct rpcrdma_buffer *buf) cancel_delayed_work_sync(&buf->rb_recovery_worker); cancel_delayed_work_sync(&buf->rb_refresh_worker); + rpcrdma_sendctxs_destroy(buf); + while (!list_empty(&buf->rb_recv_bufs)) { struct rpcrdma_rep *rep; @@ -1235,7 +1377,6 @@ rpcrdma_buffer_put(struct rpcrdma_req *req) struct rpcrdma_buffer *buffers = req->rl_buffer; struct rpcrdma_rep *rep = req->rl_reply; - req->rl_send_wr.num_sge = 0; req->rl_reply = NULL; spin_lock(&buffers->rb_lock); @@ -1367,7 +1508,7 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, struct rpcrdma_ep *ep, struct rpcrdma_req *req) { - struct ib_send_wr *send_wr = &req->rl_send_wr; + struct ib_send_wr *send_wr = &req->rl_sendctx->sc_wr; struct ib_send_wr *send_wr_fail; int rc; @@ -1381,7 +1522,14 @@ rpcrdma_ep_post(struct rpcrdma_ia *ia, dprintk("RPC: %s: posting %d s/g entries\n", __func__, send_wr->num_sge); - rpcrdma_set_signaled(ep, send_wr); + if (!ep->rep_send_count || + test_bit(RPCRDMA_REQ_F_TX_RESOURCES, &req->rl_flags)) { + send_wr->send_flags |= IB_SEND_SIGNALED; + ep->rep_send_count = ep->rep_send_batch; + } else { + send_wr->send_flags &= ~IB_SEND_SIGNALED; + --ep->rep_send_count; + } rc = ib_post_send(ia->ri_id->qp, send_wr, &send_wr_fail); if (rc) goto out_postsend_err; diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index b282d3f8cdd8..51686d9eac5f 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2014-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two @@ -93,8 +94,8 @@ enum { */ struct rpcrdma_ep { - atomic_t rep_cqcount; - int rep_cqinit; + unsigned int rep_send_count; + unsigned int rep_send_batch; int rep_connected; struct ib_qp_init_attr rep_attr; wait_queue_head_t rep_connect_wait; @@ -104,25 +105,6 @@ struct rpcrdma_ep { struct delayed_work rep_connect_worker; }; -static inline void -rpcrdma_init_cqcount(struct rpcrdma_ep *ep, int count) -{ - atomic_set(&ep->rep_cqcount, ep->rep_cqinit - count); -} - -/* To update send queue accounting, provider must take a - * send completion every now and then. - */ -static inline void -rpcrdma_set_signaled(struct rpcrdma_ep *ep, struct ib_send_wr *send_wr) -{ - send_wr->send_flags = 0; - if (unlikely(atomic_sub_return(1, &ep->rep_cqcount) <= 0)) { - rpcrdma_init_cqcount(ep, 0); - send_wr->send_flags = IB_SEND_SIGNALED; - } -} - /* Pre-allocate extra Work Requests for handling backward receives * and sends. This is a fixed value because the Work Queues are * allocated when the forward channel is set up. @@ -164,12 +146,6 @@ rdmab_lkey(struct rpcrdma_regbuf *rb) return rb->rg_iov.lkey; } -static inline struct rpcrdma_msg * -rdmab_to_msg(struct rpcrdma_regbuf *rb) -{ - return (struct rpcrdma_msg *)rb->rg_base; -} - static inline struct ib_device * rdmab_device(struct rpcrdma_regbuf *rb) { @@ -202,33 +178,58 @@ enum { }; /* - * struct rpcrdma_rep -- this structure encapsulates state required to recv - * and complete a reply, asychronously. It needs several pieces of - * state: - * o recv buffer (posted to provider) - * o ib_sge (also donated to provider) - * o status of reply (length, success or not) - * o bookkeeping state to get run by reply handler (list, etc) + * struct rpcrdma_rep -- this structure encapsulates state required + * to receive and complete an RPC Reply, asychronously. It needs + * several pieces of state: * - * These are allocated during initialization, per-transport instance. + * o receive buffer and ib_sge (donated to provider) + * o status of receive (success or not, length, inv rkey) + * o bookkeeping state to get run by reply handler (XDR stream) * - * N of these are associated with a transport instance, and stored in - * struct rpcrdma_buffer. N is the max number of outstanding requests. + * These structures are allocated during transport initialization. + * N of these are associated with a transport instance, managed by + * struct rpcrdma_buffer. N is the max number of outstanding RPCs. */ struct rpcrdma_rep { struct ib_cqe rr_cqe; - unsigned int rr_len; + __be32 rr_xid; + __be32 rr_vers; + __be32 rr_proc; int rr_wc_flags; u32 rr_inv_rkey; + struct rpcrdma_regbuf *rr_rdmabuf; struct rpcrdma_xprt *rr_rxprt; struct work_struct rr_work; + struct xdr_buf rr_hdrbuf; + struct xdr_stream rr_stream; + struct rpc_rqst *rr_rqst; struct list_head rr_list; struct ib_recv_wr rr_recv_wr; - struct rpcrdma_regbuf *rr_rdmabuf; }; -#define RPCRDMA_BAD_LEN (~0U) +/* struct rpcrdma_sendctx - DMA mapped SGEs to unmap after Send completes + */ +struct rpcrdma_req; +struct rpcrdma_xprt; +struct rpcrdma_sendctx { + struct ib_send_wr sc_wr; + struct ib_cqe sc_cqe; + struct rpcrdma_xprt *sc_xprt; + struct rpcrdma_req *sc_req; + unsigned int sc_unmap_count; + struct ib_sge sc_sges[]; +}; + +/* Limit the number of SGEs that can be unmapped during one + * Send completion. This caps the amount of work a single + * completion can do before returning to the provider. + * + * Setting this to zero disables Send completion batching. + */ +enum { + RPCRDMA_MAX_SEND_BATCH = 7, +}; /* * struct rpcrdma_mw - external memory region metadata @@ -341,25 +342,30 @@ enum { struct rpcrdma_buffer; struct rpcrdma_req { struct list_head rl_list; - __be32 rl_xid; - unsigned int rl_mapped_sges; unsigned int rl_connect_cookie; struct rpcrdma_buffer *rl_buffer; struct rpcrdma_rep *rl_reply; - struct ib_send_wr rl_send_wr; - struct ib_sge rl_send_sge[RPCRDMA_MAX_SEND_SGES]; + struct xdr_stream rl_stream; + struct xdr_buf rl_hdrbuf; + struct rpcrdma_sendctx *rl_sendctx; struct rpcrdma_regbuf *rl_rdmabuf; /* xprt header */ struct rpcrdma_regbuf *rl_sendbuf; /* rq_snd_buf */ struct rpcrdma_regbuf *rl_recvbuf; /* rq_rcv_buf */ - struct ib_cqe rl_cqe; struct list_head rl_all; - bool rl_backchannel; + unsigned long rl_flags; struct list_head rl_registered; /* registered segments */ struct rpcrdma_mr_seg rl_segments[RPCRDMA_MAX_SEGS]; }; +/* rl_flags */ +enum { + RPCRDMA_REQ_F_BACKCHANNEL = 0, + RPCRDMA_REQ_F_PENDING, + RPCRDMA_REQ_F_TX_RESOURCES, +}; + static inline void rpcrdma_set_xprtdata(struct rpc_rqst *rqst, struct rpcrdma_req *req) { @@ -399,13 +405,17 @@ struct rpcrdma_buffer { struct list_head rb_mws; struct list_head rb_all; + unsigned long rb_sc_head; + unsigned long rb_sc_tail; + unsigned long rb_sc_last; + struct rpcrdma_sendctx **rb_sc_ctxs; + spinlock_t rb_lock; /* protect buf lists */ int rb_send_count, rb_recv_count; struct list_head rb_send_bufs; struct list_head rb_recv_bufs; - struct list_head rb_pending; u32 rb_max_requests; - atomic_t rb_credits; /* most recent credit grant */ + u32 rb_credits; /* most recent credit grant */ u32 rb_bc_srv_max_requests; spinlock_t rb_reqslock; /* protect rb_allreqs */ @@ -440,24 +450,29 @@ struct rpcrdma_create_data_internal { * Statistics for RPCRDMA */ struct rpcrdma_stats { + /* accessed when sending a call */ unsigned long read_chunk_count; unsigned long write_chunk_count; unsigned long reply_chunk_count; - unsigned long long total_rdma_request; - unsigned long long total_rdma_reply; + /* rarely accessed error counters */ unsigned long long pullup_copy_count; - unsigned long long fixup_copy_count; unsigned long hardway_register_count; unsigned long failed_marshal_count; unsigned long bad_reply_count; - unsigned long nomsg_call_count; - unsigned long bcall_count; unsigned long mrs_recovered; unsigned long mrs_orphaned; unsigned long mrs_allocated; + unsigned long empty_sendctx_q; + + /* accessed when receiving a reply */ + unsigned long long total_rdma_reply; + unsigned long long fixup_copy_count; + unsigned long reply_waits_for_send; unsigned long local_inv_needed; + unsigned long nomsg_call_count; + unsigned long bcall_count; }; /* @@ -465,13 +480,12 @@ struct rpcrdma_stats { */ struct rpcrdma_xprt; struct rpcrdma_memreg_ops { - int (*ro_map)(struct rpcrdma_xprt *, + struct rpcrdma_mr_seg * + (*ro_map)(struct rpcrdma_xprt *, struct rpcrdma_mr_seg *, int, bool, struct rpcrdma_mw **); void (*ro_unmap_sync)(struct rpcrdma_xprt *, struct list_head *); - void (*ro_unmap_safe)(struct rpcrdma_xprt *, - struct rpcrdma_req *, bool); void (*ro_recover_mr)(struct rpcrdma_mw *); int (*ro_open)(struct rpcrdma_ia *, struct rpcrdma_ep *, @@ -529,6 +543,8 @@ void rpcrdma_ia_close(struct rpcrdma_ia *); bool frwr_is_supported(struct rpcrdma_ia *); bool fmr_is_supported(struct rpcrdma_ia *); +extern struct workqueue_struct *rpcrdma_receive_wq; + /* * Endpoint calls - xprtrdma/verbs.c */ @@ -551,34 +567,8 @@ struct rpcrdma_rep *rpcrdma_create_rep(struct rpcrdma_xprt *); void rpcrdma_destroy_req(struct rpcrdma_req *); int rpcrdma_buffer_create(struct rpcrdma_xprt *); void rpcrdma_buffer_destroy(struct rpcrdma_buffer *); - -static inline void -rpcrdma_insert_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) -{ - spin_lock(&buffers->rb_lock); - if (list_empty(&req->rl_list)) - list_add_tail(&req->rl_list, &buffers->rb_pending); - spin_unlock(&buffers->rb_lock); -} - -static inline struct rpcrdma_req * -rpcrdma_lookup_req_locked(struct rpcrdma_buffer *buffers, __be32 xid) -{ - struct rpcrdma_req *pos; - - list_for_each_entry(pos, &buffers->rb_pending, rl_list) - if (pos->rl_xid == xid) - return pos; - return NULL; -} - -static inline void -rpcrdma_remove_req(struct rpcrdma_buffer *buffers, struct rpcrdma_req *req) -{ - spin_lock(&buffers->rb_lock); - list_del(&req->rl_list); - spin_unlock(&buffers->rb_lock); -} +struct rpcrdma_sendctx *rpcrdma_sendctx_get_locked(struct rpcrdma_buffer *buf); +void rpcrdma_sendctx_put_locked(struct rpcrdma_sendctx *sc); struct rpcrdma_mw *rpcrdma_get_mw(struct rpcrdma_xprt *); void rpcrdma_put_mw(struct rpcrdma_xprt *, struct rpcrdma_mw *); @@ -635,12 +625,24 @@ enum rpcrdma_chunktype { rpcrdma_replych }; -bool rpcrdma_prepare_send_sges(struct rpcrdma_ia *, struct rpcrdma_req *, - u32, struct xdr_buf *, enum rpcrdma_chunktype); -void rpcrdma_unmap_sges(struct rpcrdma_ia *, struct rpcrdma_req *); -int rpcrdma_marshal_req(struct rpc_rqst *); +int rpcrdma_prepare_send_sges(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req, u32 hdrlen, + struct xdr_buf *xdr, + enum rpcrdma_chunktype rtype); +void rpcrdma_unmap_sendctx(struct rpcrdma_sendctx *sc); +int rpcrdma_marshal_req(struct rpcrdma_xprt *r_xprt, struct rpc_rqst *rqst); void rpcrdma_set_max_header_sizes(struct rpcrdma_xprt *); -void rpcrdma_reply_handler(struct work_struct *work); +void rpcrdma_complete_rqst(struct rpcrdma_rep *rep); +void rpcrdma_reply_handler(struct rpcrdma_rep *rep); +void rpcrdma_release_rqst(struct rpcrdma_xprt *r_xprt, + struct rpcrdma_req *req); +void rpcrdma_deferred_completion(struct work_struct *work); + +static inline void rpcrdma_set_xdrlen(struct xdr_buf *xdr, size_t len) +{ + xdr->head[0].iov_len = len; + xdr->len = len; +} /* RPC/RDMA module init - xprtrdma/transport.c */ diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 4f154d388748..9cc850c2719e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * linux/net/sunrpc/xprtsock.c * @@ -551,6 +552,7 @@ static int xs_local_send_request(struct rpc_task *task) default: dprintk("RPC: sendmsg returned unrecognized error %d\n", -status); + /* fall through */ case -EPIPE: xs_close(xprt); status = -ENOTCONN; @@ -969,10 +971,12 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, return; /* Look up and lock the request corresponding to the given XID */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; + xprt_pin_rqst(rovr); + spin_unlock(&xprt->recv_lock); task = rovr->rq_task; copied = rovr->rq_private_buf.buflen; @@ -981,13 +985,16 @@ static void xs_local_data_read_skb(struct rpc_xprt *xprt, if (xs_local_copy_to_xdr(&rovr->rq_private_buf, skb)) { dprintk("RPC: sk_buff copy failed\n"); - goto out_unlock; + spin_lock(&xprt->recv_lock); + goto out_unpin; } + spin_lock(&xprt->recv_lock); xprt_complete_rqst(task, copied); - +out_unpin: + xprt_unpin_rqst(rovr); out_unlock: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); } static void xs_local_data_receive(struct sock_xprt *transport) @@ -1050,10 +1057,12 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, return; /* Look up and lock the request corresponding to the given XID */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); rovr = xprt_lookup_rqst(xprt, *xp); if (!rovr) goto out_unlock; + xprt_pin_rqst(rovr); + spin_unlock(&xprt->recv_lock); task = rovr->rq_task; if ((copied = rovr->rq_private_buf.buflen) > repsize) @@ -1062,16 +1071,21 @@ static void xs_udp_data_read_skb(struct rpc_xprt *xprt, /* Suck it into the iovec, verify checksum if not done by hw. */ if (csum_partial_copy_to_xdr(&rovr->rq_private_buf, skb)) { __UDPX_INC_STATS(sk, UDP_MIB_INERRORS); - goto out_unlock; + spin_lock(&xprt->recv_lock); + goto out_unpin; } __UDPX_INC_STATS(sk, UDP_MIB_INDATAGRAMS); + spin_lock_bh(&xprt->transport_lock); xprt_adjust_cwnd(xprt, task, copied); + spin_unlock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); xprt_complete_rqst(task, copied); - +out_unpin: + xprt_unpin_rqst(rovr); out_unlock: - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); } static void xs_udp_data_receive(struct sock_xprt *transport) @@ -1277,25 +1291,12 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt, } len = desc->count; - if (len > transport->tcp_reclen - transport->tcp_offset) { - struct xdr_skb_reader my_desc; - - len = transport->tcp_reclen - transport->tcp_offset; - memcpy(&my_desc, desc, sizeof(my_desc)); - my_desc.count = len; - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, - &my_desc, xdr_skb_read_bits); - desc->count -= r; - desc->offset += r; - } else - r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, + if (len > transport->tcp_reclen - transport->tcp_offset) + desc->count = transport->tcp_reclen - transport->tcp_offset; + r = xdr_partial_copy_from_skb(rcvbuf, transport->tcp_copied, desc, xdr_skb_read_bits); - if (r > 0) { - transport->tcp_copied += r; - transport->tcp_offset += r; - } - if (r != len) { + if (desc->count) { /* Error when copying to the receive buffer, * usually because we weren't able to allocate * additional buffer pages. All we can do now @@ -1315,6 +1316,10 @@ static inline void xs_tcp_read_common(struct rpc_xprt *xprt, return; } + transport->tcp_copied += r; + transport->tcp_offset += r; + desc->count = len - r; + dprintk("RPC: XID %08x read %zd bytes\n", ntohl(transport->tcp_xid), r); dprintk("RPC: xprt = %p, tcp_copied = %lu, tcp_offset = %u, " @@ -1343,21 +1348,24 @@ static inline int xs_tcp_read_reply(struct rpc_xprt *xprt, dprintk("RPC: read reply XID %08x\n", ntohl(transport->tcp_xid)); /* Find and lock the request corresponding to this xid */ - spin_lock_bh(&xprt->transport_lock); + spin_lock(&xprt->recv_lock); req = xprt_lookup_rqst(xprt, transport->tcp_xid); if (!req) { dprintk("RPC: XID %08x request not found!\n", ntohl(transport->tcp_xid)); - spin_unlock_bh(&xprt->transport_lock); + spin_unlock(&xprt->recv_lock); return -1; } + xprt_pin_rqst(req); + spin_unlock(&xprt->recv_lock); xs_tcp_read_common(xprt, desc, req); + spin_lock(&xprt->recv_lock); if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_rqst(req->rq_task, transport->tcp_copied); - - spin_unlock_bh(&xprt->transport_lock); + xprt_unpin_rqst(req); + spin_unlock(&xprt->recv_lock); return 0; } @@ -1376,11 +1384,9 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, container_of(xprt, struct sock_xprt, xprt); struct rpc_rqst *req; - /* Look up and lock the request corresponding to the given XID */ - spin_lock_bh(&xprt->transport_lock); + /* Look up the request corresponding to the given XID */ req = xprt_lookup_bc_request(xprt, transport->tcp_xid); if (req == NULL) { - spin_unlock_bh(&xprt->transport_lock); printk(KERN_WARNING "Callback slot table overflowed\n"); xprt_force_disconnect(xprt); return -1; @@ -1391,7 +1397,6 @@ static int xs_tcp_read_callback(struct rpc_xprt *xprt, if (!(transport->tcp_flags & TCP_RCV_COPY_DATA)) xprt_complete_bc_request(req, transport->tcp_copied); - spin_unlock_bh(&xprt->transport_lock); return 0; } @@ -1516,6 +1521,7 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) .arg.data = xprt, }; unsigned long total = 0; + int loop; int read = 0; mutex_lock(&transport->recv_mutex); @@ -1524,20 +1530,20 @@ static void xs_tcp_data_receive(struct sock_xprt *transport) goto out; /* We use rd_desc to pass struct xprt to xs_tcp_data_recv */ - for (;;) { + for (loop = 0; loop < 64; loop++) { lock_sock(sk); read = tcp_read_sock(sk, &rd_desc, xs_tcp_data_recv); if (read <= 0) { clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); release_sock(sk); - if (!test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) - break; - } else { - release_sock(sk); - total += read; + break; } + release_sock(sk); + total += read; rd_desc.count = 65536; } + if (test_bit(XPRT_SOCK_DATA_READY, &transport->sock_state)) + queue_work(xprtiod_workqueue, &transport->recv_worker); out: mutex_unlock(&transport->recv_mutex); trace_xs_tcp_data_ready(xprt, read, total); @@ -1606,6 +1612,7 @@ static void xs_tcp_state_change(struct sock *sk) xprt->connect_cookie++; clear_bit(XPRT_CONNECTED, &xprt->state); xs_tcp_force_close(xprt); + /* fall through */ case TCP_CLOSING: /* * If the server closed down the connection, make sure that @@ -2199,7 +2206,7 @@ static void xs_udp_setup_socket(struct work_struct *work) struct sock_xprt *transport = container_of(work, struct sock_xprt, connect_worker.work); struct rpc_xprt *xprt = &transport->xprt; - struct socket *sock = transport->sock; + struct socket *sock; int status = -EIO; sock = xs_create_sock(xprt, transport, @@ -2363,6 +2370,7 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock) switch (ret) { case 0: xs_set_srcport(transport, sock); + /* fall through */ case -EINPROGRESS: /* SYN_SENT! */ if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO) @@ -2414,6 +2422,7 @@ static void xs_tcp_setup_socket(struct work_struct *work) default: printk("%s: connect returned unhandled error %d\n", __func__, status); + /* fall through */ case -EADDRNOTAVAIL: /* We're probably in TIME_WAIT. Get rid of existing socket, * and retry @@ -2724,7 +2733,7 @@ static void bc_destroy(struct rpc_xprt *xprt) module_put(THIS_MODULE); } -static struct rpc_xprt_ops xs_local_ops = { +static const struct rpc_xprt_ops xs_local_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, .alloc_slot = xprt_alloc_slot, @@ -2742,7 +2751,7 @@ static struct rpc_xprt_ops xs_local_ops = { .disable_swap = xs_disable_swap, }; -static struct rpc_xprt_ops xs_udp_ops = { +static const struct rpc_xprt_ops xs_udp_ops = { .set_buffer_size = xs_udp_set_buffer_size, .reserve_xprt = xprt_reserve_xprt_cong, .release_xprt = xprt_release_xprt_cong, @@ -2764,7 +2773,7 @@ static struct rpc_xprt_ops xs_udp_ops = { .inject_disconnect = xs_inject_disconnect, }; -static struct rpc_xprt_ops xs_tcp_ops = { +static const struct rpc_xprt_ops xs_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xs_tcp_release_xprt, .alloc_slot = xprt_lock_and_alloc_slot, @@ -2795,7 +2804,7 @@ static struct rpc_xprt_ops xs_tcp_ops = { * The rpc_xprt_ops for the server backchannel */ -static struct rpc_xprt_ops bc_tcp_ops = { +static const struct rpc_xprt_ops bc_tcp_ops = { .reserve_xprt = xprt_reserve_xprt, .release_xprt = xprt_release_xprt, .alloc_slot = xprt_alloc_slot, diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c index 25dc67ef9d37..74b9d916a58b 100644 --- a/net/switchdev/switchdev.c +++ b/net/switchdev/switchdev.c @@ -343,10 +343,10 @@ static size_t switchdev_obj_size(const struct switchdev_obj *obj) switch (obj->id) { case SWITCHDEV_OBJ_ID_PORT_VLAN: return sizeof(struct switchdev_obj_port_vlan); - case SWITCHDEV_OBJ_ID_PORT_FDB: - return sizeof(struct switchdev_obj_port_fdb); case SWITCHDEV_OBJ_ID_PORT_MDB: return sizeof(struct switchdev_obj_port_mdb); + case SWITCHDEV_OBJ_ID_HOST_MDB: + return sizeof(struct switchdev_obj_port_mdb); default: BUG(); } @@ -534,43 +534,6 @@ int switchdev_port_obj_del(struct net_device *dev, } EXPORT_SYMBOL_GPL(switchdev_port_obj_del); -/** - * switchdev_port_obj_dump - Dump port objects - * - * @dev: port device - * @id: object ID - * @obj: object to dump - * @cb: function to call with a filled object - * - * rtnl_lock must be held. - */ -int switchdev_port_obj_dump(struct net_device *dev, struct switchdev_obj *obj, - switchdev_obj_dump_cb_t *cb) -{ - const struct switchdev_ops *ops = dev->switchdev_ops; - struct net_device *lower_dev; - struct list_head *iter; - int err = -EOPNOTSUPP; - - ASSERT_RTNL(); - - if (ops && ops->switchdev_port_obj_dump) - return ops->switchdev_port_obj_dump(dev, obj, cb); - - /* Switch device port(s) may be stacked under - * bond/team/vlan dev, so recurse down to dump objects on - * first port at bottom of stack. - */ - - netdev_for_each_lower_dev(dev, lower_dev, iter) { - err = switchdev_port_obj_dump(lower_dev, obj, cb); - break; - } - - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_obj_dump); - static ATOMIC_NOTIFIER_HEAD(switchdev_notif_chain); /** @@ -613,486 +576,6 @@ int call_switchdev_notifiers(unsigned long val, struct net_device *dev, } EXPORT_SYMBOL_GPL(call_switchdev_notifiers); -struct switchdev_vlan_dump { - struct switchdev_obj_port_vlan vlan; - struct sk_buff *skb; - u32 filter_mask; - u16 flags; - u16 begin; - u16 end; -}; - -static int switchdev_port_vlan_dump_put(struct switchdev_vlan_dump *dump) -{ - struct bridge_vlan_info vinfo; - - vinfo.flags = dump->flags; - - if (dump->begin == 0 && dump->end == 0) { - return 0; - } else if (dump->begin == dump->end) { - vinfo.vid = dump->begin; - if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - return -EMSGSIZE; - } else { - vinfo.vid = dump->begin; - vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_BEGIN; - if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - return -EMSGSIZE; - vinfo.vid = dump->end; - vinfo.flags &= ~BRIDGE_VLAN_INFO_RANGE_BEGIN; - vinfo.flags |= BRIDGE_VLAN_INFO_RANGE_END; - if (nla_put(dump->skb, IFLA_BRIDGE_VLAN_INFO, - sizeof(vinfo), &vinfo)) - return -EMSGSIZE; - } - - return 0; -} - -static int switchdev_port_vlan_dump_cb(struct switchdev_obj *obj) -{ - struct switchdev_obj_port_vlan *vlan = SWITCHDEV_OBJ_PORT_VLAN(obj); - struct switchdev_vlan_dump *dump = - container_of(vlan, struct switchdev_vlan_dump, vlan); - int err = 0; - - if (vlan->vid_begin > vlan->vid_end) - return -EINVAL; - - if (dump->filter_mask & RTEXT_FILTER_BRVLAN) { - dump->flags = vlan->flags; - for (dump->begin = dump->end = vlan->vid_begin; - dump->begin <= vlan->vid_end; - dump->begin++, dump->end++) { - err = switchdev_port_vlan_dump_put(dump); - if (err) - return err; - } - } else if (dump->filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) { - if (dump->begin > vlan->vid_begin && - dump->begin >= vlan->vid_end) { - if ((dump->begin - 1) == vlan->vid_end && - dump->flags == vlan->flags) { - /* prepend */ - dump->begin = vlan->vid_begin; - } else { - err = switchdev_port_vlan_dump_put(dump); - dump->flags = vlan->flags; - dump->begin = vlan->vid_begin; - dump->end = vlan->vid_end; - } - } else if (dump->end <= vlan->vid_begin && - dump->end < vlan->vid_end) { - if ((dump->end + 1) == vlan->vid_begin && - dump->flags == vlan->flags) { - /* append */ - dump->end = vlan->vid_end; - } else { - err = switchdev_port_vlan_dump_put(dump); - dump->flags = vlan->flags; - dump->begin = vlan->vid_begin; - dump->end = vlan->vid_end; - } - } else { - err = -EINVAL; - } - } - - return err; -} - -static int switchdev_port_vlan_fill(struct sk_buff *skb, struct net_device *dev, - u32 filter_mask) -{ - struct switchdev_vlan_dump dump = { - .vlan.obj.orig_dev = dev, - .vlan.obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - .skb = skb, - .filter_mask = filter_mask, - }; - int err = 0; - - if ((filter_mask & RTEXT_FILTER_BRVLAN) || - (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED)) { - err = switchdev_port_obj_dump(dev, &dump.vlan.obj, - switchdev_port_vlan_dump_cb); - if (err) - goto err_out; - if (filter_mask & RTEXT_FILTER_BRVLAN_COMPRESSED) - /* last one */ - err = switchdev_port_vlan_dump_put(&dump); - } - -err_out: - return err == -EOPNOTSUPP ? 0 : err; -} - -/** - * switchdev_port_bridge_getlink - Get bridge port attributes - * - * @dev: port device - * - * Called for SELF on rtnl_bridge_getlink to get bridge port - * attributes. - */ -int switchdev_port_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, - struct net_device *dev, u32 filter_mask, - int nlflags) -{ - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, - }; - u16 mode = BRIDGE_MODE_UNDEF; - u32 mask = BR_LEARNING | BR_LEARNING_SYNC | BR_FLOOD; - int err; - - if (!netif_is_bridge_port(dev)) - return -EOPNOTSUPP; - - err = switchdev_port_attr_get(dev, &attr); - if (err && err != -EOPNOTSUPP) - return err; - - return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, - attr.u.brport_flags, mask, nlflags, - filter_mask, switchdev_port_vlan_fill); -} -EXPORT_SYMBOL_GPL(switchdev_port_bridge_getlink); - -static int switchdev_port_br_setflag(struct net_device *dev, - struct nlattr *nlattr, - unsigned long brport_flag) -{ - struct switchdev_attr attr = { - .orig_dev = dev, - .id = SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS, - }; - u8 flag = nla_get_u8(nlattr); - int err; - - err = switchdev_port_attr_get(dev, &attr); - if (err) - return err; - - if (flag) - attr.u.brport_flags |= brport_flag; - else - attr.u.brport_flags &= ~brport_flag; - - return switchdev_port_attr_set(dev, &attr); -} - -static const struct nla_policy -switchdev_port_bridge_policy[IFLA_BRPORT_MAX + 1] = { - [IFLA_BRPORT_STATE] = { .type = NLA_U8 }, - [IFLA_BRPORT_COST] = { .type = NLA_U32 }, - [IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 }, - [IFLA_BRPORT_MODE] = { .type = NLA_U8 }, - [IFLA_BRPORT_GUARD] = { .type = NLA_U8 }, - [IFLA_BRPORT_PROTECT] = { .type = NLA_U8 }, - [IFLA_BRPORT_FAST_LEAVE] = { .type = NLA_U8 }, - [IFLA_BRPORT_LEARNING] = { .type = NLA_U8 }, - [IFLA_BRPORT_LEARNING_SYNC] = { .type = NLA_U8 }, - [IFLA_BRPORT_UNICAST_FLOOD] = { .type = NLA_U8 }, -}; - -static int switchdev_port_br_setlink_protinfo(struct net_device *dev, - struct nlattr *protinfo) -{ - struct nlattr *attr; - int rem; - int err; - - err = nla_validate_nested(protinfo, IFLA_BRPORT_MAX, - switchdev_port_bridge_policy, NULL); - if (err) - return err; - - nla_for_each_nested(attr, protinfo, rem) { - switch (nla_type(attr)) { - case IFLA_BRPORT_LEARNING: - err = switchdev_port_br_setflag(dev, attr, - BR_LEARNING); - break; - case IFLA_BRPORT_LEARNING_SYNC: - err = switchdev_port_br_setflag(dev, attr, - BR_LEARNING_SYNC); - break; - case IFLA_BRPORT_UNICAST_FLOOD: - err = switchdev_port_br_setflag(dev, attr, BR_FLOOD); - break; - default: - err = -EOPNOTSUPP; - break; - } - if (err) - return err; - } - - return 0; -} - -static int switchdev_port_br_afspec(struct net_device *dev, - struct nlattr *afspec, - int (*f)(struct net_device *dev, - const struct switchdev_obj *obj)) -{ - struct nlattr *attr; - struct bridge_vlan_info *vinfo; - struct switchdev_obj_port_vlan vlan = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_VLAN, - }; - int rem; - int err; - - nla_for_each_nested(attr, afspec, rem) { - if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO) - continue; - if (nla_len(attr) != sizeof(struct bridge_vlan_info)) - return -EINVAL; - vinfo = nla_data(attr); - if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK) - return -EINVAL; - vlan.flags = vinfo->flags; - if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) { - if (vlan.vid_begin) - return -EINVAL; - vlan.vid_begin = vinfo->vid; - /* don't allow range of pvids */ - if (vlan.flags & BRIDGE_VLAN_INFO_PVID) - return -EINVAL; - } else if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END) { - if (!vlan.vid_begin) - return -EINVAL; - vlan.vid_end = vinfo->vid; - if (vlan.vid_end <= vlan.vid_begin) - return -EINVAL; - err = f(dev, &vlan.obj); - if (err) - return err; - vlan.vid_begin = 0; - } else { - if (vlan.vid_begin) - return -EINVAL; - vlan.vid_begin = vinfo->vid; - vlan.vid_end = vinfo->vid; - err = f(dev, &vlan.obj); - if (err) - return err; - vlan.vid_begin = 0; - } - } - - return 0; -} - -/** - * switchdev_port_bridge_setlink - Set bridge port attributes - * - * @dev: port device - * @nlh: netlink header - * @flags: netlink flags - * - * Called for SELF on rtnl_bridge_setlink to set bridge port - * attributes. - */ -int switchdev_port_bridge_setlink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct nlattr *protinfo; - struct nlattr *afspec; - int err = 0; - - if (!netif_is_bridge_port(dev)) - return -EOPNOTSUPP; - - protinfo = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_PROTINFO); - if (protinfo) { - err = switchdev_port_br_setlink_protinfo(dev, protinfo); - if (err) - return err; - } - - afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_AF_SPEC); - if (afspec) - err = switchdev_port_br_afspec(dev, afspec, - switchdev_port_obj_add); - - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_bridge_setlink); - -/** - * switchdev_port_bridge_dellink - Set bridge port attributes - * - * @dev: port device - * @nlh: netlink header - * @flags: netlink flags - * - * Called for SELF on rtnl_bridge_dellink to set bridge port - * attributes. - */ -int switchdev_port_bridge_dellink(struct net_device *dev, - struct nlmsghdr *nlh, u16 flags) -{ - struct nlattr *afspec; - - if (!netif_is_bridge_port(dev)) - return -EOPNOTSUPP; - - afspec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), - IFLA_AF_SPEC); - if (afspec) - return switchdev_port_br_afspec(dev, afspec, - switchdev_port_obj_del); - - return 0; -} -EXPORT_SYMBOL_GPL(switchdev_port_bridge_dellink); - -/** - * switchdev_port_fdb_add - Add FDB (MAC/VLAN) entry to port - * - * @ndmsg: netlink hdr - * @nlattr: netlink attributes - * @dev: port device - * @addr: MAC address to add - * @vid: VLAN to add - * - * Add FDB entry to switch device. - */ -int switchdev_port_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid, u16 nlm_flags) -{ - struct switchdev_obj_port_fdb fdb = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .vid = vid, - }; - - ether_addr_copy(fdb.addr, addr); - return switchdev_port_obj_add(dev, &fdb.obj); -} -EXPORT_SYMBOL_GPL(switchdev_port_fdb_add); - -/** - * switchdev_port_fdb_del - Delete FDB (MAC/VLAN) entry from port - * - * @ndmsg: netlink hdr - * @nlattr: netlink attributes - * @dev: port device - * @addr: MAC address to delete - * @vid: VLAN to delete - * - * Delete FDB entry from switch device. - */ -int switchdev_port_fdb_del(struct ndmsg *ndm, struct nlattr *tb[], - struct net_device *dev, const unsigned char *addr, - u16 vid) -{ - struct switchdev_obj_port_fdb fdb = { - .obj.orig_dev = dev, - .obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .vid = vid, - }; - - ether_addr_copy(fdb.addr, addr); - return switchdev_port_obj_del(dev, &fdb.obj); -} -EXPORT_SYMBOL_GPL(switchdev_port_fdb_del); - -struct switchdev_fdb_dump { - struct switchdev_obj_port_fdb fdb; - struct net_device *dev; - struct sk_buff *skb; - struct netlink_callback *cb; - int idx; -}; - -static int switchdev_port_fdb_dump_cb(struct switchdev_obj *obj) -{ - struct switchdev_obj_port_fdb *fdb = SWITCHDEV_OBJ_PORT_FDB(obj); - struct switchdev_fdb_dump *dump = - container_of(fdb, struct switchdev_fdb_dump, fdb); - u32 portid = NETLINK_CB(dump->cb->skb).portid; - u32 seq = dump->cb->nlh->nlmsg_seq; - struct nlmsghdr *nlh; - struct ndmsg *ndm; - - if (dump->idx < dump->cb->args[2]) - goto skip; - - nlh = nlmsg_put(dump->skb, portid, seq, RTM_NEWNEIGH, - sizeof(*ndm), NLM_F_MULTI); - if (!nlh) - return -EMSGSIZE; - - ndm = nlmsg_data(nlh); - ndm->ndm_family = AF_BRIDGE; - ndm->ndm_pad1 = 0; - ndm->ndm_pad2 = 0; - ndm->ndm_flags = NTF_SELF; - ndm->ndm_type = 0; - ndm->ndm_ifindex = dump->dev->ifindex; - ndm->ndm_state = fdb->ndm_state; - - if (nla_put(dump->skb, NDA_LLADDR, ETH_ALEN, fdb->addr)) - goto nla_put_failure; - - if (fdb->vid && nla_put_u16(dump->skb, NDA_VLAN, fdb->vid)) - goto nla_put_failure; - - nlmsg_end(dump->skb, nlh); - -skip: - dump->idx++; - return 0; - -nla_put_failure: - nlmsg_cancel(dump->skb, nlh); - return -EMSGSIZE; -} - -/** - * switchdev_port_fdb_dump - Dump port FDB (MAC/VLAN) entries - * - * @skb: netlink skb - * @cb: netlink callback - * @dev: port device - * @filter_dev: filter device - * @idx: - * - * Dump FDB entries from switch device. - */ -int switchdev_port_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb, - struct net_device *dev, - struct net_device *filter_dev, int *idx) -{ - struct switchdev_fdb_dump dump = { - .fdb.obj.orig_dev = dev, - .fdb.obj.id = SWITCHDEV_OBJ_ID_PORT_FDB, - .dev = dev, - .skb = skb, - .cb = cb, - .idx = *idx, - }; - int err; - - err = switchdev_port_obj_dump(dev, &dump.fdb.obj, - switchdev_port_fdb_dump_cb); - *idx = dump.idx; - return err; -} -EXPORT_SYMBOL_GPL(switchdev_port_fdb_dump); - bool switchdev_port_same_parent_id(struct net_device *a, struct net_device *b) { diff --git a/net/tipc/Makefile b/net/tipc/Makefile index 31b9f9c52974..37bb0bfbd936 100644 --- a/net/tipc/Makefile +++ b/net/tipc/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux TIPC layer # @@ -8,7 +9,7 @@ tipc-y += addr.o bcast.o bearer.o \ core.o link.o discover.o msg.o \ name_distr.o subscr.o monitor.o name_table.o net.o \ netlink.o netlink_compat.o node.o socket.o eth_media.o \ - server.o socket.o + server.o socket.o group.o tipc-$(CONFIG_TIPC_MEDIA_UDP) += udp_media.o tipc-$(CONFIG_TIPC_MEDIA_IB) += ib_media.o diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index 7d99029df342..329325bd553e 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -233,7 +233,7 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, struct sk_buff_head xmitq; int rc = 0; - __skb_queue_head_init(&xmitq); + skb_queue_head_init(&xmitq); tipc_bcast_lock(net); if (tipc_link_bc_peers(l)) rc = tipc_link_xmit(l, pkts, &xmitq); @@ -258,20 +258,20 @@ static int tipc_bcast_xmit(struct net *net, struct sk_buff_head *pkts, static int tipc_rcast_xmit(struct net *net, struct sk_buff_head *pkts, struct tipc_nlist *dests, u16 *cong_link_cnt) { + struct tipc_dest *dst, *tmp; struct sk_buff_head _pkts; - struct u32_item *n, *tmp; - u32 dst, selector; + u32 dnode, selector; selector = msg_link_selector(buf_msg(skb_peek(pkts))); - __skb_queue_head_init(&_pkts); + skb_queue_head_init(&_pkts); - list_for_each_entry_safe(n, tmp, &dests->list, list) { - dst = n->value; - if (!tipc_msg_pskb_copy(dst, pkts, &_pkts)) + list_for_each_entry_safe(dst, tmp, &dests->list, list) { + dnode = dst->node; + if (!tipc_msg_pskb_copy(dnode, pkts, &_pkts)) return -ENOMEM; /* Any other return value than -ELINKCONG is ignored */ - if (tipc_node_xmit(net, &_pkts, dst, selector) == -ELINKCONG) + if (tipc_node_xmit(net, &_pkts, dnode, selector) == -ELINKCONG) (*cong_link_cnt)++; } return 0; @@ -554,7 +554,7 @@ void tipc_nlist_add(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = true; - else if (u32_push(&nl->list, node)) + else if (tipc_dest_push(&nl->list, node, 0)) nl->remote++; } @@ -562,13 +562,13 @@ void tipc_nlist_del(struct tipc_nlist *nl, u32 node) { if (node == nl->self) nl->local = false; - else if (u32_del(&nl->list, node)) + else if (tipc_dest_del(&nl->list, node, 0)) nl->remote--; } void tipc_nlist_purge(struct tipc_nlist *nl) { - u32_list_purge(&nl->list); + tipc_dest_list_purge(&nl->list); nl->remote = 0; nl->local = 0; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 767e0537dde5..47ec121574ce 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) } static void bearer_disable(struct net *net, struct tipc_bearer *b); +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name @@ -365,30 +367,6 @@ static int tipc_reset_bearer(struct net *net, struct tipc_bearer *b) return 0; } -/* tipc_bearer_reset_all - reset all links on all bearers - */ -void tipc_bearer_reset_all(struct net *net) -{ - struct tipc_bearer *b; - int i; - - for (i = 0; i < MAX_BEARERS; i++) { - b = bearer_get(net, i); - if (b) - clear_bit_unlock(0, &b->up); - } - for (i = 0; i < MAX_BEARERS; i++) { - b = bearer_get(net, i); - if (b) - tipc_reset_bearer(net, b); - } - for (i = 0; i < MAX_BEARERS; i++) { - b = bearer_get(net, i); - if (b) - test_and_set_bit_lock(0, &b->up); - } -} - /** * bearer_disable * @@ -428,6 +406,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); + b->pt.dev = dev; + b->pt.type = htons(ETH_P_TIPC); + b->pt.func = tipc_l2_rcv_msg; + dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; @@ -447,6 +429,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); + dev_remove_pack(&b->pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); @@ -594,11 +577,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(dev->tipc_ptr); + b = rcu_dereference_rtnl(dev->tipc_ptr) ?: + rcu_dereference_rtnl(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && (skb->pkt_type <= PACKET_MULTICAST))) { skb->next = NULL; - tipc_rcv(dev_net(dev), skb, b); + tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -653,17 +637,12 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, break; case NETDEV_UNREGISTER: case NETDEV_CHANGENAME: - bearer_disable(dev_net(dev), b); + bearer_disable(net, b); break; } return NOTIFY_OK; } -static struct packet_type tipc_packet_type __read_mostly = { - .type = htons(ETH_P_TIPC), - .func = tipc_l2_rcv_msg, -}; - static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, @@ -671,19 +650,12 @@ static struct notifier_block notifier = { int tipc_bearer_setup(void) { - int err; - - err = register_netdevice_notifier(¬ifier); - if (err) - return err; - dev_add_pack(&tipc_packet_type); - return 0; + return register_netdevice_notifier(¬ifier); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(¬ifier); - dev_remove_pack(&tipc_packet_type); } void tipc_bearer_stop(struct net *net) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 635c9086e19a..42d6eeeb646d 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -131,6 +131,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @pt: packet type for bearer * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer @@ -151,6 +152,7 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct packet_type pt; struct rcu_head rcu; u32 priority; u32 window; @@ -210,7 +212,6 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest); struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name); int tipc_bearer_get_name(struct net *net, char *name, u32 bearer_id); struct tipc_media *tipc_media_find(const char *name); -void tipc_bearer_reset_all(struct net *net); int tipc_bearer_setup(void); void tipc_bearer_cleanup(void); void tipc_bearer_stop(struct net *net); diff --git a/net/tipc/core.h b/net/tipc/core.h index 5cc5398be722..964342689f2c 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -132,6 +132,11 @@ static inline struct list_head *tipc_nodes(struct net *net) return &tipc_net(net)->node_list; } +static inline struct tipc_server *tipc_topsrv(struct net *net) +{ + return tipc_net(net)->topsrv; +} + static inline unsigned int tipc_hashfn(u32 addr) { return addr & (NODE_HTABLE_SIZE - 1); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index 02462d67d191..92e4828c6b09 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -224,9 +224,9 @@ void tipc_disc_remove_dest(struct tipc_link_req *req) * * Called whenever a link setup request timer associated with a bearer expires. */ -static void disc_timeout(unsigned long data) +static void disc_timeout(struct timer_list *t) { - struct tipc_link_req *req = (struct tipc_link_req *)data; + struct tipc_link_req *req = from_timer(req, t, timer); struct sk_buff *skb; int max_delay; @@ -292,7 +292,7 @@ int tipc_disc_create(struct net *net, struct tipc_bearer *b, req->num_nodes = 0; req->timer_intv = TIPC_LINK_REQ_INIT; spin_lock_init(&req->lock); - setup_timer(&req->timer, disc_timeout, (unsigned long)req); + timer_setup(&req->timer, disc_timeout, 0); mod_timer(&req->timer, jiffies + req->timer_intv); b->link_req = req; *skb = skb_clone(req->buf, GFP_ATOMIC); diff --git a/net/tipc/group.c b/net/tipc/group.c new file mode 100644 index 000000000000..7821085a7dd8 --- /dev/null +++ b/net/tipc/group.c @@ -0,0 +1,871 @@ +/* + * net/tipc/group.c: TIPC group messaging code + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "core.h" +#include "addr.h" +#include "group.h" +#include "bcast.h" +#include "server.h" +#include "msg.h" +#include "socket.h" +#include "node.h" +#include "name_table.h" +#include "subscr.h" + +#define ADV_UNIT (((MAX_MSG_SIZE + MAX_H_SIZE) / FLOWCTL_BLK_SZ) + 1) +#define ADV_IDLE ADV_UNIT +#define ADV_ACTIVE (ADV_UNIT * 12) + +enum mbr_state { + MBR_QUARANTINED, + MBR_DISCOVERED, + MBR_JOINING, + MBR_PUBLISHED, + MBR_JOINED, + MBR_PENDING, + MBR_ACTIVE, + MBR_RECLAIMING, + MBR_REMITTED, + MBR_LEAVING +}; + +struct tipc_member { + struct rb_node tree_node; + struct list_head list; + struct list_head congested; + struct sk_buff *event_msg; + struct sk_buff_head deferredq; + struct tipc_group *group; + u32 node; + u32 port; + u32 instance; + enum mbr_state state; + u16 advertised; + u16 window; + u16 bc_rcv_nxt; + u16 bc_syncpt; + u16 bc_acked; + bool usr_pending; +}; + +struct tipc_group { + struct rb_root members; + struct list_head congested; + struct list_head pending; + struct list_head active; + struct list_head reclaiming; + struct tipc_nlist dests; + struct net *net; + int subid; + u32 type; + u32 instance; + u32 domain; + u32 scope; + u32 portid; + u16 member_cnt; + u16 active_cnt; + u16 max_active; + u16 bc_snd_nxt; + u16 bc_ackers; + bool loopback; + bool events; +}; + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq); + +static void tipc_group_decr_active(struct tipc_group *grp, + struct tipc_member *m) +{ + if (m->state == MBR_ACTIVE || m->state == MBR_RECLAIMING) + grp->active_cnt--; +} + +static int tipc_group_rcvbuf_limit(struct tipc_group *grp) +{ + int max_active, active_pool, idle_pool; + int mcnt = grp->member_cnt + 1; + + /* Limit simultaneous reception from other members */ + max_active = min(mcnt / 8, 64); + max_active = max(max_active, 16); + grp->max_active = max_active; + + /* Reserve blocks for active and idle members */ + active_pool = max_active * ADV_ACTIVE; + idle_pool = (mcnt - max_active) * ADV_IDLE; + + /* Scale to bytes, considering worst-case truesize/msgsize ratio */ + return (active_pool + idle_pool) * FLOWCTL_BLK_SZ * 4; +} + +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp) +{ + return grp->bc_snd_nxt; +} + +static bool tipc_group_is_enabled(struct tipc_member *m) +{ + return m->state != MBR_QUARANTINED && m->state != MBR_LEAVING; +} + +static bool tipc_group_is_receiver(struct tipc_member *m) +{ + return m && m->state >= MBR_JOINED; +} + +u32 tipc_group_exclude(struct tipc_group *grp) +{ + if (!grp->loopback) + return grp->portid; + return 0; +} + +int tipc_group_size(struct tipc_group *grp) +{ + return grp->member_cnt; +} + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq) +{ + struct tipc_group *grp; + u32 type = mreq->type; + + grp = kzalloc(sizeof(*grp), GFP_ATOMIC); + if (!grp) + return NULL; + tipc_nlist_init(&grp->dests, tipc_own_addr(net)); + INIT_LIST_HEAD(&grp->congested); + INIT_LIST_HEAD(&grp->active); + INIT_LIST_HEAD(&grp->pending); + INIT_LIST_HEAD(&grp->reclaiming); + grp->members = RB_ROOT; + grp->net = net; + grp->portid = portid; + grp->domain = addr_domain(net, mreq->scope); + grp->type = type; + grp->instance = mreq->instance; + grp->scope = mreq->scope; + grp->loopback = mreq->flags & TIPC_GROUP_LOOPBACK; + grp->events = mreq->flags & TIPC_GROUP_MEMBER_EVTS; + if (tipc_topsrv_kern_subscr(net, portid, type, 0, ~0, &grp->subid)) + return grp; + kfree(grp); + return NULL; +} + +void tipc_group_delete(struct net *net, struct tipc_group *grp) +{ + struct rb_root *tree = &grp->members; + struct tipc_member *m, *tmp; + struct sk_buff_head xmitq; + + __skb_queue_head_init(&xmitq); + + rbtree_postorder_for_each_entry_safe(m, tmp, tree, tree_node) { + tipc_group_proto_xmit(grp, m, GRP_LEAVE_MSG, &xmitq); + list_del(&m->list); + kfree(m); + } + tipc_node_distr_xmit(net, &xmitq); + tipc_nlist_purge(&grp->dests); + tipc_topsrv_kern_unsubscr(net, grp->subid); + kfree(grp); +} + +struct tipc_member *tipc_group_find_member(struct tipc_group *grp, + u32 node, u32 port) +{ + struct rb_node *n = grp->members.rb_node; + u64 nkey, key = (u64)node << 32 | port; + struct tipc_member *m; + + while (n) { + m = container_of(n, struct tipc_member, tree_node); + nkey = (u64)m->node << 32 | m->port; + if (key < nkey) + n = n->rb_left; + else if (key > nkey) + n = n->rb_right; + else + return m; + } + return NULL; +} + +static struct tipc_member *tipc_group_find_dest(struct tipc_group *grp, + u32 node, u32 port) +{ + struct tipc_member *m; + + m = tipc_group_find_member(grp, node, port); + if (m && tipc_group_is_enabled(m)) + return m; + return NULL; +} + +static struct tipc_member *tipc_group_find_node(struct tipc_group *grp, + u32 node) +{ + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (m->node == node) + return m; + } + return NULL; +} + +static void tipc_group_add_to_tree(struct tipc_group *grp, + struct tipc_member *m) +{ + u64 nkey, key = (u64)m->node << 32 | m->port; + struct rb_node **n, *parent = NULL; + struct tipc_member *tmp; + + n = &grp->members.rb_node; + while (*n) { + tmp = container_of(*n, struct tipc_member, tree_node); + parent = *n; + tmp = container_of(parent, struct tipc_member, tree_node); + nkey = (u64)tmp->node << 32 | tmp->port; + if (key < nkey) + n = &(*n)->rb_left; + else if (key > nkey) + n = &(*n)->rb_right; + else + return; + } + rb_link_node(&m->tree_node, parent, n); + rb_insert_color(&m->tree_node, &grp->members); +} + +static struct tipc_member *tipc_group_create_member(struct tipc_group *grp, + u32 node, u32 port, + int state) +{ + struct tipc_member *m; + + m = kzalloc(sizeof(*m), GFP_ATOMIC); + if (!m) + return NULL; + INIT_LIST_HEAD(&m->list); + INIT_LIST_HEAD(&m->congested); + __skb_queue_head_init(&m->deferredq); + m->group = grp; + m->node = node; + m->port = port; + m->bc_acked = grp->bc_snd_nxt - 1; + grp->member_cnt++; + tipc_group_add_to_tree(grp, m); + tipc_nlist_add(&grp->dests, m->node); + m->state = state; + return m; +} + +void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port) +{ + tipc_group_create_member(grp, node, port, MBR_DISCOVERED); +} + +static void tipc_group_delete_member(struct tipc_group *grp, + struct tipc_member *m) +{ + rb_erase(&m->tree_node, &grp->members); + grp->member_cnt--; + + /* Check if we were waiting for replicast ack from this member */ + if (grp->bc_ackers && less(m->bc_acked, grp->bc_snd_nxt - 1)) + grp->bc_ackers--; + + list_del_init(&m->list); + list_del_init(&m->congested); + tipc_group_decr_active(grp, m); + + /* If last member on a node, remove node from dest list */ + if (!tipc_group_find_node(grp, m->node)) + tipc_nlist_del(&grp->dests, m->node); + + kfree(m); +} + +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp) +{ + return &grp->dests; +} + +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope) +{ + seq->type = grp->type; + seq->lower = grp->instance; + seq->upper = grp->instance; + *scope = grp->scope; +} + +void tipc_group_update_member(struct tipc_member *m, int len) +{ + struct tipc_group *grp = m->group; + struct tipc_member *_m, *tmp; + + if (!tipc_group_is_enabled(m)) + return; + + m->window -= len; + + if (m->window >= ADV_IDLE) + return; + + if (!list_empty(&m->congested)) + return; + + /* Sort member into congested members' list */ + list_for_each_entry_safe(_m, tmp, &grp->congested, congested) { + if (m->window > _m->window) + continue; + list_add_tail(&m->congested, &_m->congested); + return; + } + list_add_tail(&m->congested, &grp->congested); +} + +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack) +{ + u16 prev = grp->bc_snd_nxt - 1; + struct tipc_member *m; + struct rb_node *n; + + for (n = rb_first(&grp->members); n; n = rb_next(n)) { + m = container_of(n, struct tipc_member, tree_node); + if (tipc_group_is_enabled(m)) { + tipc_group_update_member(m, len); + m->bc_acked = prev; + } + } + + /* Mark number of acknowledges to expect, if any */ + if (ack) + grp->bc_ackers = grp->member_cnt; + grp->bc_snd_nxt++; +} + +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **mbr) +{ + struct sk_buff_head xmitq; + struct tipc_member *m; + int adv, state; + + m = tipc_group_find_dest(grp, dnode, dport); + *mbr = m; + if (!m) + return false; + if (m->usr_pending) + return true; + if (m->window >= len) + return false; + m->usr_pending = true; + + /* If not fully advertised, do it now to prevent mutual blocking */ + adv = m->advertised; + state = m->state; + if (state < MBR_JOINED) + return true; + if (state == MBR_JOINED && adv == ADV_IDLE) + return true; + if (state == MBR_ACTIVE && adv == ADV_ACTIVE) + return true; + if (state == MBR_PENDING && adv == ADV_IDLE) + return true; + skb_queue_head_init(&xmitq); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, &xmitq); + tipc_node_distr_xmit(grp->net, &xmitq); + return true; +} + +bool tipc_group_bc_cong(struct tipc_group *grp, int len) +{ + struct tipc_member *m = NULL; + + /* If prev bcast was replicast, reject until all receivers have acked */ + if (grp->bc_ackers) + return true; + + if (list_empty(&grp->congested)) + return false; + + m = list_first_entry(&grp->congested, struct tipc_member, congested); + if (m->window >= len) + return false; + + return tipc_group_cong(grp, m->node, m->port, len, &m); +} + +/* tipc_group_sort_msg() - sort msg into queue by bcast sequence number + */ +static void tipc_group_sort_msg(struct sk_buff *skb, struct sk_buff_head *defq) +{ + struct tipc_msg *_hdr, *hdr = buf_msg(skb); + u16 bc_seqno = msg_grp_bc_seqno(hdr); + struct sk_buff *_skb, *tmp; + int mtyp = msg_type(hdr); + + /* Bcast/mcast may be bypassed by ucast or other bcast, - sort it in */ + if (mtyp == TIPC_GRP_BCAST_MSG || mtyp == TIPC_GRP_MCAST_MSG) { + skb_queue_walk_safe(defq, _skb, tmp) { + _hdr = buf_msg(_skb); + if (!less(bc_seqno, msg_grp_bc_seqno(_hdr))) + continue; + __skb_queue_before(defq, _skb, skb); + return; + } + /* Bcast was not bypassed, - add to tail */ + } + /* Unicasts are never bypassed, - always add to tail */ + __skb_queue_tail(defq, skb); +} + +/* tipc_group_filter_msg() - determine if we should accept arriving message + */ +void tipc_group_filter_msg(struct tipc_group *grp, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + bool ack, deliver, update, leave = false; + struct sk_buff_head *defq; + struct tipc_member *m; + struct tipc_msg *hdr; + u32 node, port; + int mtyp, blks; + + if (!skb) + return; + + hdr = buf_msg(skb); + node = msg_orignode(hdr); + port = msg_origport(hdr); + + if (!msg_in_group(hdr)) + goto drop; + + m = tipc_group_find_member(grp, node, port); + if (!tipc_group_is_receiver(m)) + goto drop; + + if (less(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + goto drop; + + TIPC_SKB_CB(skb)->orig_member = m->instance; + defq = &m->deferredq; + tipc_group_sort_msg(skb, defq); + + while ((skb = skb_peek(defq))) { + hdr = buf_msg(skb); + mtyp = msg_type(hdr); + deliver = true; + ack = false; + update = false; + + if (more(msg_grp_bc_seqno(hdr), m->bc_rcv_nxt)) + break; + + /* Decide what to do with message */ + switch (mtyp) { + case TIPC_GRP_MCAST_MSG: + if (msg_nameinst(hdr) != grp->instance) { + update = true; + deliver = false; + } + /* Fall thru */ + case TIPC_GRP_BCAST_MSG: + m->bc_rcv_nxt++; + ack = msg_grp_bc_ack_req(hdr); + break; + case TIPC_GRP_UCAST_MSG: + break; + case TIPC_GRP_MEMBER_EVT: + if (m->state == MBR_LEAVING) + leave = true; + if (!grp->events) + deliver = false; + break; + default: + break; + } + + /* Execute decisions */ + __skb_dequeue(defq); + if (deliver) + __skb_queue_tail(inputq, skb); + else + kfree_skb(skb); + + if (ack) + tipc_group_proto_xmit(grp, m, GRP_ACK_MSG, xmitq); + + if (leave) { + tipc_group_delete_member(grp, m); + __skb_queue_purge(defq); + break; + } + if (!update) + continue; + + blks = msg_blocks(hdr); + tipc_group_update_rcv_win(grp, blks, node, port, xmitq); + } + return; +drop: + kfree_skb(skb); +} + +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq) +{ + struct list_head *active = &grp->active; + int max_active = grp->max_active; + int reclaim_limit = max_active * 3 / 4; + int active_cnt = grp->active_cnt; + struct tipc_member *m, *rm; + + m = tipc_group_find_member(grp, node, port); + if (!m) + return; + + m->advertised -= blks; + + switch (m->state) { + case MBR_JOINED: + /* Reclaim advertised space from least active member */ + if (!list_empty(active) && active_cnt >= reclaim_limit) { + rm = list_first_entry(active, struct tipc_member, list); + rm->state = MBR_RECLAIMING; + list_move_tail(&rm->list, &grp->reclaiming); + tipc_group_proto_xmit(grp, rm, GRP_RECLAIM_MSG, xmitq); + } + /* If max active, become pending and wait for reclaimed space */ + if (active_cnt >= max_active) { + m->state = MBR_PENDING; + list_add_tail(&m->list, &grp->pending); + break; + } + /* Otherwise become active */ + m->state = MBR_ACTIVE; + list_add_tail(&m->list, &grp->active); + grp->active_cnt++; + /* Fall through */ + case MBR_ACTIVE: + if (!list_is_last(&m->list, &grp->active)) + list_move_tail(&m->list, &grp->active); + if (m->advertised > (ADV_ACTIVE * 3 / 4)) + break; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + break; + case MBR_REMITTED: + if (m->advertised > ADV_IDLE) + break; + m->state = MBR_JOINED; + if (m->advertised < ADV_IDLE) { + pr_warn_ratelimited("Rcv unexpected msg after REMIT\n"); + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + } + break; + case MBR_RECLAIMING: + case MBR_DISCOVERED: + case MBR_JOINING: + case MBR_LEAVING: + default: + break; + } +} + +static void tipc_group_proto_xmit(struct tipc_group *grp, struct tipc_member *m, + int mtyp, struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr; + struct sk_buff *skb; + int adv = 0; + + skb = tipc_msg_create(GROUP_PROTOCOL, mtyp, INT_H_SIZE, 0, + m->node, tipc_own_addr(grp->net), + m->port, grp->portid, 0); + if (!skb) + return; + + if (m->state == MBR_ACTIVE) + adv = ADV_ACTIVE - m->advertised; + else if (m->state == MBR_JOINED || m->state == MBR_PENDING) + adv = ADV_IDLE - m->advertised; + + hdr = buf_msg(skb); + + if (mtyp == GRP_JOIN_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_LEAVE_MSG) { + msg_set_grp_bc_syncpt(hdr, grp->bc_snd_nxt); + } else if (mtyp == GRP_ADV_MSG) { + msg_set_adv_win(hdr, adv); + m->advertised += adv; + } else if (mtyp == GRP_ACK_MSG) { + msg_set_grp_bc_acked(hdr, m->bc_rcv_nxt); + } else if (mtyp == GRP_REMIT_MSG) { + msg_set_grp_remitted(hdr, m->window); + } + __skb_queue_tail(xmitq, skb); +} + +void tipc_group_proto_rcv(struct tipc_group *grp, bool *usr_wakeup, + struct tipc_msg *hdr, struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + u32 node = msg_orignode(hdr); + u32 port = msg_origport(hdr); + struct tipc_member *m, *pm; + struct tipc_msg *ehdr; + u16 remitted, in_flight; + + if (!grp) + return; + + m = tipc_group_find_member(grp, node, port); + + switch (msg_type(hdr)) { + case GRP_JOIN_MSG: + if (!m) + m = tipc_group_create_member(grp, node, port, + MBR_QUARANTINED); + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + m->bc_rcv_nxt = m->bc_syncpt; + m->window += msg_adv_win(hdr); + + /* Wait until PUBLISH event is received */ + if (m->state == MBR_DISCOVERED) { + m->state = MBR_JOINING; + } else if (m->state == MBR_PUBLISHED) { + m->state = MBR_JOINED; + *usr_wakeup = true; + m->usr_pending = false; + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + ehdr = buf_msg(m->event_msg); + msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); + __skb_queue_tail(inputq, m->event_msg); + } + if (m->window < ADV_IDLE) + tipc_group_update_member(m, 0); + else + list_del_init(&m->congested); + return; + case GRP_LEAVE_MSG: + if (!m) + return; + m->bc_syncpt = msg_grp_bc_syncpt(hdr); + + /* Wait until WITHDRAW event is received */ + if (m->state != MBR_LEAVING) { + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + return; + } + /* Otherwise deliver already received WITHDRAW event */ + ehdr = buf_msg(m->event_msg); + msg_set_grp_bc_seqno(ehdr, m->bc_syncpt); + __skb_queue_tail(inputq, m->event_msg); + *usr_wakeup = true; + list_del_init(&m->congested); + return; + case GRP_ADV_MSG: + if (!m) + return; + m->window += msg_adv_win(hdr); + *usr_wakeup = m->usr_pending; + m->usr_pending = false; + list_del_init(&m->congested); + return; + case GRP_ACK_MSG: + if (!m) + return; + m->bc_acked = msg_grp_bc_acked(hdr); + if (--grp->bc_ackers) + break; + *usr_wakeup = true; + m->usr_pending = false; + return; + case GRP_RECLAIM_MSG: + if (!m) + return; + *usr_wakeup = m->usr_pending; + m->usr_pending = false; + tipc_group_proto_xmit(grp, m, GRP_REMIT_MSG, xmitq); + m->window = ADV_IDLE; + return; + case GRP_REMIT_MSG: + if (!m || m->state != MBR_RECLAIMING) + return; + + list_del_init(&m->list); + grp->active_cnt--; + remitted = msg_grp_remitted(hdr); + + /* Messages preceding the REMIT still in receive queue */ + if (m->advertised > remitted) { + m->state = MBR_REMITTED; + in_flight = m->advertised - remitted; + } + /* All messages preceding the REMIT have been read */ + if (m->advertised <= remitted) { + m->state = MBR_JOINED; + in_flight = 0; + } + /* ..and the REMIT overtaken by more messages => re-advertise */ + if (m->advertised < remitted) + tipc_group_proto_xmit(grp, m, GRP_ADV_MSG, xmitq); + + m->advertised = ADV_IDLE + in_flight; + + /* Set oldest pending member to active and advertise */ + if (list_empty(&grp->pending)) + return; + pm = list_first_entry(&grp->pending, struct tipc_member, list); + pm->state = MBR_ACTIVE; + list_move_tail(&pm->list, &grp->active); + grp->active_cnt++; + if (pm->advertised <= (ADV_ACTIVE * 3 / 4)) + tipc_group_proto_xmit(grp, pm, GRP_ADV_MSG, xmitq); + return; + default: + pr_warn("Received unknown GROUP_PROTO message\n"); + } +} + +/* tipc_group_member_evt() - receive and handle a member up/down event + */ +void tipc_group_member_evt(struct tipc_group *grp, + bool *usr_wakeup, + int *sk_rcvbuf, + struct sk_buff *skb, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_event *evt = (void *)msg_data(hdr); + u32 instance = evt->found_lower; + u32 node = evt->port.node; + u32 port = evt->port.ref; + int event = evt->event; + struct tipc_member *m; + struct net *net; + bool node_up; + u32 self; + + if (!grp) + goto drop; + + net = grp->net; + self = tipc_own_addr(net); + if (!grp->loopback && node == self && port == grp->portid) + goto drop; + + /* Convert message before delivery to user */ + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_user(hdr, TIPC_CRITICAL_IMPORTANCE); + msg_set_type(hdr, TIPC_GRP_MEMBER_EVT); + msg_set_origport(hdr, port); + msg_set_orignode(hdr, node); + msg_set_nametype(hdr, grp->type); + msg_set_grp_evt(hdr, event); + + m = tipc_group_find_member(grp, node, port); + + if (event == TIPC_PUBLISHED) { + if (!m) + m = tipc_group_create_member(grp, node, port, + MBR_DISCOVERED); + if (!m) + goto drop; + + /* Hold back event if JOIN message not yet received */ + if (m->state == MBR_DISCOVERED) { + m->event_msg = skb; + m->state = MBR_PUBLISHED; + } else { + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + __skb_queue_tail(inputq, skb); + m->state = MBR_JOINED; + *usr_wakeup = true; + m->usr_pending = false; + } + m->instance = instance; + TIPC_SKB_CB(skb)->orig_member = m->instance; + tipc_group_proto_xmit(grp, m, GRP_JOIN_MSG, xmitq); + if (m->window < ADV_IDLE) + tipc_group_update_member(m, 0); + else + list_del_init(&m->congested); + } else if (event == TIPC_WITHDRAWN) { + if (!m) + goto drop; + + TIPC_SKB_CB(skb)->orig_member = m->instance; + + *usr_wakeup = true; + m->usr_pending = false; + node_up = tipc_node_is_up(net, node); + + /* Hold back event if more messages might be expected */ + if (m->state != MBR_LEAVING && node_up) { + m->event_msg = skb; + tipc_group_decr_active(grp, m); + m->state = MBR_LEAVING; + } else { + if (node_up) + msg_set_grp_bc_seqno(hdr, m->bc_syncpt); + else + msg_set_grp_bc_seqno(hdr, m->bc_rcv_nxt); + __skb_queue_tail(inputq, skb); + } + list_del_init(&m->congested); + } + *sk_rcvbuf = tipc_group_rcvbuf_limit(grp); + return; +drop: + kfree_skb(skb); +} diff --git a/net/tipc/group.h b/net/tipc/group.h new file mode 100644 index 000000000000..d525e1cd7de5 --- /dev/null +++ b/net/tipc/group.h @@ -0,0 +1,73 @@ +/* + * net/tipc/group.h: Include file for TIPC group unicast/multicast functions + * + * Copyright (c) 2017, Ericsson AB + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. Neither the names of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * Alternatively, this software may be distributed under the terms of the + * GNU General Public License ("GPL") version 2 as published by the Free + * Software Foundation. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TIPC_GROUP_H +#define _TIPC_GROUP_H + +#include "core.h" + +struct tipc_group; +struct tipc_member; +struct tipc_msg; + +struct tipc_group *tipc_group_create(struct net *net, u32 portid, + struct tipc_group_req *mreq); +void tipc_group_delete(struct net *net, struct tipc_group *grp); +void tipc_group_add_member(struct tipc_group *grp, u32 node, u32 port); +struct tipc_nlist *tipc_group_dests(struct tipc_group *grp); +void tipc_group_self(struct tipc_group *grp, struct tipc_name_seq *seq, + int *scope); +u32 tipc_group_exclude(struct tipc_group *grp); +void tipc_group_filter_msg(struct tipc_group *grp, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_member_evt(struct tipc_group *grp, bool *wakeup, + int *sk_rcvbuf, struct sk_buff *skb, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_proto_rcv(struct tipc_group *grp, bool *wakeup, + struct tipc_msg *hdr, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq); +void tipc_group_update_bc_members(struct tipc_group *grp, int len, bool ack); +bool tipc_group_cong(struct tipc_group *grp, u32 dnode, u32 dport, + int len, struct tipc_member **m); +bool tipc_group_bc_cong(struct tipc_group *grp, int len); +void tipc_group_update_rcv_win(struct tipc_group *grp, int blks, u32 node, + u32 port, struct sk_buff_head *xmitq); +u16 tipc_group_bc_snd_nxt(struct tipc_group *grp); +void tipc_group_update_member(struct tipc_member *m, int len); +int tipc_group_size(struct tipc_group *grp); +#endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 60820dc35a08..6bce0b1117bd 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -239,7 +239,8 @@ static int link_is_up(struct tipc_link *l) static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *xmitq); static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq); static void link_print(struct tipc_link *l, const char *str); static int tipc_link_build_nack_msg(struct tipc_link *l, @@ -773,7 +774,7 @@ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) } if (state || probe || setup) - tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, probe, 0, 0, 0, 0, xmitq); return rc; } @@ -978,15 +979,15 @@ static void link_retransmit_failure(struct tipc_link *l, struct sk_buff *skb) struct tipc_msg *hdr = buf_msg(skb); pr_warn("Retransmission failure on link <%s>\n", l->name); - link_print(l, "Resetting link "); + link_print(l, "State of link "); pr_info("Failed msg: usr %u, typ %u, len %u, err %u\n", msg_user(hdr), msg_type(hdr), msg_size(hdr), msg_errcode(hdr)); pr_info("sqno %u, prev: %x, src: %x\n", msg_seqno(hdr), msg_prevnode(hdr), msg_orignode(hdr)); } -int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, - struct sk_buff_head *xmitq) +int tipc_link_retrans(struct tipc_link *l, struct tipc_link *nacker, + u16 from, u16 to, struct sk_buff_head *xmitq) { struct sk_buff *_skb, *skb = skb_peek(&l->transmq); struct tipc_msg *hdr; @@ -997,11 +998,14 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, return 0; /* Detect repeated retransmit failures on same packet */ - if (likely(l->last_retransm != buf_seqno(skb))) { - l->last_retransm = buf_seqno(skb); - l->stale_count = 1; - } else if (++l->stale_count > 100) { + if (nacker->last_retransm != buf_seqno(skb)) { + nacker->last_retransm = buf_seqno(skb); + nacker->stale_count = 1; + } else if (++nacker->stale_count > 100) { link_retransmit_failure(l, skb); + nacker->stale_count = 0; + if (link_is_bc_sndlink(l)) + return TIPC_LINK_DOWN_EVT; return tipc_link_fsm_evt(l, LINK_FAILURE_EVT); } @@ -1036,6 +1040,7 @@ int tipc_link_retrans(struct tipc_link *l, u16 from, u16 to, static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, struct sk_buff_head *inputq) { + struct sk_buff_head *mc_inputq = l->bc_rcvlink->inputq; struct tipc_msg *hdr = buf_msg(skb); switch (msg_user(hdr)) { @@ -1043,13 +1048,16 @@ static bool tipc_data_input(struct tipc_link *l, struct sk_buff *skb, case TIPC_MEDIUM_IMPORTANCE: case TIPC_HIGH_IMPORTANCE: case TIPC_CRITICAL_IMPORTANCE: - if (unlikely(msg_type(hdr) == TIPC_MCAST_MSG)) { - skb_queue_tail(l->bc_rcvlink->inputq, skb); + if (unlikely(msg_in_group(hdr) || msg_mcast(hdr))) { + skb_queue_tail(mc_inputq, skb); return true; } case CONN_MANAGER: skb_queue_tail(inputq, skb); return true; + case GROUP_PROTOCOL: + skb_queue_tail(mc_inputq, skb); + return true; case NAME_DISTRIBUTOR: l->bc_rcvlink->state = LINK_ESTABLISHED; skb_queue_tail(l->namedq, skb); @@ -1167,7 +1175,7 @@ int tipc_link_build_state_msg(struct tipc_link *l, struct sk_buff_head *xmitq) /* Unicast ACK */ l->rcv_unacked = 0; l->stats.sent_acks++; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1181,7 +1189,7 @@ void tipc_link_build_reset_msg(struct tipc_link *l, struct sk_buff_head *xmitq) if (l->state == LINK_ESTABLISHING) mtyp = ACTIVATE_MSG; - tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, mtyp, 0, 0, 0, 0, 0, xmitq); /* Inform peer that this endpoint is going down if applicable */ skb = skb_peek_tail(xmitq); @@ -1208,7 +1216,7 @@ static int tipc_link_build_nack_msg(struct tipc_link *l, } if ((skb_queue_len(&l->deferdq) == 1) || !(def_cnt % TIPC_NACK_INTV)) - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, 0, xmitq); return 0; } @@ -1282,7 +1290,8 @@ drop: } static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, - u16 rcvgap, int tolerance, int priority, + bool probe_reply, u16 rcvgap, + int tolerance, int priority, struct sk_buff_head *xmitq) { struct tipc_link *bcl = l->bc_rcvlink; @@ -1330,6 +1339,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe, msg_set_seq_gap(hdr, rcvgap); msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl)); msg_set_probe(hdr, probe); + msg_set_is_keepalive(hdr, probe || probe_reply); tipc_mon_prep(l->net, data, &dlen, mstate, l->bearer_id); msg_set_size(hdr, INT_H_SIZE + dlen); skb_trim(skb, INT_H_SIZE + dlen); @@ -1435,6 +1445,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, u16 rcv_nxt = l->rcv_nxt; u16 dlen = msg_data_sz(hdr); int mtyp = msg_type(hdr); + bool reply = msg_probe(hdr); void *data; char *if_name; int rc = 0; @@ -1521,14 +1532,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb, /* Send NACK if peer has sent pkts we haven't received yet */ if (more(peers_snd_nxt, rcv_nxt) && !tipc_link_is_synching(l)) rcvgap = peers_snd_nxt - l->rcv_nxt; - if (rcvgap || (msg_probe(hdr))) - tipc_link_build_proto_msg(l, STATE_MSG, 0, rcvgap, - 0, 0, xmitq); + if (rcvgap || reply) + tipc_link_build_proto_msg(l, STATE_MSG, 0, reply, + rcvgap, 0, 0, xmitq); tipc_link_release_pkts(l, ack); /* If NACK, retransmit will now start at right position */ if (gap) { - rc = tipc_link_retrans(l, ack + 1, ack + gap, xmitq); + rc = tipc_link_retrans(l, l, ack + 1, ack + gap, xmitq); l->stats.recv_nacks++; } @@ -1680,7 +1691,7 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr, return rc; if (link_bc_retr_eval(snd_l, &from, &to)) - rc = tipc_link_retrans(snd_l, from, to, xmitq); + rc = tipc_link_retrans(snd_l, l, from, to, xmitq); l->snd_nxt = peers_snd_nxt; if (link_bc_rcv_gap(l)) @@ -1775,7 +1786,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb, if (dnode == tipc_own_addr(l->net)) { tipc_link_bc_ack_rcv(l, acked, xmitq); - rc = tipc_link_retrans(l->bc_sndlink, from, to, xmitq); + rc = tipc_link_retrans(l->bc_sndlink, l, from, to, xmitq); l->stats.recv_nacks++; return rc; } @@ -2115,14 +2126,14 @@ void tipc_link_set_tolerance(struct tipc_link *l, u32 tol, struct sk_buff_head *xmitq) { l->tolerance = tol; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, tol, 0, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, tol, 0, xmitq); } void tipc_link_set_prio(struct tipc_link *l, u32 prio, struct sk_buff_head *xmitq) { l->priority = prio; - tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, prio, xmitq); + tipc_link_build_proto_msg(l, STATE_MSG, 0, 0, 0, 0, prio, xmitq); } void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit) diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 9e109bb1a207..8e884ed06d4b 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -530,8 +530,11 @@ void tipc_mon_prep(struct net *net, void *data, int *dlen, u16 gen = mon->dom_gen; u16 len; - if (!tipc_mon_is_active(net, mon)) + /* Send invalid record if not active */ + if (!tipc_mon_is_active(net, mon)) { + dom->len = 0; return; + } /* Send only a dummy record with ack if peer has acked our last sent */ if (likely(state->acked_gen == gen)) { @@ -559,6 +562,12 @@ void tipc_mon_get_state(struct net *net, u32 addr, struct tipc_monitor *mon = tipc_monitor(net, bearer_id); struct tipc_peer *peer; + if (!tipc_mon_is_active(net, mon)) { + state->probing = false; + state->monitoring = true; + return; + } + /* Used cached state if table has not changed */ if (!state->probing && (state->list_gen == mon->list_gen) && @@ -578,9 +587,9 @@ void tipc_mon_get_state(struct net *net, u32 addr, read_unlock_bh(&mon->lock); } -static void mon_timeout(unsigned long m) +static void mon_timeout(struct timer_list *t) { - struct tipc_monitor *mon = (void *)m; + struct tipc_monitor *mon = from_timer(mon, t, timer); struct tipc_peer *self; int best_member_cnt = dom_size(mon->peer_cnt) - 1; @@ -623,7 +632,7 @@ int tipc_mon_create(struct net *net, int bearer_id) self->is_up = true; self->is_head = true; INIT_LIST_HEAD(&self->list); - setup_timer(&mon->timer, mon_timeout, (unsigned long)mon); + timer_setup(&mon->timer, mon_timeout, 0); mon->timer_intv = msecs_to_jiffies(MON_TIMEOUT + (tn->random & 0xffff)); mod_timer(&mon->timer, jiffies + mon->timer_intv); return 0; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index dcd90e6fa7c3..b0d07b35909d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -174,7 +174,7 @@ int tipc_buf_append(struct sk_buff **headbuf, struct sk_buff **buf) if (fragid == LAST_FRAGMENT) { TIPC_SKB_CB(head)->validated = false; - if (unlikely(!tipc_msg_validate(head))) + if (unlikely(!tipc_msg_validate(&head))) goto err; *buf = head; TIPC_SKB_CB(head)->tail = NULL; @@ -201,11 +201,21 @@ err: * TIPC will ignore the excess, under the assumption that it is optional info * introduced by a later release of the protocol. */ -bool tipc_msg_validate(struct sk_buff *skb) +bool tipc_msg_validate(struct sk_buff **_skb) { - struct tipc_msg *msg; + struct sk_buff *skb = *_skb; + struct tipc_msg *hdr; int msz, hsz; + /* Ensure that flow control ratio condition is satisfied */ + if (unlikely(skb->truesize / buf_roundup_len(skb) > 4)) { + skb = skb_copy(skb, GFP_ATOMIC); + if (!skb) + return false; + kfree_skb(*_skb); + *_skb = skb; + } + if (unlikely(TIPC_SKB_CB(skb)->validated)) return true; if (unlikely(!pskb_may_pull(skb, MIN_H_SIZE))) @@ -217,11 +227,11 @@ bool tipc_msg_validate(struct sk_buff *skb) if (unlikely(!pskb_may_pull(skb, hsz))) return false; - msg = buf_msg(skb); - if (unlikely(msg_version(msg) != TIPC_VERSION)) + hdr = buf_msg(skb); + if (unlikely(msg_version(hdr) != TIPC_VERSION)) return false; - msz = msg_size(msg); + msz = msg_size(hdr); if (unlikely(msz < hsz)) return false; if (unlikely((msz - hsz) > TIPC_MAX_USER_MSG_SIZE)) @@ -411,7 +421,7 @@ bool tipc_msg_extract(struct sk_buff *skb, struct sk_buff **iskb, int *pos) skb_pull(*iskb, offset); imsz = msg_size(buf_msg(*iskb)); skb_trim(*iskb, imsz); - if (unlikely(!tipc_msg_validate(*iskb))) + if (unlikely(!tipc_msg_validate(iskb))) goto none; *pos += align(imsz); return true; @@ -479,13 +489,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { struct sk_buff *_skb = *skb; - struct tipc_msg *hdr = buf_msg(_skb); + struct tipc_msg *hdr; struct tipc_msg ohdr; - int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); + int dlen; if (skb_linearize(_skb)) goto exit; hdr = buf_msg(_skb); + dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); if (msg_dest_droppable(hdr)) goto exit; if (msg_errcode(hdr)) @@ -511,6 +522,8 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) goto exit; + /* reassign after skb header modifications */ + hdr = buf_msg(_skb); /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_non_seq(hdr, 0); @@ -548,7 +561,7 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) return false; if (msg_errcode(msg)) return false; - *err = -TIPC_ERR_NO_NAME; + *err = TIPC_ERR_NO_NAME; if (skb_linearize(skb)) return false; msg = buf_msg(skb); @@ -565,6 +578,14 @@ bool tipc_msg_lookup_dest(struct net *net, struct sk_buff *skb, int *err) msg_set_destnode(msg, dnode); msg_set_destport(msg, dport); *err = TIPC_OK; + + if (!skb_cloned(skb)) + return true; + + /* Unclone buffer in case it was bundled */ + if (pskb_expand_head(skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) + return false; + return true; } @@ -655,3 +676,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno, } kfree_skb(skb); } + +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq) +{ + if (tipc_msg_reverse(tipc_own_addr(net), &skb, err)) + __skb_queue_tail(xmitq, skb); +} diff --git a/net/tipc/msg.h b/net/tipc/msg.h index c843fd2bc48d..3e4384c222f7 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -1,7 +1,7 @@ /* * net/tipc/msg.h: Include file for TIPC message header routines * - * Copyright (c) 2000-2007, 2014-2015 Ericsson AB + * Copyright (c) 2000-2007, 2014-2017 Ericsson AB * Copyright (c) 2005-2008, 2010-2011, Wind River Systems * All rights reserved. * @@ -61,10 +61,14 @@ struct plist; /* * Payload message types */ -#define TIPC_CONN_MSG 0 -#define TIPC_MCAST_MSG 1 -#define TIPC_NAMED_MSG 2 -#define TIPC_DIRECT_MSG 3 +#define TIPC_CONN_MSG 0 +#define TIPC_MCAST_MSG 1 +#define TIPC_NAMED_MSG 2 +#define TIPC_DIRECT_MSG 3 +#define TIPC_GRP_MEMBER_EVT 4 +#define TIPC_GRP_BCAST_MSG 5 +#define TIPC_GRP_MCAST_MSG 6 +#define TIPC_GRP_UCAST_MSG 7 /* * Internal message users @@ -73,11 +77,13 @@ struct plist; #define MSG_BUNDLER 6 #define LINK_PROTOCOL 7 #define CONN_MANAGER 8 +#define GROUP_PROTOCOL 9 #define TUNNEL_PROTOCOL 10 #define NAME_DISTRIBUTOR 11 #define MSG_FRAGMENTER 12 #define LINK_CONFIG 13 #define SOCK_WAKEUP 14 /* pseudo user */ +#define TOP_SRV 15 /* pseudo user */ /* * Message header sizes @@ -86,6 +92,7 @@ struct plist; #define BASIC_H_SIZE 32 /* Basic payload message */ #define NAMED_H_SIZE 40 /* Named payload message */ #define MCAST_H_SIZE 44 /* Multicast payload message */ +#define GROUP_H_SIZE 44 /* Group payload message */ #define INT_H_SIZE 40 /* Internal messages */ #define MIN_H_SIZE 24 /* Smallest legal TIPC header size */ #define MAX_H_SIZE 60 /* Largest possible TIPC header size */ @@ -96,6 +103,7 @@ struct plist; struct tipc_skb_cb { u32 bytes_read; + u32 orig_member; struct sk_buff *tail; bool validated; u16 chain_imp; @@ -188,6 +196,11 @@ static inline u32 msg_size(struct tipc_msg *m) return msg_bits(m, 0, 0, 0x1ffff); } +static inline u32 msg_blocks(struct tipc_msg *m) +{ + return (msg_size(m) / 1024) + 1; +} + static inline u32 msg_data_sz(struct tipc_msg *m) { return msg_size(m) - msg_hdr_sz(m); @@ -213,6 +226,16 @@ static inline void msg_set_dest_droppable(struct tipc_msg *m, u32 d) msg_set_bits(m, 0, 19, 1, d); } +static inline int msg_is_keepalive(struct tipc_msg *m) +{ + return msg_bits(m, 0, 19, 1); +} + +static inline void msg_set_is_keepalive(struct tipc_msg *m, u32 d) +{ + msg_set_bits(m, 0, 19, 1, d); +} + static inline int msg_src_droppable(struct tipc_msg *m) { return msg_bits(m, 0, 18, 1); @@ -251,6 +274,18 @@ static inline void msg_set_type(struct tipc_msg *m, u32 n) msg_set_bits(m, 1, 29, 0x7, n); } +static inline int msg_in_group(struct tipc_msg *m) +{ + int mtyp = msg_type(m); + + return mtyp >= TIPC_GRP_MEMBER_EVT && mtyp <= TIPC_GRP_UCAST_MSG; +} + +static inline bool msg_is_grp_evt(struct tipc_msg *m) +{ + return msg_type(m) == TIPC_GRP_MEMBER_EVT; +} + static inline u32 msg_named(struct tipc_msg *m) { return msg_type(m) == TIPC_NAMED_MSG; @@ -258,7 +293,10 @@ static inline u32 msg_named(struct tipc_msg *m) static inline u32 msg_mcast(struct tipc_msg *m) { - return msg_type(m) == TIPC_MCAST_MSG; + int mtyp = msg_type(m); + + return ((mtyp == TIPC_MCAST_MSG) || (mtyp == TIPC_GRP_BCAST_MSG) || + (mtyp == TIPC_GRP_MCAST_MSG)); } static inline u32 msg_connected(struct tipc_msg *m) @@ -514,6 +552,16 @@ static inline void msg_set_nameupper(struct tipc_msg *m, u32 n) #define DSC_RESP_MSG 1 /* + * Group protocol message types + */ +#define GRP_JOIN_MSG 0 +#define GRP_LEAVE_MSG 1 +#define GRP_ADV_MSG 2 +#define GRP_ACK_MSG 3 +#define GRP_RECLAIM_MSG 4 +#define GRP_REMIT_MSG 5 + +/* * Word 1 */ static inline u32 msg_seq_gap(struct tipc_msg *m) @@ -764,12 +812,12 @@ static inline void msg_set_conn_ack(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 16, 0xffff, n); } -static inline u32 msg_adv_win(struct tipc_msg *m) +static inline u16 msg_adv_win(struct tipc_msg *m) { return msg_bits(m, 9, 0, 0xffff); } -static inline void msg_set_adv_win(struct tipc_msg *m, u32 n) +static inline void msg_set_adv_win(struct tipc_msg *m, u16 n) { msg_set_bits(m, 9, 0, 0xffff, n); } @@ -794,6 +842,68 @@ static inline void msg_set_link_tolerance(struct tipc_msg *m, u32 n) msg_set_bits(m, 9, 0, 0xffff, n); } +static inline u16 msg_grp_bc_syncpt(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_syncpt(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_bc_acked(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_bc_acked(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +static inline u16 msg_grp_remitted(struct tipc_msg *m) +{ + return msg_bits(m, 9, 16, 0xffff); +} + +static inline void msg_set_grp_remitted(struct tipc_msg *m, u16 n) +{ + msg_set_bits(m, 9, 16, 0xffff, n); +} + +/* Word 10 + */ +static inline u16 msg_grp_evt(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x3); +} + +static inline void msg_set_grp_evt(struct tipc_msg *m, int n) +{ + msg_set_bits(m, 10, 0, 0x3, n); +} + +static inline u16 msg_grp_bc_ack_req(struct tipc_msg *m) +{ + return msg_bits(m, 10, 0, 0x1); +} + +static inline void msg_set_grp_bc_ack_req(struct tipc_msg *m, bool n) +{ + msg_set_bits(m, 10, 0, 0x1, n); +} + +static inline u16 msg_grp_bc_seqno(struct tipc_msg *m) +{ + return msg_bits(m, 10, 16, 0xffff); +} + +static inline void msg_set_grp_bc_seqno(struct tipc_msg *m, u32 n) +{ + msg_set_bits(m, 10, 16, 0xffff, n); +} + static inline bool msg_peer_link_is_up(struct tipc_msg *m) { if (likely(msg_user(m) != LINK_PROTOCOL)) @@ -816,8 +926,10 @@ static inline bool msg_is_reset(struct tipc_msg *hdr) } struct sk_buff *tipc_buf_acquire(u32 size, gfp_t gfp); -bool tipc_msg_validate(struct sk_buff *skb); +bool tipc_msg_validate(struct sk_buff **_skb); bool tipc_msg_reverse(u32 own_addr, struct sk_buff **skb, int err); +void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb, + struct sk_buff_head *xmitq); void tipc_msg_init(u32 own_addr, struct tipc_msg *m, u32 user, u32 type, u32 hsize, u32 destnode); struct sk_buff *tipc_msg_create(uint user, uint type, uint hdr_sz, @@ -842,6 +954,11 @@ static inline u16 buf_seqno(struct sk_buff *skb) return msg_seqno(buf_msg(skb)); } +static inline int buf_roundup_len(struct sk_buff *skb) +{ + return (skb->len / 1024 + 1) * 1024; +} + /* tipc_skb_peek(): peek and reserve first buffer in list * @list: list to be peeked in * Returns pointer to first buffer in list, if any diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index bd0aac87b41a..b3829bcf63c7 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -43,6 +43,7 @@ #include "bcast.h" #include "addr.h" #include "node.h" +#include "group.h" #include <net/genetlink.h> #define TIPC_NAMETBL_SIZE 1024 /* must be a power of 2 */ @@ -596,18 +597,47 @@ not_found: return ref; } -/** - * tipc_nametbl_mc_translate - find multicast destinations - * - * Creates list of all local ports that overlap the given multicast address; - * also determines if any off-node ports overlap. - * - * Note: Publications with a scope narrower than 'limit' are ignored. - * (i.e. local node-scope publications mustn't receive messages arriving - * from another node, even if the multcast link brought it here) - * - * Returns non-zero if any off-node ports overlap - */ +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all) +{ + u32 self = tipc_own_addr(net); + struct publication *publ; + struct name_info *info; + struct name_seq *seq; + struct sub_seq *sseq; + + if (!tipc_in_scope(domain, self)) + return false; + + *dstcnt = 0; + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (unlikely(!seq)) + goto exit; + spin_lock_bh(&seq->lock); + sseq = nameseq_find_subseq(seq, instance); + if (likely(sseq)) { + info = sseq->info; + list_for_each_entry(publ, &info->zone_list, zone_list) { + if (!tipc_in_scope(domain, publ->node)) + continue; + if (publ->ref == exclude && publ->node == self) + continue; + tipc_dest_push(dsts, publ->node, publ->ref); + (*dstcnt)++; + if (all) + continue; + list_move_tail(&publ->zone_list, &info->zone_list); + break; + } + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); + return !list_empty(dsts); +} + int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, u32 limit, struct list_head *dports) { @@ -634,7 +664,7 @@ int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, info = sseq->info; list_for_each_entry(publ, &info->node_list, node_list) { if (publ->scope <= limit) - u32_push(dports, publ->ref); + tipc_dest_push(dports, 0, publ->ref); } if (info->cluster_list_size != info->node_list_size) @@ -667,7 +697,7 @@ void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, spin_lock_bh(&seq->lock); sseq = seq->sseqs + nameseq_locate_subseq(seq, lower); stop = seq->sseqs + seq->first_free; - for (; sseq->lower <= upper && sseq != stop; sseq++) { + for (; sseq != stop && sseq->lower <= upper; sseq++) { info = sseq->info; list_for_each_entry(publ, &info->zone_list, zone_list) { if (tipc_in_scope(domain, publ->node)) @@ -679,6 +709,37 @@ exit: rcu_read_unlock(); } +/* tipc_nametbl_build_group - build list of communication group members + */ +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 domain) +{ + struct sub_seq *sseq, *stop; + struct name_info *info; + struct publication *p; + struct name_seq *seq; + + rcu_read_lock(); + seq = nametbl_find_seq(net, type); + if (!seq) + goto exit; + + spin_lock_bh(&seq->lock); + sseq = seq->sseqs; + stop = seq->sseqs + seq->first_free; + for (; sseq != stop; sseq++) { + info = sseq->info; + list_for_each_entry(p, &info->zone_list, zone_list) { + if (!tipc_in_scope(domain, p->node)) + continue; + tipc_group_add_member(grp, p->node, p->ref); + } + } + spin_unlock_bh(&seq->lock); +exit: + rcu_read_unlock(); +} + /* * tipc_nametbl_publish - add name publication to network name tables */ @@ -1057,78 +1118,79 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; } -bool u32_find(struct list_head *l, u32 value) +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port) { - struct u32_item *item; + u64 value = (u64)node << 32 | port; + struct tipc_dest *dst; - list_for_each_entry(item, l, list) { - if (item->value == value) - return true; + list_for_each_entry(dst, l, list) { + if (dst->value != value) + continue; + return dst; } - return false; + return NULL; } -bool u32_push(struct list_head *l, u32 value) +bool tipc_dest_push(struct list_head *l, u32 node, u32 port) { - struct u32_item *item; + u64 value = (u64)node << 32 | port; + struct tipc_dest *dst; - list_for_each_entry(item, l, list) { - if (item->value == value) - return false; - } - item = kmalloc(sizeof(*item), GFP_ATOMIC); - if (unlikely(!item)) + if (tipc_dest_find(l, node, port)) return false; - item->value = value; - list_add(&item->list, l); + dst = kmalloc(sizeof(*dst), GFP_ATOMIC); + if (unlikely(!dst)) + return false; + dst->value = value; + list_add(&dst->list, l); return true; } -u32 u32_pop(struct list_head *l) +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port) { - struct u32_item *item; - u32 value = 0; + struct tipc_dest *dst; if (list_empty(l)) - return 0; - item = list_first_entry(l, typeof(*item), list); - value = item->value; - list_del(&item->list); - kfree(item); - return value; + return false; + dst = list_first_entry(l, typeof(*dst), list); + if (port) + *port = dst->port; + if (node) + *node = dst->node; + list_del(&dst->list); + kfree(dst); + return true; } -bool u32_del(struct list_head *l, u32 value) +bool tipc_dest_del(struct list_head *l, u32 node, u32 port) { - struct u32_item *item, *tmp; + struct tipc_dest *dst; - list_for_each_entry_safe(item, tmp, l, list) { - if (item->value != value) - continue; - list_del(&item->list); - kfree(item); - return true; - } - return false; + dst = tipc_dest_find(l, node, port); + if (!dst) + return false; + list_del(&dst->list); + kfree(dst); + return true; } -void u32_list_purge(struct list_head *l) +void tipc_dest_list_purge(struct list_head *l) { - struct u32_item *item, *tmp; + struct tipc_dest *dst, *tmp; - list_for_each_entry_safe(item, tmp, l, list) { - list_del(&item->list); - kfree(item); + list_for_each_entry_safe(dst, tmp, l, list) { + list_del(&dst->list); + kfree(dst); } } -int u32_list_len(struct list_head *l) +int tipc_dest_list_len(struct list_head *l) { - struct u32_item *item; + struct tipc_dest *dst; int i = 0; - list_for_each_entry(item, l, list) { + list_for_each_entry(dst, l, list) { i++; } return i; diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h index 6ebdeb1d84a5..71926e429446 100644 --- a/net/tipc/name_table.h +++ b/net/tipc/name_table.h @@ -40,6 +40,7 @@ struct tipc_subscription; struct tipc_plist; struct tipc_nlist; +struct tipc_group; /* * TIPC name types reserved for internal TIPC use (both current and planned) @@ -101,9 +102,14 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb); u32 tipc_nametbl_translate(struct net *net, u32 type, u32 instance, u32 *node); int tipc_nametbl_mc_translate(struct net *net, u32 type, u32 lower, u32 upper, u32 limit, struct list_head *dports); +void tipc_nametbl_build_group(struct net *net, struct tipc_group *grp, + u32 type, u32 domain); void tipc_nametbl_lookup_dst_nodes(struct net *net, u32 type, u32 lower, u32 upper, u32 domain, struct tipc_nlist *nodes); +bool tipc_nametbl_lookup(struct net *net, u32 type, u32 instance, u32 domain, + struct list_head *dsts, int *dstcnt, u32 exclude, + bool all); struct publication *tipc_nametbl_publish(struct net *net, u32 type, u32 lower, u32 upper, u32 scope, u32 port_ref, u32 key); @@ -120,16 +126,22 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s); int tipc_nametbl_init(struct net *net); void tipc_nametbl_stop(struct net *net); -struct u32_item { +struct tipc_dest { struct list_head list; - u32 value; + union { + struct { + u32 port; + u32 node; + }; + u64 value; + }; }; -bool u32_push(struct list_head *l, u32 value); -u32 u32_pop(struct list_head *l); -bool u32_find(struct list_head *l, u32 value); -bool u32_del(struct list_head *l, u32 value); -void u32_list_purge(struct list_head *l); -int u32_list_len(struct list_head *l); +struct tipc_dest *tipc_dest_find(struct list_head *l, u32 node, u32 port); +bool tipc_dest_push(struct list_head *l, u32 node, u32 port); +bool tipc_dest_pop(struct list_head *l, u32 *node, u32 *port); +bool tipc_dest_del(struct list_head *l, u32 node, u32 port); +void tipc_dest_list_purge(struct list_head *l); +int tipc_dest_list_len(struct list_head *l); #endif diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 9bfe886ab330..e48f0b2c01b9 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { kfree_skb(msg->rep); + msg->rep = NULL; return -ENOMEM; } err = __tipc_nl_compat_dumpit(cmd, msg, arg); - if (err) + if (err) { kfree_skb(msg->rep); - + msg->rep = NULL; + } kfree_skb(arg); return err; @@ -1215,7 +1217,7 @@ send: return err; } -static struct genl_ops tipc_genl_compat_ops[] = { +static const struct genl_ops tipc_genl_compat_ops[] = { { .cmd = TIPC_GENL_CMD, .doit = tipc_nl_compat_recv, diff --git a/net/tipc/node.c b/net/tipc/node.c index 9b4dcb6a16b5..507017fe0f1b 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -153,11 +153,11 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete); static void node_lost_contact(struct tipc_node *n, struct sk_buff_head *inputq); static void tipc_node_delete(struct tipc_node *node); -static void tipc_node_timeout(unsigned long data); +static void tipc_node_timeout(struct timer_list *t); static void tipc_node_fsm_evt(struct tipc_node *n, int evt); static struct tipc_node *tipc_node_find(struct net *net, u32 addr); static void tipc_node_put(struct tipc_node *node); -static bool tipc_node_is_up(struct tipc_node *n); +static bool node_is_up(struct tipc_node *n); struct tipc_sock_conn { u32 port; @@ -361,7 +361,7 @@ struct tipc_node *tipc_node_create(struct net *net, u32 addr, u16 capabilities) goto exit; } tipc_node_get(n); - setup_timer(&n->timer, tipc_node_timeout, (unsigned long)n); + timer_setup(&n->timer, tipc_node_timeout, 0); n->keepalive_intv = U32_MAX; hlist_add_head_rcu(&n->hash, &tn->node_htable[tipc_hashfn(addr)]); list_for_each_entry_rcu(temp_node, &tn->node_list, list) { @@ -500,9 +500,9 @@ void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port) /* tipc_node_timeout - handle expiration of node timer */ -static void tipc_node_timeout(unsigned long data) +static void tipc_node_timeout(struct timer_list *t) { - struct tipc_node *n = (struct tipc_node *)data; + struct tipc_node *n = from_timer(n, t, timer); struct tipc_link_entry *le; struct sk_buff_head xmitq; int bearer_id; @@ -657,7 +657,7 @@ static void __tipc_node_link_down(struct tipc_node *n, int *bearer_id, *slot1 = i; } - if (!tipc_node_is_up(n)) { + if (!node_is_up(n)) { if (tipc_link_peer_is_down(l)) tipc_node_fsm_evt(n, PEER_LOST_CONTACT_EVT); tipc_node_fsm_evt(n, SELF_LOST_CONTACT_EVT); @@ -717,11 +717,27 @@ static void tipc_node_link_down(struct tipc_node *n, int bearer_id, bool delete) tipc_sk_rcv(n->net, &le->inputq); } -static bool tipc_node_is_up(struct tipc_node *n) +static bool node_is_up(struct tipc_node *n) { return n->active_links[0] != INVALID_BEARER_ID; } +bool tipc_node_is_up(struct net *net, u32 addr) +{ + struct tipc_node *n; + bool retval = false; + + if (in_own_node(net, addr)) + return true; + + n = tipc_node_find(net, addr); + if (!n) + return false; + retval = node_is_up(n); + tipc_node_put(n); + return retval; +} + void tipc_node_check_dest(struct net *net, u32 onode, struct tipc_bearer *b, u16 capabilities, u32 signature, @@ -1126,8 +1142,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, strncpy(linkname, tipc_link_name(link), len); err = 0; } -exit: tipc_node_read_unlock(node); +exit: tipc_node_put(node); return err; } @@ -1149,7 +1165,7 @@ static int __tipc_nl_add_node(struct tipc_nl_msg *msg, struct tipc_node *node) if (nla_put_u32(msg->skb, TIPC_NLA_NODE_ADDR, node->addr)) goto attr_msg_full; - if (tipc_node_is_up(node)) + if (node_is_up(node)) if (nla_put_flag(msg->skb, TIPC_NLA_NODE_UP)) goto attr_msg_full; @@ -1238,6 +1254,22 @@ int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dnode, return 0; } +/* tipc_node_distr_xmit(): send single buffer msgs to individual destinations + * Note: this is only for SYSTEM_IMPORTANCE messages, which cannot be rejected + */ +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *xmitq) +{ + struct sk_buff *skb; + u32 selector, dnode; + + while ((skb = __skb_dequeue(xmitq))) { + selector = msg_origport(buf_msg(skb)); + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, selector); + } + return 0; +} + void tipc_node_broadcast(struct net *net, struct sk_buff *skb) { struct sk_buff *txskb; @@ -1249,7 +1281,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb) dst = n->addr; if (in_own_node(net, dst)) continue; - if (!tipc_node_is_up(n)) + if (!node_is_up(n)) continue; txskb = pskb_copy(skb, GFP_ATOMIC); if (!txskb) @@ -1284,7 +1316,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr, rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr); if (rc & TIPC_LINK_DOWN_EVT) { - tipc_bearer_reset_all(n->net); + tipc_node_reset_links(n); return; } @@ -1351,15 +1383,9 @@ static void tipc_node_bc_rcv(struct net *net, struct sk_buff *skb, int bearer_id if (!skb_queue_empty(&be->inputq1)) tipc_node_mcast_rcv(n); - if (rc & TIPC_LINK_DOWN_EVT) { - /* Reception reassembly failure => reset all links to peer */ - if (!tipc_link_is_up(be->link)) - tipc_node_reset_links(n); - - /* Retransmission failure => reset all links to all peers */ - if (!tipc_link_is_up(tipc_bc_sndlink(net))) - tipc_bearer_reset_all(net); - } + /* If reassembly or retransmission failure => reset all links to peer */ + if (rc & TIPC_LINK_DOWN_EVT) + tipc_node_reset_links(n); tipc_node_put(n); } @@ -1513,7 +1539,7 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) __skb_queue_head_init(&xmitq); /* Ensure message is well-formed before touching the header */ - if (unlikely(!tipc_msg_validate(skb))) + if (unlikely(!tipc_msg_validate(&skb))) goto discard; hdr = buf_msg(skb); usr = msg_user(hdr); @@ -1557,6 +1583,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check/update node state before receiving */ if (unlikely(skb)) { + if (unlikely(skb_linearize(skb))) + goto discard; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { if (le->link) { diff --git a/net/tipc/node.h b/net/tipc/node.h index 898c22916984..acd58d23a70e 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -48,7 +48,8 @@ enum { TIPC_BCAST_SYNCH = (1 << 1), TIPC_BCAST_STATE_NACK = (1 << 2), TIPC_BLOCK_FLOWCTL = (1 << 3), - TIPC_BCAST_RCAST = (1 << 4) + TIPC_BCAST_RCAST = (1 << 4), + TIPC_MCAST_GROUPS = (1 << 5) }; #define TIPC_NODE_CAPABILITIES (TIPC_BCAST_SYNCH | \ @@ -68,6 +69,7 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 node, char *linkname, size_t len); int tipc_node_xmit(struct net *net, struct sk_buff_head *list, u32 dnode, int selector); +int tipc_node_distr_xmit(struct net *net, struct sk_buff_head *list); int tipc_node_xmit_skb(struct net *net, struct sk_buff *skb, u32 dest, u32 selector); void tipc_node_subscribe(struct net *net, struct list_head *subscr, u32 addr); @@ -76,6 +78,7 @@ void tipc_node_broadcast(struct net *net, struct sk_buff *skb); int tipc_node_add_conn(struct net *net, u32 dnode, u32 port, u32 peer_port); void tipc_node_remove_conn(struct net *net, u32 dnode, u32 port); int tipc_node_get_mtu(struct net *net, u32 addr, u32 sel); +bool tipc_node_is_up(struct net *net, u32 addr); u16 tipc_node_get_capabilities(struct net *net, u32 addr); int tipc_nl_node_dump(struct sk_buff *skb, struct netlink_callback *cb); int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/tipc/server.c b/net/tipc/server.c index 3cd6402e812c..acaef80fb88c 100644 --- a/net/tipc/server.c +++ b/net/tipc/server.c @@ -36,6 +36,8 @@ #include "server.h" #include "core.h" #include "socket.h" +#include "addr.h" +#include "msg.h" #include <net/sock.h> #include <linux/module.h> @@ -105,13 +107,11 @@ static void tipc_conn_kref_release(struct kref *kref) kernel_bind(sock, (struct sockaddr *)saddr, sizeof(*saddr)); sock_release(sock); con->sock = NULL; - - spin_lock_bh(&s->idr_lock); - idr_remove(&s->conn_idr, con->conid); - s->idr_in_use--; - spin_unlock_bh(&s->idr_lock); } - + spin_lock_bh(&s->idr_lock); + idr_remove(&s->conn_idr, con->conid); + s->idr_in_use--; + spin_unlock_bh(&s->idr_lock); tipc_clean_outqueues(con); kfree(con); } @@ -197,7 +197,8 @@ static void tipc_close_conn(struct tipc_conn *con) struct tipc_server *s = con->server; if (test_and_clear_bit(CF_CONNECTED, &con->flags)) { - tipc_unregister_callbacks(con); + if (con->sock) + tipc_unregister_callbacks(con); if (con->conid) s->tipc_conn_release(con->conid, con->usr_data); @@ -207,8 +208,8 @@ static void tipc_close_conn(struct tipc_conn *con) * are harmless for us here as we have already deleted this * connection from server connection list. */ - kernel_sock_shutdown(con->sock, SHUT_RDWR); - + if (con->sock) + kernel_sock_shutdown(con->sock, SHUT_RDWR); conn_put(con); } } @@ -487,38 +488,104 @@ void tipc_conn_terminate(struct tipc_server *s, int conid) } } +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, + u32 lower, u32 upper, int *conid) +{ + struct tipc_subscriber *scbr; + struct tipc_subscr sub; + struct tipc_server *s; + struct tipc_conn *con; + + sub.seq.type = type; + sub.seq.lower = lower; + sub.seq.upper = upper; + sub.timeout = TIPC_WAIT_FOREVER; + sub.filter = TIPC_SUB_PORTS; + *(u32 *)&sub.usr_handle = port; + + con = tipc_alloc_conn(tipc_topsrv(net)); + if (IS_ERR(con)) + return false; + + *conid = con->conid; + s = con->server; + scbr = s->tipc_conn_new(*conid); + if (!scbr) { + tipc_close_conn(con); + return false; + } + + con->usr_data = scbr; + con->sock = NULL; + s->tipc_conn_recvmsg(net, *conid, NULL, scbr, &sub, sizeof(sub)); + return true; +} + +void tipc_topsrv_kern_unsubscr(struct net *net, int conid) +{ + struct tipc_conn *con; + + con = tipc_conn_lookup(tipc_topsrv(net), conid); + if (!con) + return; + tipc_close_conn(con); + conn_put(con); +} + +static void tipc_send_kern_top_evt(struct net *net, struct tipc_event *evt) +{ + u32 port = *(u32 *)&evt->s.usr_handle; + u32 self = tipc_own_addr(net); + struct sk_buff_head evtq; + struct sk_buff *skb; + + skb = tipc_msg_create(TOP_SRV, 0, INT_H_SIZE, sizeof(*evt), + self, self, port, port, 0); + if (!skb) + return; + msg_set_dest_droppable(buf_msg(skb), true); + memcpy(msg_data(buf_msg(skb)), evt, sizeof(*evt)); + skb_queue_head_init(&evtq); + __skb_queue_tail(&evtq, skb); + tipc_sk_rcv(net, &evtq); +} + static void tipc_send_to_sock(struct tipc_conn *con) { - int count = 0; struct tipc_server *s = con->server; struct outqueue_entry *e; + struct tipc_event *evt; struct msghdr msg; + int count = 0; int ret; spin_lock_bh(&con->outqueue_lock); while (test_bit(CF_CONNECTED, &con->flags)) { - e = list_entry(con->outqueue.next, struct outqueue_entry, - list); + e = list_entry(con->outqueue.next, struct outqueue_entry, list); if ((struct list_head *) e == &con->outqueue) break; - spin_unlock_bh(&con->outqueue_lock); - memset(&msg, 0, sizeof(msg)); - msg.msg_flags = MSG_DONTWAIT; + spin_unlock_bh(&con->outqueue_lock); - if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { - msg.msg_name = &e->dest; - msg.msg_namelen = sizeof(struct sockaddr_tipc); - } - ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, - e->iov.iov_len); - if (ret == -EWOULDBLOCK || ret == 0) { - cond_resched(); - goto out; - } else if (ret < 0) { - goto send_err; + if (con->sock) { + memset(&msg, 0, sizeof(msg)); + msg.msg_flags = MSG_DONTWAIT; + if (s->type == SOCK_DGRAM || s->type == SOCK_RDM) { + msg.msg_name = &e->dest; + msg.msg_namelen = sizeof(struct sockaddr_tipc); + } + ret = kernel_sendmsg(con->sock, &msg, &e->iov, 1, + e->iov.iov_len); + if (ret == -EWOULDBLOCK || ret == 0) { + cond_resched(); + goto out; + } else if (ret < 0) { + goto send_err; + } + } else { + evt = e->iov.iov_base; + tipc_send_kern_top_evt(s->net, evt); } - /* Don't starve users filling buffers */ if (++count >= MAX_SEND_MSG_COUNT) { cond_resched(); diff --git a/net/tipc/server.h b/net/tipc/server.h index 34f8055afa3b..2113c9192633 100644 --- a/net/tipc/server.h +++ b/net/tipc/server.h @@ -83,13 +83,16 @@ struct tipc_server { int tipc_conn_sendmsg(struct tipc_server *s, int conid, struct sockaddr_tipc *addr, void *data, size_t len); +bool tipc_topsrv_kern_subscr(struct net *net, u32 port, u32 type, + u32 lower, u32 upper, int *conid); +void tipc_topsrv_kern_unsubscr(struct net *net, int conid); + /** * tipc_conn_terminate - terminate connection with server * * Note: Must call it in process context since it might sleep */ void tipc_conn_terminate(struct tipc_server *s, int conid); - int tipc_server_start(struct tipc_server *s); void tipc_server_stop(struct tipc_server *s); diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 101e3597338f..5d18c0caa92b 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1,7 +1,7 @@ /* * net/tipc/socket.c: TIPC socket API * - * Copyright (c) 2001-2007, 2012-2016, Ericsson AB + * Copyright (c) 2001-2007, 2012-2017, Ericsson AB * Copyright (c) 2004-2008, 2010-2013, Wind River Systems * All rights reserved. * @@ -45,9 +45,10 @@ #include "socket.h" #include "bcast.h" #include "netlink.h" +#include "group.h" #define CONN_TIMEOUT_DEFAULT 8000 /* default connect timeout = 8s */ -#define CONN_PROBING_INTERVAL msecs_to_jiffies(3600000) /* [ms] => 1 h */ +#define CONN_PROBING_INTV msecs_to_jiffies(3600000) /* [ms] => 1 h */ #define TIPC_FWD_MSG 1 #define TIPC_MAX_PORT 0xffffffff #define TIPC_MIN_PORT 1 @@ -61,6 +62,11 @@ enum { TIPC_CONNECTING = TCP_SYN_SENT, }; +struct sockaddr_pair { + struct sockaddr_tipc sock; + struct sockaddr_tipc member; +}; + /** * struct tipc_sock - TIPC socket structure * @sk: socket - interacts with 'port' and with user via the socket API @@ -78,7 +84,7 @@ enum { * @conn_timeout: the time we can wait for an unresponded setup request * @dupl_rcvcnt: number of bytes counted twice, in both backlog and rcv queue * @cong_link_cnt: number of congested links - * @sent_unacked: # messages sent by socket, and not yet acked by peer + * @snt_unacked: # messages sent by socket, and not yet acked by peer * @rcv_unacked: # messages read by user, but not yet acked back to peer * @peer: 'connected' peer for dgram/rdm * @node: hash table node @@ -109,20 +115,22 @@ struct tipc_sock { struct rhash_head node; struct tipc_mc_method mc_method; struct rcu_head rcu; + struct tipc_group *group; }; -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb); +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); static void tipc_data_ready(struct sock *sk); static void tipc_write_space(struct sock *sk); static void tipc_sock_destruct(struct sock *sk); static int tipc_release(struct socket *sock); static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags, bool kern); -static void tipc_sk_timeout(unsigned long data); +static void tipc_sk_timeout(struct timer_list *t); static int tipc_sk_publish(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); static int tipc_sk_withdraw(struct tipc_sock *tsk, uint scope, struct tipc_name_seq const *seq); +static int tipc_sk_leave(struct tipc_sock *tsk); static struct tipc_sock *tipc_sk_lookup(struct net *net, u32 portid); static int tipc_sk_insert(struct tipc_sock *tsk); static void tipc_sk_remove(struct tipc_sock *tsk); @@ -193,6 +201,11 @@ static bool tsk_conn_cong(struct tipc_sock *tsk) return tsk->snt_unacked > tsk->snd_win; } +static u16 tsk_blocks(int len) +{ + return ((len / FLOWCTL_BLK_SZ) + 1); +} + /* tsk_blocks(): translate a buffer size in bytes to number of * advertisable blocks, taking into account the ratio truesize(len)/len * We can trust that this ratio is always < 4 for len >= FLOWCTL_BLK_SZ @@ -451,9 +464,9 @@ static int tipc_sk_create(struct net *net, struct socket *sock, NAMED_H_SIZE, 0); msg_set_origport(msg, tsk->portid); - setup_timer(&sk->sk_timer, tipc_sk_timeout, (unsigned long)tsk); + timer_setup(&sk->sk_timer, tipc_sk_timeout, 0); sk->sk_shutdown = 0; - sk->sk_backlog_rcv = tipc_backlog_rcv; + sk->sk_backlog_rcv = tipc_sk_backlog_rcv; sk->sk_rcvbuf = sysctl_tipc_rmem[1]; sk->sk_data_ready = tipc_data_ready; sk->sk_write_space = tipc_write_space; @@ -559,13 +572,14 @@ static int tipc_release(struct socket *sock) __tipc_shutdown(sock, TIPC_ERR_NO_PORT); sk->sk_shutdown = SHUTDOWN_MASK; + tipc_sk_leave(tsk); tipc_sk_withdraw(tsk, 0, NULL); sk_stop_timer(sk, &sk->sk_timer); tipc_sk_remove(tsk); /* Reject any messages that accumulated in backlog queue */ release_sock(sk); - u32_list_purge(&tsk->cong_links); + tipc_dest_list_purge(&tsk->cong_links); tsk->cong_link_cnt = 0; call_rcu(&tsk->rcu, tipc_sk_callback); sock->sk = NULL; @@ -601,7 +615,10 @@ static int tipc_bind(struct socket *sock, struct sockaddr *uaddr, res = tipc_sk_withdraw(tsk, 0, NULL); goto exit; } - + if (tsk->group) { + res = -EACCES; + goto exit; + } if (uaddr_len < sizeof(struct sockaddr_tipc)) { res = -EINVAL; goto exit; @@ -698,38 +715,41 @@ static unsigned int tipc_poll(struct file *file, struct socket *sock, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - u32 mask = 0; + struct tipc_group *grp = tsk->group; + u32 revents = 0; sock_poll_wait(file, sk_sleep(sk), wait); if (sk->sk_shutdown & RCV_SHUTDOWN) - mask |= POLLRDHUP | POLLIN | POLLRDNORM; + revents |= POLLRDHUP | POLLIN | POLLRDNORM; if (sk->sk_shutdown == SHUTDOWN_MASK) - mask |= POLLHUP; + revents |= POLLHUP; switch (sk->sk_state) { case TIPC_ESTABLISHED: if (!tsk->cong_link_cnt && !tsk_conn_cong(tsk)) - mask |= POLLOUT; + revents |= POLLOUT; /* fall thru' */ case TIPC_LISTEN: case TIPC_CONNECTING: if (!skb_queue_empty(&sk->sk_receive_queue)) - mask |= (POLLIN | POLLRDNORM); + revents |= POLLIN | POLLRDNORM; break; case TIPC_OPEN: - if (!tsk->cong_link_cnt) - mask |= POLLOUT; - if (tipc_sk_type_connectionless(sk) && - (!skb_queue_empty(&sk->sk_receive_queue))) - mask |= (POLLIN | POLLRDNORM); + if (!grp || tipc_group_size(grp)) + if (!tsk->cong_link_cnt) + revents |= POLLOUT; + if (!tipc_sk_type_connectionless(sk)) + break; + if (skb_queue_empty(&sk->sk_receive_queue)) + break; + revents |= POLLIN | POLLRDNORM; break; case TIPC_DISCONNECTING: - mask = (POLLIN | POLLRDNORM | POLLHUP); + revents = POLLIN | POLLRDNORM | POLLHUP; break; } - - return mask; + return revents; } /** @@ -757,6 +777,9 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, struct tipc_nlist dsts; int rc; + if (tsk->group) + return -EACCES; + /* Block or return if any destination link is congested */ rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt); if (unlikely(rc)) @@ -794,6 +817,296 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, } /** + * tipc_send_group_msg - send a message to a member in the group + * @net: network namespace + * @m: message to send + * @mb: group member + * @dnode: destination node + * @dport: destination port + * @dlen: total length of message data + */ +static int tipc_send_group_msg(struct net *net, struct tipc_sock *tsk, + struct msghdr *m, struct tipc_member *mb, + u32 dnode, u32 dport, int dlen) +{ + u16 bc_snd_nxt = tipc_group_bc_snd_nxt(tsk->group); + struct tipc_mc_method *method = &tsk->mc_method; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + struct sk_buff_head pkts; + int mtu, rc; + + /* Complete message header */ + msg_set_type(hdr, TIPC_GRP_UCAST_MSG); + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, dport); + msg_set_destnode(hdr, dnode); + msg_set_grp_bc_seqno(hdr, bc_snd_nxt); + + /* Build message as chain of buffers */ + skb_queue_head_init(&pkts); + mtu = tipc_node_get_mtu(net, dnode, tsk->portid); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); + if (unlikely(rc == -ELINKCONG)) { + tipc_dest_push(&tsk->cong_links, dnode, 0); + tsk->cong_link_cnt++; + } + + /* Update send window */ + tipc_group_update_member(mb, blks); + + /* A broadcast sent within next EXPIRE period must follow same path */ + method->rcast = true; + method->mandatory = true; + return dlen; +} + +/** + * tipc_send_group_unicast - send message to a member in the group + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_unicast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct net *net = sock_net(sk); + struct tipc_member *mb = NULL; + u32 node, port; + int rc; + + node = dest->addr.id.node; + port = dest->addr.id.ref; + if (!port && !node) + return -EHOSTUNREACH; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(&tsk->cong_links, node, 0) && + !tipc_group_cong(grp, node, port, blks, &mb)); + if (unlikely(rc)) + return rc; + + if (unlikely(!mb)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mb, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_anycast - send message to any member with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_anycast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct tipc_sock *tsk = tipc_sk(sk); + struct list_head *cong_links = &tsk->cong_links; + int blks = tsk_blocks(GROUP_H_SIZE + dlen); + struct tipc_group *grp = tsk->group; + struct tipc_member *first = NULL; + struct tipc_member *mbr = NULL; + struct net *net = sock_net(sk); + u32 node, port, exclude; + u32 type, inst, domain; + struct list_head dsts; + int lookups = 0; + int dstcnt, rc; + bool cong; + + INIT_LIST_HEAD(&dsts); + + type = dest->addr.name.name.type; + inst = dest->addr.name.name.instance; + domain = addr_domain(net, dest->scope); + exclude = tipc_group_exclude(grp); + + while (++lookups < 4) { + first = NULL; + + /* Look for a non-congested destination member, if any */ + while (1) { + if (!tipc_nametbl_lookup(net, type, inst, domain, &dsts, + &dstcnt, exclude, false)) + return -EHOSTUNREACH; + tipc_dest_pop(&dsts, &node, &port); + cong = tipc_group_cong(grp, node, port, blks, &mbr); + if (!cong) + break; + if (mbr == first) + break; + if (!first) + first = mbr; + } + + /* Start over if destination was not in member list */ + if (unlikely(!mbr)) + continue; + + if (likely(!cong && !tipc_dest_find(cong_links, node, 0))) + break; + + /* Block or return if destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(cong_links, node, 0) && + !tipc_group_cong(grp, node, port, + blks, &mbr)); + if (unlikely(rc)) + return rc; + + /* Send, unless destination disappeared while waiting */ + if (likely(mbr)) + break; + } + + if (unlikely(lookups >= 4)) + return -EHOSTUNREACH; + + rc = tipc_send_group_msg(net, tsk, m, mbr, node, port, dlen); + + return rc ? rc : dlen; +} + +/** + * tipc_send_group_bcast - send message to all members in communication group + * @sk: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_bcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct sock *sk = sock->sk; + struct net *net = sock_net(sk); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct tipc_nlist *dsts = tipc_group_dests(grp); + struct tipc_mc_method *method = &tsk->mc_method; + bool ack = method->mandatory && method->rcast; + int blks = tsk_blocks(MCAST_H_SIZE + dlen); + struct tipc_msg *hdr = &tsk->phdr; + int mtu = tipc_bcast_get_mtu(net); + struct sk_buff_head pkts; + int rc = -EHOSTUNREACH; + + if (!dsts->local && !dsts->remote) + return -EHOSTUNREACH; + + /* Block or return if any destination link or member is congested */ + rc = tipc_wait_for_cond(sock, &timeout, !tsk->cong_link_cnt && + !tipc_group_bc_cong(grp, blks)); + if (unlikely(rc)) + return rc; + + /* Complete message header */ + if (dest) { + msg_set_type(hdr, TIPC_GRP_MCAST_MSG); + msg_set_nameinst(hdr, dest->addr.name.name.instance); + } else { + msg_set_type(hdr, TIPC_GRP_BCAST_MSG); + msg_set_nameinst(hdr, 0); + } + msg_set_hdr_sz(hdr, GROUP_H_SIZE); + msg_set_destport(hdr, 0); + msg_set_destnode(hdr, 0); + msg_set_grp_bc_seqno(hdr, tipc_group_bc_snd_nxt(grp)); + + /* Avoid getting stuck with repeated forced replicasts */ + msg_set_grp_bc_ack_req(hdr, ack); + + /* Build message as chain of buffers */ + skb_queue_head_init(&pkts); + rc = tipc_msg_build(hdr, m, 0, dlen, mtu, &pkts); + if (unlikely(rc != dlen)) + return rc; + + /* Send message */ + rc = tipc_mcast_xmit(net, &pkts, method, dsts, &tsk->cong_link_cnt); + if (unlikely(rc)) + return rc; + + /* Update broadcast sequence number and send windows */ + tipc_group_update_bc_members(tsk->group, blks, ack); + + /* Broadcast link is now free to choose method for next broadcast */ + method->mandatory = false; + method->expires = jiffies; + + return dlen; +} + +/** + * tipc_send_group_mcast - send message to all members with given identity + * @sock: socket structure + * @m: message to send + * @dlen: total length of message data + * @timeout: timeout to wait for wakeup + * + * Called from function tipc_sendmsg(), which has done all sanity checks + * Returns the number of bytes sent on success, or errno + */ +static int tipc_send_group_mcast(struct socket *sock, struct msghdr *m, + int dlen, long timeout) +{ + struct sock *sk = sock->sk; + DECLARE_SOCKADDR(struct sockaddr_tipc *, dest, m->msg_name); + struct tipc_name_seq *seq = &dest->addr.nameseq; + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; + struct net *net = sock_net(sk); + u32 domain, exclude, dstcnt; + struct list_head dsts; + + INIT_LIST_HEAD(&dsts); + + if (seq->lower != seq->upper) + return -ENOTSUPP; + + domain = addr_domain(net, dest->scope); + exclude = tipc_group_exclude(grp); + if (!tipc_nametbl_lookup(net, seq->type, seq->lower, domain, + &dsts, &dstcnt, exclude, true)) + return -EHOSTUNREACH; + + if (dstcnt == 1) { + tipc_dest_pop(&dsts, &dest->addr.id.node, &dest->addr.id.ref); + return tipc_send_group_unicast(sock, m, dlen, timeout); + } + + tipc_dest_list_purge(&dsts); + return tipc_send_group_bcast(sock, m, dlen, timeout); +} + +/** * tipc_sk_mcast_rcv - Deliver multicast messages to all destination sockets * @arrvq: queue with arriving messages, to be cloned after destination lookup * @inputq: queue with cloned messages, delivered to socket after dest lookup @@ -803,13 +1116,15 @@ static int tipc_sendmcast(struct socket *sock, struct tipc_name_seq *seq, void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, struct sk_buff_head *inputq) { - struct tipc_msg *msg; - struct list_head dports; - u32 portid; u32 scope = TIPC_CLUSTER_SCOPE; - struct sk_buff_head tmpq; - uint hsz; + u32 self = tipc_own_addr(net); struct sk_buff *skb, *_skb; + u32 lower = 0, upper = ~0; + struct sk_buff_head tmpq; + u32 portid, oport, onode; + struct list_head dports; + struct tipc_msg *msg; + int user, mtyp, hsz; __skb_queue_head_init(&tmpq); INIT_LIST_HEAD(&dports); @@ -817,17 +1132,32 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, skb = tipc_skb_peek(arrvq, &inputq->lock); for (; skb; skb = tipc_skb_peek(arrvq, &inputq->lock)) { msg = buf_msg(skb); + user = msg_user(msg); + mtyp = msg_type(msg); + if (mtyp == TIPC_GRP_UCAST_MSG || user == GROUP_PROTOCOL) { + spin_lock_bh(&inputq->lock); + if (skb_peek(arrvq) == skb) { + __skb_dequeue(arrvq); + __skb_queue_tail(inputq, skb); + } + refcount_dec(&skb->users); + spin_unlock_bh(&inputq->lock); + continue; + } hsz = skb_headroom(skb) + msg_hdr_sz(msg); - - if (in_own_node(net, msg_orignode(msg))) + oport = msg_origport(msg); + onode = msg_orignode(msg); + if (onode == self) scope = TIPC_NODE_SCOPE; /* Create destination port list and message clones: */ - tipc_nametbl_mc_translate(net, - msg_nametype(msg), msg_namelower(msg), - msg_nameupper(msg), scope, &dports); - portid = u32_pop(&dports); - for (; portid; portid = u32_pop(&dports)) { + if (!msg_in_group(msg)) { + lower = msg_namelower(msg); + upper = msg_nameupper(msg); + } + tipc_nametbl_mc_translate(net, msg_nametype(msg), lower, upper, + scope, &dports); + while (tipc_dest_pop(&dports, NULL, &portid)) { _skb = __pskb_copy(skb, hsz, GFP_ATOMIC); if (_skb) { msg_set_destport(buf_msg(_skb), portid); @@ -850,16 +1180,16 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, } /** - * tipc_sk_proto_rcv - receive a connection mng protocol message + * tipc_sk_conn_proto_rcv - receive a connection mng protocol message * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, - struct sk_buff_head *xmitq) +static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { - struct sock *sk = &tsk->sk; - u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); + u32 onode = tsk_own_node(tsk); + struct sock *sk = &tsk->sk; int mtyp = msg_type(hdr); bool conn_cong; @@ -931,6 +1261,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) long timeout = sock_sndtimeo(sk, m->msg_flags & MSG_DONTWAIT); struct list_head *clinks = &tsk->cong_links; bool syn = !tipc_sk_type_connectionless(sk); + struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = &tsk->phdr; struct tipc_name_seq *seq; struct sk_buff_head pkts; @@ -941,18 +1272,31 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) if (unlikely(dlen > TIPC_MAX_USER_MSG_SIZE)) return -EMSGSIZE; + if (likely(dest)) { + if (unlikely(m->msg_namelen < sizeof(*dest))) + return -EINVAL; + if (unlikely(dest->family != AF_TIPC)) + return -EINVAL; + } + + if (grp) { + if (!dest) + return tipc_send_group_bcast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_NAME) + return tipc_send_group_anycast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_ID) + return tipc_send_group_unicast(sock, m, dlen, timeout); + if (dest->addrtype == TIPC_ADDR_MCAST) + return tipc_send_group_mcast(sock, m, dlen, timeout); + return -EINVAL; + } + if (unlikely(!dest)) { dest = &tsk->peer; if (!syn || dest->family != AF_TIPC) return -EDESTADDRREQ; } - if (unlikely(m->msg_namelen < sizeof(*dest))) - return -EINVAL; - - if (unlikely(dest->family != AF_TIPC)) - return -EINVAL; - if (unlikely(syn)) { if (sk->sk_state == TIPC_LISTEN) return -EPIPE; @@ -985,7 +1329,6 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) msg_set_destport(hdr, dport); if (unlikely(!dport && !dnode)) return -EHOSTUNREACH; - } else if (dest->addrtype == TIPC_ADDR_ID) { dnode = dest->addr.id.node; msg_set_type(hdr, TIPC_DIRECT_MSG); @@ -996,7 +1339,8 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) } /* Block or return if destination link is congested */ - rc = tipc_wait_for_cond(sock, &timeout, !u32_find(clinks, dnode)); + rc = tipc_wait_for_cond(sock, &timeout, + !tipc_dest_find(clinks, dnode, 0)); if (unlikely(rc)) return rc; @@ -1008,7 +1352,7 @@ static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dlen) rc = tipc_node_xmit(net, &pkts, dnode, tsk->portid); if (unlikely(rc == -ELINKCONG)) { - u32_push(clinks, dnode); + tipc_dest_push(clinks, dnode, 0); tsk->cong_link_cnt++; rc = 0; } @@ -1128,7 +1472,7 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, msg_set_lookup_scope(msg, 0); msg_set_hdr_sz(msg, SHORT_H_SIZE); - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); tipc_set_sk_state(sk, TIPC_ESTABLISHED); tipc_node_add_conn(net, peer_node, tsk->portid, peer_port); tsk->max_pkt = tipc_node_get_mtu(net, peer_node, tsk->portid); @@ -1142,26 +1486,38 @@ static void tipc_sk_finish_conn(struct tipc_sock *tsk, u32 peer_port, } /** - * set_orig_addr - capture sender's address for received message + * tipc_sk_set_orig_addr - capture sender's address for received message * @m: descriptor for message info - * @msg: received message header + * @hdr: received message header * * Note: Address is not captured if not requested by receiver. */ -static void set_orig_addr(struct msghdr *m, struct tipc_msg *msg) +static void tipc_sk_set_orig_addr(struct msghdr *m, struct sk_buff *skb) { - DECLARE_SOCKADDR(struct sockaddr_tipc *, addr, m->msg_name); + DECLARE_SOCKADDR(struct sockaddr_pair *, srcaddr, m->msg_name); + struct tipc_msg *hdr = buf_msg(skb); - if (addr) { - addr->family = AF_TIPC; - addr->addrtype = TIPC_ADDR_ID; - memset(&addr->addr, 0, sizeof(addr->addr)); - addr->addr.id.ref = msg_origport(msg); - addr->addr.id.node = msg_orignode(msg); - addr->addr.name.domain = 0; /* could leave uninitialized */ - addr->scope = 0; /* could leave uninitialized */ - m->msg_namelen = sizeof(struct sockaddr_tipc); - } + if (!srcaddr) + return; + + srcaddr->sock.family = AF_TIPC; + srcaddr->sock.addrtype = TIPC_ADDR_ID; + srcaddr->sock.addr.id.ref = msg_origport(hdr); + srcaddr->sock.addr.id.node = msg_orignode(hdr); + srcaddr->sock.addr.name.domain = 0; + srcaddr->sock.scope = 0; + m->msg_namelen = sizeof(struct sockaddr_tipc); + + if (!msg_in_group(hdr)) + return; + + /* Group message users may also want to know sending member's id */ + srcaddr->member.family = AF_TIPC; + srcaddr->member.addrtype = TIPC_ADDR_NAME; + srcaddr->member.addr.name.name.type = msg_nametype(hdr); + srcaddr->member.addr.name.name.instance = TIPC_SKB_CB(skb)->orig_member; + srcaddr->member.addr.name.domain = 0; + m->msg_namelen = sizeof(*srcaddr); } /** @@ -1318,11 +1674,13 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, size_t buflen, int flags) { struct sock *sk = sock->sk; - struct tipc_sock *tsk = tipc_sk(sk); - struct sk_buff *skb; - struct tipc_msg *hdr; bool connected = !tipc_sk_type_connectionless(sk); + struct tipc_sock *tsk = tipc_sk(sk); int rc, err, hlen, dlen, copy; + struct sk_buff_head xmitq; + struct tipc_msg *hdr; + struct sk_buff *skb; + bool grp_evt; long timeout; /* Catch invalid receive requests */ @@ -1336,8 +1694,8 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, } timeout = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + /* Step rcv queue to first msg with data or error; wait if necessary */ do { - /* Look at first msg in receive queue; wait if necessary */ rc = tipc_wait_for_rcvmsg(sock, &timeout); if (unlikely(rc)) goto exit; @@ -1346,13 +1704,14 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, dlen = msg_data_sz(hdr); hlen = msg_hdr_sz(hdr); err = msg_errcode(hdr); + grp_evt = msg_is_grp_evt(hdr); if (likely(dlen || err)) break; tsk_advance_rx_queue(sk); } while (1); /* Collect msg meta data, including error code and rejected data */ - set_orig_addr(m, hdr); + tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, hdr, tsk); if (unlikely(rc)) goto exit; @@ -1372,15 +1731,33 @@ static int tipc_recvmsg(struct socket *sock, struct msghdr *m, if (unlikely(rc)) goto exit; + /* Mark message as group event if applicable */ + if (unlikely(grp_evt)) { + if (msg_grp_evt(hdr) == TIPC_WITHDRAWN) + m->msg_flags |= MSG_EOR; + m->msg_flags |= MSG_OOB; + copy = 0; + } + /* Caption of data or error code/rejected data was successful */ if (unlikely(flags & MSG_PEEK)) goto exit; + /* Send group flow control advertisement when applicable */ + if (tsk->group && msg_in_group(hdr) && !grp_evt) { + skb_queue_head_init(&xmitq); + tipc_group_update_rcv_win(tsk->group, tsk_blocks(hlen + dlen), + msg_orignode(hdr), msg_origport(hdr), + &xmitq); + tipc_node_distr_xmit(sock_net(sk), &xmitq); + } + tsk_advance_rx_queue(sk); + if (likely(!connected)) goto exit; - /* Send connection flow control ack when applicable */ + /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); @@ -1446,7 +1823,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Collect msg meta data, incl. error code and rejected data */ if (!copied) { - set_orig_addr(m, hdr); + tipc_sk_set_orig_addr(m, skb); rc = tipc_sk_anc_data_recv(m, hdr, tsk); if (rc) break; @@ -1532,14 +1909,51 @@ static void tipc_sock_destruct(struct sock *sk) __skb_queue_purge(&sk->sk_receive_queue); } +static void tipc_sk_proto_rcv(struct sock *sk, + struct sk_buff_head *inputq, + struct sk_buff_head *xmitq) +{ + struct sk_buff *skb = __skb_dequeue(inputq); + struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_msg *hdr = buf_msg(skb); + struct tipc_group *grp = tsk->group; + bool wakeup = false; + + switch (msg_user(hdr)) { + case CONN_MANAGER: + tipc_sk_conn_proto_rcv(tsk, skb, xmitq); + return; + case SOCK_WAKEUP: + tipc_dest_del(&tsk->cong_links, msg_orignode(hdr), 0); + tsk->cong_link_cnt--; + wakeup = true; + break; + case GROUP_PROTOCOL: + tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq); + break; + case TOP_SRV: + tipc_group_member_evt(tsk->group, &wakeup, &sk->sk_rcvbuf, + skb, inputq, xmitq); + skb = NULL; + break; + default: + break; + } + + if (wakeup) + sk->sk_write_space(sk); + + kfree_skb(skb); +} + /** - * filter_connect - Handle all incoming messages for a connection-based socket + * tipc_filter_connect - Handle incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. Set to NULL if buffer is consumed * * Returns true if everything ok, false otherwise */ -static bool filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -1643,6 +2057,9 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) struct tipc_sock *tsk = tipc_sk(sk); struct tipc_msg *hdr = buf_msg(skb); + if (unlikely(msg_in_group(hdr))) + return sk->sk_rcvbuf; + if (unlikely(!msg_connected(hdr))) return sk->sk_rcvbuf << msg_importance(hdr); @@ -1653,7 +2070,7 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) } /** - * filter_rcv - validate incoming message + * tipc_sk_filter_rcv - validate incoming message * @sk: socket * @skb: pointer to message. * @@ -1662,99 +2079,71 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Called with socket lock already taken * - * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb, - struct sk_buff_head *xmitq) +static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { + bool sk_conn = !tipc_sk_type_connectionless(sk); struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group *grp = tsk->group; struct tipc_msg *hdr = buf_msg(skb); - unsigned int limit = rcvbuf_limit(sk, skb); - int err = TIPC_OK; - int usr = msg_user(hdr); - u32 onode; + struct net *net = sock_net(sk); + struct sk_buff_head inputq; + int limit, err = TIPC_OK; - if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb, xmitq); - return false; - } + TIPC_SKB_CB(skb)->bytes_read = 0; + __skb_queue_head_init(&inputq); + __skb_queue_tail(&inputq, skb); - if (unlikely(usr == SOCK_WAKEUP)) { - onode = msg_orignode(hdr); - kfree_skb(skb); - u32_del(&tsk->cong_links, onode); - tsk->cong_link_cnt--; - sk->sk_write_space(sk); - return false; - } + if (unlikely(!msg_isdata(hdr))) + tipc_sk_proto_rcv(sk, &inputq, xmitq); - /* Drop if illegal message type */ - if (unlikely(msg_type(hdr) > TIPC_DIRECT_MSG)) { - kfree_skb(skb); - return false; - } + if (unlikely(grp)) + tipc_group_filter_msg(grp, &inputq, xmitq); - /* Reject if wrong message type for current socket state */ - if (tipc_sk_type_connectionless(sk)) { - if (msg_connected(hdr)) { + /* Validate and add to receive buffer if there is space */ + while ((skb = __skb_dequeue(&inputq))) { + hdr = buf_msg(skb); + limit = rcvbuf_limit(sk, skb); + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + (!sk_conn && msg_connected(hdr)) || + (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; - goto reject; - } - } else if (unlikely(!filter_connect(tsk, skb))) { - err = TIPC_ERR_NO_PORT; - goto reject; - } + else if (sk_rmem_alloc_get(sk) + skb->truesize >= limit) + err = TIPC_ERR_OVERLOAD; - /* Reject message if there isn't room to queue it */ - if (unlikely(sk_rmem_alloc_get(sk) + skb->truesize >= limit)) { - err = TIPC_ERR_OVERLOAD; - goto reject; + if (unlikely(err)) { + tipc_skb_reject(net, err, skb, xmitq); + err = TIPC_OK; + continue; + } + __skb_queue_tail(&sk->sk_receive_queue, skb); + skb_set_owner_r(skb, sk); + sk->sk_data_ready(sk); } - - /* Enqueue message */ - TIPC_SKB_CB(skb)->bytes_read = 0; - __skb_queue_tail(&sk->sk_receive_queue, skb); - skb_set_owner_r(skb, sk); - - sk->sk_data_ready(sk); - return true; - -reject: - if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) - __skb_queue_tail(xmitq, skb); - return false; } /** - * tipc_backlog_rcv - handle incoming message from backlog queue + * tipc_sk_backlog_rcv - handle incoming message from backlog queue * @sk: socket * @skb: message * * Caller must hold socket lock - * - * Returns 0 */ -static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) +static int tipc_sk_backlog_rcv(struct sock *sk, struct sk_buff *skb) { - unsigned int truesize = skb->truesize; + unsigned int before = sk_rmem_alloc_get(sk); struct sk_buff_head xmitq; - u32 dnode, selector; + unsigned int added; __skb_queue_head_init(&xmitq); - if (likely(filter_rcv(sk, skb, &xmitq))) { - atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); - return 0; - } - - if (skb_queue_empty(&xmitq)) - return 0; + tipc_sk_filter_rcv(sk, skb, &xmitq); + added = sk_rmem_alloc_get(sk) - before; + atomic_add(added, &tipc_sk(sk)->dupl_rcvcnt); - /* Send response/rejected message */ - skb = __skb_dequeue(&xmitq); - dnode = msg_destnode(buf_msg(skb)); - selector = msg_origport(buf_msg(skb)); - tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); + /* Send pending response/rejected messages, if any */ + tipc_node_distr_xmit(sock_net(sk), &xmitq); return 0; } @@ -1786,7 +2175,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb, xmitq); + tipc_sk_filter_rcv(sk, skb, xmitq); continue; } @@ -1833,14 +2222,10 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) spin_unlock_bh(&sk->sk_lock.slock); } /* Send pending response/rejected messages, if any */ - while ((skb = __skb_dequeue(&xmitq))) { - dnode = msg_destnode(buf_msg(skb)); - tipc_node_xmit_skb(net, skb, dnode, dport); - } + tipc_node_distr_xmit(sock_net(sk), &xmitq); sock_put(sk); continue; } - /* No destination socket => dequeue skb if still there */ skb = tipc_skb_dequeue(inputq, dport); if (!skb) @@ -1903,28 +2288,32 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest, int previous; int res = 0; + if (destlen != sizeof(struct sockaddr_tipc)) + return -EINVAL; + lock_sock(sk); - /* DGRAM/RDM connect(), just save the destaddr */ - if (tipc_sk_type_connectionless(sk)) { - if (dst->family == AF_UNSPEC) { - memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); - } else if (destlen != sizeof(struct sockaddr_tipc)) { - res = -EINVAL; - } else { - memcpy(&tsk->peer, dest, destlen); - } + if (tsk->group) { + res = -EINVAL; goto exit; } - /* - * Reject connection attempt using multicast address - * - * Note: send_msg() validates the rest of the address fields, - * so there's no need to do it here - */ - if (dst->addrtype == TIPC_ADDR_MCAST) { + if (dst->family == AF_UNSPEC) { + memset(&tsk->peer, 0, sizeof(struct sockaddr_tipc)); + if (!tipc_sk_type_connectionless(sk)) + res = -EINVAL; + goto exit; + } else if (dst->family != AF_TIPC) { + res = -EINVAL; + } + if (dst->addrtype != TIPC_ADDR_ID && dst->addrtype != TIPC_ADDR_NAME) res = -EINVAL; + if (res) + goto exit; + + /* DGRAM/RDM connect(), just save the destaddr */ + if (tipc_sk_type_connectionless(sk)) { + memcpy(&tsk->peer, dest, destlen); goto exit; } @@ -2141,46 +2530,43 @@ static int tipc_shutdown(struct socket *sock, int how) return res; } -static void tipc_sk_timeout(unsigned long data) +static void tipc_sk_timeout(struct timer_list *t) { - struct tipc_sock *tsk = (struct tipc_sock *)data; - struct sock *sk = &tsk->sk; - struct sk_buff *skb = NULL; - u32 peer_port, peer_node; + struct sock *sk = from_timer(sk, t, sk_timer); + struct tipc_sock *tsk = tipc_sk(sk); + u32 peer_port = tsk_peer_port(tsk); + u32 peer_node = tsk_peer_node(tsk); u32 own_node = tsk_own_node(tsk); + u32 own_port = tsk->portid; + struct net *net = sock_net(sk); + struct sk_buff *skb = NULL; bh_lock_sock(sk); - if (!tipc_sk_connected(sk)) { - bh_unlock_sock(sk); + if (!tipc_sk_connected(sk)) + goto exit; + + /* Try again later if socket is busy */ + if (sock_owned_by_user(sk)) { + sk_reset_timer(sk, &sk->sk_timer, jiffies + HZ / 20); goto exit; } - peer_port = tsk_peer_port(tsk); - peer_node = tsk_peer_node(tsk); if (tsk->probe_unacked) { - if (!sock_owned_by_user(sk)) { - tipc_set_sk_state(sk, TIPC_DISCONNECTING); - tipc_node_remove_conn(sock_net(sk), tsk_peer_node(tsk), - tsk_peer_port(tsk)); - sk->sk_state_change(sk); - } else { - /* Try again later */ - sk_reset_timer(sk, &sk->sk_timer, (HZ / 20)); - } - - bh_unlock_sock(sk); + tipc_set_sk_state(sk, TIPC_DISCONNECTING); + tipc_node_remove_conn(net, peer_node, peer_port); + sk->sk_state_change(sk); goto exit; } - - skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, - INT_H_SIZE, 0, peer_node, own_node, - peer_port, tsk->portid, TIPC_OK); + /* Send new probe */ + skb = tipc_msg_create(CONN_MANAGER, CONN_PROBE, INT_H_SIZE, 0, + peer_node, own_node, peer_port, own_port, + TIPC_OK); tsk->probe_unacked = true; - sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTERVAL); + sk_reset_timer(sk, &sk->sk_timer, jiffies + CONN_PROBING_INTV); +exit: bh_unlock_sock(sk); if (skb) - tipc_node_xmit_skb(sock_net(sk), skb, peer_node, tsk->portid); -exit: + tipc_node_xmit_skb(net, skb, peer_node, own_port); sock_put(sk); } @@ -2255,8 +2641,8 @@ void tipc_sk_reinit(struct net *net) do { tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (tsk) - continue; + if (IS_ERR(tsk)) + goto walk_stop; while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2265,7 +2651,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - +walk_stop: rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } @@ -2345,6 +2731,58 @@ void tipc_sk_rht_destroy(struct net *net) rhashtable_destroy(&tn->sk_rht); } +static int tipc_sk_join(struct tipc_sock *tsk, struct tipc_group_req *mreq) +{ + struct net *net = sock_net(&tsk->sk); + u32 domain = addr_domain(net, mreq->scope); + struct tipc_group *grp = tsk->group; + struct tipc_msg *hdr = &tsk->phdr; + struct tipc_name_seq seq; + int rc; + + if (mreq->type < TIPC_RESERVED_TYPES) + return -EACCES; + if (grp) + return -EACCES; + grp = tipc_group_create(net, tsk->portid, mreq); + if (!grp) + return -ENOMEM; + tsk->group = grp; + msg_set_lookup_scope(hdr, mreq->scope); + msg_set_nametype(hdr, mreq->type); + msg_set_dest_droppable(hdr, true); + seq.type = mreq->type; + seq.lower = mreq->instance; + seq.upper = seq.lower; + tipc_nametbl_build_group(net, grp, mreq->type, domain); + rc = tipc_sk_publish(tsk, mreq->scope, &seq); + if (rc) { + tipc_group_delete(net, grp); + tsk->group = NULL; + } + + /* Eliminate any risk that a broadcast overtakes the sent JOIN */ + tsk->mc_method.rcast = true; + tsk->mc_method.mandatory = true; + return rc; +} + +static int tipc_sk_leave(struct tipc_sock *tsk) +{ + struct net *net = sock_net(&tsk->sk); + struct tipc_group *grp = tsk->group; + struct tipc_name_seq seq; + int scope; + + if (!grp) + return -EINVAL; + tipc_group_self(grp, &seq, &scope); + tipc_group_delete(net, grp); + tsk->group = NULL; + tipc_sk_withdraw(tsk, scope, &seq); + return 0; +} + /** * tipc_setsockopt - set socket option * @sock: socket structure @@ -2363,6 +2801,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); + struct tipc_group_req mreq; u32 value = 0; int res = 0; @@ -2378,9 +2817,14 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, case TIPC_CONN_TIMEOUT: if (ol < sizeof(value)) return -EINVAL; - res = get_user(value, (u32 __user *)ov); - if (res) - return res; + if (get_user(value, (u32 __user *)ov)) + return -EFAULT; + break; + case TIPC_GROUP_JOIN: + if (ol < sizeof(mreq)) + return -EINVAL; + if (copy_from_user(&mreq, ov, sizeof(mreq))) + return -EFAULT; break; default: if (ov || ol) @@ -2413,6 +2857,12 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt, tsk->mc_method.rcast = true; tsk->mc_method.mandatory = true; break; + case TIPC_GROUP_JOIN: + res = tipc_sk_join(tsk, &mreq); + break; + case TIPC_GROUP_LEAVE: + res = tipc_sk_leave(tsk); + break; default: res = -EINVAL; } @@ -2440,7 +2890,8 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, { struct sock *sk = sock->sk; struct tipc_sock *tsk = tipc_sk(sk); - int len; + struct tipc_name_seq seq; + int len, scope; u32 value; int res; @@ -2474,6 +2925,12 @@ static int tipc_getsockopt(struct socket *sock, int lvl, int opt, case TIPC_SOCK_RECVQ_DEPTH: value = skb_queue_len(&sk->sk_receive_queue); break; + case TIPC_GROUP_JOIN: + seq.type = 0; + if (tsk->group) + tipc_group_self(tsk->group, &seq, &scope); + value = seq.type; + break; default: res = -EINVAL; } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0bf91cd3733c..251065dfd8df 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -52,7 +52,6 @@ struct tipc_subscriber { struct list_head subscrp_list; }; -static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); /** @@ -134,9 +133,9 @@ void tipc_subscrp_report_overlap(struct tipc_subscription *sub, u32 found_lower, node); } -static void tipc_subscrp_timeout(unsigned long data) +static void tipc_subscrp_timeout(struct timer_list *t) { - struct tipc_subscription *sub = (struct tipc_subscription *)data; + struct tipc_subscription *sub = from_timer(sub, t, timer); struct tipc_subscriber *subscriber = sub->subscriber; spin_lock_bh(&subscriber->lock); @@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, { struct list_head *subscription_list = &subscriber->subscrp_list; struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - tipc_subscrp_delete(sub); + timeout = htohl(sub->evt.s.timeout, sub->swap); + if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) { + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + tipc_subscrp_put(sub); + } if (s) break; @@ -236,18 +239,12 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_delete(struct tipc_subscription *sub) -{ - u32 timeout = htohl(sub->evt.s.timeout, sub->swap); - - if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) - tipc_subscrp_put(sub); -} - static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { + tipc_subscrb_get(subscriber); tipc_subscrb_subscrp_delete(subscriber, s); + tipc_subscrb_put(subscriber); } static struct tipc_subscription *tipc_subscrp_create(struct net *net, @@ -306,7 +303,7 @@ static void tipc_subscrp_subscribe(struct net *net, struct tipc_subscr *s, tipc_subscrb_get(subscriber); spin_unlock_bh(&subscriber->lock); - setup_timer(&sub->timer, tipc_subscrp_timeout, (unsigned long)sub); + timer_setup(&sub->timer, tipc_subscrp_timeout, 0); timeout = htohl(sub->evt.s.timeout, swap); if (timeout != TIPC_WAIT_FOREVER) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 60aff60e30ad..e07ee3ae0023 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -45,8 +45,18 @@ MODULE_AUTHOR("Mellanox Technologies"); MODULE_DESCRIPTION("Transport Layer Security Support"); MODULE_LICENSE("Dual BSD/GPL"); -static struct proto tls_base_prot; -static struct proto tls_sw_prot; +enum { + TLS_BASE_TX, + TLS_SW_TX, + TLS_NUM_CONFIG, +}; + +static struct proto tls_prots[TLS_NUM_CONFIG]; + +static inline void update_sk_prot(struct sock *sk, struct tls_context *ctx) +{ + sk->sk_prot = &tls_prots[ctx->tx_conf]; +} int wait_on_pending_writer(struct sock *sk, long *timeo) { @@ -216,6 +226,12 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) void (*sk_proto_close)(struct sock *sk, long timeout); lock_sock(sk); + sk_proto_close = ctx->sk_proto_close; + + if (ctx->tx_conf == TLS_BASE_TX) { + kfree(ctx); + goto skip_tx_cleanup; + } if (!tls_complete_pending_work(sk, ctx, 0, &timeo)) tls_handle_open_record(sk, 0); @@ -232,13 +248,14 @@ static void tls_sk_proto_close(struct sock *sk, long timeout) sg++; } } - ctx->free_resources(sk); + kfree(ctx->rec_seq); kfree(ctx->iv); - sk_proto_close = ctx->sk_proto_close; - kfree(ctx); + if (ctx->tx_conf == TLS_SW_TX) + tls_sw_free_tx_resources(sk); +skip_tx_cleanup: release_sock(sk); sk_proto_close(sk, timeout); } @@ -338,46 +355,41 @@ static int tls_getsockopt(struct sock *sk, int level, int optname, static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, unsigned int optlen) { - struct tls_crypto_info *crypto_info, tmp_crypto_info; + struct tls_crypto_info *crypto_info; struct tls_context *ctx = tls_get_ctx(sk); - struct proto *prot = NULL; int rc = 0; + int tx_conf; if (!optval || (optlen < sizeof(*crypto_info))) { rc = -EINVAL; goto out; } - rc = copy_from_user(&tmp_crypto_info, optval, sizeof(*crypto_info)); + crypto_info = &ctx->crypto_send; + /* Currently we don't support set crypto info more than one time */ + if (TLS_CRYPTO_INFO_READY(crypto_info)) + goto out; + + rc = copy_from_user(crypto_info, optval, sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto out; } /* check version */ - if (tmp_crypto_info.version != TLS_1_2_VERSION) { + if (crypto_info->version != TLS_1_2_VERSION) { rc = -ENOTSUPP; - goto out; + goto err_crypto_info; } - /* get user crypto info */ - crypto_info = &ctx->crypto_send; - - /* Currently we don't support set crypto info more than one time */ - if (TLS_CRYPTO_INFO_READY(crypto_info)) - goto out; - - switch (tmp_crypto_info.cipher_type) { + switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: { if (optlen != sizeof(struct tls12_crypto_info_aes_gcm_128)) { rc = -EINVAL; goto out; } - rc = copy_from_user( - crypto_info, - optval, - sizeof(struct tls12_crypto_info_aes_gcm_128)); - + rc = copy_from_user(crypto_info + 1, optval + sizeof(*crypto_info), + optlen - sizeof(*crypto_info)); if (rc) { rc = -EFAULT; goto err_crypto_info; @@ -389,18 +401,16 @@ static int do_tls_setsockopt_tx(struct sock *sk, char __user *optval, goto out; } - ctx->sk_write_space = sk->sk_write_space; - sk->sk_write_space = tls_write_space; - - ctx->sk_proto_close = sk->sk_prot->close; - /* currently SW is default, we will have ethtool in future */ rc = tls_set_sw_offload(sk, ctx); - prot = &tls_sw_prot; + tx_conf = TLS_SW_TX; if (rc) goto err_crypto_info; - sk->sk_prot = prot; + ctx->tx_conf = tx_conf; + update_sk_prot(sk, ctx); + ctx->sk_write_space = sk->sk_write_space; + sk->sk_write_space = tls_write_space; goto out; err_crypto_info: @@ -453,7 +463,10 @@ static int tls_init(struct sock *sk) icsk->icsk_ulp_data = ctx; ctx->setsockopt = sk->sk_prot->setsockopt; ctx->getsockopt = sk->sk_prot->getsockopt; - sk->sk_prot = &tls_base_prot; + ctx->sk_proto_close = sk->sk_prot->close; + + ctx->tx_conf = TLS_BASE_TX; + update_sk_prot(sk, ctx); out: return rc; } @@ -464,16 +477,21 @@ static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .init = tls_init, }; +static void build_protos(struct proto *prot, struct proto *base) +{ + prot[TLS_BASE_TX] = *base; + prot[TLS_BASE_TX].setsockopt = tls_setsockopt; + prot[TLS_BASE_TX].getsockopt = tls_getsockopt; + prot[TLS_BASE_TX].close = tls_sk_proto_close; + + prot[TLS_SW_TX] = prot[TLS_BASE_TX]; + prot[TLS_SW_TX].sendmsg = tls_sw_sendmsg; + prot[TLS_SW_TX].sendpage = tls_sw_sendpage; +} + static int __init tls_register(void) { - tls_base_prot = tcp_prot; - tls_base_prot.setsockopt = tls_setsockopt; - tls_base_prot.getsockopt = tls_getsockopt; - - tls_sw_prot = tls_base_prot; - tls_sw_prot.sendmsg = tls_sw_sendmsg; - tls_sw_prot.sendpage = tls_sw_sendpage; - tls_sw_prot.close = tls_sk_proto_close; + build_protos(tls_prots, &tcp_prot); tcp_register_ulp(&tcp_tls_ulp_ops); diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fa596fa71ba7..73d19210dd49 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -39,22 +39,6 @@ #include <net/tls.h> -static inline void tls_make_aad(int recv, - char *buf, - size_t size, - char *record_sequence, - int record_sequence_size, - unsigned char record_type) -{ - memcpy(buf, record_sequence, record_sequence_size); - - buf[8] = record_type; - buf[9] = TLS_1_2_VERSION_MAJOR; - buf[10] = TLS_1_2_VERSION_MINOR; - buf[11] = size >> 8; - buf[12] = size & 0xFF; -} - static void trim_sg(struct sock *sk, struct scatterlist *sg, int *sg_num_elem, unsigned int *sg_size, int target_size) { @@ -219,7 +203,7 @@ static int tls_do_encryption(struct tls_context *tls_ctx, struct aead_request *aead_req; int rc; - aead_req = kmalloc(req_size, flags); + aead_req = kzalloc(req_size, flags); if (!aead_req) return -ENOMEM; @@ -249,7 +233,7 @@ static int tls_push_record(struct sock *sk, int flags, sg_mark_end(ctx->sg_plaintext_data + ctx->sg_plaintext_num_elem - 1); sg_mark_end(ctx->sg_encrypted_data + ctx->sg_encrypted_num_elem - 1); - tls_make_aad(0, ctx->aad_space, ctx->sg_plaintext_size, + tls_make_aad(ctx->aad_space, ctx->sg_plaintext_size, tls_ctx->rec_seq, tls_ctx->rec_seq_size, record_type); @@ -639,7 +623,7 @@ sendpage_end: return ret; } -void tls_sw_free_resources(struct sock *sk) +void tls_sw_free_tx_resources(struct sock *sk) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context *ctx = tls_sw_ctx(tls_ctx); @@ -650,6 +634,7 @@ void tls_sw_free_resources(struct sock *sk) tls_free_both_sg(sk); kfree(ctx); + kfree(tls_ctx); } int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) @@ -679,7 +664,6 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx) } ctx->priv_ctx = (struct tls_offload_context *)sw_ctx; - ctx->free_resources = tls_sw_free_resources; crypto_info = &ctx->crypto_send; switch (crypto_info->cipher_type) { diff --git a/net/unix/Makefile b/net/unix/Makefile index b663c607b1c6..ffd0a275c3a7 100644 --- a/net/unix/Makefile +++ b/net/unix/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux unix domain socket layer. # diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..a9ee634f3c42 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -814,6 +814,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, */ case SOCK_RAW: sock->type = SOCK_DGRAM; + /* fall through */ case SOCK_DGRAM: sock->ops = &unix_dgram_ops; break; @@ -1528,26 +1529,13 @@ static inline bool too_many_unix_fds(struct task_struct *p) return false; } -#define MAX_RECURSION_LEVEL 4 - static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) { int i; - unsigned char max_level = 0; if (too_many_unix_fds(current)) return -ETOOMANYREFS; - for (i = scm->fp->count - 1; i >= 0; i--) { - struct sock *sk = unix_get_socket(scm->fp->fp[i]); - - if (sk) - max_level = max(max_level, - unix_sk(sk)->recursion_level); - } - if (unlikely(max_level > MAX_RECURSION_LEVEL)) - return -ETOOMANYREFS; - /* * Need to duplicate file references for the sake of garbage * collection. Otherwise a socket in the fps might become a @@ -1559,7 +1547,7 @@ static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) for (i = scm->fp->count - 1; i >= 0; i--) unix_inflight(scm->fp->user, scm->fp->fp[i]); - return max_level; + return 0; } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1649,7 +1637,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, struct sk_buff *skb; long timeo; struct scm_cookie scm; - int max_level; int data_len = 0; int sk_locked; @@ -1701,7 +1688,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, err = unix_scm_to_skb(&scm, skb, true); if (err < 0) goto out_free; - max_level = err + 1; skb_put(skb, len - data_len); skb->data_len = data_len; @@ -1819,8 +1805,6 @@ restart_locked: __net_timestamp(skb); maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1855,7 +1839,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, int sent = 0; struct scm_cookie scm; bool fds_sent = false; - int max_level; int data_len; wait_for_unix_gc(); @@ -1905,7 +1888,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, kfree_skb(skb); goto out_err; } - max_level = err + 1; fds_sent = true; skb_put(skb, size - data_len); @@ -1925,8 +1907,6 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, maybe_add_creds(skb, sock, other); skb_queue_tail(&other->sk_receive_queue, skb); - if (max_level > unix_sk(other)->recursion_level) - unix_sk(other)->recursion_level = max_level; unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2304,10 +2284,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, */ mutex_lock(&u->iolock); - if (flags & MSG_PEEK) - skip = sk_peek_offset(sk, flags); - else - skip = 0; + skip = max(sk_peek_offset(sk, flags), 0); do { int chunk; @@ -2324,7 +2301,6 @@ redo: last_len = last ? last->len : 0; again: if (skb == NULL) { - unix_sk(sk)->recursion_level = 0; if (copied >= target) goto unlock; diff --git a/net/unix/diag.c b/net/unix/diag.c index 4d9679701a6d..384c84e83462 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -257,6 +257,8 @@ static int unix_diag_get_exact(struct sk_buff *in_skb, err = -ENOENT; if (sk == NULL) goto out_nosk; + if (!net_eq(sock_net(sk), net)) + goto out; err = sock_diag_check_cookie(sk, req->udiag_cookie); if (err) diff --git a/net/vmw_vsock/Kconfig b/net/vmw_vsock/Kconfig index 8831e7c42167..970f96489fe7 100644 --- a/net/vmw_vsock/Kconfig +++ b/net/vmw_vsock/Kconfig @@ -15,6 +15,16 @@ config VSOCKETS To compile this driver as a module, choose M here: the module will be called vsock. If unsure, say N. +config VSOCKETS_DIAG + tristate "Virtual Sockets monitoring interface" + depends on VSOCKETS + default y + help + Support for PF_VSOCK sockets monitoring interface used by the ss tool. + If unsure, say Y. + + Enable this module so userspace applications can query open sockets. + config VMWARE_VMCI_VSOCKETS tristate "VMware VMCI transport for Virtual Sockets" depends on VSOCKETS && VMWARE_VMCI @@ -46,3 +56,15 @@ config VIRTIO_VSOCKETS_COMMON This option is selected by any driver which needs to access the virtio_vsock. The module will be called vmw_vsock_virtio_transport_common. + +config HYPERV_VSOCKETS + tristate "Hyper-V transport for Virtual Sockets" + depends on VSOCKETS && HYPERV + help + This module implements a Hyper-V transport for Virtual Sockets. + + Enable this transport if your Virtual Machine host supports Virtual + Sockets over Hyper-V VMBus. + + To compile this driver as a module, choose M here: the module will be + called hv_sock. If unsure, say N. diff --git a/net/vmw_vsock/Makefile b/net/vmw_vsock/Makefile index 09fc2eb29dc8..7c6f9a0b67b0 100644 --- a/net/vmw_vsock/Makefile +++ b/net/vmw_vsock/Makefile @@ -1,13 +1,20 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_VSOCKETS) += vsock.o +obj-$(CONFIG_VSOCKETS_DIAG) += vsock_diag.o obj-$(CONFIG_VMWARE_VMCI_VSOCKETS) += vmw_vsock_vmci_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS) += vmw_vsock_virtio_transport.o obj-$(CONFIG_VIRTIO_VSOCKETS_COMMON) += vmw_vsock_virtio_transport_common.o +obj-$(CONFIG_HYPERV_VSOCKETS) += hv_sock.o vsock-y += af_vsock.o af_vsock_tap.o vsock_addr.o +vsock_diag-y += diag.o + vmw_vsock_vmci_transport-y += vmci_transport.o vmci_transport_notify.o \ vmci_transport_notify_qstate.o vmw_vsock_virtio_transport-y += virtio_transport.o vmw_vsock_virtio_transport_common-y += virtio_transport_common.o + +hv_sock-y += hyperv_transport.o diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index dfc8c51e4d74..5d28abf87fbf 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -36,7 +36,7 @@ * not support simultaneous connects (two "client" sockets connecting). * * - "Server" sockets are referred to as listener sockets throughout this - * implementation because they are in the VSOCK_SS_LISTEN state. When a + * implementation because they are in the TCP_LISTEN state. When a * connection request is received (the second kind of socket mentioned above), * we create a new socket and refer to it as a pending socket. These pending * sockets are placed on the pending connection list of the listener socket. @@ -82,6 +82,15 @@ * argument, we must ensure the reference count is increased to ensure the * socket isn't freed before the function is run; the deferred function will * then drop the reference. + * + * - sk->sk_state uses the TCP state constants because they are widely used by + * other address families and exposed to userspace tools like ss(8): + * + * TCP_CLOSE - unconnected + * TCP_SYN_SENT - connecting + * TCP_ESTABLISHED - connected + * TCP_CLOSING - disconnecting + * TCP_LISTEN - listening */ #include <linux/types.h> @@ -153,7 +162,6 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); * vsock_bind_table[VSOCK_HASH_SIZE] is for unbound sockets. The hash function * mods with VSOCK_HASH_SIZE to ensure this. */ -#define VSOCK_HASH_SIZE 251 #define MAX_PORT_RETRIES 24 #define VSOCK_HASH(addr) ((addr)->svm_port % VSOCK_HASH_SIZE) @@ -168,9 +176,12 @@ EXPORT_SYMBOL_GPL(vm_sockets_get_local_cid); #define vsock_connected_sockets_vsk(vsk) \ vsock_connected_sockets(&(vsk)->remote_addr, &(vsk)->local_addr) -static struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; -static struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; -static DEFINE_SPINLOCK(vsock_table_lock); +struct list_head vsock_bind_table[VSOCK_HASH_SIZE + 1]; +EXPORT_SYMBOL_GPL(vsock_bind_table); +struct list_head vsock_connected_table[VSOCK_HASH_SIZE]; +EXPORT_SYMBOL_GPL(vsock_connected_table); +DEFINE_SPINLOCK(vsock_table_lock); +EXPORT_SYMBOL_GPL(vsock_table_lock); /* Autobind this socket to the local address if necessary. */ static int vsock_auto_bind(struct vsock_sock *vsk) @@ -184,7 +195,7 @@ static int vsock_auto_bind(struct vsock_sock *vsk) return __vsock_bind(sk, &local_addr); } -static void vsock_init_tables(void) +static int __init vsock_init_tables(void) { int i; @@ -193,6 +204,7 @@ static void vsock_init_tables(void) for (i = 0; i < ARRAY_SIZE(vsock_connected_table); i++) INIT_LIST_HEAD(&vsock_connected_table[i]); + return 0; } static void __vsock_insert_bound(struct list_head *list, @@ -248,16 +260,6 @@ static struct sock *__vsock_find_connected_socket(struct sockaddr_vm *src, return NULL; } -static bool __vsock_in_bound_table(struct vsock_sock *vsk) -{ - return !list_empty(&vsk->bound_table); -} - -static bool __vsock_in_connected_table(struct vsock_sock *vsk) -{ - return !list_empty(&vsk->connected_table); -} - static void vsock_insert_unbound(struct vsock_sock *vsk) { spin_lock_bh(&vsock_table_lock); @@ -485,7 +487,7 @@ void vsock_pending_work(struct work_struct *work) if (vsock_in_connected_table(vsk)) vsock_remove_connected(vsk); - sk->sk_state = SS_FREE; + sk->sk_state = TCP_CLOSE; out: release_sock(sk); @@ -625,7 +627,6 @@ struct sock *__vsock_create(struct net *net, sk->sk_destruct = vsock_sk_destruct; sk->sk_backlog_rcv = vsock_queue_rcv_skb; - sk->sk_state = 0; sock_reset_flag(sk, SOCK_DONE); INIT_LIST_HEAD(&vsk->bound_table); @@ -899,7 +900,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, /* Listening sockets that have connections in their accept * queue can be read. */ - if (sk->sk_state == VSOCK_SS_LISTEN + if (sk->sk_state == TCP_LISTEN && !vsock_is_accept_queue_empty(sk)) mask |= POLLIN | POLLRDNORM; @@ -928,7 +929,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, } /* Connected sockets that can produce data can be written. */ - if (sk->sk_state == SS_CONNECTED) { + if (sk->sk_state == TCP_ESTABLISHED) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) { bool space_avail_now = false; int ret = transport->notify_poll_out( @@ -950,7 +951,7 @@ static unsigned int vsock_poll(struct file *file, struct socket *sock, * POLLOUT|POLLWRNORM when peer is closed and nothing to read, * but local send is not shutdown. */ - if (sk->sk_state == SS_UNCONNECTED) { + if (sk->sk_state == TCP_CLOSE) { if (!(sk->sk_shutdown & SEND_SHUTDOWN)) mask |= POLLOUT | POLLWRNORM; @@ -1120,9 +1121,9 @@ static void vsock_connect_timeout(struct work_struct *work) sk = sk_vsock(vsk); lock_sock(sk); - if (sk->sk_state == SS_CONNECTING && + if (sk->sk_state == TCP_SYN_SENT && (sk->sk_shutdown != SHUTDOWN_MASK)) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = ETIMEDOUT; sk->sk_error_report(sk); cancel = 1; @@ -1168,7 +1169,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, err = -EALREADY; break; default: - if ((sk->sk_state == VSOCK_SS_LISTEN) || + if ((sk->sk_state == TCP_LISTEN) || vsock_addr_cast(addr, addr_len, &remote_addr) != 0) { err = -EINVAL; goto out; @@ -1191,7 +1192,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (err) goto out; - sk->sk_state = SS_CONNECTING; + sk->sk_state = TCP_SYN_SENT; err = transport->connect(vsk); if (err < 0) @@ -1211,7 +1212,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, timeout = vsk->connect_timeout; prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - while (sk->sk_state != SS_CONNECTED && sk->sk_err == 0) { + while (sk->sk_state != TCP_ESTABLISHED && sk->sk_err == 0) { if (flags & O_NONBLOCK) { /* If we're not going to block, we schedule a timeout * function to generate a timeout on the connection @@ -1234,13 +1235,13 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (signal_pending(current)) { err = sock_intr_errno(timeout); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; } else if (timeout == 0) { err = -ETIMEDOUT; - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; vsock_transport_cancel_pkt(vsk); goto out_wait; @@ -1251,7 +1252,7 @@ static int vsock_stream_connect(struct socket *sock, struct sockaddr *addr, if (sk->sk_err) { err = -sk->sk_err; - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sock->state = SS_UNCONNECTED; } else { err = 0; @@ -1284,7 +1285,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags, goto out; } - if (listener->sk_state != VSOCK_SS_LISTEN) { + if (listener->sk_state != TCP_LISTEN) { err = -EINVAL; goto out; } @@ -1374,7 +1375,7 @@ static int vsock_listen(struct socket *sock, int backlog) } sk->sk_max_ack_backlog = backlog; - sk->sk_state = VSOCK_SS_LISTEN; + sk->sk_state = TCP_LISTEN; err = 0; @@ -1554,7 +1555,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, /* Callers should not provide a destination with stream sockets. */ if (msg->msg_namelen) { - err = sk->sk_state == SS_CONNECTED ? -EISCONN : -EOPNOTSUPP; + err = sk->sk_state == TCP_ESTABLISHED ? -EISCONN : -EOPNOTSUPP; goto out; } @@ -1565,7 +1566,7 @@ static int vsock_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out; } - if (sk->sk_state != SS_CONNECTED || + if (sk->sk_state != TCP_ESTABLISHED || !vsock_addr_bound(&vsk->local_addr)) { err = -ENOTCONN; goto out; @@ -1689,7 +1690,7 @@ vsock_stream_recvmsg(struct socket *sock, struct msghdr *msg, size_t len, lock_sock(sk); - if (sk->sk_state != SS_CONNECTED) { + if (sk->sk_state != TCP_ESTABLISHED) { /* Recvmsg is supposed to return 0 if a peer performs an * orderly shutdown. Differentiate between that case and when a * peer has not connected or a local shutdown occured with the @@ -1957,8 +1958,6 @@ int __vsock_core_init(const struct vsock_transport *t, struct module *owner) vsock_proto.owner = owner; transport = t; - vsock_init_tables(); - vsock_device.minor = MISC_DYNAMIC_MINOR; err = misc_register(&vsock_device); if (err) { @@ -2019,6 +2018,8 @@ const struct vsock_transport *vsock_core_get_transport(void) } EXPORT_SYMBOL_GPL(vsock_core_get_transport); +module_init(vsock_init_tables); + MODULE_AUTHOR("VMware, Inc."); MODULE_DESCRIPTION("VMware Virtual Socket Family"); MODULE_VERSION("1.0.2.0-k"); diff --git a/net/vmw_vsock/diag.c b/net/vmw_vsock/diag.c new file mode 100644 index 000000000000..31b567652250 --- /dev/null +++ b/net/vmw_vsock/diag.c @@ -0,0 +1,186 @@ +/* + * vsock sock_diag(7) module + * + * Copyright (C) 2017 Red Hat, Inc. + * Author: Stefan Hajnoczi <stefanha@redhat.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation version 2 and no later version. + * + * This program is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include <linux/module.h> +#include <linux/sock_diag.h> +#include <linux/vm_sockets_diag.h> +#include <net/af_vsock.h> + +static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, + u32 portid, u32 seq, u32 flags) +{ + struct vsock_sock *vsk = vsock_sk(sk); + struct vsock_diag_msg *rep; + struct nlmsghdr *nlh; + + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + flags); + if (!nlh) + return -EMSGSIZE; + + rep = nlmsg_data(nlh); + rep->vdiag_family = AF_VSOCK; + + /* Lock order dictates that sk_lock is acquired before + * vsock_table_lock, so we cannot lock here. Simply don't take + * sk_lock; sk is guaranteed to stay alive since vsock_table_lock is + * held. + */ + rep->vdiag_type = sk->sk_type; + rep->vdiag_state = sk->sk_state; + rep->vdiag_shutdown = sk->sk_shutdown; + rep->vdiag_src_cid = vsk->local_addr.svm_cid; + rep->vdiag_src_port = vsk->local_addr.svm_port; + rep->vdiag_dst_cid = vsk->remote_addr.svm_cid; + rep->vdiag_dst_port = vsk->remote_addr.svm_port; + rep->vdiag_ino = sock_i_ino(sk); + + sock_diag_save_cookie(sk, rep->vdiag_cookie); + + return 0; +} + +static int vsock_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + struct vsock_diag_req *req; + struct vsock_sock *vsk; + unsigned int bucket; + unsigned int last_i; + unsigned int table; + struct net *net; + unsigned int i; + + req = nlmsg_data(cb->nlh); + net = sock_net(skb->sk); + + /* State saved between calls: */ + table = cb->args[0]; + bucket = cb->args[1]; + i = last_i = cb->args[2]; + + /* TODO VMCI pending sockets? */ + + spin_lock_bh(&vsock_table_lock); + + /* Bind table (locally created sockets) */ + if (table == 0) { + while (bucket < ARRAY_SIZE(vsock_bind_table)) { + struct list_head *head = &vsock_bind_table[bucket]; + + i = 0; + list_for_each_entry(vsk, head, bound_table) { + struct sock *sk = sk_vsock(vsk); + + if (!net_eq(sock_net(sk), net)) + continue; + if (i < last_i) + goto next_bind; + if (!(req->vdiag_states & (1 << sk->sk_state))) + goto next_bind; + if (sk_diag_fill(sk, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) + goto done; +next_bind: + i++; + } + last_i = 0; + bucket++; + } + + table++; + bucket = 0; + } + + /* Connected table (accepted connections) */ + while (bucket < ARRAY_SIZE(vsock_connected_table)) { + struct list_head *head = &vsock_connected_table[bucket]; + + i = 0; + list_for_each_entry(vsk, head, connected_table) { + struct sock *sk = sk_vsock(vsk); + + /* Skip sockets we've already seen above */ + if (__vsock_in_bound_table(vsk)) + continue; + + if (!net_eq(sock_net(sk), net)) + continue; + if (i < last_i) + goto next_connected; + if (!(req->vdiag_states & (1 << sk->sk_state))) + goto next_connected; + if (sk_diag_fill(sk, skb, + NETLINK_CB(cb->skb).portid, + cb->nlh->nlmsg_seq, + NLM_F_MULTI) < 0) + goto done; +next_connected: + i++; + } + last_i = 0; + bucket++; + } + +done: + spin_unlock_bh(&vsock_table_lock); + + cb->args[0] = table; + cb->args[1] = bucket; + cb->args[2] = i; + + return skb->len; +} + +static int vsock_diag_handler_dump(struct sk_buff *skb, struct nlmsghdr *h) +{ + int hdrlen = sizeof(struct vsock_diag_req); + struct net *net = sock_net(skb->sk); + + if (nlmsg_len(h) < hdrlen) + return -EINVAL; + + if (h->nlmsg_flags & NLM_F_DUMP) { + struct netlink_dump_control c = { + .dump = vsock_diag_dump, + }; + return netlink_dump_start(net->diag_nlsk, skb, h, &c); + } + + return -EOPNOTSUPP; +} + +static const struct sock_diag_handler vsock_diag_handler = { + .family = AF_VSOCK, + .dump = vsock_diag_handler_dump, +}; + +static int __init vsock_diag_init(void) +{ + return sock_diag_register(&vsock_diag_handler); +} + +static void __exit vsock_diag_exit(void) +{ + sock_diag_unregister(&vsock_diag_handler); +} + +module_init(vsock_diag_init); +module_exit(vsock_diag_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NET_PF_PROTO_TYPE(PF_NETLINK, NETLINK_SOCK_DIAG, + 40 /* AF_VSOCK */); diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c new file mode 100644 index 000000000000..5583df708b8c --- /dev/null +++ b/net/vmw_vsock/hyperv_transport.c @@ -0,0 +1,917 @@ +/* + * Hyper-V transport for vsock + * + * Hyper-V Sockets supplies a byte-stream based communication mechanism + * between the host and the VM. This driver implements the necessary + * support in the VM by introducing the new vsock transport. + * + * Copyright (c) 2017, Microsoft Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ +#include <linux/module.h> +#include <linux/vmalloc.h> +#include <linux/hyperv.h> +#include <net/sock.h> +#include <net/af_vsock.h> + +/* The host side's design of the feature requires 6 exact 4KB pages for + * recv/send rings respectively -- this is suboptimal considering memory + * consumption, however unluckily we have to live with it, before the + * host comes up with a better design in the future. + */ +#define PAGE_SIZE_4K 4096 +#define RINGBUFFER_HVS_RCV_SIZE (PAGE_SIZE_4K * 6) +#define RINGBUFFER_HVS_SND_SIZE (PAGE_SIZE_4K * 6) + +/* The MTU is 16KB per the host side's design */ +#define HVS_MTU_SIZE (1024 * 16) + +struct vmpipe_proto_header { + u32 pkt_type; + u32 data_size; +}; + +/* For recv, we use the VMBus in-place packet iterator APIs to directly copy + * data from the ringbuffer into the userspace buffer. + */ +struct hvs_recv_buf { + /* The header before the payload data */ + struct vmpipe_proto_header hdr; + + /* The payload */ + u8 data[HVS_MTU_SIZE]; +}; + +/* We can send up to HVS_MTU_SIZE bytes of payload to the host, but let's use + * a small size, i.e. HVS_SEND_BUF_SIZE, to minimize the dynamically-allocated + * buffer, because tests show there is no significant performance difference. + * + * Note: the buffer can be eliminated in the future when we add new VMBus + * ringbuffer APIs that allow us to directly copy data from userspace buffer + * to VMBus ringbuffer. + */ +#define HVS_SEND_BUF_SIZE (PAGE_SIZE_4K - sizeof(struct vmpipe_proto_header)) + +struct hvs_send_buf { + /* The header before the payload data */ + struct vmpipe_proto_header hdr; + + /* The payload */ + u8 data[HVS_SEND_BUF_SIZE]; +}; + +#define HVS_HEADER_LEN (sizeof(struct vmpacket_descriptor) + \ + sizeof(struct vmpipe_proto_header)) + +/* See 'prev_indices' in hv_ringbuffer_read(), hv_ringbuffer_write(), and + * __hv_pkt_iter_next(). + */ +#define VMBUS_PKT_TRAILER_SIZE (sizeof(u64)) + +#define HVS_PKT_LEN(payload_len) (HVS_HEADER_LEN + \ + ALIGN((payload_len), 8) + \ + VMBUS_PKT_TRAILER_SIZE) + +union hvs_service_id { + uuid_le srv_id; + + struct { + unsigned int svm_port; + unsigned char b[sizeof(uuid_le) - sizeof(unsigned int)]; + }; +}; + +/* Per-socket state (accessed via vsk->trans) */ +struct hvsock { + struct vsock_sock *vsk; + + uuid_le vm_srv_id; + uuid_le host_srv_id; + + struct vmbus_channel *chan; + struct vmpacket_descriptor *recv_desc; + + /* The length of the payload not delivered to userland yet */ + u32 recv_data_len; + /* The offset of the payload */ + u32 recv_data_off; + + /* Have we sent the zero-length packet (FIN)? */ + bool fin_sent; +}; + +/* In the VM, we support Hyper-V Sockets with AF_VSOCK, and the endpoint is + * <cid, port> (see struct sockaddr_vm). Note: cid is not really used here: + * when we write apps to connect to the host, we can only use VMADDR_CID_ANY + * or VMADDR_CID_HOST (both are equivalent) as the remote cid, and when we + * write apps to bind() & listen() in the VM, we can only use VMADDR_CID_ANY + * as the local cid. + * + * On the host, Hyper-V Sockets are supported by Winsock AF_HYPERV: + * https://docs.microsoft.com/en-us/virtualization/hyper-v-on-windows/user- + * guide/make-integration-service, and the endpoint is <VmID, ServiceId> with + * the below sockaddr: + * + * struct SOCKADDR_HV + * { + * ADDRESS_FAMILY Family; + * USHORT Reserved; + * GUID VmId; + * GUID ServiceId; + * }; + * Note: VmID is not used by Linux VM and actually it isn't transmitted via + * VMBus, because here it's obvious the host and the VM can easily identify + * each other. Though the VmID is useful on the host, especially in the case + * of Windows container, Linux VM doesn't need it at all. + * + * To make use of the AF_VSOCK infrastructure in Linux VM, we have to limit + * the available GUID space of SOCKADDR_HV so that we can create a mapping + * between AF_VSOCK port and SOCKADDR_HV Service GUID. The rule of writing + * Hyper-V Sockets apps on the host and in Linux VM is: + * + **************************************************************************** + * The only valid Service GUIDs, from the perspectives of both the host and * + * Linux VM, that can be connected by the other end, must conform to this * + * format: <port>-facb-11e6-bd58-64006a7986d3, and the "port" must be in * + * this range [0, 0x7FFFFFFF]. * + **************************************************************************** + * + * When we write apps on the host to connect(), the GUID ServiceID is used. + * When we write apps in Linux VM to connect(), we only need to specify the + * port and the driver will form the GUID and use that to request the host. + * + * From the perspective of Linux VM: + * 1. the local ephemeral port (i.e. the local auto-bound port when we call + * connect() without explicit bind()) is generated by __vsock_bind_stream(), + * and the range is [1024, 0xFFFFFFFF). + * 2. the remote ephemeral port (i.e. the auto-generated remote port for + * a connect request initiated by the host's connect()) is generated by + * hvs_remote_addr_init() and the range is [0x80000000, 0xFFFFFFFF). + */ + +#define MAX_LISTEN_PORT ((u32)0x7FFFFFFF) +#define MAX_VM_LISTEN_PORT MAX_LISTEN_PORT +#define MAX_HOST_LISTEN_PORT MAX_LISTEN_PORT +#define MIN_HOST_EPHEMERAL_PORT (MAX_HOST_LISTEN_PORT + 1) + +/* 00000000-facb-11e6-bd58-64006a7986d3 */ +static const uuid_le srv_id_template = + UUID_LE(0x00000000, 0xfacb, 0x11e6, 0xbd, 0x58, + 0x64, 0x00, 0x6a, 0x79, 0x86, 0xd3); + +static bool is_valid_srv_id(const uuid_le *id) +{ + return !memcmp(&id->b[4], &srv_id_template.b[4], sizeof(uuid_le) - 4); +} + +static unsigned int get_port_by_srv_id(const uuid_le *svr_id) +{ + return *((unsigned int *)svr_id); +} + +static void hvs_addr_init(struct sockaddr_vm *addr, const uuid_le *svr_id) +{ + unsigned int port = get_port_by_srv_id(svr_id); + + vsock_addr_init(addr, VMADDR_CID_ANY, port); +} + +static void hvs_remote_addr_init(struct sockaddr_vm *remote, + struct sockaddr_vm *local) +{ + static u32 host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; + struct sock *sk; + + vsock_addr_init(remote, VMADDR_CID_ANY, VMADDR_PORT_ANY); + + while (1) { + /* Wrap around ? */ + if (host_ephemeral_port < MIN_HOST_EPHEMERAL_PORT || + host_ephemeral_port == VMADDR_PORT_ANY) + host_ephemeral_port = MIN_HOST_EPHEMERAL_PORT; + + remote->svm_port = host_ephemeral_port++; + + sk = vsock_find_connected_socket(remote, local); + if (!sk) { + /* Found an available ephemeral port */ + return; + } + + /* Release refcnt got in vsock_find_connected_socket */ + sock_put(sk); + } +} + +static void hvs_set_channel_pending_send_size(struct vmbus_channel *chan) +{ + set_channel_pending_send_size(chan, + HVS_PKT_LEN(HVS_SEND_BUF_SIZE)); + + /* See hvs_stream_has_space(): we must make sure the host has seen + * the new pending send size, before we can re-check the writable + * bytes. + */ + virt_mb(); +} + +static void hvs_clear_channel_pending_send_size(struct vmbus_channel *chan) +{ + set_channel_pending_send_size(chan, 0); + + /* Ditto */ + virt_mb(); +} + +static bool hvs_channel_readable(struct vmbus_channel *chan) +{ + u32 readable = hv_get_bytes_to_read(&chan->inbound); + + /* 0-size payload means FIN */ + return readable >= HVS_PKT_LEN(0); +} + +static int hvs_channel_readable_payload(struct vmbus_channel *chan) +{ + u32 readable = hv_get_bytes_to_read(&chan->inbound); + + if (readable > HVS_PKT_LEN(0)) { + /* At least we have 1 byte to read. We don't need to return + * the exact readable bytes: see vsock_stream_recvmsg() -> + * vsock_stream_has_data(). + */ + return 1; + } + + if (readable == HVS_PKT_LEN(0)) { + /* 0-size payload means FIN */ + return 0; + } + + /* No payload or FIN */ + return -1; +} + +static size_t hvs_channel_writable_bytes(struct vmbus_channel *chan) +{ + u32 writeable = hv_get_bytes_to_write(&chan->outbound); + size_t ret; + + /* The ringbuffer mustn't be 100% full, and we should reserve a + * zero-length-payload packet for the FIN: see hv_ringbuffer_write() + * and hvs_shutdown(). + */ + if (writeable <= HVS_PKT_LEN(1) + HVS_PKT_LEN(0)) + return 0; + + ret = writeable - HVS_PKT_LEN(1) - HVS_PKT_LEN(0); + + return round_down(ret, 8); +} + +static int hvs_send_data(struct vmbus_channel *chan, + struct hvs_send_buf *send_buf, size_t to_write) +{ + send_buf->hdr.pkt_type = 1; + send_buf->hdr.data_size = to_write; + return vmbus_sendpacket(chan, &send_buf->hdr, + sizeof(send_buf->hdr) + to_write, + 0, VM_PKT_DATA_INBAND, 0); +} + +static void hvs_channel_cb(void *ctx) +{ + struct sock *sk = (struct sock *)ctx; + struct vsock_sock *vsk = vsock_sk(sk); + struct hvsock *hvs = vsk->trans; + struct vmbus_channel *chan = hvs->chan; + + if (hvs_channel_readable(chan)) + sk->sk_data_ready(sk); + + /* See hvs_stream_has_space(): when we reach here, the writable bytes + * may be already less than HVS_PKT_LEN(HVS_SEND_BUF_SIZE). + */ + if (hv_get_bytes_to_write(&chan->outbound) > 0) + sk->sk_write_space(sk); +} + +static void hvs_close_connection(struct vmbus_channel *chan) +{ + struct sock *sk = get_per_channel_state(chan); + struct vsock_sock *vsk = vsock_sk(sk); + + lock_sock(sk); + + sk->sk_state = TCP_CLOSE; + sock_set_flag(sk, SOCK_DONE); + vsk->peer_shutdown |= SEND_SHUTDOWN | RCV_SHUTDOWN; + + sk->sk_state_change(sk); + + release_sock(sk); +} + +static void hvs_open_connection(struct vmbus_channel *chan) +{ + uuid_le *if_instance, *if_type; + unsigned char conn_from_host; + + struct sockaddr_vm addr; + struct sock *sk, *new = NULL; + struct vsock_sock *vnew; + struct hvsock *hvs, *hvs_new; + int ret; + + if_type = &chan->offermsg.offer.if_type; + if_instance = &chan->offermsg.offer.if_instance; + conn_from_host = chan->offermsg.offer.u.pipe.user_def[0]; + + /* The host or the VM should only listen on a port in + * [0, MAX_LISTEN_PORT] + */ + if (!is_valid_srv_id(if_type) || + get_port_by_srv_id(if_type) > MAX_LISTEN_PORT) + return; + + hvs_addr_init(&addr, conn_from_host ? if_type : if_instance); + sk = vsock_find_bound_socket(&addr); + if (!sk) + return; + + lock_sock(sk); + if ((conn_from_host && sk->sk_state != TCP_LISTEN) || + (!conn_from_host && sk->sk_state != TCP_SYN_SENT)) + goto out; + + if (conn_from_host) { + if (sk->sk_ack_backlog >= sk->sk_max_ack_backlog) + goto out; + + new = __vsock_create(sock_net(sk), NULL, sk, GFP_KERNEL, + sk->sk_type, 0); + if (!new) + goto out; + + new->sk_state = TCP_SYN_SENT; + vnew = vsock_sk(new); + hvs_new = vnew->trans; + hvs_new->chan = chan; + } else { + hvs = vsock_sk(sk)->trans; + hvs->chan = chan; + } + + set_channel_read_mode(chan, HV_CALL_DIRECT); + ret = vmbus_open(chan, RINGBUFFER_HVS_SND_SIZE, + RINGBUFFER_HVS_RCV_SIZE, NULL, 0, + hvs_channel_cb, conn_from_host ? new : sk); + if (ret != 0) { + if (conn_from_host) { + hvs_new->chan = NULL; + sock_put(new); + } else { + hvs->chan = NULL; + } + goto out; + } + + set_per_channel_state(chan, conn_from_host ? new : sk); + vmbus_set_chn_rescind_callback(chan, hvs_close_connection); + + if (conn_from_host) { + new->sk_state = TCP_ESTABLISHED; + sk->sk_ack_backlog++; + + hvs_addr_init(&vnew->local_addr, if_type); + hvs_remote_addr_init(&vnew->remote_addr, &vnew->local_addr); + + hvs_new->vm_srv_id = *if_type; + hvs_new->host_srv_id = *if_instance; + + vsock_insert_connected(vnew); + + vsock_enqueue_accept(sk, new); + } else { + sk->sk_state = TCP_ESTABLISHED; + sk->sk_socket->state = SS_CONNECTED; + + vsock_insert_connected(vsock_sk(sk)); + } + + sk->sk_state_change(sk); + +out: + /* Release refcnt obtained when we called vsock_find_bound_socket() */ + sock_put(sk); + + release_sock(sk); +} + +static u32 hvs_get_local_cid(void) +{ + return VMADDR_CID_ANY; +} + +static int hvs_sock_init(struct vsock_sock *vsk, struct vsock_sock *psk) +{ + struct hvsock *hvs; + + hvs = kzalloc(sizeof(*hvs), GFP_KERNEL); + if (!hvs) + return -ENOMEM; + + vsk->trans = hvs; + hvs->vsk = vsk; + + return 0; +} + +static int hvs_connect(struct vsock_sock *vsk) +{ + union hvs_service_id vm, host; + struct hvsock *h = vsk->trans; + + vm.srv_id = srv_id_template; + vm.svm_port = vsk->local_addr.svm_port; + h->vm_srv_id = vm.srv_id; + + host.srv_id = srv_id_template; + host.svm_port = vsk->remote_addr.svm_port; + h->host_srv_id = host.srv_id; + + return vmbus_send_tl_connect_request(&h->vm_srv_id, &h->host_srv_id); +} + +static int hvs_shutdown(struct vsock_sock *vsk, int mode) +{ + struct sock *sk = sk_vsock(vsk); + struct vmpipe_proto_header hdr; + struct hvs_send_buf *send_buf; + struct hvsock *hvs; + + if (!(mode & SEND_SHUTDOWN)) + return 0; + + lock_sock(sk); + + hvs = vsk->trans; + if (hvs->fin_sent) + goto out; + + send_buf = (struct hvs_send_buf *)&hdr; + + /* It can't fail: see hvs_channel_writable_bytes(). */ + (void)hvs_send_data(hvs->chan, send_buf, 0); + + hvs->fin_sent = true; +out: + release_sock(sk); + return 0; +} + +static void hvs_release(struct vsock_sock *vsk) +{ + struct sock *sk = sk_vsock(vsk); + struct hvsock *hvs = vsk->trans; + struct vmbus_channel *chan; + + lock_sock(sk); + + sk->sk_state = SS_DISCONNECTING; + vsock_remove_sock(vsk); + + release_sock(sk); + + chan = hvs->chan; + if (chan) + hvs_shutdown(vsk, RCV_SHUTDOWN | SEND_SHUTDOWN); + +} + +static void hvs_destruct(struct vsock_sock *vsk) +{ + struct hvsock *hvs = vsk->trans; + struct vmbus_channel *chan = hvs->chan; + + if (chan) + vmbus_hvsock_device_unregister(chan); + + kfree(hvs); +} + +static int hvs_dgram_bind(struct vsock_sock *vsk, struct sockaddr_vm *addr) +{ + return -EOPNOTSUPP; +} + +static int hvs_dgram_dequeue(struct vsock_sock *vsk, struct msghdr *msg, + size_t len, int flags) +{ + return -EOPNOTSUPP; +} + +static int hvs_dgram_enqueue(struct vsock_sock *vsk, + struct sockaddr_vm *remote, struct msghdr *msg, + size_t dgram_len) +{ + return -EOPNOTSUPP; +} + +static bool hvs_dgram_allow(u32 cid, u32 port) +{ + return false; +} + +static int hvs_update_recv_data(struct hvsock *hvs) +{ + struct hvs_recv_buf *recv_buf; + u32 payload_len; + + recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); + payload_len = recv_buf->hdr.data_size; + + if (payload_len > HVS_MTU_SIZE) + return -EIO; + + if (payload_len == 0) + hvs->vsk->peer_shutdown |= SEND_SHUTDOWN; + + hvs->recv_data_len = payload_len; + hvs->recv_data_off = 0; + + return 0; +} + +static ssize_t hvs_stream_dequeue(struct vsock_sock *vsk, struct msghdr *msg, + size_t len, int flags) +{ + struct hvsock *hvs = vsk->trans; + bool need_refill = !hvs->recv_desc; + struct hvs_recv_buf *recv_buf; + u32 to_read; + int ret; + + if (flags & MSG_PEEK) + return -EOPNOTSUPP; + + if (need_refill) { + hvs->recv_desc = hv_pkt_iter_first(hvs->chan); + ret = hvs_update_recv_data(hvs); + if (ret) + return ret; + } + + recv_buf = (struct hvs_recv_buf *)(hvs->recv_desc + 1); + to_read = min_t(u32, len, hvs->recv_data_len); + ret = memcpy_to_msg(msg, recv_buf->data + hvs->recv_data_off, to_read); + if (ret != 0) + return ret; + + hvs->recv_data_len -= to_read; + if (hvs->recv_data_len == 0) { + hvs->recv_desc = hv_pkt_iter_next(hvs->chan, hvs->recv_desc); + if (hvs->recv_desc) { + ret = hvs_update_recv_data(hvs); + if (ret) + return ret; + } + } else { + hvs->recv_data_off += to_read; + } + + return to_read; +} + +static ssize_t hvs_stream_enqueue(struct vsock_sock *vsk, struct msghdr *msg, + size_t len) +{ + struct hvsock *hvs = vsk->trans; + struct vmbus_channel *chan = hvs->chan; + struct hvs_send_buf *send_buf; + ssize_t to_write, max_writable, ret; + + BUILD_BUG_ON(sizeof(*send_buf) != PAGE_SIZE_4K); + + send_buf = kmalloc(sizeof(*send_buf), GFP_KERNEL); + if (!send_buf) + return -ENOMEM; + + max_writable = hvs_channel_writable_bytes(chan); + to_write = min_t(ssize_t, len, max_writable); + to_write = min_t(ssize_t, to_write, HVS_SEND_BUF_SIZE); + + ret = memcpy_from_msg(send_buf->data, msg, to_write); + if (ret < 0) + goto out; + + ret = hvs_send_data(hvs->chan, send_buf, to_write); + if (ret < 0) + goto out; + + ret = to_write; +out: + kfree(send_buf); + return ret; +} + +static s64 hvs_stream_has_data(struct vsock_sock *vsk) +{ + struct hvsock *hvs = vsk->trans; + s64 ret; + + if (hvs->recv_data_len > 0) + return 1; + + switch (hvs_channel_readable_payload(hvs->chan)) { + case 1: + ret = 1; + break; + case 0: + vsk->peer_shutdown |= SEND_SHUTDOWN; + ret = 0; + break; + default: /* -1 */ + ret = 0; + break; + } + + return ret; +} + +static s64 hvs_stream_has_space(struct vsock_sock *vsk) +{ + struct hvsock *hvs = vsk->trans; + struct vmbus_channel *chan = hvs->chan; + s64 ret; + + ret = hvs_channel_writable_bytes(chan); + if (ret > 0) { + hvs_clear_channel_pending_send_size(chan); + } else { + /* See hvs_channel_cb() */ + hvs_set_channel_pending_send_size(chan); + + /* Re-check the writable bytes to avoid race */ + ret = hvs_channel_writable_bytes(chan); + if (ret > 0) + hvs_clear_channel_pending_send_size(chan); + } + + return ret; +} + +static u64 hvs_stream_rcvhiwat(struct vsock_sock *vsk) +{ + return HVS_MTU_SIZE + 1; +} + +static bool hvs_stream_is_active(struct vsock_sock *vsk) +{ + struct hvsock *hvs = vsk->trans; + + return hvs->chan != NULL; +} + +static bool hvs_stream_allow(u32 cid, u32 port) +{ + /* The host's port range [MIN_HOST_EPHEMERAL_PORT, 0xFFFFFFFF) is + * reserved as ephemeral ports, which are used as the host's ports + * when the host initiates connections. + * + * Perform this check in the guest so an immediate error is produced + * instead of a timeout. + */ + if (port > MAX_HOST_LISTEN_PORT) + return false; + + if (cid == VMADDR_CID_HOST) + return true; + + return false; +} + +static +int hvs_notify_poll_in(struct vsock_sock *vsk, size_t target, bool *readable) +{ + struct hvsock *hvs = vsk->trans; + + *readable = hvs_channel_readable(hvs->chan); + return 0; +} + +static +int hvs_notify_poll_out(struct vsock_sock *vsk, size_t target, bool *writable) +{ + *writable = hvs_stream_has_space(vsk) > 0; + + return 0; +} + +static +int hvs_notify_recv_init(struct vsock_sock *vsk, size_t target, + struct vsock_transport_recv_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_recv_pre_block(struct vsock_sock *vsk, size_t target, + struct vsock_transport_recv_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_recv_pre_dequeue(struct vsock_sock *vsk, size_t target, + struct vsock_transport_recv_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_recv_post_dequeue(struct vsock_sock *vsk, size_t target, + ssize_t copied, bool data_read, + struct vsock_transport_recv_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_send_init(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_send_pre_block(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_send_pre_enqueue(struct vsock_sock *vsk, + struct vsock_transport_send_notify_data *d) +{ + return 0; +} + +static +int hvs_notify_send_post_enqueue(struct vsock_sock *vsk, ssize_t written, + struct vsock_transport_send_notify_data *d) +{ + return 0; +} + +static void hvs_set_buffer_size(struct vsock_sock *vsk, u64 val) +{ + /* Ignored. */ +} + +static void hvs_set_min_buffer_size(struct vsock_sock *vsk, u64 val) +{ + /* Ignored. */ +} + +static void hvs_set_max_buffer_size(struct vsock_sock *vsk, u64 val) +{ + /* Ignored. */ +} + +static u64 hvs_get_buffer_size(struct vsock_sock *vsk) +{ + return -ENOPROTOOPT; +} + +static u64 hvs_get_min_buffer_size(struct vsock_sock *vsk) +{ + return -ENOPROTOOPT; +} + +static u64 hvs_get_max_buffer_size(struct vsock_sock *vsk) +{ + return -ENOPROTOOPT; +} + +static struct vsock_transport hvs_transport = { + .get_local_cid = hvs_get_local_cid, + + .init = hvs_sock_init, + .destruct = hvs_destruct, + .release = hvs_release, + .connect = hvs_connect, + .shutdown = hvs_shutdown, + + .dgram_bind = hvs_dgram_bind, + .dgram_dequeue = hvs_dgram_dequeue, + .dgram_enqueue = hvs_dgram_enqueue, + .dgram_allow = hvs_dgram_allow, + + .stream_dequeue = hvs_stream_dequeue, + .stream_enqueue = hvs_stream_enqueue, + .stream_has_data = hvs_stream_has_data, + .stream_has_space = hvs_stream_has_space, + .stream_rcvhiwat = hvs_stream_rcvhiwat, + .stream_is_active = hvs_stream_is_active, + .stream_allow = hvs_stream_allow, + + .notify_poll_in = hvs_notify_poll_in, + .notify_poll_out = hvs_notify_poll_out, + .notify_recv_init = hvs_notify_recv_init, + .notify_recv_pre_block = hvs_notify_recv_pre_block, + .notify_recv_pre_dequeue = hvs_notify_recv_pre_dequeue, + .notify_recv_post_dequeue = hvs_notify_recv_post_dequeue, + .notify_send_init = hvs_notify_send_init, + .notify_send_pre_block = hvs_notify_send_pre_block, + .notify_send_pre_enqueue = hvs_notify_send_pre_enqueue, + .notify_send_post_enqueue = hvs_notify_send_post_enqueue, + + .set_buffer_size = hvs_set_buffer_size, + .set_min_buffer_size = hvs_set_min_buffer_size, + .set_max_buffer_size = hvs_set_max_buffer_size, + .get_buffer_size = hvs_get_buffer_size, + .get_min_buffer_size = hvs_get_min_buffer_size, + .get_max_buffer_size = hvs_get_max_buffer_size, +}; + +static int hvs_probe(struct hv_device *hdev, + const struct hv_vmbus_device_id *dev_id) +{ + struct vmbus_channel *chan = hdev->channel; + + hvs_open_connection(chan); + + /* Always return success to suppress the unnecessary error message + * in vmbus_probe(): on error the host will rescind the device in + * 30 seconds and we can do cleanup at that time in + * vmbus_onoffer_rescind(). + */ + return 0; +} + +static int hvs_remove(struct hv_device *hdev) +{ + struct vmbus_channel *chan = hdev->channel; + + vmbus_close(chan); + + return 0; +} + +/* This isn't really used. See vmbus_match() and vmbus_probe() */ +static const struct hv_vmbus_device_id id_table[] = { + {}, +}; + +static struct hv_driver hvs_drv = { + .name = "hv_sock", + .hvsock = true, + .id_table = id_table, + .probe = hvs_probe, + .remove = hvs_remove, +}; + +static int __init hvs_init(void) +{ + int ret; + + if (vmbus_proto_version < VERSION_WIN10) + return -ENODEV; + + ret = vmbus_driver_register(&hvs_drv); + if (ret != 0) + return ret; + + ret = vsock_core_init(&hvs_transport); + if (ret) { + vmbus_driver_unregister(&hvs_drv); + return ret; + } + + return 0; +} + +static void __exit hvs_exit(void) +{ + vsock_core_exit(); + vmbus_driver_unregister(&hvs_drv); +} + +module_init(hvs_init); +module_exit(hvs_exit); + +MODULE_DESCRIPTION("Hyper-V Sockets"); +MODULE_VERSION("1.0.0"); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_NETPROTO(PF_VSOCK); diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c index 403d86e80162..8e03bd3f3668 100644 --- a/net/vmw_vsock/virtio_transport.c +++ b/net/vmw_vsock/virtio_transport.c @@ -414,7 +414,7 @@ static void virtio_vsock_event_fill(struct virtio_vsock *vsock) static void virtio_vsock_reset_sock(struct sock *sk) { lock_sock(sk); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); release_sock(sk); diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index edba7ab97563..3ae3a33da70b 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -708,7 +708,7 @@ static void virtio_transport_do_close(struct vsock_sock *vsk, sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); if (vsk->close_work_scheduled && @@ -748,8 +748,8 @@ static bool virtio_transport_close(struct vsock_sock *vsk) { struct sock *sk = &vsk->sk; - if (!(sk->sk_state == SS_CONNECTED || - sk->sk_state == SS_DISCONNECTING)) + if (!(sk->sk_state == TCP_ESTABLISHED || + sk->sk_state == TCP_CLOSING)) return true; /* Already received SHUTDOWN from peer, reply with RST */ @@ -801,7 +801,7 @@ virtio_transport_recv_connecting(struct sock *sk, switch (le16_to_cpu(pkt->hdr.op)) { case VIRTIO_VSOCK_OP_RESPONSE: - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsk); sk->sk_state_change(sk); @@ -821,7 +821,7 @@ virtio_transport_recv_connecting(struct sock *sk, destroy: virtio_transport_reset(vsk, pkt); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; sk->sk_error_report(sk); return err; @@ -857,7 +857,7 @@ virtio_transport_recv_connected(struct sock *sk, vsk->peer_shutdown |= SEND_SHUTDOWN; if (vsk->peer_shutdown == SHUTDOWN_MASK && vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; if (le32_to_cpu(pkt->hdr.flags)) sk->sk_state_change(sk); break; @@ -928,7 +928,7 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt) lock_sock_nested(child, SINGLE_DEPTH_NESTING); - child->sk_state = SS_CONNECTED; + child->sk_state = TCP_ESTABLISHED; vchild = vsock_sk(child); vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid), @@ -1016,18 +1016,18 @@ void virtio_transport_recv_pkt(struct virtio_vsock_pkt *pkt) sk->sk_write_space(sk); switch (sk->sk_state) { - case VSOCK_SS_LISTEN: + case TCP_LISTEN: virtio_transport_recv_listen(sk, pkt); virtio_transport_free_pkt(pkt); break; - case SS_CONNECTING: + case TCP_SYN_SENT: virtio_transport_recv_connecting(sk, pkt); virtio_transport_free_pkt(pkt); break; - case SS_CONNECTED: + case TCP_ESTABLISHED: virtio_transport_recv_connected(sk, pkt); break; - case SS_DISCONNECTING: + case TCP_CLOSING: virtio_transport_recv_disconnecting(sk, pkt); virtio_transport_free_pkt(pkt); break; diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index 10ae7823a19d..391775e3575c 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -21,7 +21,6 @@ #include <linux/kernel.h> #include <linux/kmod.h> #include <linux/list.h> -#include <linux/miscdevice.h> #include <linux/module.h> #include <linux/mutex.h> #include <linux/net.h> @@ -743,7 +742,7 @@ static int vmci_transport_recv_stream_cb(void *data, struct vmci_datagram *dg) /* The local context ID may be out of date, update it. */ vsk->local_addr.svm_cid = dst.svm_cid; - if (sk->sk_state == SS_CONNECTED) + if (sk->sk_state == TCP_ESTABLISHED) vmci_trans(vsk)->notify_ops->handle_notify_pkt( sk, pkt, true, &dst, &src, &bh_process_pkt); @@ -801,7 +800,9 @@ static void vmci_transport_handle_detach(struct sock *sk) * left in our consume queue. */ if (vsock_stream_has_data(vsk) <= 0) { - if (sk->sk_state == SS_CONNECTING) { + sk->sk_state = TCP_CLOSE; + + if (sk->sk_state == TCP_SYN_SENT) { /* The peer may detach from a queue pair while * we are still in the connecting state, i.e., * if the peer VM is killed after attaching to @@ -810,12 +811,10 @@ static void vmci_transport_handle_detach(struct sock *sk) * event like a reset. */ - sk->sk_state = SS_UNCONNECTED; sk->sk_err = ECONNRESET; sk->sk_error_report(sk); return; } - sk->sk_state = SS_UNCONNECTED; } sk->sk_state_change(sk); } @@ -883,17 +882,17 @@ static void vmci_transport_recv_pkt_work(struct work_struct *work) vsock_sk(sk)->local_addr.svm_cid = pkt->dg.dst.context; switch (sk->sk_state) { - case VSOCK_SS_LISTEN: + case TCP_LISTEN: vmci_transport_recv_listen(sk, pkt); break; - case SS_CONNECTING: + case TCP_SYN_SENT: /* Processing of pending connections for servers goes through * the listening socket, so see vmci_transport_recv_listen() * for that path. */ vmci_transport_recv_connecting_client(sk, pkt); break; - case SS_CONNECTED: + case TCP_ESTABLISHED: vmci_transport_recv_connected(sk, pkt); break; default: @@ -942,7 +941,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vsock_sk(pending)->local_addr.svm_cid = pkt->dg.dst.context; switch (pending->sk_state) { - case SS_CONNECTING: + case TCP_SYN_SENT: err = vmci_transport_recv_connecting_server(sk, pending, pkt); @@ -1072,7 +1071,7 @@ static int vmci_transport_recv_listen(struct sock *sk, vsock_add_pending(sk, pending); sk->sk_ack_backlog++; - pending->sk_state = SS_CONNECTING; + pending->sk_state = TCP_SYN_SENT; vmci_trans(vpending)->produce_size = vmci_trans(vpending)->consume_size = qp_size; vmci_trans(vpending)->queue_pair_size = qp_size; @@ -1197,11 +1196,11 @@ vmci_transport_recv_connecting_server(struct sock *listener, * the socket will be valid until it is removed from the queue. * * If we fail sending the attach below, we remove the socket from the - * connected list and move the socket to SS_UNCONNECTED before + * connected list and move the socket to TCP_CLOSE before * releasing the lock, so a pending slow path processing of an incoming * packet will not see the socket in the connected state in that case. */ - pending->sk_state = SS_CONNECTED; + pending->sk_state = TCP_ESTABLISHED; vsock_insert_connected(vpending); @@ -1232,7 +1231,7 @@ vmci_transport_recv_connecting_server(struct sock *listener, destroy: pending->sk_err = skerr; - pending->sk_state = SS_UNCONNECTED; + pending->sk_state = TCP_CLOSE; /* As long as we drop our reference, all necessary cleanup will handle * when the cleanup function drops its reference and our destruct * implementation is called. Note that since the listen handler will @@ -1270,7 +1269,7 @@ vmci_transport_recv_connecting_client(struct sock *sk, * accounting (it can already be found since it's in the bound * table). */ - sk->sk_state = SS_CONNECTED; + sk->sk_state = TCP_ESTABLISHED; sk->sk_socket->state = SS_CONNECTED; vsock_insert_connected(vsk); sk->sk_state_change(sk); @@ -1338,7 +1337,7 @@ vmci_transport_recv_connecting_client(struct sock *sk, destroy: vmci_transport_send_reset(sk, pkt); - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; sk->sk_err = skerr; sk->sk_error_report(sk); return err; @@ -1526,7 +1525,7 @@ static int vmci_transport_recv_connected(struct sock *sk, sock_set_flag(sk, SOCK_DONE); vsk->peer_shutdown = SHUTDOWN_MASK; if (vsock_stream_has_data(vsk) <= 0) - sk->sk_state = SS_DISCONNECTING; + sk->sk_state = TCP_CLOSING; sk->sk_state_change(sk); break; @@ -1790,7 +1789,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk) err = vmci_transport_send_conn_request( sk, vmci_trans(vsk)->queue_pair_size); if (err < 0) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; return err; } } else { @@ -1800,7 +1799,7 @@ static int vmci_transport_connect(struct vsock_sock *vsk) sk, vmci_trans(vsk)->queue_pair_size, supported_proto_versions); if (err < 0) { - sk->sk_state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; return err; } diff --git a/net/vmw_vsock/vmci_transport_notify.c b/net/vmw_vsock/vmci_transport_notify.c index 1406db4d97d1..41fb427f150a 100644 --- a/net/vmw_vsock/vmci_transport_notify.c +++ b/net/vmw_vsock/vmci_transport_notify.c @@ -355,7 +355,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, * queue. Ask for notifications when there is something to * read. */ - if (sk->sk_state == SS_CONNECTED) { + if (sk->sk_state == TCP_ESTABLISHED) { if (!send_waiting_read(sk, 1)) return -1; diff --git a/net/vmw_vsock/vmci_transport_notify_qstate.c b/net/vmw_vsock/vmci_transport_notify_qstate.c index f3a0afc46208..0cc84f2bb05e 100644 --- a/net/vmw_vsock/vmci_transport_notify_qstate.c +++ b/net/vmw_vsock/vmci_transport_notify_qstate.c @@ -176,7 +176,7 @@ vmci_transport_notify_pkt_poll_in(struct sock *sk, * queue. Ask for notifications when there is something to * read. */ - if (sk->sk_state == SS_CONNECTED) + if (sk->sk_state == TCP_ESTABLISHED) vsock_block_update_write_window(sk); *data_ready_now = false; } diff --git a/net/wimax/Makefile b/net/wimax/Makefile index 8f1510d0cc2b..eb2db0d3b880 100644 --- a/net/wimax/Makefile +++ b/net/wimax/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_WIMAX) += wimax.o diff --git a/net/wireless/.gitignore b/net/wireless/.gitignore index c33451b896d9..61cbc304a3d3 100644 --- a/net/wireless/.gitignore +++ b/net/wireless/.gitignore @@ -1 +1,2 @@ -regdb.c +shipped-certs.c +extra-certs.c diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig index 6c606120abfe..da91bb547db3 100644 --- a/net/wireless/Kconfig +++ b/net/wireless/Kconfig @@ -19,6 +19,7 @@ config WEXT_PRIV config CFG80211 tristate "cfg80211 - wireless configuration API" depends on RFKILL || !RFKILL + select FW_LOADER ---help--- cfg80211 is the Linux wireless LAN (802.11) configuration API. Enable this if you have a wireless device. @@ -82,6 +83,36 @@ config CFG80211_CERTIFICATION_ONUS you are a wireless researcher and are working in a controlled and approved environment by your local regulatory agency. +config CFG80211_REQUIRE_SIGNED_REGDB + bool "require regdb signature" if CFG80211_CERTIFICATION_ONUS + default y + select SYSTEM_DATA_VERIFICATION + help + Require that in addition to the "regulatory.db" file a + "regulatory.db.p7s" can be loaded with a valid PKCS#7 + signature for the regulatory.db file made by one of the + keys in the certs/ directory. + +config CFG80211_USE_KERNEL_REGDB_KEYS + bool "allow regdb keys shipped with the kernel" if CFG80211_CERTIFICATION_ONUS + default y + depends on CFG80211_REQUIRE_SIGNED_REGDB + help + Allow the regulatory database to be signed by one of the keys for + which certificates are part of the kernel sources + (in net/wireless/certs/). + + This is currently only Seth Forshee's key, who is the regulatory + database maintainer. + +config CFG80211_EXTRA_REGDB_KEYDIR + string "additional regdb key directory" if CFG80211_CERTIFICATION_ONUS + depends on CFG80211_REQUIRE_SIGNED_REGDB + help + If selected, point to a directory with DER-encoded X.509 + certificates like in the kernel sources (net/wireless/certs/) + that shall be accepted for a signed regulatory database. + config CFG80211_REG_CELLULAR_HINTS bool "cfg80211 regulatory support for cellular base station hints" depends on CFG80211_CERTIFICATION_ONUS @@ -139,35 +170,14 @@ config CFG80211_DEBUGFS If unsure, say N. -config CFG80211_INTERNAL_REGDB - bool "use statically compiled regulatory rules database" if EXPERT - default n - depends on CFG80211 - ---help--- - This option generates an internal data structure representing - the wireless regulatory rules described in net/wireless/db.txt - and includes code to query that database. This is an alternative - to using CRDA for defining regulatory rules for the kernel. - - Using this option requires some parsing of the db.txt at build time, - the parser will be upkept with the latest wireless-regdb updates but - older wireless-regdb formats will be ignored. The parser may later - be replaced to avoid issues with conflicts on versions of - wireless-regdb. - - For details see: - - http://wireless.kernel.org/en/developers/Regulatory - - Most distributions have a CRDA package. So if unsure, say N. - config CFG80211_CRDA_SUPPORT - bool "support CRDA" if CFG80211_INTERNAL_REGDB + bool "support CRDA" if EXPERT default y depends on CFG80211 help You should enable this option unless you know for sure you have no - need for it, for example when using internal regdb (above.) + need for it, for example when using internal regdb (above) or the + database loaded as a firmware file. If unsure, say Y. diff --git a/net/wireless/Makefile b/net/wireless/Makefile index d06e5015751a..278d979c211a 100644 --- a/net/wireless/Makefile +++ b/net/wireless/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 obj-$(CONFIG_CFG80211) += cfg80211.o obj-$(CONFIG_LIB80211) += lib80211.o obj-$(CONFIG_LIB80211_CRYPT_WEP) += lib80211_crypt_wep.o @@ -14,11 +15,27 @@ cfg80211-y += mlme.o ibss.o sme.o chan.o ethtool.o mesh.o ap.o trace.o ocb.o cfg80211-$(CONFIG_OF) += of.o cfg80211-$(CONFIG_CFG80211_DEBUGFS) += debugfs.o cfg80211-$(CONFIG_CFG80211_WEXT) += wext-compat.o wext-sme.o -cfg80211-$(CONFIG_CFG80211_INTERNAL_REGDB) += regdb.o CFLAGS_trace.o := -I$(src) -$(obj)/regdb.c: $(src)/db.txt $(src)/genregdb.awk - @$(AWK) -f $(srctree)/$(src)/genregdb.awk < $< > $@ +cfg80211-$(CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS) += shipped-certs.o +ifneq ($(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR),) +cfg80211-y += extra-certs.o +endif -clean-files := regdb.c +$(obj)/shipped-certs.c: $(wildcard $(srctree)/$(src)/certs/*.x509) + @$(kecho) " GEN $@" + @echo '#include "reg.h"' > $@ + @echo 'const u8 shipped_regdb_certs[] = {' >> $@ + @for f in $^ ; do hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ ; done + @echo '};' >> $@ + @echo 'unsigned int shipped_regdb_certs_len = sizeof(shipped_regdb_certs);' >> $@ + +$(obj)/extra-certs.c: $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%) \ + $(wildcard $(CONFIG_CFG80211_EXTRA_REGDB_KEYDIR:"%"=%)/*.x509) + @$(kecho) " GEN $@" + @echo '#include "reg.h"' > $@ + @echo 'const u8 extra_regdb_certs[] = {' >> $@ + @for f in $^ ; do test -f $$f && hexdump -v -e '1/1 "0x%.2x," "\n"' < $$f >> $@ || true ; done + @echo '};' >> $@ + @echo 'unsigned int extra_regdb_certs_len = sizeof(extra_regdb_certs);' >> $@ diff --git a/net/wireless/ap.c b/net/wireless/ap.c index 25666d3009be..63682176c96c 100644 --- a/net/wireless/ap.c +++ b/net/wireless/ap.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> diff --git a/net/wireless/certs/sforshee.x509 b/net/wireless/certs/sforshee.x509 Binary files differnew file mode 100644 index 000000000000..c6f8f9d6b988 --- /dev/null +++ b/net/wireless/certs/sforshee.x509 diff --git a/net/wireless/chan.c b/net/wireless/chan.c index b8aa5a7d5c77..a48859982a32 100644 --- a/net/wireless/chan.c +++ b/net/wireless/chan.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * This file contains helper code to handle channel * settings and keeping track of what is possible at @@ -464,7 +465,7 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, struct ieee80211_channel *chan) { int width; - u32 cf_offset, freq; + u32 freq; if (chandef->chan->center_freq == chan->center_freq) return true; @@ -473,8 +474,6 @@ bool cfg80211_is_sub_chan(struct cfg80211_chan_def *chandef, if (width <= 20) return false; - cf_offset = width / 2 - 10; - for (freq = chandef->center_freq1 - width / 2 + 10; freq <= chandef->center_freq1 + width / 2 - 10; freq += 20) { if (chan->center_freq == freq) diff --git a/net/wireless/core.c b/net/wireless/core.c index 7b33e8c366bc..fdde0d98fde1 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -1384,7 +1384,7 @@ out_fail_sysfs: out_fail_pernet: return err; } -subsys_initcall(cfg80211_init); +fs_initcall(cfg80211_init); static void __exit cfg80211_exit(void) { diff --git a/net/wireless/core.h b/net/wireless/core.h index 6e809325af3b..d2f7e8b8a097 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Wireless configuration interface internals. * @@ -216,6 +217,7 @@ enum cfg80211_event_type { EVENT_DISCONNECTED, EVENT_IBSS_JOINED, EVENT_STOPPED, + EVENT_PORT_AUTHORIZED, }; struct cfg80211_event { @@ -235,6 +237,9 @@ struct cfg80211_event { u8 bssid[ETH_ALEN]; struct ieee80211_channel *channel; } ij; + struct { + u8 bssid[ETH_ALEN]; + } pa; }; }; @@ -385,6 +390,7 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, bool wextev); void __cfg80211_roamed(struct wireless_dev *wdev, struct cfg80211_roam_info *info); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid); int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev, struct wireless_dev *wdev); void cfg80211_autodisconnect_wk(struct work_struct *work); diff --git a/net/wireless/db.txt b/net/wireless/db.txt deleted file mode 100644 index a2fc3a09ccdc..000000000000 --- a/net/wireless/db.txt +++ /dev/null @@ -1,17 +0,0 @@ -# -# This file is a placeholder to prevent accidental build breakage if someone -# enables CONFIG_CFG80211_INTERNAL_REGDB. Almost no one actually needs to -# enable that build option. -# -# You should be using CRDA instead. It is even better if you use the CRDA -# package provided by your distribution, since they will probably keep it -# up-to-date on your behalf. -# -# If you _really_ intend to use CONFIG_CFG80211_INTERNAL_REGDB then you will -# need to replace this file with one containing appropriately formatted -# regulatory rules that cover the regulatory domains you will be using. Your -# best option is to extract the db.txt file from the wireless-regdb git -# repository: -# -# git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-regdb.git -# diff --git a/net/wireless/debugfs.h b/net/wireless/debugfs.h index 74fdd3811427..a8a135d94ab5 100644 --- a/net/wireless/debugfs.h +++ b/net/wireless/debugfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CFG80211_DEBUGFS_H #define __CFG80211_DEBUGFS_H diff --git a/net/wireless/ethtool.c b/net/wireless/ethtool.c index e9e91298c70d..a9c0f368db5d 100644 --- a/net/wireless/ethtool.c +++ b/net/wireless/ethtool.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/utsname.h> #include <net/cfg80211.h> #include "core.h" diff --git a/net/wireless/genregdb.awk b/net/wireless/genregdb.awk deleted file mode 100644 index baf2426b555a..000000000000 --- a/net/wireless/genregdb.awk +++ /dev/null @@ -1,158 +0,0 @@ -#!/usr/bin/awk -f -# -# genregdb.awk -- generate regdb.c from db.txt -# -# Actually, it reads from stdin (presumed to be db.txt) and writes -# to stdout (presumed to be regdb.c), but close enough... -# -# Copyright 2009 John W. Linville <linville@tuxdriver.com> -# -# Permission to use, copy, modify, and/or distribute this software for any -# purpose with or without fee is hereby granted, provided that the above -# copyright notice and this permission notice appear in all copies. -# -# THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES -# WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF -# MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR -# ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN -# ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF -# OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - -BEGIN { - active = 0 - rules = 0; - print "/*" - print " * DO NOT EDIT -- file generated from data in db.txt" - print " */" - print "" - print "#include <linux/nl80211.h>" - print "#include <net/cfg80211.h>" - print "#include \"regdb.h\"" - print "" - regdb = "const struct ieee80211_regdomain *reg_regdb[] = {\n" -} - -function parse_country_head() { - country=$2 - sub(/:/, "", country) - printf "static const struct ieee80211_regdomain regdom_%s = {\n", country - printf "\t.alpha2 = \"%s\",\n", country - if ($NF ~ /DFS-ETSI/) - printf "\t.dfs_region = NL80211_DFS_ETSI,\n" - else if ($NF ~ /DFS-FCC/) - printf "\t.dfs_region = NL80211_DFS_FCC,\n" - else if ($NF ~ /DFS-JP/) - printf "\t.dfs_region = NL80211_DFS_JP,\n" - printf "\t.reg_rules = {\n" - active = 1 - regdb = regdb "\t®dom_" country ",\n" -} - -function parse_reg_rule() -{ - flag_starts_at = 7 - - start = $1 - sub(/\(/, "", start) - end = $3 - bw = $5 - sub(/\),/, "", bw) - gain = 0 - power = $6 - # power might be in mW... - units = $7 - dfs_cac = 0 - - sub(/\(/, "", power) - sub(/\),/, "", power) - sub(/\),/, "", units) - sub(/\)/, "", units) - - if (units == "mW") { - flag_starts_at = 8 - power = 10 * log(power)/log(10) - if ($8 ~ /[[:digit:]]/) { - flag_starts_at = 9 - dfs_cac = $8 - } - } else { - if ($7 ~ /[[:digit:]]/) { - flag_starts_at = 8 - dfs_cac = $7 - } - } - sub(/\(/, "", dfs_cac) - sub(/\),/, "", dfs_cac) - flagstr = "" - for (i=flag_starts_at; i<=NF; i++) - flagstr = flagstr $i - split(flagstr, flagarray, ",") - flags = "" - for (arg in flagarray) { - if (flagarray[arg] == "NO-OFDM") { - flags = flags "\n\t\t\tNL80211_RRF_NO_OFDM | " - } else if (flagarray[arg] == "NO-CCK") { - flags = flags "\n\t\t\tNL80211_RRF_NO_CCK | " - } else if (flagarray[arg] == "NO-INDOOR") { - flags = flags "\n\t\t\tNL80211_RRF_NO_INDOOR | " - } else if (flagarray[arg] == "NO-OUTDOOR") { - flags = flags "\n\t\t\tNL80211_RRF_NO_OUTDOOR | " - } else if (flagarray[arg] == "DFS") { - flags = flags "\n\t\t\tNL80211_RRF_DFS | " - } else if (flagarray[arg] == "PTP-ONLY") { - flags = flags "\n\t\t\tNL80211_RRF_PTP_ONLY | " - } else if (flagarray[arg] == "PTMP-ONLY") { - flags = flags "\n\t\t\tNL80211_RRF_PTMP_ONLY | " - } else if (flagarray[arg] == "PASSIVE-SCAN") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " - } else if (flagarray[arg] == "NO-IBSS") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " - } else if (flagarray[arg] == "NO-IR") { - flags = flags "\n\t\t\tNL80211_RRF_NO_IR | " - } else if (flagarray[arg] == "AUTO-BW") { - flags = flags "\n\t\t\tNL80211_RRF_AUTO_BW | " - } - - } - flags = flags "0" - printf "\t\tREG_RULE_EXT(%d, %d, %d, %d, %.0f, %d, %s),\n", start, end, bw, gain, power, dfs_cac, flags - rules++ -} - -function print_tail_country() -{ - active = 0 - printf "\t},\n" - printf "\t.n_reg_rules = %d\n", rules - printf "};\n\n" - rules = 0; -} - -/^[ \t]*#/ { - # Ignore -} - -!active && /^[ \t]*$/ { - # Ignore -} - -!active && /country/ { - parse_country_head() -} - -active && /^[ \t]*\(/ { - parse_reg_rule() -} - -active && /^[ \t]*$/ { - print_tail_country() -} - -END { - if (active) - print_tail_country() - print regdb "};" - print "" - print "int reg_regdb_size = ARRAY_SIZE(reg_regdb);" -} diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 10bf040a0982..413d4f4e6334 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Some IBSS support code for cfg80211. * diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 421a6b80ec62..51aa55618ef7 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/ieee80211.h> #include <linux/export.h> #include <net/cfg80211.h> diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index d8df7a5180a0..e7c64a8dce54 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * cfg80211 MLME SAP interface * diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 8ce85420ecb0..a0e1951227fa 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -549,6 +549,14 @@ nl80211_nan_srf_policy[NL80211_NAN_SRF_ATTR_MAX + 1] = { [NL80211_NAN_SRF_MAC_ADDRS] = { .type = NLA_NESTED }, }; +/* policy for packet pattern attributes */ +static const struct nla_policy +nl80211_packet_pattern_policy[MAX_NL80211_PKTPAT + 1] = { + [NL80211_PKTPAT_MASK] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_PATTERN] = { .type = NLA_BINARY, }, + [NL80211_PKTPAT_OFFSET] = { .type = NLA_U32 }, +}; + static int nl80211_prepare_wdev_dump(struct sk_buff *skb, struct netlink_callback *cb, struct cfg80211_registered_device **rdev, @@ -2122,6 +2130,15 @@ static int nl80211_parse_chandef(struct cfg80211_registered_device *rdev, case NL80211_CHAN_HT40MINUS: cfg80211_chandef_create(chandef, chandef->chan, chantype); + /* user input for center_freq is incorrect */ + if (info->attrs[NL80211_ATTR_CENTER_FREQ1] && + chandef->center_freq1 != nla_get_u32( + info->attrs[NL80211_ATTR_CENTER_FREQ1])) + return -EINVAL; + /* center_freq2 must be zero */ + if (info->attrs[NL80211_ATTR_CENTER_FREQ2] && + nla_get_u32(info->attrs[NL80211_ATTR_CENTER_FREQ2])) + return -EINVAL; break; default: return -EINVAL; @@ -3791,8 +3808,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params, static void nl80211_calculate_ap_params(struct cfg80211_ap_settings *params) { const struct cfg80211_beacon_data *bcn = ¶ms->beacon; - size_t ies_len = bcn->beacon_ies_len; - const u8 *ies = bcn->beacon_ies; + size_t ies_len = bcn->tail_len; + const u8 *ies = bcn->tail; const u8 *rates; const u8 *cap; @@ -5669,6 +5686,11 @@ static int nl80211_req_set_reg(struct sk_buff *skb, struct genl_info *info) } } +static int nl80211_reload_regdb(struct sk_buff *skb, struct genl_info *info) +{ + return reg_reload_regdb(); +} + static int nl80211_get_mesh_config(struct sk_buff *skb, struct genl_info *info) { @@ -6269,7 +6291,7 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb, if (!hdr) return -1; - genl_dump_check_consistent(cb, hdr, &nl80211_fam); + genl_dump_check_consistent(cb, hdr); if (nl80211_put_regdom(regdom, msg)) goto nla_put_failure; @@ -6610,6 +6632,77 @@ static bool cfg80211_off_channel_oper_allowed(struct wireless_dev *wdev) return regulatory_pre_cac_allowed(wdev->wiphy); } +static int +nl80211_check_scan_flags(struct wiphy *wiphy, struct wireless_dev *wdev, + void *request, struct nlattr **attrs, + bool is_sched_scan) +{ + u8 *mac_addr, *mac_addr_mask; + u32 *flags; + enum nl80211_feature_flags randomness_flag; + + if (!attrs[NL80211_ATTR_SCAN_FLAGS]) + return 0; + + if (is_sched_scan) { + struct cfg80211_sched_scan_request *req = request; + + randomness_flag = wdev ? + NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR : + NL80211_FEATURE_ND_RANDOM_MAC_ADDR; + flags = &req->flags; + mac_addr = req->mac_addr; + mac_addr_mask = req->mac_addr_mask; + } else { + struct cfg80211_scan_request *req = request; + + randomness_flag = NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR; + flags = &req->flags; + mac_addr = req->mac_addr; + mac_addr_mask = req->mac_addr_mask; + } + + *flags = nla_get_u32(attrs[NL80211_ATTR_SCAN_FLAGS]); + + if ((*flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && + !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) + return -EOPNOTSUPP; + + if (*flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { + int err; + + if (!(wiphy->features & randomness_flag) || + (wdev && wdev->current_bss)) + return -EOPNOTSUPP; + + err = nl80211_parse_random_mac(attrs, mac_addr, mac_addr_mask); + if (err) + return err; + } + + if ((*flags & NL80211_SCAN_FLAG_FILS_MAX_CHANNEL_TIME) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_FILS_MAX_CHANNEL_TIME)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_ACCEPT_BCAST_PROBE_RESP)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_DEFERRAL_SUPPRESSION)) + return -EOPNOTSUPP; + + if ((*flags & NL80211_SCAN_FLAG_OCE_PROBE_REQ_HIGH_TX_RATE) && + !wiphy_ext_feature_isset(wiphy, + NL80211_EXT_FEATURE_OCE_PROBE_REQ_HIGH_TX_RATE)) + return -EOPNOTSUPP; + + return 0; +} + static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev = info->user_ptr[0]; @@ -6815,34 +6908,10 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info) nla_get_flag(info->attrs[NL80211_ATTR_MEASUREMENT_DURATION_MANDATORY]); } - if (info->attrs[NL80211_ATTR_SCAN_FLAGS]) { - request->flags = nla_get_u32( - info->attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - if (!(wiphy->features & - NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (wdev->current_bss) { - err = -EOPNOTSUPP; - goto out_free; - } - - err = nl80211_parse_random_mac(info->attrs, - request->mac_addr, - request->mac_addr_mask); - if (err) - goto out_free; - } - } + err = nl80211_check_scan_flags(wiphy, wdev, request, info->attrs, + false); + if (err) + goto out_free; request->no_cck = nla_get_flag(info->attrs[NL80211_ATTR_TX_NO_CCK_RATE]); @@ -7290,37 +7359,9 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev, request->ie_len); } - if (attrs[NL80211_ATTR_SCAN_FLAGS]) { - request->flags = nla_get_u32( - attrs[NL80211_ATTR_SCAN_FLAGS]); - if ((request->flags & NL80211_SCAN_FLAG_LOW_PRIORITY) && - !(wiphy->features & NL80211_FEATURE_LOW_PRIORITY_SCAN)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (request->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) { - u32 flg = NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR; - - if (!wdev) /* must be net-detect */ - flg = NL80211_FEATURE_ND_RANDOM_MAC_ADDR; - - if (!(wiphy->features & flg)) { - err = -EOPNOTSUPP; - goto out_free; - } - - if (wdev && wdev->current_bss) { - err = -EOPNOTSUPP; - goto out_free; - } - - err = nl80211_parse_random_mac(attrs, request->mac_addr, - request->mac_addr_mask); - if (err) - goto out_free; - } - } + err = nl80211_check_scan_flags(wiphy, wdev, request, attrs, true); + if (err) + goto out_free; if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY]) request->delay = @@ -7681,7 +7722,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, if (!hdr) return -1; - genl_dump_check_consistent(cb, hdr, &nl80211_fam); + genl_dump_check_consistent(cb, hdr); if (nla_put_u32(msg, NL80211_ATTR_GENERATION, rdev->bss_generation)) goto nla_put_failure; @@ -8924,8 +8965,14 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_USE_MFP]) { connect.mfp = nla_get_u32(info->attrs[NL80211_ATTR_USE_MFP]); + if (connect.mfp == NL80211_MFP_OPTIONAL && + !wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_MFP_OPTIONAL)) + return -EOPNOTSUPP; + if (connect.mfp != NL80211_MFP_REQUIRED && - connect.mfp != NL80211_MFP_NO) + connect.mfp != NL80211_MFP_NO && + connect.mfp != NL80211_MFP_OPTIONAL) return -EINVAL; } else { connect.mfp = NL80211_MFP_NO; @@ -9987,6 +10034,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!setup.chandef.chan) + return -EINVAL; + err = validate_beacon_tx_rate(rdev, setup.chandef.chan->band, &setup.beacon_rate); if (err) @@ -10529,7 +10579,8 @@ static int nl80211_set_wowlan(struct sk_buff *skb, struct genl_info *info) u8 *mask_pat; nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, - NULL, info->extack); + nl80211_packet_pattern_policy, + info->extack); err = -EINVAL; if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) @@ -10778,7 +10829,8 @@ static int nl80211_parse_coalesce_rule(struct cfg80211_registered_device *rdev, rem) { u8 *mask_pat; - nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, NULL, NULL); + nla_parse_nested(pat_tb, MAX_NL80211_PKTPAT, pat, + nl80211_packet_pattern_policy, NULL); if (!pat_tb[NL80211_PKTPAT_MASK] || !pat_tb[NL80211_PKTPAT_PATTERN]) return -EINVAL; @@ -10903,6 +10955,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) if (err) return err; + if (!tb[NL80211_REKEY_DATA_REPLAY_CTR] || !tb[NL80211_REKEY_DATA_KEK] || + !tb[NL80211_REKEY_DATA_KCK]) + return -EINVAL; if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN) return -ERANGE; if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN) @@ -12669,6 +12724,12 @@ static const struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, { + .cmd = NL80211_CMD_RELOAD_REGDB, + .doit = nl80211_reload_regdb, + .policy = nl80211_policy, + .flags = GENL_ADMIN_PERM, + }, + { .cmd = NL80211_CMD_GET_MESH_CONFIG, .doit = nl80211_get_mesh_config, .policy = nl80211_policy, @@ -13796,9 +13857,7 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, info->req_ie)) || (info->resp_ie && nla_put(msg, NL80211_ATTR_RESP_IE, info->resp_ie_len, - info->resp_ie)) || - (info->authorized && - nla_put_flag(msg, NL80211_ATTR_PORT_AUTHORIZED))) + info->resp_ie))) goto nla_put_failure; genlmsg_end(msg, hdr); @@ -13812,6 +13871,36 @@ void nl80211_send_roamed(struct cfg80211_registered_device *rdev, nlmsg_free(msg); } +void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid) +{ + struct sk_buff *msg; + void *hdr; + + msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); + if (!msg) + return; + + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_PORT_AUTHORIZED); + if (!hdr) { + nlmsg_free(msg); + return; + } + + if (nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, bssid)) + goto nla_put_failure; + + genlmsg_end(msg, hdr); + + genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0, + NL80211_MCGRP_MLME, GFP_KERNEL); + return; + + nla_put_failure: + genlmsg_cancel(msg, hdr); + nlmsg_free(msg); +} + void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap) @@ -14185,7 +14274,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); struct sk_buff *msg; void *hdr; - u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); + u32 nlportid = READ_ONCE(wdev->ap_unexpected_nlportid); if (!nlportid) return false; diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index b96933322077..79e47fe60c35 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __NET_WIRELESS_NL80211_H #define __NET_WIRELESS_NL80211_H @@ -58,6 +59,8 @@ void nl80211_send_connect_result(struct cfg80211_registered_device *rdev, void nl80211_send_roamed(struct cfg80211_registered_device *rdev, struct net_device *netdev, struct cfg80211_roam_info *info, gfp_t gfp); +void nl80211_send_port_authorized(struct cfg80211_registered_device *rdev, + struct net_device *netdev, const u8 *bssid); void nl80211_send_disconnected(struct cfg80211_registered_device *rdev, struct net_device *netdev, u16 reason, const u8 *ie, size_t ie_len, bool from_ap); diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h index ce23d7d49960..0c06240d25af 100644 --- a/net/wireless/rdev-ops.h +++ b/net/wireless/rdev-ops.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __CFG80211_RDEV_OPS #define __CFG80211_RDEV_OPS diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 5fae296a6a58..3871998059de 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -4,6 +4,7 @@ * Copyright 2007 Johannes Berg <johannes@sipsolutions.net> * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * Copyright 2013-2014 Intel Mobile Communications GmbH + * Copyright 2017 Intel Deutschland GmbH * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above @@ -52,12 +53,13 @@ #include <linux/ctype.h> #include <linux/nl80211.h> #include <linux/platform_device.h> +#include <linux/verification.h> #include <linux/moduleparam.h> +#include <linux/firmware.h> #include <net/cfg80211.h> #include "core.h" #include "reg.h" #include "rdev-ops.h" -#include "regdb.h" #include "nl80211.h" /* @@ -99,7 +101,7 @@ static struct regulatory_request core_request_world = { static struct regulatory_request __rcu *last_request = (void __force __rcu *)&core_request_world; -/* To trigger userspace events */ +/* To trigger userspace events and load firmware */ static struct platform_device *reg_pdev; /* @@ -442,7 +444,6 @@ reg_copy_regd(const struct ieee80211_regdomain *src_regd) return regd; } -#ifdef CONFIG_CFG80211_INTERNAL_REGDB struct reg_regdb_apply_request { struct list_head list; const struct ieee80211_regdomain *regdom; @@ -474,55 +475,26 @@ static void reg_regdb_apply(struct work_struct *work) static DECLARE_WORK(reg_regdb_work, reg_regdb_apply); -static int reg_query_builtin(const char *alpha2) +static int reg_schedule_apply(const struct ieee80211_regdomain *regdom) { - const struct ieee80211_regdomain *regdom = NULL; struct reg_regdb_apply_request *request; - unsigned int i; - - for (i = 0; i < reg_regdb_size; i++) { - if (alpha2_equal(alpha2, reg_regdb[i]->alpha2)) { - regdom = reg_regdb[i]; - break; - } - } - - if (!regdom) - return -ENODATA; request = kzalloc(sizeof(struct reg_regdb_apply_request), GFP_KERNEL); - if (!request) - return -ENOMEM; - - request->regdom = reg_copy_regd(regdom); - if (IS_ERR_OR_NULL(request->regdom)) { - kfree(request); + if (!request) { + kfree(regdom); return -ENOMEM; } + request->regdom = regdom; + mutex_lock(®_regdb_apply_mutex); list_add_tail(&request->list, ®_regdb_apply_list); mutex_unlock(®_regdb_apply_mutex); schedule_work(®_regdb_work); - return 0; } -/* Feel free to add any other sanity checks here */ -static void reg_regdb_size_check(void) -{ - /* We should ideally BUILD_BUG_ON() but then random builds would fail */ - WARN_ONCE(!reg_regdb_size, "db.txt is empty, you should update it..."); -} -#else -static inline void reg_regdb_size_check(void) {} -static inline int reg_query_builtin(const char *alpha2) -{ - return -ENODATA; -} -#endif /* CONFIG_CFG80211_INTERNAL_REGDB */ - #ifdef CONFIG_CFG80211_CRDA_SUPPORT /* Max number of consecutive attempts to communicate with CRDA */ #define REG_MAX_CRDA_TIMEOUTS 10 @@ -598,10 +570,402 @@ static inline int call_crda(const char *alpha2) } #endif /* CONFIG_CFG80211_CRDA_SUPPORT */ +/* code to directly load a firmware database through request_firmware */ +static const struct fwdb_header *regdb; + +struct fwdb_country { + u8 alpha2[2]; + __be16 coll_ptr; + /* this struct cannot be extended */ +} __packed __aligned(4); + +struct fwdb_collection { + u8 len; + u8 n_rules; + u8 dfs_region; + /* no optional data yet */ + /* aligned to 2, then followed by __be16 array of rule pointers */ +} __packed __aligned(4); + +enum fwdb_flags { + FWDB_FLAG_NO_OFDM = BIT(0), + FWDB_FLAG_NO_OUTDOOR = BIT(1), + FWDB_FLAG_DFS = BIT(2), + FWDB_FLAG_NO_IR = BIT(3), + FWDB_FLAG_AUTO_BW = BIT(4), +}; + +struct fwdb_rule { + u8 len; + u8 flags; + __be16 max_eirp; + __be32 start, end, max_bw; + /* start of optional data */ + __be16 cac_timeout; +} __packed __aligned(4); + +#define FWDB_MAGIC 0x52474442 +#define FWDB_VERSION 20 + +struct fwdb_header { + __be32 magic; + __be32 version; + struct fwdb_country country[]; +} __packed __aligned(4); + +static bool valid_rule(const u8 *data, unsigned int size, u16 rule_ptr) +{ + struct fwdb_rule *rule = (void *)(data + (rule_ptr << 2)); + + if ((u8 *)rule + sizeof(rule->len) > data + size) + return false; + + /* mandatory fields */ + if (rule->len < offsetofend(struct fwdb_rule, max_bw)) + return false; + + return true; +} + +static bool valid_country(const u8 *data, unsigned int size, + const struct fwdb_country *country) +{ + unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; + struct fwdb_collection *coll = (void *)(data + ptr); + __be16 *rules_ptr; + unsigned int i; + + /* make sure we can read len/n_rules */ + if ((u8 *)coll + offsetofend(typeof(*coll), n_rules) > data + size) + return false; + + /* make sure base struct and all rules fit */ + if ((u8 *)coll + ALIGN(coll->len, 2) + + (coll->n_rules * 2) > data + size) + return false; + + /* mandatory fields must exist */ + if (coll->len < offsetofend(struct fwdb_collection, dfs_region)) + return false; + + rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2)); + + for (i = 0; i < coll->n_rules; i++) { + u16 rule_ptr = be16_to_cpu(rules_ptr[i]); + + if (!valid_rule(data, size, rule_ptr)) + return false; + } + + return true; +} + +#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB +static struct key *builtin_regdb_keys; + +static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen) +{ + const u8 *end = p + buflen; + size_t plen; + key_ref_t key; + + while (p < end) { + /* Each cert begins with an ASN.1 SEQUENCE tag and must be more + * than 256 bytes in size. + */ + if (end - p < 4) + goto dodgy_cert; + if (p[0] != 0x30 && + p[1] != 0x82) + goto dodgy_cert; + plen = (p[2] << 8) | p[3]; + plen += 4; + if (plen > end - p) + goto dodgy_cert; + + key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1), + "asymmetric", NULL, p, plen, + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ), + KEY_ALLOC_NOT_IN_QUOTA | + KEY_ALLOC_BUILT_IN | + KEY_ALLOC_BYPASS_RESTRICTION); + if (IS_ERR(key)) { + pr_err("Problem loading in-kernel X.509 certificate (%ld)\n", + PTR_ERR(key)); + } else { + pr_notice("Loaded X.509 cert '%s'\n", + key_ref_to_ptr(key)->description); + key_ref_put(key); + } + p += plen; + } + + return; + +dodgy_cert: + pr_err("Problem parsing in-kernel X.509 certificate list\n"); +} + +static int __init load_builtin_regdb_keys(void) +{ + builtin_regdb_keys = + keyring_alloc(".builtin_regdb_keys", + KUIDT_INIT(0), KGIDT_INIT(0), current_cred(), + ((KEY_POS_ALL & ~KEY_POS_SETATTR) | + KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH), + KEY_ALLOC_NOT_IN_QUOTA, NULL, NULL); + if (IS_ERR(builtin_regdb_keys)) + return PTR_ERR(builtin_regdb_keys); + + pr_notice("Loading compiled-in X.509 certificates for regulatory database\n"); + +#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS + load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len); +#endif +#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR + if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0') + load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len); +#endif + + return 0; +} + +static bool regdb_has_valid_signature(const u8 *data, unsigned int size) +{ + const struct firmware *sig; + bool result; + + if (request_firmware(&sig, "regulatory.db.p7s", ®_pdev->dev)) + return false; + + result = verify_pkcs7_signature(data, size, sig->data, sig->size, + builtin_regdb_keys, + VERIFYING_UNSPECIFIED_SIGNATURE, + NULL, NULL) == 0; + + release_firmware(sig); + + return result; +} + +static void free_regdb_keyring(void) +{ + key_put(builtin_regdb_keys); +} +#else +static int load_builtin_regdb_keys(void) +{ + return 0; +} + +static bool regdb_has_valid_signature(const u8 *data, unsigned int size) +{ + return true; +} + +static void free_regdb_keyring(void) +{ +} +#endif /* CONFIG_CFG80211_REQUIRE_SIGNED_REGDB */ + +static bool valid_regdb(const u8 *data, unsigned int size) +{ + const struct fwdb_header *hdr = (void *)data; + const struct fwdb_country *country; + + if (size < sizeof(*hdr)) + return false; + + if (hdr->magic != cpu_to_be32(FWDB_MAGIC)) + return false; + + if (hdr->version != cpu_to_be32(FWDB_VERSION)) + return false; + + if (!regdb_has_valid_signature(data, size)) + return false; + + country = &hdr->country[0]; + while ((u8 *)(country + 1) <= data + size) { + if (!country->coll_ptr) + break; + if (!valid_country(data, size, country)) + return false; + country++; + } + + return true; +} + +static int regdb_query_country(const struct fwdb_header *db, + const struct fwdb_country *country) +{ + unsigned int ptr = be16_to_cpu(country->coll_ptr) << 2; + struct fwdb_collection *coll = (void *)((u8 *)db + ptr); + struct ieee80211_regdomain *regdom; + unsigned int size_of_regd; + unsigned int i; + + size_of_regd = + sizeof(struct ieee80211_regdomain) + + coll->n_rules * sizeof(struct ieee80211_reg_rule); + + regdom = kzalloc(size_of_regd, GFP_KERNEL); + if (!regdom) + return -ENOMEM; + + regdom->n_reg_rules = coll->n_rules; + regdom->alpha2[0] = country->alpha2[0]; + regdom->alpha2[1] = country->alpha2[1]; + regdom->dfs_region = coll->dfs_region; + + for (i = 0; i < regdom->n_reg_rules; i++) { + __be16 *rules_ptr = (void *)((u8 *)coll + ALIGN(coll->len, 2)); + unsigned int rule_ptr = be16_to_cpu(rules_ptr[i]) << 2; + struct fwdb_rule *rule = (void *)((u8 *)db + rule_ptr); + struct ieee80211_reg_rule *rrule = ®dom->reg_rules[i]; + + rrule->freq_range.start_freq_khz = be32_to_cpu(rule->start); + rrule->freq_range.end_freq_khz = be32_to_cpu(rule->end); + rrule->freq_range.max_bandwidth_khz = be32_to_cpu(rule->max_bw); + + rrule->power_rule.max_antenna_gain = 0; + rrule->power_rule.max_eirp = be16_to_cpu(rule->max_eirp); + + rrule->flags = 0; + if (rule->flags & FWDB_FLAG_NO_OFDM) + rrule->flags |= NL80211_RRF_NO_OFDM; + if (rule->flags & FWDB_FLAG_NO_OUTDOOR) + rrule->flags |= NL80211_RRF_NO_OUTDOOR; + if (rule->flags & FWDB_FLAG_DFS) + rrule->flags |= NL80211_RRF_DFS; + if (rule->flags & FWDB_FLAG_NO_IR) + rrule->flags |= NL80211_RRF_NO_IR; + if (rule->flags & FWDB_FLAG_AUTO_BW) + rrule->flags |= NL80211_RRF_AUTO_BW; + + rrule->dfs_cac_ms = 0; + + /* handle optional data */ + if (rule->len >= offsetofend(struct fwdb_rule, cac_timeout)) + rrule->dfs_cac_ms = + 1000 * be16_to_cpu(rule->cac_timeout); + } + + return reg_schedule_apply(regdom); +} + +static int query_regdb(const char *alpha2) +{ + const struct fwdb_header *hdr = regdb; + const struct fwdb_country *country; + + ASSERT_RTNL(); + + if (IS_ERR(regdb)) + return PTR_ERR(regdb); + + country = &hdr->country[0]; + while (country->coll_ptr) { + if (alpha2_equal(alpha2, country->alpha2)) + return regdb_query_country(regdb, country); + country++; + } + + return -ENODATA; +} + +static void regdb_fw_cb(const struct firmware *fw, void *context) +{ + int set_error = 0; + bool restore = true; + void *db; + + if (!fw) { + pr_info("failed to load regulatory.db\n"); + set_error = -ENODATA; + } else if (!valid_regdb(fw->data, fw->size)) { + pr_info("loaded regulatory.db is malformed or signature is missing/invalid\n"); + set_error = -EINVAL; + } + + rtnl_lock(); + if (WARN_ON(regdb && !IS_ERR(regdb))) { + /* just restore and free new db */ + } else if (set_error) { + regdb = ERR_PTR(set_error); + } else if (fw) { + db = kmemdup(fw->data, fw->size, GFP_KERNEL); + if (db) { + regdb = db; + restore = context && query_regdb(context); + } else { + restore = true; + } + } + + if (restore) + restore_regulatory_settings(true); + + rtnl_unlock(); + + kfree(context); + + release_firmware(fw); +} + +static int query_regdb_file(const char *alpha2) +{ + ASSERT_RTNL(); + + if (regdb) + return query_regdb(alpha2); + + alpha2 = kmemdup(alpha2, 2, GFP_KERNEL); + if (!alpha2) + return -ENOMEM; + + return request_firmware_nowait(THIS_MODULE, true, "regulatory.db", + ®_pdev->dev, GFP_KERNEL, + (void *)alpha2, regdb_fw_cb); +} + +int reg_reload_regdb(void) +{ + const struct firmware *fw; + void *db; + int err; + + err = request_firmware(&fw, "regulatory.db", ®_pdev->dev); + if (err) + return err; + + if (!valid_regdb(fw->data, fw->size)) { + err = -ENODATA; + goto out; + } + + db = kmemdup(fw->data, fw->size, GFP_KERNEL); + if (!db) { + err = -ENOMEM; + goto out; + } + + rtnl_lock(); + if (!IS_ERR_OR_NULL(regdb)) + kfree(regdb); + regdb = db; + rtnl_unlock(); + + out: + release_firmware(fw); + return err; +} + static bool reg_query_database(struct regulatory_request *request) { - /* query internal regulatory database (if it exists) */ - if (reg_query_builtin(request->alpha2) == 0) + if (query_regdb_file(request->alpha2) == 0) return true; if (call_crda(request->alpha2) == 0) @@ -1483,7 +1847,9 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, { struct ieee80211_supported_band *sband = wiphy->bands[channel->band]; struct ieee80211_channel *channel_before = NULL, *channel_after = NULL; + const struct ieee80211_regdomain *regd; unsigned int i; + u32 flags; if (!is_ht40_allowed(channel)) { channel->flags |= IEEE80211_CHAN_NO_HT40; @@ -1503,17 +1869,30 @@ static void reg_process_ht_flags_channel(struct wiphy *wiphy, channel_after = c; } + flags = 0; + regd = get_wiphy_regdom(wiphy); + if (regd) { + const struct ieee80211_reg_rule *reg_rule = + freq_reg_info_regd(MHZ_TO_KHZ(channel->center_freq), + regd, MHZ_TO_KHZ(20)); + + if (!IS_ERR(reg_rule)) + flags = reg_rule->flags; + } + /* * Please note that this assumes target bandwidth is 20 MHz, * if that ever changes we also need to change the below logic * to include that as well. */ - if (!is_ht40_allowed(channel_before)) + if (!is_ht40_allowed(channel_before) || + flags & NL80211_RRF_NO_HT40MINUS) channel->flags |= IEEE80211_CHAN_NO_HT40MINUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40MINUS; - if (!is_ht40_allowed(channel_after)) + if (!is_ht40_allowed(channel_after) || + flags & NL80211_RRF_NO_HT40PLUS) channel->flags |= IEEE80211_CHAN_NO_HT40PLUS; else channel->flags &= ~IEEE80211_CHAN_NO_HT40PLUS; @@ -3269,6 +3648,10 @@ int __init regulatory_init(void) { int err = 0; + err = load_builtin_regdb_keys(); + if (err) + return err; + reg_pdev = platform_device_register_simple("regulatory", 0, NULL, 0); if (IS_ERR(reg_pdev)) return PTR_ERR(reg_pdev); @@ -3277,8 +3660,6 @@ int __init regulatory_init(void) spin_lock_init(®_pending_beacons_lock); spin_lock_init(®_indoor_lock); - reg_regdb_size_check(); - rcu_assign_pointer(cfg80211_regdomain, cfg80211_world_regdom); user_alpha2[0] = '9'; @@ -3344,4 +3725,9 @@ void regulatory_exit(void) list_del(®_request->list); kfree(reg_request); } + + if (!IS_ERR_OR_NULL(regdb)) + kfree(regdb); + + free_regdb_keyring(); } diff --git a/net/wireless/reg.h b/net/wireless/reg.h index ca7fedf2e7a1..9ceeb5f3a7cb 100644 --- a/net/wireless/reg.h +++ b/net/wireless/reg.h @@ -1,5 +1,8 @@ #ifndef __NET_WIRELESS_REG_H #define __NET_WIRELESS_REG_H + +#include <net/cfg80211.h> + /* * Copyright 2008-2011 Luis R. Rodriguez <mcgrof@qca.qualcomm.com> * @@ -179,4 +182,15 @@ void regulatory_propagate_dfs_state(struct wiphy *wiphy, * @wiphy2 - wiphy it's dfs_region to be checked against that of wiphy1 */ bool reg_dfs_domain_same(struct wiphy *wiphy1, struct wiphy *wiphy2); + +/** + * reg_reload_regdb - reload the regulatory.db firmware file + */ +int reg_reload_regdb(void); + +extern const u8 shipped_regdb_certs[]; +extern unsigned int shipped_regdb_certs_len; +extern const u8 extra_regdb_certs[]; +extern unsigned int extra_regdb_certs_len; + #endif /* __NET_WIRELESS_REG_H */ diff --git a/net/wireless/regdb.h b/net/wireless/regdb.h deleted file mode 100644 index 3279cfcefb0c..000000000000 --- a/net/wireless/regdb.h +++ /dev/null @@ -1,23 +0,0 @@ -#ifndef __REGDB_H__ -#define __REGDB_H__ - -/* - * Copyright 2009 John W. Linville <linville@tuxdriver.com> - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR - * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN - * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF - * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. - */ - -extern const struct ieee80211_regdomain *reg_regdb[]; -extern int reg_regdb_size; - -#endif /* __REGDB_H__ */ diff --git a/net/wireless/scan.c b/net/wireless/scan.c index 9f0901f3e42b..f6c5fe482506 100644 --- a/net/wireless/scan.c +++ b/net/wireless/scan.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * cfg80211 scan result handling * diff --git a/net/wireless/sme.c b/net/wireless/sme.c index 0a49b88070d0..fdb3646274a5 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * SME code for cfg80211 * both driver SME event handling and the SME implementation @@ -522,11 +523,6 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev, return -EOPNOTSUPP; if (wdev->current_bss) { - if (!prev_bssid) - return -EALREADY; - if (prev_bssid && - !ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) - return -ENOTCONN; cfg80211_unhold_bss(wdev->current_bss); cfg80211_put_bss(wdev->wiphy, &wdev->current_bss->pub); wdev->current_bss = NULL; @@ -960,7 +956,6 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, ev->rm.resp_ie_len = info->resp_ie_len; memcpy((void *)ev->rm.resp_ie, info->resp_ie, info->resp_ie_len); ev->rm.bss = info->bss; - ev->rm.authorized = info->authorized; spin_lock_irqsave(&wdev->event_lock, flags); list_add_tail(&ev->list, &wdev->event_list); @@ -969,6 +964,50 @@ void cfg80211_roamed(struct net_device *dev, struct cfg80211_roam_info *info, } EXPORT_SYMBOL(cfg80211_roamed); +void __cfg80211_port_authorized(struct wireless_dev *wdev, const u8 *bssid) +{ + ASSERT_WDEV_LOCK(wdev); + + if (WARN_ON(wdev->iftype != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(!wdev->current_bss) || + WARN_ON(!ether_addr_equal(wdev->current_bss->pub.bssid, bssid))) + return; + + nl80211_send_port_authorized(wiphy_to_rdev(wdev->wiphy), wdev->netdev, + bssid); +} + +void cfg80211_port_authorized(struct net_device *dev, const u8 *bssid, + gfp_t gfp) +{ + struct wireless_dev *wdev = dev->ieee80211_ptr; + struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy); + struct cfg80211_event *ev; + unsigned long flags; + + if (WARN_ON(!bssid)) + return; + + ev = kzalloc(sizeof(*ev), gfp); + if (!ev) + return; + + ev->type = EVENT_PORT_AUTHORIZED; + memcpy(ev->pa.bssid, bssid, ETH_ALEN); + + /* + * Use the wdev event list so that if there are pending + * connected/roamed events, they will be reported first. + */ + spin_lock_irqsave(&wdev->event_lock, flags); + list_add_tail(&ev->list, &wdev->event_list); + spin_unlock_irqrestore(&wdev->event_lock, flags); + queue_work(cfg80211_wq, &rdev->event_work); +} +EXPORT_SYMBOL(cfg80211_port_authorized); + void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap) { @@ -1063,11 +1102,35 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, ASSERT_WDEV_LOCK(wdev); - if (WARN_ON(wdev->connect_keys)) { - kzfree(wdev->connect_keys); - wdev->connect_keys = NULL; + /* + * If we have an ssid_len, we're trying to connect or are + * already connected, so reject a new SSID unless it's the + * same (which is the case for re-association.) + */ + if (wdev->ssid_len && + (wdev->ssid_len != connect->ssid_len || + memcmp(wdev->ssid, connect->ssid, wdev->ssid_len))) + return -EALREADY; + + /* + * If connected, reject (re-)association unless prev_bssid + * matches the current BSSID. + */ + if (wdev->current_bss) { + if (!prev_bssid) + return -EALREADY; + if (!ether_addr_equal(prev_bssid, wdev->current_bss->pub.bssid)) + return -ENOTCONN; } + /* + * Reject if we're in the process of connecting with WEP, + * this case isn't very interesting and trying to handle + * it would make the code much more complex. + */ + if (wdev->connect_keys) + return -EINPROGRESS; + cfg80211_oper_and_ht_capa(&connect->ht_capa_mask, rdev->wiphy.ht_capa_mod_mask); @@ -1118,7 +1181,12 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev, if (err) { wdev->connect_keys = NULL; - wdev->ssid_len = 0; + /* + * This could be reassoc getting refused, don't clear + * ssid_len in that case. + */ + if (!wdev->current_bss) + wdev->ssid_len = 0; return err; } @@ -1145,6 +1213,14 @@ int cfg80211_disconnect(struct cfg80211_registered_device *rdev, else if (wdev->ssid_len) err = rdev_disconnect(rdev, dev, reason); + /* + * Clear ssid_len unless we actually were fully connected, + * in which case cfg80211_disconnected() will take care of + * this later. + */ + if (!wdev->current_bss) + wdev->ssid_len = 0; + return err; } diff --git a/net/wireless/sysfs.h b/net/wireless/sysfs.h index b533ed71daff..7b454c2de9b7 100644 --- a/net/wireless/sysfs.h +++ b/net/wireless/sysfs.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef __WIRELESS_SYSFS_H #define __WIRELESS_SYSFS_H diff --git a/net/wireless/trace.h b/net/wireless/trace.h index 0f8db41eaddb..f3353fe5b35b 100644 --- a/net/wireless/trace.h +++ b/net/wireless/trace.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #undef TRACE_SYSTEM #define TRACE_SYSTEM cfg80211 diff --git a/net/wireless/util.c b/net/wireless/util.c index bcb1284c3415..c69160694b6c 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Wireless utility functions * @@ -157,32 +158,30 @@ static void set_mandatory_flags_band(struct ieee80211_supported_band *sband) case NL80211_BAND_2GHZ: want = 7; for (i = 0; i < sband->n_bitrates; i++) { - if (sband->bitrates[i].bitrate == 10) { + switch (sband->bitrates[i].bitrate) { + case 10: + case 20: + case 55: + case 110: sband->bitrates[i].flags |= IEEE80211_RATE_MANDATORY_B | IEEE80211_RATE_MANDATORY_G; want--; - } - - if (sband->bitrates[i].bitrate == 20 || - sband->bitrates[i].bitrate == 55 || - sband->bitrates[i].bitrate == 110 || - sband->bitrates[i].bitrate == 60 || - sband->bitrates[i].bitrate == 120 || - sband->bitrates[i].bitrate == 240) { + break; + case 60: + case 120: + case 240: sband->bitrates[i].flags |= IEEE80211_RATE_MANDATORY_G; want--; - } - - if (sband->bitrates[i].bitrate != 10 && - sband->bitrates[i].bitrate != 20 && - sband->bitrates[i].bitrate != 55 && - sband->bitrates[i].bitrate != 110) + /* fall through */ + default: sband->bitrates[i].flags |= IEEE80211_RATE_ERP_G; + break; + } } - WARN_ON(want != 0 && want != 3 && want != 6); + WARN_ON(want != 0 && want != 3); break; case NL80211_BAND_60GHZ: /* check for mandatory HT MCS 1..4 */ @@ -529,121 +528,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, } EXPORT_SYMBOL(ieee80211_data_to_8023_exthdr); -int ieee80211_data_from_8023(struct sk_buff *skb, const u8 *addr, - enum nl80211_iftype iftype, - const u8 *bssid, bool qos) -{ - struct ieee80211_hdr hdr; - u16 hdrlen, ethertype; - __le16 fc; - const u8 *encaps_data; - int encaps_len, skip_header_bytes; - int nh_pos, h_pos; - int head_need; - - if (unlikely(skb->len < ETH_HLEN)) - return -EINVAL; - - nh_pos = skb_network_header(skb) - skb->data; - h_pos = skb_transport_header(skb) - skb->data; - - /* convert Ethernet header to proper 802.11 header (based on - * operation mode) */ - ethertype = (skb->data[12] << 8) | skb->data[13]; - fc = cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_DATA); - - switch (iftype) { - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_AP_VLAN: - case NL80211_IFTYPE_P2P_GO: - fc |= cpu_to_le16(IEEE80211_FCTL_FROMDS); - /* DA BSSID SA */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, addr, ETH_ALEN); - memcpy(hdr.addr3, skb->data + ETH_ALEN, ETH_ALEN); - hdrlen = 24; - break; - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_P2P_CLIENT: - fc |= cpu_to_le16(IEEE80211_FCTL_TODS); - /* BSSID SA DA */ - memcpy(hdr.addr1, bssid, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, skb->data, ETH_ALEN); - hdrlen = 24; - break; - case NL80211_IFTYPE_OCB: - case NL80211_IFTYPE_ADHOC: - /* DA SA BSSID */ - memcpy(hdr.addr1, skb->data, ETH_ALEN); - memcpy(hdr.addr2, skb->data + ETH_ALEN, ETH_ALEN); - memcpy(hdr.addr3, bssid, ETH_ALEN); - hdrlen = 24; - break; - default: - return -EOPNOTSUPP; - } - - if (qos) { - fc |= cpu_to_le16(IEEE80211_STYPE_QOS_DATA); - hdrlen += 2; - } - - hdr.frame_control = fc; - hdr.duration_id = 0; - hdr.seq_ctrl = 0; - - skip_header_bytes = ETH_HLEN; - if (ethertype == ETH_P_AARP || ethertype == ETH_P_IPX) { - encaps_data = bridge_tunnel_header; - encaps_len = sizeof(bridge_tunnel_header); - skip_header_bytes -= 2; - } else if (ethertype >= ETH_P_802_3_MIN) { - encaps_data = rfc1042_header; - encaps_len = sizeof(rfc1042_header); - skip_header_bytes -= 2; - } else { - encaps_data = NULL; - encaps_len = 0; - } - - skb_pull(skb, skip_header_bytes); - nh_pos -= skip_header_bytes; - h_pos -= skip_header_bytes; - - head_need = hdrlen + encaps_len - skb_headroom(skb); - - if (head_need > 0 || skb_cloned(skb)) { - head_need = max(head_need, 0); - if (head_need) - skb_orphan(skb); - - if (pskb_expand_head(skb, head_need, 0, GFP_ATOMIC)) - return -ENOMEM; - } - - if (encaps_data) { - memcpy(skb_push(skb, encaps_len), encaps_data, encaps_len); - nh_pos += encaps_len; - h_pos += encaps_len; - } - - memcpy(skb_push(skb, hdrlen), &hdr, hdrlen); - - nh_pos += hdrlen; - h_pos += hdrlen; - - /* Update skb pointers to various headers since this modified frame - * is going to go through Linux networking code that may potentially - * need things like pointer to IP header. */ - skb_reset_mac_header(skb); - skb_set_network_header(skb, nh_pos); - skb_set_transport_header(skb, h_pos); - - return 0; -} -EXPORT_SYMBOL(ieee80211_data_from_8023); - static void __frame_add_frag(struct sk_buff *skb, struct page *page, void *ptr, int len, int size) @@ -963,6 +847,9 @@ void cfg80211_process_wdev_events(struct wireless_dev *wdev) case EVENT_STOPPED: __cfg80211_leave(wiphy_to_rdev(wdev->wiphy), wdev); break; + case EVENT_PORT_AUTHORIZED: + __cfg80211_port_authorized(wdev, ev->pa.bssid); + break; } wdev_unlock(wdev); @@ -1367,13 +1254,29 @@ int cfg80211_get_p2p_attr(const u8 *ies, unsigned int len, } EXPORT_SYMBOL(cfg80211_get_p2p_attr); -static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id) +static bool ieee80211_id_in_list(const u8 *ids, int n_ids, u8 id, bool id_ext) { int i; - for (i = 0; i < n_ids; i++) - if (ids[i] == id) + /* Make sure array values are legal */ + if (WARN_ON(ids[n_ids - 1] == WLAN_EID_EXTENSION)) + return false; + + i = 0; + while (i < n_ids) { + if (ids[i] == WLAN_EID_EXTENSION) { + if (id_ext && (ids[i + 1] == id)) + return true; + + i += 2; + continue; + } + + if (ids[i] == id && !id_ext) return true; + + i++; + } return false; } @@ -1403,14 +1306,36 @@ size_t ieee80211_ie_split_ric(const u8 *ies, size_t ielen, { size_t pos = offset; - while (pos < ielen && ieee80211_id_in_list(ids, n_ids, ies[pos])) { + while (pos < ielen) { + u8 ext = 0; + + if (ies[pos] == WLAN_EID_EXTENSION) + ext = 2; + if ((pos + ext) >= ielen) + break; + + if (!ieee80211_id_in_list(ids, n_ids, ies[pos + ext], + ies[pos] == WLAN_EID_EXTENSION)) + break; + if (ies[pos] == WLAN_EID_RIC_DATA && n_after_ric) { pos = skip_ie(ies, ielen, pos); - while (pos < ielen && - !ieee80211_id_in_list(after_ric, n_after_ric, - ies[pos])) - pos = skip_ie(ies, ielen, pos); + while (pos < ielen) { + if (ies[pos] == WLAN_EID_EXTENSION) + ext = 2; + else + ext = 0; + + if ((pos + ext) >= ielen) + break; + + if (!ieee80211_id_in_list(after_ric, + n_after_ric, + ies[pos + ext], + ext == 2)) + pos = skip_ie(ies, ielen, pos); + } } else { pos = skip_ie(ies, ielen, pos); } diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 5d4a02c7979b..7ca04a7de85a 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * cfg80211 - wext compat code * diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index c434f193f39a..c67d7a82ab13 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * cfg80211 wext compat for managed mode. * diff --git a/net/x25/Makefile b/net/x25/Makefile index a2c34ab6f194..5dd544a231f2 100644 --- a/net/x25/Makefile +++ b/net/x25/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the Linux X.25 Packet layer. # diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 5a1a98df3499..ea87143314f3 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -74,7 +74,7 @@ DEFINE_RWLOCK(x25_list_lock); static const struct proto_ops x25_proto_ops; -static struct x25_address null_x25_address = {" "}; +static const struct x25_address null_x25_address = {" "}; #ifdef CONFIG_COMPAT struct compat_x25_subscrip_struct { @@ -374,9 +374,11 @@ static void __x25_destroy_socket(struct sock *); /* * handler for deferred kills. */ -static void x25_destroy_timer(unsigned long data) +static void x25_destroy_timer(struct timer_list *t) { - x25_destroy_socket_from_timer((struct sock *)data); + struct sock *sk = from_timer(sk, t, sk_timer); + + x25_destroy_socket_from_timer(sk); } /* @@ -413,8 +415,7 @@ static void __x25_destroy_socket(struct sock *sk) if (sk_has_allocations(sk)) { /* Defer: outstanding buffers */ sk->sk_timer.expires = jiffies + 10 * HZ; - sk->sk_timer.function = x25_destroy_timer; - sk->sk_timer.data = (unsigned long)sk; + sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_destroy_timer; add_timer(&sk->sk_timer); } else { /* drop last reference so sock_put will free */ diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index ba078c85f0a1..e9802afa43d0 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* -*- linux-c -*- * sysctl_net_x25.c: sysctl interface to net X.25 subsystem. * diff --git a/net/x25/x25_facilities.c b/net/x25/x25_facilities.c index 997ff7b2509b..ad1734d36ed7 100644 --- a/net/x25/x25_facilities.c +++ b/net/x25/x25_facilities.c @@ -103,7 +103,7 @@ int x25_parse_facilities(struct sk_buff *skb, struct x25_facilities *facilities, *vc_fac_mask |= X25_MASK_REVERSE; break; } - + /*fall through */ case X25_FAC_THROUGHPUT: facilities->throughput = p[1]; *vc_fac_mask |= X25_MASK_THROUGHPUT; diff --git a/net/x25/x25_in.c b/net/x25/x25_in.c index 7ac50098a375..3c12cae32001 100644 --- a/net/x25/x25_in.c +++ b/net/x25/x25_in.c @@ -345,6 +345,7 @@ static int x25_state4_machine(struct sock *sk, struct sk_buff *skb, int frametyp case X25_RESET_REQUEST: x25_write_internal(sk, X25_RESET_CONFIRMATION); + /* fall through */ case X25_RESET_CONFIRMATION: { x25_stop_timer(sk); x25->condition = 0x00; diff --git a/net/x25/x25_timer.c b/net/x25/x25_timer.c index 5c5db1a36399..1dfba3c23459 100644 --- a/net/x25/x25_timer.c +++ b/net/x25/x25_timer.c @@ -26,18 +26,17 @@ #include <net/tcp_states.h> #include <net/x25.h> -static void x25_heartbeat_expiry(unsigned long); -static void x25_timer_expiry(unsigned long); +static void x25_heartbeat_expiry(struct timer_list *t); +static void x25_timer_expiry(struct timer_list *t); void x25_init_timers(struct sock *sk) { struct x25_sock *x25 = x25_sk(sk); - setup_timer(&x25->timer, x25_timer_expiry, (unsigned long)sk); + timer_setup(&x25->timer, x25_timer_expiry, 0); /* initialized by sock_init_data */ - sk->sk_timer.data = (unsigned long)sk; - sk->sk_timer.function = &x25_heartbeat_expiry; + sk->sk_timer.function = (TIMER_FUNC_TYPE)x25_heartbeat_expiry; } void x25_start_heartbeat(struct sock *sk) @@ -93,9 +92,9 @@ unsigned long x25_display_timer(struct sock *sk) return x25->timer.expires - jiffies; } -static void x25_heartbeat_expiry(unsigned long param) +static void x25_heartbeat_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct sock *sk = from_timer(sk, t, sk_timer); bh_lock_sock(sk); if (sock_owned_by_user(sk)) /* can currently only occur in state 3 */ @@ -160,9 +159,10 @@ static inline void x25_do_timer_expiry(struct sock * sk) } } -static void x25_timer_expiry(unsigned long param) +static void x25_timer_expiry(struct timer_list *t) { - struct sock *sk = (struct sock *)param; + struct x25_sock *x25 = from_timer(x25, t, timer); + struct sock *sk = &x25->sk; bh_lock_sock(sk); if (sock_owned_by_user(sk)) { /* can currently only occur in state 3 */ diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index 55b2ac300995..0bd2465a8c5a 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -1,3 +1,4 @@ +# SPDX-License-Identifier: GPL-2.0 # # Makefile for the XFRM subsystem. # diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 5f7e8bfa0c2d..30e5746085b8 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -63,7 +63,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xfrm_address_t *daddr; if (!x->type_offload) - return 0; + return -EINVAL; /* We don't yet support UDP encapsulation, TFC padding and ESN. */ if (x->encap || x->tfcpad || (x->props.flags & XFRM_STATE_ESN)) @@ -79,7 +79,8 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, daddr = &x->props.saddr; } - dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, x->props.family); + dst = __xfrm_dst_lookup(net, 0, 0, saddr, daddr, + x->props.family, x->props.output_mark); if (IS_ERR(dst)) return 0; @@ -90,6 +91,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, } if (!dev->xfrmdev_ops || !dev->xfrmdev_ops->xdo_dev_state_add) { + xso->dev = NULL; dev_put(dev); return 0; } @@ -153,6 +155,7 @@ static int xfrm_dev_register(struct net_device *dev) static int xfrm_dev_unregister(struct net_device *dev) { + xfrm_policy_cache_flush(); return NOTIFY_DONE; } @@ -175,8 +178,7 @@ static int xfrm_dev_down(struct net_device *dev) if (dev->features & NETIF_F_HW_ESP) xfrm_dev_state_flush(dev_net(dev), dev, true); - xfrm_garbage_collect(dev_net(dev)); - + xfrm_policy_cache_flush(); return NOTIFY_DONE; } diff --git a/net/xfrm/xfrm_hash.c b/net/xfrm/xfrm_hash.c index 1e98bc0fe0a5..2ad33ce1ea17 100644 --- a/net/xfrm/xfrm_hash.c +++ b/net/xfrm/xfrm_hash.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* xfrm_hash.c: Common hash table code. * * Copyright (C) 2006 David S. Miller (davem@davemloft.net) diff --git a/net/xfrm/xfrm_hash.h b/net/xfrm/xfrm_hash.h index eaea9c4fb3b0..61be810389d8 100644 --- a/net/xfrm/xfrm_hash.h +++ b/net/xfrm/xfrm_hash.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _XFRM_HASH_H #define _XFRM_HASH_H diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index 923205e279f7..347ab31574d5 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * xfrm_input.c * @@ -247,6 +248,11 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + if (xo->status & CRYPTO_INVALID_PROTOCOL) { + XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEPROTOERROR); + goto drop; + } + XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); goto drop; } @@ -260,8 +266,6 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto lock; } - daddr = (xfrm_address_t *)(skb_network_header(skb) + - XFRM_SPI_SKB_CB(skb)->daddroff); family = XFRM_SPI_SKB_CB(skb)->family; /* if tunnel is present override skb->mark value with tunnel i_key */ @@ -288,6 +292,8 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } + daddr = (xfrm_address_t *)(skb_network_header(skb) + + XFRM_SPI_SKB_CB(skb)->daddroff); do { if (skb->sp->len == XFRM_MAX_DEPTH) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR); @@ -424,6 +430,8 @@ resume: nf_reset(skb); if (decaps) { + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; @@ -434,6 +442,8 @@ resume: err = x->inner_mode->afinfo->transport_finish(skb, xfrm_gro || async); if (xfrm_gro) { + if (skb->sp) + skb->sp->olen = 0; skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c index 8c0b6722aaa8..73ad8c8ef344 100644 --- a/net/xfrm/xfrm_output.c +++ b/net/xfrm/xfrm_output.c @@ -66,6 +66,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) goto error_nolock; } + if (x->props.output_mark) + skb->mark = x->props.output_mark; + err = x->outer_mode->output(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); @@ -102,6 +105,9 @@ static int xfrm_output_one(struct sk_buff *skb, int err) if (xfrm_offload(skb)) { x->type_offload->encap(x, skb); } else { + /* Inner headers are invalid now. */ + skb->encapsulation = 0; + err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out; @@ -205,7 +211,6 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb) int err; secpath_reset(skb); - skb->encapsulation = 0; if (xfrm_dev_offload_ok(skb, x)) { struct sec_path *sp; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff61d8557929..9542975eb2f9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -24,6 +24,7 @@ #include <linux/netfilter.h> #include <linux/module.h> #include <linux/cache.h> +#include <linux/cpu.h> #include <linux/audit.h> #include <net/dst.h> #include <net/flow.h> @@ -44,6 +45,8 @@ struct xfrm_flo { u8 flags; }; +static DEFINE_PER_CPU(struct xfrm_dst *, xfrm_last_dst); +static struct work_struct *xfrm_pcpu_work __read_mostly; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo const __rcu *xfrm_policy_afinfo[AF_INET6 + 1] __read_mostly; @@ -54,7 +57,7 @@ static __read_mostly seqcount_t xfrm_policy_hash_generation; static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); static int xfrm_bundle_ok(struct xfrm_dst *xdst); -static void xfrm_policy_queue_process(unsigned long arg); +static void xfrm_policy_queue_process(struct timer_list *t); static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -119,7 +122,7 @@ static const struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short fa struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, - int family) + int family, u32 mark) { const struct xfrm_policy_afinfo *afinfo; struct dst_entry *dst; @@ -128,7 +131,7 @@ struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, int oif, if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr, mark); rcu_read_unlock(); @@ -140,7 +143,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, - int family) + int family, u32 mark) { struct net *net = xs_net(x); xfrm_address_t *saddr = &x->props.saddr; @@ -156,7 +159,7 @@ static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family, mark); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -176,9 +179,9 @@ static inline unsigned long make_jiffies(long secs) return secs*HZ; } -static void xfrm_policy_timer(unsigned long data) +static void xfrm_policy_timer(struct timer_list *t) { - struct xfrm_policy *xp = (struct xfrm_policy *)data; + struct xfrm_policy *xp = from_timer(xp, t, timer); unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -246,36 +249,6 @@ expired: xfrm_pol_put(xp); } -static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - if (unlikely(pol->walk.dead)) - flo = NULL; - else - xfrm_pol_hold(pol); - - return flo; -} - -static int xfrm_policy_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo); - - return !pol->walk.dead; -} - -static void xfrm_policy_flo_delete(struct flow_cache_object *flo) -{ - xfrm_pol_put(container_of(flo, struct xfrm_policy, flo)); -} - -static const struct flow_cache_ops xfrm_policy_fc_ops = { - .get = xfrm_policy_flo_get, - .check = xfrm_policy_flo_check, - .delete = xfrm_policy_flo_delete, -}; - /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2 * SPD calls. */ @@ -294,11 +267,9 @@ struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp) rwlock_init(&policy->lock); refcount_set(&policy->refcnt, 1); skb_queue_head_init(&policy->polq.hold_queue); - setup_timer(&policy->timer, xfrm_policy_timer, - (unsigned long)policy); - setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process, - (unsigned long)policy); - policy->flo.ops = &xfrm_policy_fc_ops; + timer_setup(&policy->timer, xfrm_policy_timer, 0); + timer_setup(&policy->polq.hold_timer, + xfrm_policy_queue_process, 0); } return policy; } @@ -798,7 +769,6 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) else hlist_add_head(&policy->bydst, chain); __xfrm_policy_link(policy, dir); - atomic_inc(&net->xfrm.flow_cache_genid); /* After previous checking, family can either be AF_INET or AF_INET6 */ if (policy->family == AF_INET) @@ -1004,6 +974,8 @@ int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) } if (!cnt) err = -ESRCH; + else + xfrm_policy_cache_flush(); out: spin_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; @@ -1175,7 +1147,7 @@ fail: } static struct xfrm_policy * -__xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) +xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_policy *pol; @@ -1187,61 +1159,6 @@ __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir); } -static int flow_to_policy_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - - switch (dir) { - default: - case FLOW_DIR_IN: - return XFRM_POLICY_IN; - case FLOW_DIR_OUT: - return XFRM_POLICY_OUT; - case FLOW_DIR_FWD: - return XFRM_POLICY_FWD; - } -} - -static struct flow_cache_object * -xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, - u8 dir, struct flow_cache_object *old_obj, void *ctx) -{ - struct xfrm_policy *pol; - - if (old_obj) - xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo)); - - pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir)); - if (IS_ERR_OR_NULL(pol)) - return ERR_CAST(pol); - - /* Resolver returns two references: - * one for cache and one for caller of flow_cache_lookup() */ - xfrm_pol_hold(pol); - - return &pol->flo; -} - -static inline int policy_to_flow_dir(int dir) -{ - if (XFRM_POLICY_IN == FLOW_DIR_IN && - XFRM_POLICY_OUT == FLOW_DIR_OUT && - XFRM_POLICY_FWD == FLOW_DIR_FWD) - return dir; - switch (dir) { - default: - case XFRM_POLICY_IN: - return FLOW_DIR_IN; - case XFRM_POLICY_OUT: - return FLOW_DIR_OUT; - case XFRM_POLICY_FWD: - return FLOW_DIR_FWD; - } -} - static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, const struct flowi *fl, u16 family) { @@ -1261,7 +1178,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, } err = security_xfrm_policy_lookup(pol->security, fl->flowi_secid, - policy_to_flow_dir(dir)); + dir); if (!err) { if (!xfrm_pol_hold_rcu(pol)) goto again; @@ -1388,6 +1305,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; newp->type = old->type; + newp->family = old->family; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); spin_lock_bh(&net->xfrm.xfrm_policy_lock); @@ -1422,14 +1340,14 @@ int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) static int xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, - xfrm_address_t *remote, unsigned short family) + xfrm_address_t *remote, unsigned short family, u32 mark) { int err; const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, oif, local, remote); + err = afinfo->get_saddr(net, oif, local, remote, mark); rcu_read_unlock(); return err; } @@ -1460,7 +1378,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl, if (xfrm_addr_any(local, tmpl->encap_family)) { error = xfrm_get_saddr(net, fl->flowi_oif, &tmp, remote, - tmpl->encap_family); + tmpl->encap_family, 0); if (error) goto fail; local = &tmp; @@ -1545,58 +1463,6 @@ static int xfrm_get_tos(const struct flowi *fl, int family) return tos; } -static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (xdst->route == NULL) { - /* Dummy bundle - if it has xfrms we were not - * able to build bundle as template resolution failed. - * It means we need to try again resolving. */ - if (xdst->num_xfrms > 0) - return NULL; - } else if (dst->flags & DST_XFRM_QUEUE) { - return NULL; - } else { - /* Real bundle */ - if (stale_bundle(dst)) - return NULL; - } - - dst_hold(dst); - return flo; -} - -static int xfrm_bundle_flo_check(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - if (!xdst->route) - return 0; - if (stale_bundle(dst)) - return 0; - - return 1; -} - -static void xfrm_bundle_flo_delete(struct flow_cache_object *flo) -{ - struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo); - struct dst_entry *dst = &xdst->u.dst; - - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - dst->obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(dst); -} - -static const struct flow_cache_ops xfrm_bundle_fc_ops = { - .get = xfrm_bundle_flo_get, - .check = xfrm_bundle_flo_check, - .delete = xfrm_bundle_flo_delete, -}; - static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) { const struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); @@ -1624,7 +1490,6 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) struct dst_entry *dst = &xdst->u.dst; memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); - xdst->flo.ops = &xfrm_bundle_fc_ops; } else xdst = ERR_PTR(-ENOBUFS); @@ -1708,6 +1573,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, goto put_states; } + if (!dst_prev) + dst0 = dst1; + else + /* Ref count is taken during xfrm_alloc_dst() + * No need to do dst_clone() on dst1 + */ + dst_prev->child = dst1; + if (xfrm[i]->sel.family == AF_UNSPEC) { inner_mode = xfrm_ip2inner_mode(xfrm[i], xfrm_af2proto(family)); @@ -1719,21 +1592,14 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy, } else inner_mode = xfrm[i]->inner_mode; - if (!dst_prev) - dst0 = dst1; - else - /* Ref count is taken during xfrm_alloc_dst() - * No need to do dst_clone() on dst1 - */ - dst_prev->child = dst1; - xdst->route = dst; dst_copy_metrics(dst1, dst); if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, - &saddr, &daddr, family); + &saddr, &daddr, family, + xfrm[i]->props.output_mark); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -1840,6 +1706,106 @@ static int xfrm_expand_policies(const struct flowi *fl, u16 family, } +static void xfrm_last_dst_update(struct xfrm_dst *xdst, struct xfrm_dst *old) +{ + this_cpu_write(xfrm_last_dst, xdst); + if (old) + dst_release(&old->u.dst); +} + +static void __xfrm_pcpu_work_fn(void) +{ + struct xfrm_dst *old; + + old = this_cpu_read(xfrm_last_dst); + if (old && !xfrm_bundle_ok(old)) + xfrm_last_dst_update(NULL, old); +} + +static void xfrm_pcpu_work_fn(struct work_struct *work) +{ + local_bh_disable(); + rcu_read_lock(); + __xfrm_pcpu_work_fn(); + rcu_read_unlock(); + local_bh_enable(); +} + +void xfrm_policy_cache_flush(void) +{ + struct xfrm_dst *old; + bool found = 0; + int cpu; + + local_bh_disable(); + rcu_read_lock(); + for_each_possible_cpu(cpu) { + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + if (smp_processor_id() == cpu) { + __xfrm_pcpu_work_fn(); + continue; + } + found = true; + break; + } + } + + rcu_read_unlock(); + local_bh_enable(); + + if (!found) + return; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + bool bundle_release; + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + bundle_release = old && !xfrm_bundle_ok(old); + rcu_read_unlock(); + + if (!bundle_release) + continue; + + if (cpu_online(cpu)) { + schedule_work_on(cpu, &xfrm_pcpu_work[cpu]); + continue; + } + + rcu_read_lock(); + old = per_cpu(xfrm_last_dst, cpu); + if (old && !xfrm_bundle_ok(old)) { + per_cpu(xfrm_last_dst, cpu) = NULL; + dst_release(&old->u.dst); + } + rcu_read_unlock(); + } + + put_online_cpus(); +} + +static bool xfrm_xdst_can_reuse(struct xfrm_dst *xdst, + struct xfrm_state * const xfrm[], + int num) +{ + const struct dst_entry *dst = &xdst->u.dst; + int i; + + if (xdst->num_xfrms != num) + return false; + + for (i = 0; i < num; i++) { + if (!dst || dst->xfrm != xfrm[i]) + return false; + dst = dst->child; + } + + return xfrm_bundle_ok(xdst); +} + static struct xfrm_dst * xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, const struct flowi *fl, u16 family, @@ -1847,8 +1813,8 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, { struct net *net = xp_net(pols[0]); struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; + struct xfrm_dst *xdst, *old; struct dst_entry *dst; - struct xfrm_dst *xdst; int err; /* Try to instantiate a bundle */ @@ -1859,6 +1825,21 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, return ERR_PTR(err); } + xdst = this_cpu_read(xfrm_last_dst); + if (xdst && + xdst->u.dst.dev == dst_orig->dev && + xdst->num_pols == num_pols && + memcmp(xdst->pols, pols, + sizeof(struct xfrm_policy *) * num_pols) == 0 && + xfrm_xdst_can_reuse(xdst, xfrm, err)) { + dst_hold(&xdst->u.dst); + while (err > 0) + xfrm_state_put(xfrm[--err]); + return xdst; + } + + old = xdst; + dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig); if (IS_ERR(dst)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR); @@ -1871,15 +1852,18 @@ xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols, memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); + atomic_set(&xdst->u.dst.__refcnt, 2); + xfrm_last_dst_update(xdst, old); + return xdst; } -static void xfrm_policy_queue_process(unsigned long arg) +static void xfrm_policy_queue_process(struct timer_list *t) { struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; - struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct xfrm_policy *pol = from_timer(pol, t, polq.hold_timer); struct net *net = xp_net(pol); struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; @@ -2051,86 +2035,39 @@ free_dst: goto out; } -static struct flow_cache_object * -xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, - struct flow_cache_object *oldflo, void *ctx) +static struct xfrm_dst * +xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct xfrm_flo *xflo) { - struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct xfrm_dst *xdst, *new_xdst; - int num_pols = 0, num_xfrms = 0, i, err, pol_dead; - - /* Check if the policies from old bundle are usable */ - xdst = NULL; - if (oldflo) { - xdst = container_of(oldflo, struct xfrm_dst, flo); - num_pols = xdst->num_pols; - num_xfrms = xdst->num_xfrms; - pol_dead = 0; - for (i = 0; i < num_pols; i++) { - pols[i] = xdst->pols[i]; - pol_dead |= pols[i]->walk.dead; - } - if (pol_dead) { - /* Mark DST_OBSOLETE_DEAD to fail the next - * xfrm_dst_check() - */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - xdst = NULL; - num_pols = 0; - num_xfrms = 0; - oldflo = NULL; - } - } + int num_pols = 0, num_xfrms = 0, err; + struct xfrm_dst *xdst; /* Resolve policies to use if we couldn't get them from * previous cache entry */ - if (xdst == NULL) { - num_pols = 1; - pols[0] = __xfrm_policy_lookup(net, fl, family, - flow_to_policy_dir(dir)); - err = xfrm_expand_policies(fl, family, pols, + num_pols = 1; + pols[0] = xfrm_policy_lookup(net, fl, family, dir); + err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); - if (err < 0) - goto inc_error; - if (num_pols == 0) - return NULL; - if (num_xfrms <= 0) - goto make_dummy_bundle; - } + if (err < 0) + goto inc_error; + if (num_pols == 0) + return NULL; + if (num_xfrms <= 0) + goto make_dummy_bundle; - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, xflo->dst_orig); - if (IS_ERR(new_xdst)) { - err = PTR_ERR(new_xdst); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); if (err != -EAGAIN) goto error; - if (oldflo == NULL) - goto make_dummy_bundle; - dst_hold(&xdst->u.dst); - return oldflo; - } else if (new_xdst == NULL) { + goto make_dummy_bundle; + } else if (xdst == NULL) { num_xfrms = 0; - if (oldflo == NULL) - goto make_dummy_bundle; - xdst->num_xfrms = 0; - dst_hold(&xdst->u.dst); - return oldflo; - } - - /* Kill the previous bundle */ - if (xdst) { - /* The policies were stolen for newly generated bundle */ - xdst->num_pols = 0; - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); + goto make_dummy_bundle; } - /* We do need to return one reference for original caller */ - dst_hold(&new_xdst->u.dst); - return &new_xdst->flo; + return xdst; make_dummy_bundle: /* We found policies, but there's no bundles to instantiate: @@ -2145,18 +2082,12 @@ make_dummy_bundle: xdst->num_xfrms = num_xfrms; memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); - dst_hold(&xdst->u.dst); - return &xdst->flo; + return xdst; inc_error: XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR); error: - if (xdst != NULL) { - /* Mark DST_OBSOLETE_DEAD to fail the next xfrm_dst_check() */ - xdst->u.dst.obsolete = DST_OBSOLETE_DEAD; - dst_release_immediate(&xdst->u.dst); - } else - xfrm_pols_put(pols, num_pols); + xfrm_pols_put(pols, num_pols); return ERR_PTR(err); } @@ -2187,11 +2118,10 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; - struct flow_cache_object *flo; struct xfrm_dst *xdst; struct dst_entry *dst, *route; u16 family = dst_orig->ops->family; - u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); + u8 dir = XFRM_POLICY_OUT; int i, err, num_pols, num_xfrms = 0, drop_pols = 0; dst = NULL; @@ -2226,7 +2156,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } - dst_hold(&xdst->u.dst); route = xdst->route; } } @@ -2242,15 +2171,13 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; - flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, &xflo); - if (flo == NULL) + xdst = xfrm_bundle_lookup(net, fl, family, dir, &xflo); + if (xdst == NULL) goto nopol; - if (IS_ERR(flo)) { - err = PTR_ERR(flo); + if (IS_ERR(xdst)) { + err = PTR_ERR(xdst); goto dropdst; } - xdst = container_of(flo, struct xfrm_dst, flo); num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; @@ -2449,12 +2376,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int pi; int reverse; struct flowi fl; - u8 fl_dir; int xerr_idx = -1; reverse = dir & ~XFRM_POLICY_MASK; dir &= XFRM_POLICY_MASK; - fl_dir = policy_to_flow_dir(dir); if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); @@ -2486,16 +2411,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } } - if (!pol) { - struct flow_cache_object *flo; - - flo = flow_cache_lookup(net, &fl, family, fl_dir, - xfrm_policy_lookup, NULL); - if (IS_ERR_OR_NULL(flo)) - pol = ERR_CAST(flo); - else - pol = container_of(flo, struct xfrm_policy, flo); - } + if (!pol) + pol = xfrm_policy_lookup(net, &fl, family, dir); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); @@ -2641,11 +2558,9 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) * notice. That's what we are validating here via the * stale_bundle() check. * - * When an xdst is removed from flow cache, DST_OBSOLETE_DEAD will - * be marked on it. * When a dst is removed from the fib tree, DST_OBSOLETE_DEAD will * be marked on it. - * Both will force stable_bundle() to fail on any xdst bundle with + * This will force stale_bundle() to fail on any xdst bundle with * this dst linked in it. */ if (dst->obsolete < 0 && !stale_bundle(dst)) @@ -2685,18 +2600,6 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } -void xfrm_garbage_collect(struct net *net) -{ - flow_cache_flush(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect); - -void xfrm_garbage_collect_deferred(struct net *net) -{ - flow_cache_flush_deferred(net); -} -EXPORT_SYMBOL(xfrm_garbage_collect_deferred); - static void xfrm_init_pmtu(struct dst_entry *dst) { do { @@ -3034,14 +2937,9 @@ static int __net_init xfrm_net_init(struct net *net) rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; - rv = flow_cache_init(net); - if (rv < 0) - goto out; return 0; -out: - xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -3054,7 +2952,6 @@ out_statistics: static void __net_exit xfrm_net_exit(struct net *net) { - flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -3068,7 +2965,15 @@ static struct pernet_operations __net_initdata xfrm_net_ops = { void __init xfrm_init(void) { - flow_cache_hp_init(); + int i; + + xfrm_pcpu_work = kmalloc_array(NR_CPUS, sizeof(*xfrm_pcpu_work), + GFP_KERNEL); + BUG_ON(!xfrm_pcpu_work); + + for (i = 0; i < NR_CPUS; i++) + INIT_WORK(&xfrm_pcpu_work[i], xfrm_pcpu_work_fn); + register_pernet_subsys(&xfrm_net_ops); seqcount_init(&xfrm_policy_hash_generation); xfrm_input_init(); @@ -3308,9 +3213,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_state *x_new[XFRM_MAX_DEPTH]; struct xfrm_migrate *mp; + /* Stage 0 - sanity checks */ if ((err = xfrm_migrate_check(m, num_migrate)) < 0) goto out; + if (dir >= XFRM_POLICY_MAX) { + err = -EINVAL; + goto out; + } + /* Stage 1 - find policy */ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c0956d10db6..1f5cee2269af 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -296,12 +296,14 @@ int xfrm_unregister_type_offload(const struct xfrm_type_offload *type, } EXPORT_SYMBOL(xfrm_unregister_type_offload); -static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned short family) +static const struct xfrm_type_offload * +xfrm_get_type_offload(u8 proto, unsigned short family, bool try_load) { struct xfrm_state_afinfo *afinfo; const struct xfrm_type_offload **typemap; const struct xfrm_type_offload *type; +retry: afinfo = xfrm_state_get_afinfo(family); if (unlikely(afinfo == NULL)) return NULL; @@ -311,6 +313,12 @@ static const struct xfrm_type_offload *xfrm_get_type_offload(u8 proto, unsigned if ((type && !try_module_get(type->owner))) type = NULL; + if (!type && try_load) { + request_module("xfrm-offload-%d-%d", family, proto); + try_load = 0; + goto retry; + } + rcu_read_unlock(); return type; } @@ -724,11 +732,12 @@ restart: } } } - if (cnt) - err = 0; - out: spin_unlock_bh(&net->xfrm.xfrm_state_lock); + if (cnt) { + err = 0; + xfrm_policy_cache_flush(); + } return err; } EXPORT_SYMBOL(xfrm_state_flush); @@ -1620,6 +1629,7 @@ int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family, struct net *net) { + int i; int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) @@ -1628,6 +1638,9 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); + else + for (i = 0; i < n; i++) + dst[i] = src[i]; spin_unlock_bh(&net->xfrm.xfrm_state_lock); rcu_read_unlock(); return err; @@ -1638,6 +1651,7 @@ int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, unsigned short family) { + int i; int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); struct net *net = xs_net(*src); @@ -1648,6 +1662,9 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, spin_lock_bh(&net->xfrm.xfrm_state_lock); if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); + else + for (i = 0; i < n; i++) + dst[i] = src[i]; spin_unlock_bh(&net->xfrm.xfrm_state_lock); rcu_read_unlock(); return err; @@ -2052,6 +2069,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen if (err >= 0) { xfrm_sk_policy_insert(sk, err, pol); xfrm_pol_put(pol); + __sk_dst_reset(sk); err = 0; } @@ -2164,7 +2182,7 @@ int xfrm_state_mtu(struct xfrm_state *x, int mtu) return mtu - x->props.header_len; } -int __xfrm_init_state(struct xfrm_state *x, bool init_replay) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) { struct xfrm_state_afinfo *afinfo; struct xfrm_mode *inner_mode; @@ -2229,7 +2247,7 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay) if (x->type == NULL) goto error; - x->type_offload = xfrm_get_type_offload(x->id.proto, family); + x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload); err = x->type->init_state(x); if (err) @@ -2257,7 +2275,7 @@ EXPORT_SYMBOL(__xfrm_init_state); int xfrm_init_state(struct xfrm_state *x) { - return __xfrm_init_state(x, true); + return __xfrm_init_state(x, true, false); } EXPORT_SYMBOL(xfrm_init_state); diff --git a/net/xfrm/xfrm_sysctl.c b/net/xfrm/xfrm_sysctl.c index 35a7e794ad04..0c6c5ef65f9d 100644 --- a/net/xfrm/xfrm_sysctl.c +++ b/net/xfrm/xfrm_sysctl.c @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #include <linux/sysctl.h> #include <linux/slab.h> #include <net/net_namespace.h> diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2be4c6af008a..983b0233767b 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -42,7 +42,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; algp = nla_data(rt); - if (nla_len(rt) < xfrm_alg_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_len(algp)) return -EINVAL; switch (type) { @@ -68,7 +68,7 @@ static int verify_auth_trunc(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < xfrm_alg_auth_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) return -EINVAL; algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; @@ -84,7 +84,7 @@ static int verify_aead(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < aead_len(algp)) + if (nla_len(rt) < (int)aead_len(algp)) return -EINVAL; algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; @@ -130,7 +130,7 @@ static inline int verify_replay(struct xfrm_usersa_info *p, if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) return -EINVAL; - if (nla_len(rt) < xfrm_replay_state_esn_len(rs) && + if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && nla_len(rt) != sizeof(*rs)) return -EINVAL; } @@ -404,7 +404,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es struct nlattr *rp) { struct xfrm_replay_state_esn *up; - int ulen; + unsigned int ulen; if (!replay_esn || !rp) return 0; @@ -414,7 +414,7 @@ static inline int xfrm_replay_verify_len(struct xfrm_replay_state_esn *replay_es /* Check the overall length and the internal bitmap length to avoid * potential overflow. */ - if (nla_len(rp) < ulen || + if (nla_len(rp) < (int)ulen || xfrm_replay_state_esn_len(replay_esn) != ulen || replay_esn->bmp_len != up->bmp_len) return -EINVAL; @@ -430,14 +430,14 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn struct nlattr *rta) { struct xfrm_replay_state_esn *p, *pp, *up; - int klen, ulen; + unsigned int klen, ulen; if (!rta) return 0; up = nla_data(rta); klen = xfrm_replay_state_esn_len(up); - ulen = nla_len(rta) >= klen ? klen : sizeof(*up); + ulen = nla_len(rta) >= (int)klen ? klen : sizeof(*up); p = kzalloc(klen, GFP_KERNEL); if (!p) @@ -458,9 +458,9 @@ static int xfrm_alloc_replay_state_esn(struct xfrm_replay_state_esn **replay_esn return 0; } -static inline int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) +static inline unsigned int xfrm_user_sec_ctx_size(struct xfrm_sec_ctx *xfrm_ctx) { - int len = 0; + unsigned int len = 0; if (xfrm_ctx) { len += sizeof(struct xfrm_user_sec_ctx); @@ -584,7 +584,10 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, xfrm_mark_get(attrs, &x->mark); - err = __xfrm_init_state(x, false); + if (attrs[XFRMA_OUTPUT_MARK]) + x->props.output_mark = nla_get_u32(attrs[XFRMA_OUTPUT_MARK]); + + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]); if (err) goto error; @@ -654,6 +657,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (err < 0) { x->km.state = XFRM_STATE_DEAD; + xfrm_dev_state_delete(x); __xfrm_state_put(x); goto out; } @@ -796,7 +800,7 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb return -EMSGSIZE; xuo = nla_data(attr); - + memset(xuo, 0, sizeof(*xuo)); xuo->ifindex = xso->dev->ifindex; xuo->flags = xso->flags; @@ -897,6 +901,11 @@ static int copy_to_user_state_extra(struct xfrm_state *x, ret = copy_user_offload(&x->xso, skb); if (ret) goto out; + if (x->props.output_mark) { + ret = nla_put_u32(skb, XFRMA_OUTPUT_MARK, x->props.output_mark); + if (ret) + goto out; + } if (x->security) ret = copy_sec_ctx(x->security, skb); out: @@ -1023,7 +1032,7 @@ static inline int xfrm_nlmsg_multicast(struct net *net, struct sk_buff *skb, return -1; } -static inline size_t xfrm_spdinfo_msgsize(void) +static inline unsigned int xfrm_spdinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_spdinfo)) @@ -1138,18 +1147,19 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, u32 *flags = nlmsg_data(nlh); u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; + int err; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) - BUG(); + err = build_spdinfo(r_skb, net, sportid, seq, *flags); + BUG_ON(err < 0); return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } -static inline size_t xfrm_sadinfo_msgsize(void) +static inline unsigned int xfrm_sadinfo_msgsize(void) { return NLMSG_ALIGN(4) + nla_total_size(sizeof(struct xfrmu_sadhinfo)) @@ -1196,13 +1206,14 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, u32 *flags = nlmsg_data(nlh); u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; + int err; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) - BUG(); + err = build_sadinfo(r_skb, net, sportid, seq, *flags); + BUG_ON(err < 0); return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } @@ -1625,7 +1636,7 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s return copy_sec_ctx(xp->security, skb); return 0; } -static inline size_t userpolicy_type_attrsize(void) +static inline unsigned int userpolicy_type_attrsize(void) { #ifdef CONFIG_XFRM_SUB_POLICY return nla_total_size(sizeof(struct xfrm_userpolicy_type)); @@ -1684,32 +1695,34 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr static int xfrm_dump_policy_done(struct netlink_callback *cb) { - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct net *net = sock_net(cb->skb->sk); xfrm_policy_walk_done(walk, net); return 0; } +static int xfrm_dump_policy_start(struct netlink_callback *cb) +{ + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; + + BUILD_BUG_ON(sizeof(*walk) > sizeof(cb->args)); + + xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); + return 0; +} + static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) { struct net *net = sock_net(skb->sk); - struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *) &cb->args[1]; + struct xfrm_policy_walk *walk = (struct xfrm_policy_walk *)cb->args; struct xfrm_dump_info info; - BUILD_BUG_ON(sizeof(struct xfrm_policy_walk) > - sizeof(cb->args) - sizeof(cb->args[0])); - info.in_skb = cb->skb; info.out_skb = skb; info.nlmsg_seq = cb->nlh->nlmsg_seq; info.nlmsg_flags = NLM_F_MULTI; - if (!cb->args[0]) { - cb->args[0] = 1; - xfrm_policy_walk_init(walk, XFRM_POLICY_TYPE_ANY); - } - (void) xfrm_policy_walk(net, walk, dump_one_policy, &info); return skb->len; @@ -1815,8 +1828,6 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, out: xfrm_pol_put(xp); - if (delete && err == 0) - xfrm_garbage_collect(net); return err; } @@ -1844,9 +1855,9 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; } -static inline size_t xfrm_aevent_msgsize(struct xfrm_state *x) +static inline unsigned int xfrm_aevent_msgsize(struct xfrm_state *x) { - size_t replay_size = x->replay_esn ? + unsigned int replay_size = x->replay_esn ? xfrm_replay_state_esn_len(x->replay_esn) : sizeof(struct xfrm_replay_state); @@ -1869,6 +1880,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct return -EMSGSIZE; id = nlmsg_data(nlh); + memset(&id->sa_id, 0, sizeof(id->sa_id)); memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; @@ -1950,8 +1962,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.seq = nlh->nlmsg_seq; c.portid = nlh->nlmsg_pid; - if (build_aevent(r_skb, x, &c) < 0) - BUG(); + err = build_aevent(r_skb, x, &c); + BUG_ON(err < 0); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); @@ -2027,7 +2040,6 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return 0; return err; } - xfrm_garbage_collect(net); c.data.type = type; c.event = nlh->nlmsg_type; @@ -2315,8 +2327,8 @@ static int copy_to_user_kmaddress(const struct xfrm_kmaddress *k, struct sk_buff return nla_put(skb, XFRMA_KMADDRESS, sizeof(uk), &uk); } -static inline size_t xfrm_migrate_msgsize(int num_migrate, int with_kma, - int with_encp) +static inline unsigned int xfrm_migrate_msgsize(int num_migrate, int with_kma, + int with_encp) { return NLMSG_ALIGN(sizeof(struct xfrm_userpolicy_id)) + (with_kma ? nla_total_size(sizeof(struct xfrm_kmaddress)) : 0) @@ -2379,6 +2391,7 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, { struct net *net = &init_net; struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_migrate_msgsize(num_migrate, !!k, !!encap), GFP_ATOMIC); @@ -2386,8 +2399,8 @@ static int xfrm_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, return -ENOMEM; /* build migrate */ - if (build_migrate(skb, m, num_migrate, k, sel, encap, dir, type) < 0) - BUG(); + err = build_migrate(skb, m, num_migrate, k, sel, encap, dir, type); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MIGRATE); } @@ -2457,6 +2470,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_PROTO] = { .type = NLA_U8 }, [XFRMA_ADDRESS_FILTER] = { .len = sizeof(struct xfrm_address_filter) }, [XFRMA_OFFLOAD_DEV] = { .len = sizeof(struct xfrm_user_offload) }, + [XFRMA_OUTPUT_MARK] = { .len = NLA_U32 }, }; static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { @@ -2466,6 +2480,7 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { static const struct xfrm_link { int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); + int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); const struct nla_policy *nla_pol; @@ -2479,6 +2494,7 @@ static const struct xfrm_link { [XFRM_MSG_NEWPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_add_policy }, [XFRM_MSG_DELPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy }, [XFRM_MSG_GETPOLICY - XFRM_MSG_BASE] = { .doit = xfrm_get_policy, + .start = xfrm_dump_policy_start, .dump = xfrm_dump_policy, .done = xfrm_dump_policy_done }, [XFRM_MSG_ALLOCSPI - XFRM_MSG_BASE] = { .doit = xfrm_alloc_userspi }, @@ -2531,6 +2547,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, { struct netlink_dump_control c = { + .start = link->start, .dump = link->dump, .done = link->done, }; @@ -2559,7 +2576,7 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) mutex_unlock(&net->xfrm.xfrm_cfg_mutex); } -static inline size_t xfrm_expire_msgsize(void) +static inline unsigned int xfrm_expire_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + nla_total_size(sizeof(struct xfrm_mark)); @@ -2578,6 +2595,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; + /* clear the padding bytes */ + memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard)); err = xfrm_mark_put(skb, &x->mark); if (err) @@ -2608,13 +2627,14 @@ static int xfrm_aevent_state_notify(struct xfrm_state *x, const struct km_event { struct net *net = xs_net(x); struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_aevent_msgsize(x), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_aevent(skb, x, c) < 0) - BUG(); + err = build_aevent(skb, x, c); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_AEVENTS); } @@ -2645,9 +2665,9 @@ static int xfrm_notify_sa_flush(const struct km_event *c) return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_SA); } -static inline size_t xfrm_sa_len(struct xfrm_state *x) +static inline unsigned int xfrm_sa_len(struct xfrm_state *x) { - size_t l = 0; + unsigned int l = 0; if (x->aead) l += nla_total_size(aead_len(x->aead)); if (x->aalg) { @@ -2676,6 +2696,8 @@ static inline size_t xfrm_sa_len(struct xfrm_state *x) l += nla_total_size(sizeof(x->props.extra_flags)); if (x->xso.dev) l += nla_total_size(sizeof(x->xso)); + if (x->props.output_mark) + l += nla_total_size(sizeof(x->props.output_mark)); /* Must count x->lastused as it may become non-zero behind our back. */ l += nla_total_size_64bit(sizeof(u64)); @@ -2690,8 +2712,9 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct xfrm_usersa_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int len = xfrm_sa_len(x); - int headlen, err; + unsigned int len = xfrm_sa_len(x); + unsigned int headlen; + int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELSA) { @@ -2715,6 +2738,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct nlattr *attr; id = nlmsg_data(nlh); + memset(id, 0, sizeof(*id)); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; @@ -2764,8 +2788,8 @@ static int xfrm_send_state_notify(struct xfrm_state *x, const struct km_event *c } -static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, - struct xfrm_policy *xp) +static inline unsigned int xfrm_acquire_msgsize(struct xfrm_state *x, + struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) @@ -2817,13 +2841,14 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, { struct net *net = xs_net(x); struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_acquire_msgsize(x, xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_acquire(skb, x, xt, xp) < 0) - BUG(); + err = build_acquire(skb, x, xt, xp); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_ACQUIRE); } @@ -2888,7 +2913,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, return xp; } -static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) +static inline unsigned int xfrm_polexpire_msgsize(struct xfrm_policy *xp) { return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) @@ -2932,26 +2957,28 @@ static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct { struct net *net = xp_net(xp); struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_polexpire_msgsize(xp), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_polexpire(skb, xp, dir, c) < 0) - BUG(); + err = build_polexpire(skb, xp, dir, c); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_EXPIRE); } static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_event *c) { - int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); + unsigned int len = nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr); struct net *net = xp_net(xp); struct xfrm_userpolicy_info *p; struct xfrm_userpolicy_id *id; struct nlmsghdr *nlh; struct sk_buff *skb; - int headlen, err; + unsigned int headlen; + int err; headlen = sizeof(*p); if (c->event == XFRM_MSG_DELPOLICY) { @@ -3058,7 +3085,7 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, const struct } -static inline size_t xfrm_report_msgsize(void) +static inline unsigned int xfrm_report_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_report)); } @@ -3092,18 +3119,19 @@ static int xfrm_send_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) { struct sk_buff *skb; + int err; skb = nlmsg_new(xfrm_report_msgsize(), GFP_ATOMIC); if (skb == NULL) return -ENOMEM; - if (build_report(skb, proto, sel, addr) < 0) - BUG(); + err = build_report(skb, proto, sel, addr); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_REPORT); } -static inline size_t xfrm_mapping_msgsize(void) +static inline unsigned int xfrm_mapping_msgsize(void) { return NLMSG_ALIGN(sizeof(struct xfrm_user_mapping)); } @@ -3139,6 +3167,7 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, { struct net *net = xs_net(x); struct sk_buff *skb; + int err; if (x->id.proto != IPPROTO_ESP) return -EINVAL; @@ -3150,8 +3179,8 @@ static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, if (skb == NULL) return -ENOMEM; - if (build_mapping(skb, x, ipaddr, sport) < 0) - BUG(); + err = build_mapping(skb, x, ipaddr, sport); + BUG_ON(err < 0); return xfrm_nlmsg_multicast(net, skb, 0, XFRMNLGRP_MAPPING); } |
